[PATCH] cpusets: confine pdflush to its cpuset
[linux-2.6] / net / ipv6 / route.c
1 /*
2  *      Linux INET6 implementation
3  *      FIB front-end.
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>     
7  *
8  *      $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
9  *
10  *      This program is free software; you can redistribute it and/or
11  *      modify it under the terms of the GNU General Public License
12  *      as published by the Free Software Foundation; either version
13  *      2 of the License, or (at your option) any later version.
14  */
15
16 /*      Changes:
17  *
18  *      YOSHIFUJI Hideaki @USAGI
19  *              reworked default router selection.
20  *              - respect outgoing interface
21  *              - select from (probably) reachable routers (i.e.
22  *              routers in REACHABLE, STALE, DELAY or PROBE states).
23  *              - always select the same router if it is (probably)
24  *              reachable.  otherwise, round-robin the list.
25  */
26
27 #include <linux/config.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/init.h>
38 #include <linux/netlink.h>
39 #include <linux/if_arp.h>
40
41 #ifdef  CONFIG_PROC_FS
42 #include <linux/proc_fs.h>
43 #include <linux/seq_file.h>
44 #endif
45
46 #include <net/snmp.h>
47 #include <net/ipv6.h>
48 #include <net/ip6_fib.h>
49 #include <net/ip6_route.h>
50 #include <net/ndisc.h>
51 #include <net/addrconf.h>
52 #include <net/tcp.h>
53 #include <linux/rtnetlink.h>
54 #include <net/dst.h>
55 #include <net/xfrm.h>
56
57 #include <asm/uaccess.h>
58
59 #ifdef CONFIG_SYSCTL
60 #include <linux/sysctl.h>
61 #endif
62
63 /* Set to 3 to get tracing. */
64 #define RT6_DEBUG 2
65
66 #if RT6_DEBUG >= 3
67 #define RDBG(x) printk x
68 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
69 #else
70 #define RDBG(x)
71 #define RT6_TRACE(x...) do { ; } while (0)
72 #endif
73
74
75 static int ip6_rt_max_size = 4096;
76 static int ip6_rt_gc_min_interval = HZ / 2;
77 static int ip6_rt_gc_timeout = 60*HZ;
78 int ip6_rt_gc_interval = 30*HZ;
79 static int ip6_rt_gc_elasticity = 9;
80 static int ip6_rt_mtu_expires = 10*60*HZ;
81 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
82
83 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
84 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
85 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86 static void             ip6_dst_destroy(struct dst_entry *);
87 static void             ip6_dst_ifdown(struct dst_entry *,
88                                        struct net_device *dev, int how);
89 static int               ip6_dst_gc(void);
90
91 static int              ip6_pkt_discard(struct sk_buff *skb);
92 static int              ip6_pkt_discard_out(struct sk_buff *skb);
93 static void             ip6_link_failure(struct sk_buff *skb);
94 static void             ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
95
96 static struct dst_ops ip6_dst_ops = {
97         .family                 =       AF_INET6,
98         .protocol               =       __constant_htons(ETH_P_IPV6),
99         .gc                     =       ip6_dst_gc,
100         .gc_thresh              =       1024,
101         .check                  =       ip6_dst_check,
102         .destroy                =       ip6_dst_destroy,
103         .ifdown                 =       ip6_dst_ifdown,
104         .negative_advice        =       ip6_negative_advice,
105         .link_failure           =       ip6_link_failure,
106         .update_pmtu            =       ip6_rt_update_pmtu,
107         .entry_size             =       sizeof(struct rt6_info),
108 };
109
110 struct rt6_info ip6_null_entry = {
111         .u = {
112                 .dst = {
113                         .__refcnt       = ATOMIC_INIT(1),
114                         .__use          = 1,
115                         .dev            = &loopback_dev,
116                         .obsolete       = -1,
117                         .error          = -ENETUNREACH,
118                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
119                         .input          = ip6_pkt_discard,
120                         .output         = ip6_pkt_discard_out,
121                         .ops            = &ip6_dst_ops,
122                         .path           = (struct dst_entry*)&ip6_null_entry,
123                 }
124         },
125         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
126         .rt6i_metric    = ~(u32) 0,
127         .rt6i_ref       = ATOMIC_INIT(1),
128 };
129
130 struct fib6_node ip6_routing_table = {
131         .leaf           = &ip6_null_entry,
132         .fn_flags       = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
133 };
134
135 /* Protects all the ip6 fib */
136
137 DEFINE_RWLOCK(rt6_lock);
138
139
140 /* allocate dst with ip6_dst_ops */
141 static __inline__ struct rt6_info *ip6_dst_alloc(void)
142 {
143         return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
144 }
145
146 static void ip6_dst_destroy(struct dst_entry *dst)
147 {
148         struct rt6_info *rt = (struct rt6_info *)dst;
149         struct inet6_dev *idev = rt->rt6i_idev;
150
151         if (idev != NULL) {
152                 rt->rt6i_idev = NULL;
153                 in6_dev_put(idev);
154         }       
155 }
156
157 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
158                            int how)
159 {
160         struct rt6_info *rt = (struct rt6_info *)dst;
161         struct inet6_dev *idev = rt->rt6i_idev;
162
163         if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
164                 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
165                 if (loopback_idev != NULL) {
166                         rt->rt6i_idev = loopback_idev;
167                         in6_dev_put(idev);
168                 }
169         }
170 }
171
172 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
173 {
174         return (rt->rt6i_flags & RTF_EXPIRES &&
175                 time_after(jiffies, rt->rt6i_expires));
176 }
177
178 /*
179  *      Route lookup. Any rt6_lock is implied.
180  */
181
182 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
183                                                     int oif,
184                                                     int strict)
185 {
186         struct rt6_info *local = NULL;
187         struct rt6_info *sprt;
188
189         if (oif) {
190                 for (sprt = rt; sprt; sprt = sprt->u.next) {
191                         struct net_device *dev = sprt->rt6i_dev;
192                         if (dev->ifindex == oif)
193                                 return sprt;
194                         if (dev->flags & IFF_LOOPBACK) {
195                                 if (sprt->rt6i_idev == NULL ||
196                                     sprt->rt6i_idev->dev->ifindex != oif) {
197                                         if (strict && oif)
198                                                 continue;
199                                         if (local && (!oif || 
200                                                       local->rt6i_idev->dev->ifindex == oif))
201                                                 continue;
202                                 }
203                                 local = sprt;
204                         }
205                 }
206
207                 if (local)
208                         return local;
209
210                 if (strict)
211                         return &ip6_null_entry;
212         }
213         return rt;
214 }
215
216 /*
217  *      pointer to the last default router chosen. BH is disabled locally.
218  */
219 static struct rt6_info *rt6_dflt_pointer;
220 static DEFINE_SPINLOCK(rt6_dflt_lock);
221
222 void rt6_reset_dflt_pointer(struct rt6_info *rt)
223 {
224         spin_lock_bh(&rt6_dflt_lock);
225         if (rt == NULL || rt == rt6_dflt_pointer) {
226                 RT6_TRACE("reset default router: %p->NULL\n", rt6_dflt_pointer);
227                 rt6_dflt_pointer = NULL;
228         }
229         spin_unlock_bh(&rt6_dflt_lock);
230 }
231
232 /* Default Router Selection (RFC 2461 6.3.6) */
233 static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
234 {
235         struct rt6_info *match = NULL;
236         struct rt6_info *sprt;
237         int mpri = 0;
238
239         for (sprt = rt; sprt; sprt = sprt->u.next) {
240                 struct neighbour *neigh;
241                 int m = 0;
242
243                 if (!oif ||
244                     (sprt->rt6i_dev &&
245                      sprt->rt6i_dev->ifindex == oif))
246                         m += 8;
247
248                 if (rt6_check_expired(sprt))
249                         continue;
250
251                 if (sprt == rt6_dflt_pointer)
252                         m += 4;
253
254                 if ((neigh = sprt->rt6i_nexthop) != NULL) {
255                         read_lock_bh(&neigh->lock);
256                         switch (neigh->nud_state) {
257                         case NUD_REACHABLE:
258                                 m += 3;
259                                 break;
260
261                         case NUD_STALE:
262                         case NUD_DELAY:
263                         case NUD_PROBE:
264                                 m += 2;
265                                 break;
266
267                         case NUD_NOARP:
268                         case NUD_PERMANENT:
269                                 m += 1;
270                                 break;
271
272                         case NUD_INCOMPLETE:
273                         default:
274                                 read_unlock_bh(&neigh->lock);
275                                 continue;
276                         }
277                         read_unlock_bh(&neigh->lock);
278                 } else {
279                         continue;
280                 }
281
282                 if (m > mpri || m >= 12) {
283                         match = sprt;
284                         mpri = m;
285                         if (m >= 12) {
286                                 /* we choose the last default router if it
287                                  * is in (probably) reachable state.
288                                  * If route changed, we should do pmtu
289                                  * discovery. --yoshfuji
290                                  */
291                                 break;
292                         }
293                 }
294         }
295
296         spin_lock(&rt6_dflt_lock);
297         if (!match) {
298                 /*
299                  *      No default routers are known to be reachable.
300                  *      SHOULD round robin
301                  */
302                 if (rt6_dflt_pointer) {
303                         for (sprt = rt6_dflt_pointer->u.next;
304                              sprt; sprt = sprt->u.next) {
305                                 if (sprt->u.dst.obsolete <= 0 &&
306                                     sprt->u.dst.error == 0 &&
307                                     !rt6_check_expired(sprt)) {
308                                         match = sprt;
309                                         break;
310                                 }
311                         }
312                         for (sprt = rt;
313                              !match && sprt;
314                              sprt = sprt->u.next) {
315                                 if (sprt->u.dst.obsolete <= 0 &&
316                                     sprt->u.dst.error == 0 &&
317                                     !rt6_check_expired(sprt)) {
318                                         match = sprt;
319                                         break;
320                                 }
321                                 if (sprt == rt6_dflt_pointer)
322                                         break;
323                         }
324                 }
325         }
326
327         if (match) {
328                 if (rt6_dflt_pointer != match)
329                         RT6_TRACE("changed default router: %p->%p\n",
330                                   rt6_dflt_pointer, match);
331                 rt6_dflt_pointer = match;
332         }
333         spin_unlock(&rt6_dflt_lock);
334
335         if (!match) {
336                 /*
337                  * Last Resort: if no default routers found, 
338                  * use addrconf default route.
339                  * We don't record this route.
340                  */
341                 for (sprt = ip6_routing_table.leaf;
342                      sprt; sprt = sprt->u.next) {
343                         if (!rt6_check_expired(sprt) &&
344                             (sprt->rt6i_flags & RTF_DEFAULT) &&
345                             (!oif ||
346                              (sprt->rt6i_dev &&
347                               sprt->rt6i_dev->ifindex == oif))) {
348                                 match = sprt;
349                                 break;
350                         }
351                 }
352                 if (!match) {
353                         /* no default route.  give up. */
354                         match = &ip6_null_entry;
355                 }
356         }
357
358         return match;
359 }
360
361 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
362                             int oif, int strict)
363 {
364         struct fib6_node *fn;
365         struct rt6_info *rt;
366
367         read_lock_bh(&rt6_lock);
368         fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
369         rt = rt6_device_match(fn->leaf, oif, strict);
370         dst_hold(&rt->u.dst);
371         rt->u.dst.__use++;
372         read_unlock_bh(&rt6_lock);
373
374         rt->u.dst.lastuse = jiffies;
375         if (rt->u.dst.error == 0)
376                 return rt;
377         dst_release(&rt->u.dst);
378         return NULL;
379 }
380
381 /* ip6_ins_rt is called with FREE rt6_lock.
382    It takes new route entry, the addition fails by any reason the
383    route is freed. In any case, if caller does not hold it, it may
384    be destroyed.
385  */
386
387 int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
388                 void *_rtattr, struct netlink_skb_parms *req)
389 {
390         int err;
391
392         write_lock_bh(&rt6_lock);
393         err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
394         write_unlock_bh(&rt6_lock);
395
396         return err;
397 }
398
399 /* No rt6_lock! If COW failed, the function returns dead route entry
400    with dst->error set to errno value.
401  */
402
403 static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
404                                 struct in6_addr *saddr, struct netlink_skb_parms *req)
405 {
406         int err;
407         struct rt6_info *rt;
408
409         /*
410          *      Clone the route.
411          */
412
413         rt = ip6_rt_copy(ort);
414
415         if (rt) {
416                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
417
418                 if (!(rt->rt6i_flags&RTF_GATEWAY))
419                         ipv6_addr_copy(&rt->rt6i_gateway, daddr);
420
421                 rt->rt6i_dst.plen = 128;
422                 rt->rt6i_flags |= RTF_CACHE;
423                 rt->u.dst.flags |= DST_HOST;
424
425 #ifdef CONFIG_IPV6_SUBTREES
426                 if (rt->rt6i_src.plen && saddr) {
427                         ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
428                         rt->rt6i_src.plen = 128;
429                 }
430 #endif
431
432                 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
433
434                 dst_hold(&rt->u.dst);
435
436                 err = ip6_ins_rt(rt, NULL, NULL, req);
437                 if (err == 0)
438                         return rt;
439
440                 rt->u.dst.error = err;
441
442                 return rt;
443         }
444         dst_hold(&ip6_null_entry.u.dst);
445         return &ip6_null_entry;
446 }
447
448 #define BACKTRACK() \
449 if (rt == &ip6_null_entry && strict) { \
450        while ((fn = fn->parent) != NULL) { \
451                 if (fn->fn_flags & RTN_ROOT) { \
452                         dst_hold(&rt->u.dst); \
453                         goto out; \
454                 } \
455                 if (fn->fn_flags & RTN_RTINFO) \
456                         goto restart; \
457         } \
458 }
459
460
461 void ip6_route_input(struct sk_buff *skb)
462 {
463         struct fib6_node *fn;
464         struct rt6_info *rt;
465         int strict;
466         int attempts = 3;
467
468         strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
469
470 relookup:
471         read_lock_bh(&rt6_lock);
472
473         fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
474                          &skb->nh.ipv6h->saddr);
475
476 restart:
477         rt = fn->leaf;
478
479         if ((rt->rt6i_flags & RTF_CACHE)) {
480                 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
481                 BACKTRACK();
482                 dst_hold(&rt->u.dst);
483                 goto out;
484         }
485
486         rt = rt6_device_match(rt, skb->dev->ifindex, 0);
487         BACKTRACK();
488
489         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
490                 struct rt6_info *nrt;
491                 dst_hold(&rt->u.dst);
492                 read_unlock_bh(&rt6_lock);
493
494                 nrt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
495                               &skb->nh.ipv6h->saddr,
496                               &NETLINK_CB(skb));
497
498                 dst_release(&rt->u.dst);
499                 rt = nrt;
500
501                 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
502                         goto out2;
503
504                 /* Race condition! In the gap, when rt6_lock was
505                    released someone could insert this route.  Relookup.
506                 */
507                 dst_release(&rt->u.dst);
508                 goto relookup;
509         }
510         dst_hold(&rt->u.dst);
511
512 out:
513         read_unlock_bh(&rt6_lock);
514 out2:
515         rt->u.dst.lastuse = jiffies;
516         rt->u.dst.__use++;
517         skb->dst = (struct dst_entry *) rt;
518 }
519
520 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
521 {
522         struct fib6_node *fn;
523         struct rt6_info *rt;
524         int strict;
525         int attempts = 3;
526
527         strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
528
529 relookup:
530         read_lock_bh(&rt6_lock);
531
532         fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
533
534 restart:
535         rt = fn->leaf;
536
537         if ((rt->rt6i_flags & RTF_CACHE)) {
538                 rt = rt6_device_match(rt, fl->oif, strict);
539                 BACKTRACK();
540                 dst_hold(&rt->u.dst);
541                 goto out;
542         }
543         if (rt->rt6i_flags & RTF_DEFAULT) {
544                 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
545                         rt = rt6_best_dflt(rt, fl->oif);
546         } else {
547                 rt = rt6_device_match(rt, fl->oif, strict);
548                 BACKTRACK();
549         }
550
551         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
552                 struct rt6_info *nrt;
553                 dst_hold(&rt->u.dst);
554                 read_unlock_bh(&rt6_lock);
555
556                 nrt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src, NULL);
557
558                 dst_release(&rt->u.dst);
559                 rt = nrt;
560
561                 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
562                         goto out2;
563
564                 /* Race condition! In the gap, when rt6_lock was
565                    released someone could insert this route.  Relookup.
566                 */
567                 dst_release(&rt->u.dst);
568                 goto relookup;
569         }
570         dst_hold(&rt->u.dst);
571
572 out:
573         read_unlock_bh(&rt6_lock);
574 out2:
575         rt->u.dst.lastuse = jiffies;
576         rt->u.dst.__use++;
577         return &rt->u.dst;
578 }
579
580
581 /*
582  *      Destination cache support functions
583  */
584
585 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
586 {
587         struct rt6_info *rt;
588
589         rt = (struct rt6_info *) dst;
590
591         if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
592                 return dst;
593
594         return NULL;
595 }
596
597 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
598 {
599         struct rt6_info *rt = (struct rt6_info *) dst;
600
601         if (rt) {
602                 if (rt->rt6i_flags & RTF_CACHE)
603                         ip6_del_rt(rt, NULL, NULL, NULL);
604                 else
605                         dst_release(dst);
606         }
607         return NULL;
608 }
609
610 static void ip6_link_failure(struct sk_buff *skb)
611 {
612         struct rt6_info *rt;
613
614         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
615
616         rt = (struct rt6_info *) skb->dst;
617         if (rt) {
618                 if (rt->rt6i_flags&RTF_CACHE) {
619                         dst_set_expires(&rt->u.dst, 0);
620                         rt->rt6i_flags |= RTF_EXPIRES;
621                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
622                         rt->rt6i_node->fn_sernum = -1;
623         }
624 }
625
626 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
627 {
628         struct rt6_info *rt6 = (struct rt6_info*)dst;
629
630         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
631                 rt6->rt6i_flags |= RTF_MODIFIED;
632                 if (mtu < IPV6_MIN_MTU) {
633                         mtu = IPV6_MIN_MTU;
634                         dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
635                 }
636                 dst->metrics[RTAX_MTU-1] = mtu;
637         }
638 }
639
640 /* Protected by rt6_lock.  */
641 static struct dst_entry *ndisc_dst_gc_list;
642 static int ipv6_get_mtu(struct net_device *dev);
643
644 static inline unsigned int ipv6_advmss(unsigned int mtu)
645 {
646         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
647
648         if (mtu < ip6_rt_min_advmss)
649                 mtu = ip6_rt_min_advmss;
650
651         /*
652          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 
653          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 
654          * IPV6_MAXPLEN is also valid and means: "any MSS, 
655          * rely only on pmtu discovery"
656          */
657         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
658                 mtu = IPV6_MAXPLEN;
659         return mtu;
660 }
661
662 struct dst_entry *ndisc_dst_alloc(struct net_device *dev, 
663                                   struct neighbour *neigh,
664                                   struct in6_addr *addr,
665                                   int (*output)(struct sk_buff *))
666 {
667         struct rt6_info *rt;
668         struct inet6_dev *idev = in6_dev_get(dev);
669
670         if (unlikely(idev == NULL))
671                 return NULL;
672
673         rt = ip6_dst_alloc();
674         if (unlikely(rt == NULL)) {
675                 in6_dev_put(idev);
676                 goto out;
677         }
678
679         dev_hold(dev);
680         if (neigh)
681                 neigh_hold(neigh);
682         else
683                 neigh = ndisc_get_neigh(dev, addr);
684
685         rt->rt6i_dev      = dev;
686         rt->rt6i_idev     = idev;
687         rt->rt6i_nexthop  = neigh;
688         atomic_set(&rt->u.dst.__refcnt, 1);
689         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
690         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
691         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
692         rt->u.dst.output  = output;
693
694 #if 0   /* there's no chance to use these for ndisc */
695         rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST 
696                                 ? DST_HOST 
697                                 : 0;
698         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
699         rt->rt6i_dst.plen = 128;
700 #endif
701
702         write_lock_bh(&rt6_lock);
703         rt->u.dst.next = ndisc_dst_gc_list;
704         ndisc_dst_gc_list = &rt->u.dst;
705         write_unlock_bh(&rt6_lock);
706
707         fib6_force_start_gc();
708
709 out:
710         return (struct dst_entry *)rt;
711 }
712
713 int ndisc_dst_gc(int *more)
714 {
715         struct dst_entry *dst, *next, **pprev;
716         int freed;
717
718         next = NULL;
719         pprev = &ndisc_dst_gc_list;
720         freed = 0;
721         while ((dst = *pprev) != NULL) {
722                 if (!atomic_read(&dst->__refcnt)) {
723                         *pprev = dst->next;
724                         dst_free(dst);
725                         freed++;
726                 } else {
727                         pprev = &dst->next;
728                         (*more)++;
729                 }
730         }
731
732         return freed;
733 }
734
735 static int ip6_dst_gc(void)
736 {
737         static unsigned expire = 30*HZ;
738         static unsigned long last_gc;
739         unsigned long now = jiffies;
740
741         if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
742             atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
743                 goto out;
744
745         expire++;
746         fib6_run_gc(expire);
747         last_gc = now;
748         if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
749                 expire = ip6_rt_gc_timeout>>1;
750
751 out:
752         expire -= expire>>ip6_rt_gc_elasticity;
753         return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
754 }
755
756 /* Clean host part of a prefix. Not necessary in radix tree,
757    but results in cleaner routing tables.
758
759    Remove it only when all the things will work!
760  */
761
762 static int ipv6_get_mtu(struct net_device *dev)
763 {
764         int mtu = IPV6_MIN_MTU;
765         struct inet6_dev *idev;
766
767         idev = in6_dev_get(dev);
768         if (idev) {
769                 mtu = idev->cnf.mtu6;
770                 in6_dev_put(idev);
771         }
772         return mtu;
773 }
774
775 int ipv6_get_hoplimit(struct net_device *dev)
776 {
777         int hoplimit = ipv6_devconf.hop_limit;
778         struct inet6_dev *idev;
779
780         idev = in6_dev_get(dev);
781         if (idev) {
782                 hoplimit = idev->cnf.hop_limit;
783                 in6_dev_put(idev);
784         }
785         return hoplimit;
786 }
787
788 /*
789  *
790  */
791
792 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, 
793                 void *_rtattr, struct netlink_skb_parms *req)
794 {
795         int err;
796         struct rtmsg *r;
797         struct rtattr **rta;
798         struct rt6_info *rt = NULL;
799         struct net_device *dev = NULL;
800         struct inet6_dev *idev = NULL;
801         int addr_type;
802
803         rta = (struct rtattr **) _rtattr;
804
805         if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
806                 return -EINVAL;
807 #ifndef CONFIG_IPV6_SUBTREES
808         if (rtmsg->rtmsg_src_len)
809                 return -EINVAL;
810 #endif
811         if (rtmsg->rtmsg_ifindex) {
812                 err = -ENODEV;
813                 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
814                 if (!dev)
815                         goto out;
816                 idev = in6_dev_get(dev);
817                 if (!idev)
818                         goto out;
819         }
820
821         if (rtmsg->rtmsg_metric == 0)
822                 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
823
824         rt = ip6_dst_alloc();
825
826         if (rt == NULL) {
827                 err = -ENOMEM;
828                 goto out;
829         }
830
831         rt->u.dst.obsolete = -1;
832         rt->rt6i_expires = clock_t_to_jiffies(rtmsg->rtmsg_info);
833         if (nlh && (r = NLMSG_DATA(nlh))) {
834                 rt->rt6i_protocol = r->rtm_protocol;
835         } else {
836                 rt->rt6i_protocol = RTPROT_BOOT;
837         }
838
839         addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
840
841         if (addr_type & IPV6_ADDR_MULTICAST)
842                 rt->u.dst.input = ip6_mc_input;
843         else
844                 rt->u.dst.input = ip6_forward;
845
846         rt->u.dst.output = ip6_output;
847
848         ipv6_addr_prefix(&rt->rt6i_dst.addr, 
849                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
850         rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
851         if (rt->rt6i_dst.plen == 128)
852                rt->u.dst.flags = DST_HOST;
853
854 #ifdef CONFIG_IPV6_SUBTREES
855         ipv6_addr_prefix(&rt->rt6i_src.addr, 
856                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
857         rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
858 #endif
859
860         rt->rt6i_metric = rtmsg->rtmsg_metric;
861
862         /* We cannot add true routes via loopback here,
863            they would result in kernel looping; promote them to reject routes
864          */
865         if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
866             (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
867                 /* hold loopback dev/idev if we haven't done so. */
868                 if (dev != &loopback_dev) {
869                         if (dev) {
870                                 dev_put(dev);
871                                 in6_dev_put(idev);
872                         }
873                         dev = &loopback_dev;
874                         dev_hold(dev);
875                         idev = in6_dev_get(dev);
876                         if (!idev) {
877                                 err = -ENODEV;
878                                 goto out;
879                         }
880                 }
881                 rt->u.dst.output = ip6_pkt_discard_out;
882                 rt->u.dst.input = ip6_pkt_discard;
883                 rt->u.dst.error = -ENETUNREACH;
884                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
885                 goto install_route;
886         }
887
888         if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
889                 struct in6_addr *gw_addr;
890                 int gwa_type;
891
892                 gw_addr = &rtmsg->rtmsg_gateway;
893                 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
894                 gwa_type = ipv6_addr_type(gw_addr);
895
896                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
897                         struct rt6_info *grt;
898
899                         /* IPv6 strictly inhibits using not link-local
900                            addresses as nexthop address.
901                            Otherwise, router will not able to send redirects.
902                            It is very good, but in some (rare!) circumstances
903                            (SIT, PtP, NBMA NOARP links) it is handy to allow
904                            some exceptions. --ANK
905                          */
906                         err = -EINVAL;
907                         if (!(gwa_type&IPV6_ADDR_UNICAST))
908                                 goto out;
909
910                         grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
911
912                         err = -EHOSTUNREACH;
913                         if (grt == NULL)
914                                 goto out;
915                         if (dev) {
916                                 if (dev != grt->rt6i_dev) {
917                                         dst_release(&grt->u.dst);
918                                         goto out;
919                                 }
920                         } else {
921                                 dev = grt->rt6i_dev;
922                                 idev = grt->rt6i_idev;
923                                 dev_hold(dev);
924                                 in6_dev_hold(grt->rt6i_idev);
925                         }
926                         if (!(grt->rt6i_flags&RTF_GATEWAY))
927                                 err = 0;
928                         dst_release(&grt->u.dst);
929
930                         if (err)
931                                 goto out;
932                 }
933                 err = -EINVAL;
934                 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
935                         goto out;
936         }
937
938         err = -ENODEV;
939         if (dev == NULL)
940                 goto out;
941
942         if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
943                 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
944                 if (IS_ERR(rt->rt6i_nexthop)) {
945                         err = PTR_ERR(rt->rt6i_nexthop);
946                         rt->rt6i_nexthop = NULL;
947                         goto out;
948                 }
949         }
950
951         rt->rt6i_flags = rtmsg->rtmsg_flags;
952
953 install_route:
954         if (rta && rta[RTA_METRICS-1]) {
955                 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
956                 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
957
958                 while (RTA_OK(attr, attrlen)) {
959                         unsigned flavor = attr->rta_type;
960                         if (flavor) {
961                                 if (flavor > RTAX_MAX) {
962                                         err = -EINVAL;
963                                         goto out;
964                                 }
965                                 rt->u.dst.metrics[flavor-1] =
966                                         *(u32 *)RTA_DATA(attr);
967                         }
968                         attr = RTA_NEXT(attr, attrlen);
969                 }
970         }
971
972         if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
973                 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
974         if (!rt->u.dst.metrics[RTAX_MTU-1])
975                 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
976         if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
977                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
978         rt->u.dst.dev = dev;
979         rt->rt6i_idev = idev;
980         return ip6_ins_rt(rt, nlh, _rtattr, req);
981
982 out:
983         if (dev)
984                 dev_put(dev);
985         if (idev)
986                 in6_dev_put(idev);
987         if (rt)
988                 dst_free((struct dst_entry *) rt);
989         return err;
990 }
991
992 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
993 {
994         int err;
995
996         write_lock_bh(&rt6_lock);
997
998         rt6_reset_dflt_pointer(NULL);
999
1000         err = fib6_del(rt, nlh, _rtattr, req);
1001         dst_release(&rt->u.dst);
1002
1003         write_unlock_bh(&rt6_lock);
1004
1005         return err;
1006 }
1007
1008 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1009 {
1010         struct fib6_node *fn;
1011         struct rt6_info *rt;
1012         int err = -ESRCH;
1013
1014         read_lock_bh(&rt6_lock);
1015
1016         fn = fib6_locate(&ip6_routing_table,
1017                          &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1018                          &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1019         
1020         if (fn) {
1021                 for (rt = fn->leaf; rt; rt = rt->u.next) {
1022                         if (rtmsg->rtmsg_ifindex &&
1023                             (rt->rt6i_dev == NULL ||
1024                              rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1025                                 continue;
1026                         if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1027                             !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1028                                 continue;
1029                         if (rtmsg->rtmsg_metric &&
1030                             rtmsg->rtmsg_metric != rt->rt6i_metric)
1031                                 continue;
1032                         dst_hold(&rt->u.dst);
1033                         read_unlock_bh(&rt6_lock);
1034
1035                         return ip6_del_rt(rt, nlh, _rtattr, req);
1036                 }
1037         }
1038         read_unlock_bh(&rt6_lock);
1039
1040         return err;
1041 }
1042
1043 /*
1044  *      Handle redirects
1045  */
1046 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1047                   struct neighbour *neigh, u8 *lladdr, int on_link)
1048 {
1049         struct rt6_info *rt, *nrt;
1050
1051         /* Locate old route to this destination. */
1052         rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
1053
1054         if (rt == NULL)
1055                 return;
1056
1057         if (neigh->dev != rt->rt6i_dev)
1058                 goto out;
1059
1060         /*
1061          * Current route is on-link; redirect is always invalid.
1062          * 
1063          * Seems, previous statement is not true. It could
1064          * be node, which looks for us as on-link (f.e. proxy ndisc)
1065          * But then router serving it might decide, that we should
1066          * know truth 8)8) --ANK (980726).
1067          */
1068         if (!(rt->rt6i_flags&RTF_GATEWAY))
1069                 goto out;
1070
1071         /*
1072          *      RFC 2461 specifies that redirects should only be
1073          *      accepted if they come from the nexthop to the target.
1074          *      Due to the way default routers are chosen, this notion
1075          *      is a bit fuzzy and one might need to check all default
1076          *      routers.
1077          */
1078         if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) {
1079                 if (rt->rt6i_flags & RTF_DEFAULT) {
1080                         struct rt6_info *rt1;
1081
1082                         read_lock(&rt6_lock);
1083                         for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1084                                 if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) {
1085                                         dst_hold(&rt1->u.dst);
1086                                         dst_release(&rt->u.dst);
1087                                         read_unlock(&rt6_lock);
1088                                         rt = rt1;
1089                                         goto source_ok;
1090                                 }
1091                         }
1092                         read_unlock(&rt6_lock);
1093                 }
1094                 if (net_ratelimit())
1095                         printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1096                                "for redirect target\n");
1097                 goto out;
1098         }
1099
1100 source_ok:
1101
1102         /*
1103          *      We have finally decided to accept it.
1104          */
1105
1106         neigh_update(neigh, lladdr, NUD_STALE, 
1107                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
1108                      NEIGH_UPDATE_F_OVERRIDE|
1109                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1110                                      NEIGH_UPDATE_F_ISROUTER))
1111                      );
1112
1113         /*
1114          * Redirect received -> path was valid.
1115          * Look, redirects are sent only in response to data packets,
1116          * so that this nexthop apparently is reachable. --ANK
1117          */
1118         dst_confirm(&rt->u.dst);
1119
1120         /* Duplicate redirect: silently ignore. */
1121         if (neigh == rt->u.dst.neighbour)
1122                 goto out;
1123
1124         nrt = ip6_rt_copy(rt);
1125         if (nrt == NULL)
1126                 goto out;
1127
1128         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1129         if (on_link)
1130                 nrt->rt6i_flags &= ~RTF_GATEWAY;
1131
1132         ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1133         nrt->rt6i_dst.plen = 128;
1134         nrt->u.dst.flags |= DST_HOST;
1135
1136         ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1137         nrt->rt6i_nexthop = neigh_clone(neigh);
1138         /* Reset pmtu, it may be better */
1139         nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1140         nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1141
1142         if (ip6_ins_rt(nrt, NULL, NULL, NULL))
1143                 goto out;
1144
1145         if (rt->rt6i_flags&RTF_CACHE) {
1146                 ip6_del_rt(rt, NULL, NULL, NULL);
1147                 return;
1148         }
1149
1150 out:
1151         dst_release(&rt->u.dst);
1152         return;
1153 }
1154
1155 /*
1156  *      Handle ICMP "packet too big" messages
1157  *      i.e. Path MTU discovery
1158  */
1159
1160 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1161                         struct net_device *dev, u32 pmtu)
1162 {
1163         struct rt6_info *rt, *nrt;
1164         int allfrag = 0;
1165
1166         rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1167         if (rt == NULL)
1168                 return;
1169
1170         if (pmtu >= dst_mtu(&rt->u.dst))
1171                 goto out;
1172
1173         if (pmtu < IPV6_MIN_MTU) {
1174                 /*
1175                  * According to RFC2460, PMTU is set to the IPv6 Minimum Link 
1176                  * MTU (1280) and a fragment header should always be included
1177                  * after a node receiving Too Big message reporting PMTU is
1178                  * less than the IPv6 Minimum Link MTU.
1179                  */
1180                 pmtu = IPV6_MIN_MTU;
1181                 allfrag = 1;
1182         }
1183
1184         /* New mtu received -> path was valid.
1185            They are sent only in response to data packets,
1186            so that this nexthop apparently is reachable. --ANK
1187          */
1188         dst_confirm(&rt->u.dst);
1189
1190         /* Host route. If it is static, it would be better
1191            not to override it, but add new one, so that
1192            when cache entry will expire old pmtu
1193            would return automatically.
1194          */
1195         if (rt->rt6i_flags & RTF_CACHE) {
1196                 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1197                 if (allfrag)
1198                         rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1199                 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1200                 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1201                 goto out;
1202         }
1203
1204         /* Network route.
1205            Two cases are possible:
1206            1. It is connected route. Action: COW
1207            2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1208          */
1209         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
1210                 nrt = rt6_cow(rt, daddr, saddr, NULL);
1211                 if (!nrt->u.dst.error) {
1212                         nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1213                         if (allfrag)
1214                                 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1215                         /* According to RFC 1981, detecting PMTU increase shouldn't be
1216                            happened within 5 mins, the recommended timer is 10 mins.
1217                            Here this route expiration time is set to ip6_rt_mtu_expires
1218                            which is 10 mins. After 10 mins the decreased pmtu is expired
1219                            and detecting PMTU increase will be automatically happened.
1220                          */
1221                         dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1222                         nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1223                 }
1224                 dst_release(&nrt->u.dst);
1225         } else {
1226                 nrt = ip6_rt_copy(rt);
1227                 if (nrt == NULL)
1228                         goto out;
1229                 ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
1230                 nrt->rt6i_dst.plen = 128;
1231                 nrt->u.dst.flags |= DST_HOST;
1232                 nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
1233                 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1234                 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
1235                 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1236                 if (allfrag)
1237                         nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1238                 ip6_ins_rt(nrt, NULL, NULL, NULL);
1239         }
1240
1241 out:
1242         dst_release(&rt->u.dst);
1243 }
1244
1245 /*
1246  *      Misc support functions
1247  */
1248
1249 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1250 {
1251         struct rt6_info *rt = ip6_dst_alloc();
1252
1253         if (rt) {
1254                 rt->u.dst.input = ort->u.dst.input;
1255                 rt->u.dst.output = ort->u.dst.output;
1256
1257                 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1258                 rt->u.dst.dev = ort->u.dst.dev;
1259                 if (rt->u.dst.dev)
1260                         dev_hold(rt->u.dst.dev);
1261                 rt->rt6i_idev = ort->rt6i_idev;
1262                 if (rt->rt6i_idev)
1263                         in6_dev_hold(rt->rt6i_idev);
1264                 rt->u.dst.lastuse = jiffies;
1265                 rt->rt6i_expires = 0;
1266
1267                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1268                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1269                 rt->rt6i_metric = 0;
1270
1271                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1272 #ifdef CONFIG_IPV6_SUBTREES
1273                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1274 #endif
1275         }
1276         return rt;
1277 }
1278
1279 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1280 {       
1281         struct rt6_info *rt;
1282         struct fib6_node *fn;
1283
1284         fn = &ip6_routing_table;
1285
1286         write_lock_bh(&rt6_lock);
1287         for (rt = fn->leaf; rt; rt=rt->u.next) {
1288                 if (dev == rt->rt6i_dev &&
1289                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
1290                         break;
1291         }
1292         if (rt)
1293                 dst_hold(&rt->u.dst);
1294         write_unlock_bh(&rt6_lock);
1295         return rt;
1296 }
1297
1298 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1299                                      struct net_device *dev)
1300 {
1301         struct in6_rtmsg rtmsg;
1302
1303         memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1304         rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1305         ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1306         rtmsg.rtmsg_metric = 1024;
1307         rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES;
1308
1309         rtmsg.rtmsg_ifindex = dev->ifindex;
1310
1311         ip6_route_add(&rtmsg, NULL, NULL, NULL);
1312         return rt6_get_dflt_router(gwaddr, dev);
1313 }
1314
1315 void rt6_purge_dflt_routers(void)
1316 {
1317         struct rt6_info *rt;
1318
1319 restart:
1320         read_lock_bh(&rt6_lock);
1321         for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1322                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1323                         dst_hold(&rt->u.dst);
1324
1325                         rt6_reset_dflt_pointer(NULL);
1326
1327                         read_unlock_bh(&rt6_lock);
1328
1329                         ip6_del_rt(rt, NULL, NULL, NULL);
1330
1331                         goto restart;
1332                 }
1333         }
1334         read_unlock_bh(&rt6_lock);
1335 }
1336
1337 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1338 {
1339         struct in6_rtmsg rtmsg;
1340         int err;
1341
1342         switch(cmd) {
1343         case SIOCADDRT:         /* Add a route */
1344         case SIOCDELRT:         /* Delete a route */
1345                 if (!capable(CAP_NET_ADMIN))
1346                         return -EPERM;
1347                 err = copy_from_user(&rtmsg, arg,
1348                                      sizeof(struct in6_rtmsg));
1349                 if (err)
1350                         return -EFAULT;
1351                         
1352                 rtnl_lock();
1353                 switch (cmd) {
1354                 case SIOCADDRT:
1355                         err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
1356                         break;
1357                 case SIOCDELRT:
1358                         err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
1359                         break;
1360                 default:
1361                         err = -EINVAL;
1362                 }
1363                 rtnl_unlock();
1364
1365                 return err;
1366         };
1367
1368         return -EINVAL;
1369 }
1370
1371 /*
1372  *      Drop the packet on the floor
1373  */
1374
1375 static int ip6_pkt_discard(struct sk_buff *skb)
1376 {
1377         IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1378         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1379         kfree_skb(skb);
1380         return 0;
1381 }
1382
1383 static int ip6_pkt_discard_out(struct sk_buff *skb)
1384 {
1385         skb->dev = skb->dst->dev;
1386         return ip6_pkt_discard(skb);
1387 }
1388
1389 /*
1390  *      Allocate a dst for local (unicast / anycast) address.
1391  */
1392
1393 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1394                                     const struct in6_addr *addr,
1395                                     int anycast)
1396 {
1397         struct rt6_info *rt = ip6_dst_alloc();
1398
1399         if (rt == NULL)
1400                 return ERR_PTR(-ENOMEM);
1401
1402         dev_hold(&loopback_dev);
1403         in6_dev_hold(idev);
1404
1405         rt->u.dst.flags = DST_HOST;
1406         rt->u.dst.input = ip6_input;
1407         rt->u.dst.output = ip6_output;
1408         rt->rt6i_dev = &loopback_dev;
1409         rt->rt6i_idev = idev;
1410         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1411         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1412         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1413         rt->u.dst.obsolete = -1;
1414
1415         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1416         if (!anycast)
1417                 rt->rt6i_flags |= RTF_LOCAL;
1418         rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1419         if (rt->rt6i_nexthop == NULL) {
1420                 dst_free((struct dst_entry *) rt);
1421                 return ERR_PTR(-ENOMEM);
1422         }
1423
1424         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1425         rt->rt6i_dst.plen = 128;
1426
1427         atomic_set(&rt->u.dst.__refcnt, 1);
1428
1429         return rt;
1430 }
1431
1432 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1433 {
1434         if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1435             rt != &ip6_null_entry) {
1436                 RT6_TRACE("deleted by ifdown %p\n", rt);
1437                 return -1;
1438         }
1439         return 0;
1440 }
1441
1442 void rt6_ifdown(struct net_device *dev)
1443 {
1444         write_lock_bh(&rt6_lock);
1445         fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1446         write_unlock_bh(&rt6_lock);
1447 }
1448
1449 struct rt6_mtu_change_arg
1450 {
1451         struct net_device *dev;
1452         unsigned mtu;
1453 };
1454
1455 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1456 {
1457         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1458         struct inet6_dev *idev;
1459
1460         /* In IPv6 pmtu discovery is not optional,
1461            so that RTAX_MTU lock cannot disable it.
1462            We still use this lock to block changes
1463            caused by addrconf/ndisc.
1464         */
1465
1466         idev = __in6_dev_get(arg->dev);
1467         if (idev == NULL)
1468                 return 0;
1469
1470         /* For administrative MTU increase, there is no way to discover
1471            IPv6 PMTU increase, so PMTU increase should be updated here.
1472            Since RFC 1981 doesn't include administrative MTU increase
1473            update PMTU increase is a MUST. (i.e. jumbo frame)
1474          */
1475         /*
1476            If new MTU is less than route PMTU, this new MTU will be the
1477            lowest MTU in the path, update the route PMTU to reflect PMTU
1478            decreases; if new MTU is greater than route PMTU, and the
1479            old MTU is the lowest MTU in the path, update the route PMTU
1480            to reflect the increase. In this case if the other nodes' MTU
1481            also have the lowest MTU, TOO BIG MESSAGE will be lead to
1482            PMTU discouvery.
1483          */
1484         if (rt->rt6i_dev == arg->dev &&
1485             !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1486             (dst_mtu(&rt->u.dst) > arg->mtu ||
1487              (dst_mtu(&rt->u.dst) < arg->mtu &&
1488               dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1489                 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1490         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1491         return 0;
1492 }
1493
1494 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1495 {
1496         struct rt6_mtu_change_arg arg;
1497
1498         arg.dev = dev;
1499         arg.mtu = mtu;
1500         read_lock_bh(&rt6_lock);
1501         fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1502         read_unlock_bh(&rt6_lock);
1503 }
1504
1505 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1506                               struct in6_rtmsg *rtmsg)
1507 {
1508         memset(rtmsg, 0, sizeof(*rtmsg));
1509
1510         rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1511         rtmsg->rtmsg_src_len = r->rtm_src_len;
1512         rtmsg->rtmsg_flags = RTF_UP;
1513         if (r->rtm_type == RTN_UNREACHABLE)
1514                 rtmsg->rtmsg_flags |= RTF_REJECT;
1515
1516         if (rta[RTA_GATEWAY-1]) {
1517                 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1518                         return -EINVAL;
1519                 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1520                 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1521         }
1522         if (rta[RTA_DST-1]) {
1523                 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1524                         return -EINVAL;
1525                 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1526         }
1527         if (rta[RTA_SRC-1]) {
1528                 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1529                         return -EINVAL;
1530                 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1531         }
1532         if (rta[RTA_OIF-1]) {
1533                 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1534                         return -EINVAL;
1535                 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1536         }
1537         if (rta[RTA_PRIORITY-1]) {
1538                 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1539                         return -EINVAL;
1540                 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1541         }
1542         return 0;
1543 }
1544
1545 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1546 {
1547         struct rtmsg *r = NLMSG_DATA(nlh);
1548         struct in6_rtmsg rtmsg;
1549
1550         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1551                 return -EINVAL;
1552         return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1553 }
1554
1555 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1556 {
1557         struct rtmsg *r = NLMSG_DATA(nlh);
1558         struct in6_rtmsg rtmsg;
1559
1560         if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1561                 return -EINVAL;
1562         return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1563 }
1564
1565 struct rt6_rtnl_dump_arg
1566 {
1567         struct sk_buff *skb;
1568         struct netlink_callback *cb;
1569 };
1570
1571 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1572                          struct in6_addr *dst, struct in6_addr *src,
1573                          int iif, int type, u32 pid, u32 seq,
1574                          int prefix, unsigned int flags)
1575 {
1576         struct rtmsg *rtm;
1577         struct nlmsghdr  *nlh;
1578         unsigned char    *b = skb->tail;
1579         struct rta_cacheinfo ci;
1580
1581         if (prefix) {   /* user wants prefix routes only */
1582                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1583                         /* success since this is not a prefix route */
1584                         return 1;
1585                 }
1586         }
1587
1588         nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
1589         rtm = NLMSG_DATA(nlh);
1590         rtm->rtm_family = AF_INET6;
1591         rtm->rtm_dst_len = rt->rt6i_dst.plen;
1592         rtm->rtm_src_len = rt->rt6i_src.plen;
1593         rtm->rtm_tos = 0;
1594         rtm->rtm_table = RT_TABLE_MAIN;
1595         if (rt->rt6i_flags&RTF_REJECT)
1596                 rtm->rtm_type = RTN_UNREACHABLE;
1597         else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1598                 rtm->rtm_type = RTN_LOCAL;
1599         else
1600                 rtm->rtm_type = RTN_UNICAST;
1601         rtm->rtm_flags = 0;
1602         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1603         rtm->rtm_protocol = rt->rt6i_protocol;
1604         if (rt->rt6i_flags&RTF_DYNAMIC)
1605                 rtm->rtm_protocol = RTPROT_REDIRECT;
1606         else if (rt->rt6i_flags & RTF_ADDRCONF)
1607                 rtm->rtm_protocol = RTPROT_KERNEL;
1608         else if (rt->rt6i_flags&RTF_DEFAULT)
1609                 rtm->rtm_protocol = RTPROT_RA;
1610
1611         if (rt->rt6i_flags&RTF_CACHE)
1612                 rtm->rtm_flags |= RTM_F_CLONED;
1613
1614         if (dst) {
1615                 RTA_PUT(skb, RTA_DST, 16, dst);
1616                 rtm->rtm_dst_len = 128;
1617         } else if (rtm->rtm_dst_len)
1618                 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1619 #ifdef CONFIG_IPV6_SUBTREES
1620         if (src) {
1621                 RTA_PUT(skb, RTA_SRC, 16, src);
1622                 rtm->rtm_src_len = 128;
1623         } else if (rtm->rtm_src_len)
1624                 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1625 #endif
1626         if (iif)
1627                 RTA_PUT(skb, RTA_IIF, 4, &iif);
1628         else if (dst) {
1629                 struct in6_addr saddr_buf;
1630                 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1631                         RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1632         }
1633         if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1634                 goto rtattr_failure;
1635         if (rt->u.dst.neighbour)
1636                 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1637         if (rt->u.dst.dev)
1638                 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1639         RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1640         ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1641         if (rt->rt6i_expires)
1642                 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1643         else
1644                 ci.rta_expires = 0;
1645         ci.rta_used = rt->u.dst.__use;
1646         ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1647         ci.rta_error = rt->u.dst.error;
1648         ci.rta_id = 0;
1649         ci.rta_ts = 0;
1650         ci.rta_tsage = 0;
1651         RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1652         nlh->nlmsg_len = skb->tail - b;
1653         return skb->len;
1654
1655 nlmsg_failure:
1656 rtattr_failure:
1657         skb_trim(skb, b - skb->data);
1658         return -1;
1659 }
1660
1661 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1662 {
1663         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1664         int prefix;
1665
1666         if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1667                 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1668                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1669         } else
1670                 prefix = 0;
1671
1672         return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1673                      NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1674                      prefix, NLM_F_MULTI);
1675 }
1676
1677 static int fib6_dump_node(struct fib6_walker_t *w)
1678 {
1679         int res;
1680         struct rt6_info *rt;
1681
1682         for (rt = w->leaf; rt; rt = rt->u.next) {
1683                 res = rt6_dump_route(rt, w->args);
1684                 if (res < 0) {
1685                         /* Frame is full, suspend walking */
1686                         w->leaf = rt;
1687                         return 1;
1688                 }
1689                 BUG_TRAP(res!=0);
1690         }
1691         w->leaf = NULL;
1692         return 0;
1693 }
1694
1695 static void fib6_dump_end(struct netlink_callback *cb)
1696 {
1697         struct fib6_walker_t *w = (void*)cb->args[0];
1698
1699         if (w) {
1700                 cb->args[0] = 0;
1701                 fib6_walker_unlink(w);
1702                 kfree(w);
1703         }
1704         if (cb->args[1]) {
1705                 cb->done = (void*)cb->args[1];
1706                 cb->args[1] = 0;
1707         }
1708 }
1709
1710 static int fib6_dump_done(struct netlink_callback *cb)
1711 {
1712         fib6_dump_end(cb);
1713         return cb->done(cb);
1714 }
1715
1716 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1717 {
1718         struct rt6_rtnl_dump_arg arg;
1719         struct fib6_walker_t *w;
1720         int res;
1721
1722         arg.skb = skb;
1723         arg.cb = cb;
1724
1725         w = (void*)cb->args[0];
1726         if (w == NULL) {
1727                 /* New dump:
1728                  * 
1729                  * 1. hook callback destructor.
1730                  */
1731                 cb->args[1] = (long)cb->done;
1732                 cb->done = fib6_dump_done;
1733
1734                 /*
1735                  * 2. allocate and initialize walker.
1736                  */
1737                 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1738                 if (w == NULL)
1739                         return -ENOMEM;
1740                 RT6_TRACE("dump<%p", w);
1741                 memset(w, 0, sizeof(*w));
1742                 w->root = &ip6_routing_table;
1743                 w->func = fib6_dump_node;
1744                 w->args = &arg;
1745                 cb->args[0] = (long)w;
1746                 read_lock_bh(&rt6_lock);
1747                 res = fib6_walk(w);
1748                 read_unlock_bh(&rt6_lock);
1749         } else {
1750                 w->args = &arg;
1751                 read_lock_bh(&rt6_lock);
1752                 res = fib6_walk_continue(w);
1753                 read_unlock_bh(&rt6_lock);
1754         }
1755 #if RT6_DEBUG >= 3
1756         if (res <= 0 && skb->len == 0)
1757                 RT6_TRACE("%p>dump end\n", w);
1758 #endif
1759         res = res < 0 ? res : skb->len;
1760         /* res < 0 is an error. (really, impossible)
1761            res == 0 means that dump is complete, but skb still can contain data.
1762            res > 0 dump is not complete, but frame is full.
1763          */
1764         /* Destroy walker, if dump of this table is complete. */
1765         if (res <= 0)
1766                 fib6_dump_end(cb);
1767         return res;
1768 }
1769
1770 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1771 {
1772         struct rtattr **rta = arg;
1773         int iif = 0;
1774         int err = -ENOBUFS;
1775         struct sk_buff *skb;
1776         struct flowi fl;
1777         struct rt6_info *rt;
1778
1779         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1780         if (skb == NULL)
1781                 goto out;
1782
1783         /* Reserve room for dummy headers, this skb can pass
1784            through good chunk of routing engine.
1785          */
1786         skb->mac.raw = skb->data;
1787         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1788
1789         memset(&fl, 0, sizeof(fl));
1790         if (rta[RTA_SRC-1])
1791                 ipv6_addr_copy(&fl.fl6_src,
1792                                (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1793         if (rta[RTA_DST-1])
1794                 ipv6_addr_copy(&fl.fl6_dst,
1795                                (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1796
1797         if (rta[RTA_IIF-1])
1798                 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1799
1800         if (iif) {
1801                 struct net_device *dev;
1802                 dev = __dev_get_by_index(iif);
1803                 if (!dev) {
1804                         err = -ENODEV;
1805                         goto out_free;
1806                 }
1807         }
1808
1809         fl.oif = 0;
1810         if (rta[RTA_OIF-1])
1811                 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1812
1813         rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1814
1815         skb->dst = &rt->u.dst;
1816
1817         NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1818         err = rt6_fill_node(skb, rt, 
1819                             &fl.fl6_dst, &fl.fl6_src,
1820                             iif,
1821                             RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1822                             nlh->nlmsg_seq, 0, 0);
1823         if (err < 0) {
1824                 err = -EMSGSIZE;
1825                 goto out_free;
1826         }
1827
1828         err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1829         if (err > 0)
1830                 err = 0;
1831 out:
1832         return err;
1833 out_free:
1834         kfree_skb(skb);
1835         goto out;       
1836 }
1837
1838 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh, 
1839                         struct netlink_skb_parms *req)
1840 {
1841         struct sk_buff *skb;
1842         int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1843         u32 pid = current->pid;
1844         u32 seq = 0;
1845
1846         if (req)
1847                 pid = req->pid;
1848         if (nlh)
1849                 seq = nlh->nlmsg_seq;
1850         
1851         skb = alloc_skb(size, gfp_any());
1852         if (!skb) {
1853                 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
1854                 return;
1855         }
1856         if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
1857                 kfree_skb(skb);
1858                 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
1859                 return;
1860         }
1861         NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
1862         netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
1863 }
1864
1865 /*
1866  *      /proc
1867  */
1868
1869 #ifdef CONFIG_PROC_FS
1870
1871 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1872
1873 struct rt6_proc_arg
1874 {
1875         char *buffer;
1876         int offset;
1877         int length;
1878         int skip;
1879         int len;
1880 };
1881
1882 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1883 {
1884         struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1885         int i;
1886
1887         if (arg->skip < arg->offset / RT6_INFO_LEN) {
1888                 arg->skip++;
1889                 return 0;
1890         }
1891
1892         if (arg->len >= arg->length)
1893                 return 0;
1894
1895         for (i=0; i<16; i++) {
1896                 sprintf(arg->buffer + arg->len, "%02x",
1897                         rt->rt6i_dst.addr.s6_addr[i]);
1898                 arg->len += 2;
1899         }
1900         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1901                             rt->rt6i_dst.plen);
1902
1903 #ifdef CONFIG_IPV6_SUBTREES
1904         for (i=0; i<16; i++) {
1905                 sprintf(arg->buffer + arg->len, "%02x",
1906                         rt->rt6i_src.addr.s6_addr[i]);
1907                 arg->len += 2;
1908         }
1909         arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1910                             rt->rt6i_src.plen);
1911 #else
1912         sprintf(arg->buffer + arg->len,
1913                 "00000000000000000000000000000000 00 ");
1914         arg->len += 36;
1915 #endif
1916
1917         if (rt->rt6i_nexthop) {
1918                 for (i=0; i<16; i++) {
1919                         sprintf(arg->buffer + arg->len, "%02x",
1920                                 rt->rt6i_nexthop->primary_key[i]);
1921                         arg->len += 2;
1922                 }
1923         } else {
1924                 sprintf(arg->buffer + arg->len,
1925                         "00000000000000000000000000000000");
1926                 arg->len += 32;
1927         }
1928         arg->len += sprintf(arg->buffer + arg->len,
1929                             " %08x %08x %08x %08x %8s\n",
1930                             rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1931                             rt->u.dst.__use, rt->rt6i_flags, 
1932                             rt->rt6i_dev ? rt->rt6i_dev->name : "");
1933         return 0;
1934 }
1935
1936 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1937 {
1938         struct rt6_proc_arg arg;
1939         arg.buffer = buffer;
1940         arg.offset = offset;
1941         arg.length = length;
1942         arg.skip = 0;
1943         arg.len = 0;
1944
1945         read_lock_bh(&rt6_lock);
1946         fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1947         read_unlock_bh(&rt6_lock);
1948
1949         *start = buffer;
1950         if (offset)
1951                 *start += offset % RT6_INFO_LEN;
1952
1953         arg.len -= offset % RT6_INFO_LEN;
1954
1955         if (arg.len > length)
1956                 arg.len = length;
1957         if (arg.len < 0)
1958                 arg.len = 0;
1959
1960         return arg.len;
1961 }
1962
1963 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1964 {
1965         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1966                       rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1967                       rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1968                       rt6_stats.fib_rt_cache,
1969                       atomic_read(&ip6_dst_ops.entries),
1970                       rt6_stats.fib_discarded_routes);
1971
1972         return 0;
1973 }
1974
1975 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1976 {
1977         return single_open(file, rt6_stats_seq_show, NULL);
1978 }
1979
1980 static struct file_operations rt6_stats_seq_fops = {
1981         .owner   = THIS_MODULE,
1982         .open    = rt6_stats_seq_open,
1983         .read    = seq_read,
1984         .llseek  = seq_lseek,
1985         .release = single_release,
1986 };
1987 #endif  /* CONFIG_PROC_FS */
1988
1989 #ifdef CONFIG_SYSCTL
1990
1991 static int flush_delay;
1992
1993 static
1994 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
1995                               void __user *buffer, size_t *lenp, loff_t *ppos)
1996 {
1997         if (write) {
1998                 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1999                 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2000                 return 0;
2001         } else
2002                 return -EINVAL;
2003 }
2004
2005 ctl_table ipv6_route_table[] = {
2006         {
2007                 .ctl_name       =       NET_IPV6_ROUTE_FLUSH, 
2008                 .procname       =       "flush",
2009                 .data           =       &flush_delay,
2010                 .maxlen         =       sizeof(int),
2011                 .mode           =       0200,
2012                 .proc_handler   =       &ipv6_sysctl_rtcache_flush
2013         },
2014         {
2015                 .ctl_name       =       NET_IPV6_ROUTE_GC_THRESH,
2016                 .procname       =       "gc_thresh",
2017                 .data           =       &ip6_dst_ops.gc_thresh,
2018                 .maxlen         =       sizeof(int),
2019                 .mode           =       0644,
2020                 .proc_handler   =       &proc_dointvec,
2021         },
2022         {
2023                 .ctl_name       =       NET_IPV6_ROUTE_MAX_SIZE,
2024                 .procname       =       "max_size",
2025                 .data           =       &ip6_rt_max_size,
2026                 .maxlen         =       sizeof(int),
2027                 .mode           =       0644,
2028                 .proc_handler   =       &proc_dointvec,
2029         },
2030         {
2031                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2032                 .procname       =       "gc_min_interval",
2033                 .data           =       &ip6_rt_gc_min_interval,
2034                 .maxlen         =       sizeof(int),
2035                 .mode           =       0644,
2036                 .proc_handler   =       &proc_dointvec_jiffies,
2037                 .strategy       =       &sysctl_jiffies,
2038         },
2039         {
2040                 .ctl_name       =       NET_IPV6_ROUTE_GC_TIMEOUT,
2041                 .procname       =       "gc_timeout",
2042                 .data           =       &ip6_rt_gc_timeout,
2043                 .maxlen         =       sizeof(int),
2044                 .mode           =       0644,
2045                 .proc_handler   =       &proc_dointvec_jiffies,
2046                 .strategy       =       &sysctl_jiffies,
2047         },
2048         {
2049                 .ctl_name       =       NET_IPV6_ROUTE_GC_INTERVAL,
2050                 .procname       =       "gc_interval",
2051                 .data           =       &ip6_rt_gc_interval,
2052                 .maxlen         =       sizeof(int),
2053                 .mode           =       0644,
2054                 .proc_handler   =       &proc_dointvec_jiffies,
2055                 .strategy       =       &sysctl_jiffies,
2056         },
2057         {
2058                 .ctl_name       =       NET_IPV6_ROUTE_GC_ELASTICITY,
2059                 .procname       =       "gc_elasticity",
2060                 .data           =       &ip6_rt_gc_elasticity,
2061                 .maxlen         =       sizeof(int),
2062                 .mode           =       0644,
2063                 .proc_handler   =       &proc_dointvec_jiffies,
2064                 .strategy       =       &sysctl_jiffies,
2065         },
2066         {
2067                 .ctl_name       =       NET_IPV6_ROUTE_MTU_EXPIRES,
2068                 .procname       =       "mtu_expires",
2069                 .data           =       &ip6_rt_mtu_expires,
2070                 .maxlen         =       sizeof(int),
2071                 .mode           =       0644,
2072                 .proc_handler   =       &proc_dointvec_jiffies,
2073                 .strategy       =       &sysctl_jiffies,
2074         },
2075         {
2076                 .ctl_name       =       NET_IPV6_ROUTE_MIN_ADVMSS,
2077                 .procname       =       "min_adv_mss",
2078                 .data           =       &ip6_rt_min_advmss,
2079                 .maxlen         =       sizeof(int),
2080                 .mode           =       0644,
2081                 .proc_handler   =       &proc_dointvec_jiffies,
2082                 .strategy       =       &sysctl_jiffies,
2083         },
2084         {
2085                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2086                 .procname       =       "gc_min_interval_ms",
2087                 .data           =       &ip6_rt_gc_min_interval,
2088                 .maxlen         =       sizeof(int),
2089                 .mode           =       0644,
2090                 .proc_handler   =       &proc_dointvec_ms_jiffies,
2091                 .strategy       =       &sysctl_ms_jiffies,
2092         },
2093         { .ctl_name = 0 }
2094 };
2095
2096 #endif
2097
2098 void __init ip6_route_init(void)
2099 {
2100         struct proc_dir_entry *p;
2101
2102         ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2103                                                      sizeof(struct rt6_info),
2104                                                      0, SLAB_HWCACHE_ALIGN,
2105                                                      NULL, NULL);
2106         if (!ip6_dst_ops.kmem_cachep)
2107                 panic("cannot create ip6_dst_cache");
2108
2109         fib6_init();
2110 #ifdef  CONFIG_PROC_FS
2111         p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2112         if (p)
2113                 p->owner = THIS_MODULE;
2114
2115         proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2116 #endif
2117 #ifdef CONFIG_XFRM
2118         xfrm6_init();
2119 #endif
2120 }
2121
2122 void ip6_route_cleanup(void)
2123 {
2124 #ifdef CONFIG_PROC_FS
2125         proc_net_remove("ipv6_route");
2126         proc_net_remove("rt6_stats");
2127 #endif
2128 #ifdef CONFIG_XFRM
2129         xfrm6_fini();
2130 #endif
2131         rt6_ifdown(NULL);
2132         fib6_gc_cleanup();
2133         kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2134 }