2  *      Linux INET6 implementation
 
   6  *      Pedro Roque             <roque@di.fc.ul.pt>
 
   8  *      $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
 
  10  *      This program is free software; you can redistribute it and/or
 
  11  *      modify it under the terms of the GNU General Public License
 
  12  *      as published by the Free Software Foundation; either version
 
  13  *      2 of the License, or (at your option) any later version.
 
  18  *      YOSHIFUJI Hideaki @USAGI
 
  19  *              reworked default router selection.
 
  20  *              - respect outgoing interface
 
  21  *              - select from (probably) reachable routers (i.e.
 
  22  *              routers in REACHABLE, STALE, DELAY or PROBE states).
 
  23  *              - always select the same router if it is (probably)
 
  24  *              reachable.  otherwise, round-robin the list.
 
  26  *              Fixed routing subtrees.
 
  29 #include <linux/capability.h>
 
  30 #include <linux/errno.h>
 
  31 #include <linux/types.h>
 
  32 #include <linux/times.h>
 
  33 #include <linux/socket.h>
 
  34 #include <linux/sockios.h>
 
  35 #include <linux/net.h>
 
  36 #include <linux/route.h>
 
  37 #include <linux/netdevice.h>
 
  38 #include <linux/in6.h>
 
  39 #include <linux/init.h>
 
  40 #include <linux/if_arp.h>
 
  43 #include <linux/proc_fs.h>
 
  44 #include <linux/seq_file.h>
 
  49 #include <net/ip6_fib.h>
 
  50 #include <net/ip6_route.h>
 
  51 #include <net/ndisc.h>
 
  52 #include <net/addrconf.h>
 
  54 #include <linux/rtnetlink.h>
 
  57 #include <net/netevent.h>
 
  58 #include <net/netlink.h>
 
  60 #include <asm/uaccess.h>
 
  63 #include <linux/sysctl.h>
 
  66 /* Set to 3 to get tracing. */
 
  70 #define RDBG(x) printk x
 
  71 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
 
  74 #define RT6_TRACE(x...) do { ; } while (0)
 
  77 #define CLONE_OFFLINK_ROUTE 0
 
  79 static int ip6_rt_max_size = 4096;
 
  80 static int ip6_rt_gc_min_interval = HZ / 2;
 
  81 static int ip6_rt_gc_timeout = 60*HZ;
 
  82 int ip6_rt_gc_interval = 30*HZ;
 
  83 static int ip6_rt_gc_elasticity = 9;
 
  84 static int ip6_rt_mtu_expires = 10*60*HZ;
 
  85 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
 
  87 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
 
  88 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
 
  89 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
 
  90 static void             ip6_dst_destroy(struct dst_entry *);
 
  91 static void             ip6_dst_ifdown(struct dst_entry *,
 
  92                                        struct net_device *dev, int how);
 
  93 static int               ip6_dst_gc(void);
 
  95 static int              ip6_pkt_discard(struct sk_buff *skb);
 
  96 static int              ip6_pkt_discard_out(struct sk_buff *skb);
 
  97 static void             ip6_link_failure(struct sk_buff *skb);
 
  98 static void             ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
 
 100 #ifdef CONFIG_IPV6_ROUTE_INFO
 
 101 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
 
 102                                            struct in6_addr *gwaddr, int ifindex,
 
 104 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
 
 105                                            struct in6_addr *gwaddr, int ifindex);
 
 108 static struct dst_ops ip6_dst_ops = {
 
 110         .protocol               =       __constant_htons(ETH_P_IPV6),
 
 113         .check                  =       ip6_dst_check,
 
 114         .destroy                =       ip6_dst_destroy,
 
 115         .ifdown                 =       ip6_dst_ifdown,
 
 116         .negative_advice        =       ip6_negative_advice,
 
 117         .link_failure           =       ip6_link_failure,
 
 118         .update_pmtu            =       ip6_rt_update_pmtu,
 
 119         .entry_size             =       sizeof(struct rt6_info),
 
 122 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
 
 126 static struct dst_ops ip6_dst_blackhole_ops = {
 
 128         .protocol               =       __constant_htons(ETH_P_IPV6),
 
 129         .destroy                =       ip6_dst_destroy,
 
 130         .check                  =       ip6_dst_check,
 
 131         .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
 
 132         .entry_size             =       sizeof(struct rt6_info),
 
 135 struct rt6_info ip6_null_entry = {
 
 138                         .__refcnt       = ATOMIC_INIT(1),
 
 140                         .dev            = &loopback_dev,
 
 142                         .error          = -ENETUNREACH,
 
 143                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
 
 144                         .input          = ip6_pkt_discard,
 
 145                         .output         = ip6_pkt_discard_out,
 
 147                         .path           = (struct dst_entry*)&ip6_null_entry,
 
 150         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
 
 151         .rt6i_metric    = ~(u32) 0,
 
 152         .rt6i_ref       = ATOMIC_INIT(1),
 
 155 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 
 157 static int ip6_pkt_prohibit(struct sk_buff *skb);
 
 158 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
 
 159 static int ip6_pkt_blk_hole(struct sk_buff *skb);
 
 161 struct rt6_info ip6_prohibit_entry = {
 
 164                         .__refcnt       = ATOMIC_INIT(1),
 
 166                         .dev            = &loopback_dev,
 
 169                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
 
 170                         .input          = ip6_pkt_prohibit,
 
 171                         .output         = ip6_pkt_prohibit_out,
 
 173                         .path           = (struct dst_entry*)&ip6_prohibit_entry,
 
 176         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
 
 177         .rt6i_metric    = ~(u32) 0,
 
 178         .rt6i_ref       = ATOMIC_INIT(1),
 
 181 struct rt6_info ip6_blk_hole_entry = {
 
 184                         .__refcnt       = ATOMIC_INIT(1),
 
 186                         .dev            = &loopback_dev,
 
 189                         .metrics        = { [RTAX_HOPLIMIT - 1] = 255, },
 
 190                         .input          = ip6_pkt_blk_hole,
 
 191                         .output         = ip6_pkt_blk_hole,
 
 193                         .path           = (struct dst_entry*)&ip6_blk_hole_entry,
 
 196         .rt6i_flags     = (RTF_REJECT | RTF_NONEXTHOP),
 
 197         .rt6i_metric    = ~(u32) 0,
 
 198         .rt6i_ref       = ATOMIC_INIT(1),
 
 203 /* allocate dst with ip6_dst_ops */
 
 204 static __inline__ struct rt6_info *ip6_dst_alloc(void)
 
 206         return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
 
 209 static void ip6_dst_destroy(struct dst_entry *dst)
 
 211         struct rt6_info *rt = (struct rt6_info *)dst;
 
 212         struct inet6_dev *idev = rt->rt6i_idev;
 
 215                 rt->rt6i_idev = NULL;
 
 220 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
 
 223         struct rt6_info *rt = (struct rt6_info *)dst;
 
 224         struct inet6_dev *idev = rt->rt6i_idev;
 
 226         if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
 
 227                 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
 
 228                 if (loopback_idev != NULL) {
 
 229                         rt->rt6i_idev = loopback_idev;
 
 235 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
 
 237         return (rt->rt6i_flags & RTF_EXPIRES &&
 
 238                 time_after(jiffies, rt->rt6i_expires));
 
 241 static inline int rt6_need_strict(struct in6_addr *daddr)
 
 243         return (ipv6_addr_type(daddr) &
 
 244                 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
 
 248  *      Route lookup. Any table->tb6_lock is implied.
 
 251 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
 
 255         struct rt6_info *local = NULL;
 
 256         struct rt6_info *sprt;
 
 259                 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
 
 260                         struct net_device *dev = sprt->rt6i_dev;
 
 261                         if (dev->ifindex == oif)
 
 263                         if (dev->flags & IFF_LOOPBACK) {
 
 264                                 if (sprt->rt6i_idev == NULL ||
 
 265                                     sprt->rt6i_idev->dev->ifindex != oif) {
 
 268                                         if (local && (!oif ||
 
 269                                                       local->rt6i_idev->dev->ifindex == oif))
 
 280                         return &ip6_null_entry;
 
 285 #ifdef CONFIG_IPV6_ROUTER_PREF
 
 286 static void rt6_probe(struct rt6_info *rt)
 
 288         struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
 
 290          * Okay, this does not seem to be appropriate
 
 291          * for now, however, we need to check if it
 
 292          * is really so; aka Router Reachability Probing.
 
 294          * Router Reachability Probe MUST be rate-limited
 
 295          * to no more than one per minute.
 
 297         if (!neigh || (neigh->nud_state & NUD_VALID))
 
 299         read_lock_bh(&neigh->lock);
 
 300         if (!(neigh->nud_state & NUD_VALID) &&
 
 301             time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
 
 302                 struct in6_addr mcaddr;
 
 303                 struct in6_addr *target;
 
 305                 neigh->updated = jiffies;
 
 306                 read_unlock_bh(&neigh->lock);
 
 308                 target = (struct in6_addr *)&neigh->primary_key;
 
 309                 addrconf_addr_solict_mult(target, &mcaddr);
 
 310                 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
 
 312                 read_unlock_bh(&neigh->lock);
 
 315 static inline void rt6_probe(struct rt6_info *rt)
 
 322  * Default Router Selection (RFC 2461 6.3.6)
 
 324 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
 
 326         struct net_device *dev = rt->rt6i_dev;
 
 327         if (!oif || dev->ifindex == oif)
 
 329         if ((dev->flags & IFF_LOOPBACK) &&
 
 330             rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
 
 335 static inline int rt6_check_neigh(struct rt6_info *rt)
 
 337         struct neighbour *neigh = rt->rt6i_nexthop;
 
 339         if (rt->rt6i_flags & RTF_NONEXTHOP ||
 
 340             !(rt->rt6i_flags & RTF_GATEWAY))
 
 343                 read_lock_bh(&neigh->lock);
 
 344                 if (neigh->nud_state & NUD_VALID)
 
 346                 else if (!(neigh->nud_state & NUD_FAILED))
 
 348                 read_unlock_bh(&neigh->lock);
 
 353 static int rt6_score_route(struct rt6_info *rt, int oif,
 
 358         m = rt6_check_dev(rt, oif);
 
 359         if (!m && (strict & RT6_LOOKUP_F_IFACE))
 
 361 #ifdef CONFIG_IPV6_ROUTER_PREF
 
 362         m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
 
 364         n = rt6_check_neigh(rt);
 
 365         if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
 
 370 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
 
 371                                    int *mpri, struct rt6_info *match)
 
 375         if (rt6_check_expired(rt))
 
 378         m = rt6_score_route(rt, oif, strict);
 
 383                 if (strict & RT6_LOOKUP_F_REACHABLE)
 
 387         } else if (strict & RT6_LOOKUP_F_REACHABLE) {
 
 395 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
 
 396                                      struct rt6_info *rr_head,
 
 397                                      u32 metric, int oif, int strict)
 
 399         struct rt6_info *rt, *match;
 
 403         for (rt = rr_head; rt && rt->rt6i_metric == metric;
 
 404              rt = rt->u.dst.rt6_next)
 
 405                 match = find_match(rt, oif, strict, &mpri, match);
 
 406         for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
 
 407              rt = rt->u.dst.rt6_next)
 
 408                 match = find_match(rt, oif, strict, &mpri, match);
 
 413 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
 
 415         struct rt6_info *match, *rt0;
 
 417         RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
 
 418                   __FUNCTION__, fn->leaf, oif);
 
 422                 fn->rr_ptr = rt0 = fn->leaf;
 
 424         match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
 
 427             (strict & RT6_LOOKUP_F_REACHABLE)) {
 
 428                 struct rt6_info *next = rt0->u.dst.rt6_next;
 
 430                 /* no entries matched; do round-robin */
 
 431                 if (!next || next->rt6i_metric != rt0->rt6i_metric)
 
 438         RT6_TRACE("%s() => %p\n",
 
 439                   __FUNCTION__, match);
 
 441         return (match ? match : &ip6_null_entry);
 
 444 #ifdef CONFIG_IPV6_ROUTE_INFO
 
 445 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 
 446                   struct in6_addr *gwaddr)
 
 448         struct route_info *rinfo = (struct route_info *) opt;
 
 449         struct in6_addr prefix_buf, *prefix;
 
 454         if (len < sizeof(struct route_info)) {
 
 458         /* Sanity check for prefix_len and length */
 
 459         if (rinfo->length > 3) {
 
 461         } else if (rinfo->prefix_len > 128) {
 
 463         } else if (rinfo->prefix_len > 64) {
 
 464                 if (rinfo->length < 2) {
 
 467         } else if (rinfo->prefix_len > 0) {
 
 468                 if (rinfo->length < 1) {
 
 473         pref = rinfo->route_pref;
 
 474         if (pref == ICMPV6_ROUTER_PREF_INVALID)
 
 475                 pref = ICMPV6_ROUTER_PREF_MEDIUM;
 
 477         lifetime = ntohl(rinfo->lifetime);
 
 478         if (lifetime == 0xffffffff) {
 
 480         } else if (lifetime > 0x7fffffff/HZ) {
 
 481                 /* Avoid arithmetic overflow */
 
 482                 lifetime = 0x7fffffff/HZ - 1;
 
 485         if (rinfo->length == 3)
 
 486                 prefix = (struct in6_addr *)rinfo->prefix;
 
 488                 /* this function is safe */
 
 489                 ipv6_addr_prefix(&prefix_buf,
 
 490                                  (struct in6_addr *)rinfo->prefix,
 
 492                 prefix = &prefix_buf;
 
 495         rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
 
 497         if (rt && !lifetime) {
 
 503                 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
 
 506                 rt->rt6i_flags = RTF_ROUTEINFO |
 
 507                                  (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
 
 510                 if (lifetime == 0xffffffff) {
 
 511                         rt->rt6i_flags &= ~RTF_EXPIRES;
 
 513                         rt->rt6i_expires = jiffies + HZ * lifetime;
 
 514                         rt->rt6i_flags |= RTF_EXPIRES;
 
 516                 dst_release(&rt->u.dst);
 
 522 #define BACKTRACK(saddr) \
 
 524         if (rt == &ip6_null_entry) { \
 
 525                 struct fib6_node *pn; \
 
 527                         if (fn->fn_flags & RTN_TL_ROOT) \
 
 530                         if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
 
 531                                 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
 
 534                         if (fn->fn_flags & RTN_RTINFO) \
 
 540 static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
 
 541                                              struct flowi *fl, int flags)
 
 543         struct fib6_node *fn;
 
 546         read_lock_bh(&table->tb6_lock);
 
 547         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
 
 550         rt = rt6_device_match(rt, fl->oif, flags);
 
 551         BACKTRACK(&fl->fl6_src);
 
 553         dst_hold(&rt->u.dst);
 
 554         read_unlock_bh(&table->tb6_lock);
 
 556         rt->u.dst.lastuse = jiffies;
 
 563 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
 
 574         struct dst_entry *dst;
 
 575         int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
 
 578                 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
 
 579                 flags |= RT6_LOOKUP_F_HAS_SADDR;
 
 582         dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup);
 
 584                 return (struct rt6_info *) dst;
 
 591 EXPORT_SYMBOL(rt6_lookup);
 
 593 /* ip6_ins_rt is called with FREE table->tb6_lock.
 
 594    It takes new route entry, the addition fails by any reason the
 
 595    route is freed. In any case, if caller does not hold it, it may
 
 599 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
 
 602         struct fib6_table *table;
 
 604         table = rt->rt6i_table;
 
 605         write_lock_bh(&table->tb6_lock);
 
 606         err = fib6_add(&table->tb6_root, rt, info);
 
 607         write_unlock_bh(&table->tb6_lock);
 
 612 int ip6_ins_rt(struct rt6_info *rt)
 
 614         return __ip6_ins_rt(rt, NULL);
 
 617 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
 
 618                                       struct in6_addr *saddr)
 
 626         rt = ip6_rt_copy(ort);
 
 629                 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
 
 630                         if (rt->rt6i_dst.plen != 128 &&
 
 631                             ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
 
 632                                 rt->rt6i_flags |= RTF_ANYCAST;
 
 633                         ipv6_addr_copy(&rt->rt6i_gateway, daddr);
 
 636                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
 
 637                 rt->rt6i_dst.plen = 128;
 
 638                 rt->rt6i_flags |= RTF_CACHE;
 
 639                 rt->u.dst.flags |= DST_HOST;
 
 641 #ifdef CONFIG_IPV6_SUBTREES
 
 642                 if (rt->rt6i_src.plen && saddr) {
 
 643                         ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
 
 644                         rt->rt6i_src.plen = 128;
 
 648                 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
 
 655 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
 
 657         struct rt6_info *rt = ip6_rt_copy(ort);
 
 659                 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
 
 660                 rt->rt6i_dst.plen = 128;
 
 661                 rt->rt6i_flags |= RTF_CACHE;
 
 662                 rt->u.dst.flags |= DST_HOST;
 
 663                 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
 
 668 static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
 
 669                                             struct flowi *fl, int flags)
 
 671         struct fib6_node *fn;
 
 672         struct rt6_info *rt, *nrt;
 
 676         int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
 
 678         strict |= flags & RT6_LOOKUP_F_IFACE;
 
 681         read_lock_bh(&table->tb6_lock);
 
 684         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
 
 687         rt = rt6_select(fn, fl->iif, strict | reachable);
 
 688         BACKTRACK(&fl->fl6_src);
 
 689         if (rt == &ip6_null_entry ||
 
 690             rt->rt6i_flags & RTF_CACHE)
 
 693         dst_hold(&rt->u.dst);
 
 694         read_unlock_bh(&table->tb6_lock);
 
 696         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
 
 697                 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
 
 699 #if CLONE_OFFLINK_ROUTE
 
 700                 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
 
 706         dst_release(&rt->u.dst);
 
 707         rt = nrt ? : &ip6_null_entry;
 
 709         dst_hold(&rt->u.dst);
 
 711                 err = ip6_ins_rt(nrt);
 
 720          * Race condition! In the gap, when table->tb6_lock was
 
 721          * released someone could insert this route.  Relookup.
 
 723         dst_release(&rt->u.dst);
 
 731         dst_hold(&rt->u.dst);
 
 732         read_unlock_bh(&table->tb6_lock);
 
 734         rt->u.dst.lastuse = jiffies;
 
 740 void ip6_route_input(struct sk_buff *skb)
 
 742         struct ipv6hdr *iph = ipv6_hdr(skb);
 
 743         int flags = RT6_LOOKUP_F_HAS_SADDR;
 
 745                 .iif = skb->dev->ifindex,
 
 750                                 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
 
 754                 .proto = iph->nexthdr,
 
 757         if (rt6_need_strict(&iph->daddr))
 
 758                 flags |= RT6_LOOKUP_F_IFACE;
 
 760         skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input);
 
 763 static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
 
 764                                              struct flowi *fl, int flags)
 
 766         struct fib6_node *fn;
 
 767         struct rt6_info *rt, *nrt;
 
 771         int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
 
 773         strict |= flags & RT6_LOOKUP_F_IFACE;
 
 776         read_lock_bh(&table->tb6_lock);
 
 779         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
 
 782         rt = rt6_select(fn, fl->oif, strict | reachable);
 
 783         BACKTRACK(&fl->fl6_src);
 
 784         if (rt == &ip6_null_entry ||
 
 785             rt->rt6i_flags & RTF_CACHE)
 
 788         dst_hold(&rt->u.dst);
 
 789         read_unlock_bh(&table->tb6_lock);
 
 791         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
 
 792                 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
 
 794 #if CLONE_OFFLINK_ROUTE
 
 795                 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
 
 801         dst_release(&rt->u.dst);
 
 802         rt = nrt ? : &ip6_null_entry;
 
 804         dst_hold(&rt->u.dst);
 
 806                 err = ip6_ins_rt(nrt);
 
 815          * Race condition! In the gap, when table->tb6_lock was
 
 816          * released someone could insert this route.  Relookup.
 
 818         dst_release(&rt->u.dst);
 
 826         dst_hold(&rt->u.dst);
 
 827         read_unlock_bh(&table->tb6_lock);
 
 829         rt->u.dst.lastuse = jiffies;
 
 834 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
 
 838         if (rt6_need_strict(&fl->fl6_dst))
 
 839                 flags |= RT6_LOOKUP_F_IFACE;
 
 841         if (!ipv6_addr_any(&fl->fl6_src))
 
 842                 flags |= RT6_LOOKUP_F_HAS_SADDR;
 
 844         return fib6_rule_lookup(fl, flags, ip6_pol_route_output);
 
 847 EXPORT_SYMBOL(ip6_route_output);
 
 849 static int ip6_blackhole_output(struct sk_buff *skb)
 
 855 int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
 
 857         struct rt6_info *ort = (struct rt6_info *) *dstp;
 
 858         struct rt6_info *rt = (struct rt6_info *)
 
 859                 dst_alloc(&ip6_dst_blackhole_ops);
 
 860         struct dst_entry *new = NULL;
 
 865                 atomic_set(&new->__refcnt, 1);
 
 867                 new->input = ip6_blackhole_output;
 
 868                 new->output = ip6_blackhole_output;
 
 870                 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
 
 871                 new->dev = ort->u.dst.dev;
 
 874                 rt->rt6i_idev = ort->rt6i_idev;
 
 876                         in6_dev_hold(rt->rt6i_idev);
 
 877                 rt->rt6i_expires = 0;
 
 879                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
 
 880                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
 
 883                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
 
 884 #ifdef CONFIG_IPV6_SUBTREES
 
 885                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
 
 893         return (new ? 0 : -ENOMEM);
 
 895 EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
 
 898  *      Destination cache support functions
 
 901 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
 
 905         rt = (struct rt6_info *) dst;
 
 907         if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
 
 913 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
 
 915         struct rt6_info *rt = (struct rt6_info *) dst;
 
 918                 if (rt->rt6i_flags & RTF_CACHE)
 
 926 static void ip6_link_failure(struct sk_buff *skb)
 
 930         icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
 
 932         rt = (struct rt6_info *) skb->dst;
 
 934                 if (rt->rt6i_flags&RTF_CACHE) {
 
 935                         dst_set_expires(&rt->u.dst, 0);
 
 936                         rt->rt6i_flags |= RTF_EXPIRES;
 
 937                 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
 
 938                         rt->rt6i_node->fn_sernum = -1;
 
 942 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
 
 944         struct rt6_info *rt6 = (struct rt6_info*)dst;
 
 946         if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
 
 947                 rt6->rt6i_flags |= RTF_MODIFIED;
 
 948                 if (mtu < IPV6_MIN_MTU) {
 
 950                         dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
 
 952                 dst->metrics[RTAX_MTU-1] = mtu;
 
 953                 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
 
 957 static int ipv6_get_mtu(struct net_device *dev);
 
 959 static inline unsigned int ipv6_advmss(unsigned int mtu)
 
 961         mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
 
 963         if (mtu < ip6_rt_min_advmss)
 
 964                 mtu = ip6_rt_min_advmss;
 
 967          * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
 
 968          * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
 
 969          * IPV6_MAXPLEN is also valid and means: "any MSS,
 
 970          * rely only on pmtu discovery"
 
 972         if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
 
 977 static struct dst_entry *ndisc_dst_gc_list;
 
 978 static DEFINE_SPINLOCK(ndisc_lock);
 
 980 struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
 
 981                                   struct neighbour *neigh,
 
 982                                   struct in6_addr *addr,
 
 983                                   int (*output)(struct sk_buff *))
 
 986         struct inet6_dev *idev = in6_dev_get(dev);
 
 988         if (unlikely(idev == NULL))
 
 991         rt = ip6_dst_alloc();
 
 992         if (unlikely(rt == NULL)) {
 
1001                 neigh = ndisc_get_neigh(dev, addr);
 
1004         rt->rt6i_idev     = idev;
 
1005         rt->rt6i_nexthop  = neigh;
 
1006         atomic_set(&rt->u.dst.__refcnt, 1);
 
1007         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
 
1008         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
 
1009         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
 
1010         rt->u.dst.output  = output;
 
1012 #if 0   /* there's no chance to use these for ndisc */
 
1013         rt->u.dst.flags   = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
 
1016         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
 
1017         rt->rt6i_dst.plen = 128;
 
1020         spin_lock_bh(&ndisc_lock);
 
1021         rt->u.dst.next = ndisc_dst_gc_list;
 
1022         ndisc_dst_gc_list = &rt->u.dst;
 
1023         spin_unlock_bh(&ndisc_lock);
 
1025         fib6_force_start_gc();
 
1031 int ndisc_dst_gc(int *more)
 
1033         struct dst_entry *dst, *next, **pprev;
 
1039         spin_lock_bh(&ndisc_lock);
 
1040         pprev = &ndisc_dst_gc_list;
 
1042         while ((dst = *pprev) != NULL) {
 
1043                 if (!atomic_read(&dst->__refcnt)) {
 
1053         spin_unlock_bh(&ndisc_lock);
 
1058 static int ip6_dst_gc(void)
 
1060         static unsigned expire = 30*HZ;
 
1061         static unsigned long last_gc;
 
1062         unsigned long now = jiffies;
 
1064         if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
 
1065             atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
 
1069         fib6_run_gc(expire);
 
1071         if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
 
1072                 expire = ip6_rt_gc_timeout>>1;
 
1075         expire -= expire>>ip6_rt_gc_elasticity;
 
1076         return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
 
1079 /* Clean host part of a prefix. Not necessary in radix tree,
 
1080    but results in cleaner routing tables.
 
1082    Remove it only when all the things will work!
 
1085 static int ipv6_get_mtu(struct net_device *dev)
 
1087         int mtu = IPV6_MIN_MTU;
 
1088         struct inet6_dev *idev;
 
1090         idev = in6_dev_get(dev);
 
1092                 mtu = idev->cnf.mtu6;
 
1098 int ipv6_get_hoplimit(struct net_device *dev)
 
1100         int hoplimit = ipv6_devconf.hop_limit;
 
1101         struct inet6_dev *idev;
 
1103         idev = in6_dev_get(dev);
 
1105                 hoplimit = idev->cnf.hop_limit;
 
1115 int ip6_route_add(struct fib6_config *cfg)
 
1118         struct rt6_info *rt = NULL;
 
1119         struct net_device *dev = NULL;
 
1120         struct inet6_dev *idev = NULL;
 
1121         struct fib6_table *table;
 
1124         if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
 
1126 #ifndef CONFIG_IPV6_SUBTREES
 
1127         if (cfg->fc_src_len)
 
1130         if (cfg->fc_ifindex) {
 
1132                 dev = dev_get_by_index(cfg->fc_ifindex);
 
1135                 idev = in6_dev_get(dev);
 
1140         if (cfg->fc_metric == 0)
 
1141                 cfg->fc_metric = IP6_RT_PRIO_USER;
 
1143         table = fib6_new_table(cfg->fc_table);
 
1144         if (table == NULL) {
 
1149         rt = ip6_dst_alloc();
 
1156         rt->u.dst.obsolete = -1;
 
1157         rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
 
1159         if (cfg->fc_protocol == RTPROT_UNSPEC)
 
1160                 cfg->fc_protocol = RTPROT_BOOT;
 
1161         rt->rt6i_protocol = cfg->fc_protocol;
 
1163         addr_type = ipv6_addr_type(&cfg->fc_dst);
 
1165         if (addr_type & IPV6_ADDR_MULTICAST)
 
1166                 rt->u.dst.input = ip6_mc_input;
 
1168                 rt->u.dst.input = ip6_forward;
 
1170         rt->u.dst.output = ip6_output;
 
1172         ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
 
1173         rt->rt6i_dst.plen = cfg->fc_dst_len;
 
1174         if (rt->rt6i_dst.plen == 128)
 
1175                rt->u.dst.flags = DST_HOST;
 
1177 #ifdef CONFIG_IPV6_SUBTREES
 
1178         ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
 
1179         rt->rt6i_src.plen = cfg->fc_src_len;
 
1182         rt->rt6i_metric = cfg->fc_metric;
 
1184         /* We cannot add true routes via loopback here,
 
1185            they would result in kernel looping; promote them to reject routes
 
1187         if ((cfg->fc_flags & RTF_REJECT) ||
 
1188             (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
 
1189                 /* hold loopback dev/idev if we haven't done so. */
 
1190                 if (dev != &loopback_dev) {
 
1195                         dev = &loopback_dev;
 
1197                         idev = in6_dev_get(dev);
 
1203                 rt->u.dst.output = ip6_pkt_discard_out;
 
1204                 rt->u.dst.input = ip6_pkt_discard;
 
1205                 rt->u.dst.error = -ENETUNREACH;
 
1206                 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
 
1210         if (cfg->fc_flags & RTF_GATEWAY) {
 
1211                 struct in6_addr *gw_addr;
 
1214                 gw_addr = &cfg->fc_gateway;
 
1215                 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
 
1216                 gwa_type = ipv6_addr_type(gw_addr);
 
1218                 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
 
1219                         struct rt6_info *grt;
 
1221                         /* IPv6 strictly inhibits using not link-local
 
1222                            addresses as nexthop address.
 
1223                            Otherwise, router will not able to send redirects.
 
1224                            It is very good, but in some (rare!) circumstances
 
1225                            (SIT, PtP, NBMA NOARP links) it is handy to allow
 
1226                            some exceptions. --ANK
 
1229                         if (!(gwa_type&IPV6_ADDR_UNICAST))
 
1232                         grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
 
1234                         err = -EHOSTUNREACH;
 
1238                                 if (dev != grt->rt6i_dev) {
 
1239                                         dst_release(&grt->u.dst);
 
1243                                 dev = grt->rt6i_dev;
 
1244                                 idev = grt->rt6i_idev;
 
1246                                 in6_dev_hold(grt->rt6i_idev);
 
1248                         if (!(grt->rt6i_flags&RTF_GATEWAY))
 
1250                         dst_release(&grt->u.dst);
 
1256                 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
 
1264         if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
 
1265                 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
 
1266                 if (IS_ERR(rt->rt6i_nexthop)) {
 
1267                         err = PTR_ERR(rt->rt6i_nexthop);
 
1268                         rt->rt6i_nexthop = NULL;
 
1273         rt->rt6i_flags = cfg->fc_flags;
 
1280                 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
 
1281                         int type = nla->nla_type;
 
1284                                 if (type > RTAX_MAX) {
 
1289                                 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
 
1294         if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
 
1295                 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
 
1296         if (!rt->u.dst.metrics[RTAX_MTU-1])
 
1297                 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
 
1298         if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
 
1299                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
 
1300         rt->u.dst.dev = dev;
 
1301         rt->rt6i_idev = idev;
 
1302         rt->rt6i_table = table;
 
1303         return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
 
1311                 dst_free(&rt->u.dst);
 
1315 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
 
1318         struct fib6_table *table;
 
1320         if (rt == &ip6_null_entry)
 
1323         table = rt->rt6i_table;
 
1324         write_lock_bh(&table->tb6_lock);
 
1326         err = fib6_del(rt, info);
 
1327         dst_release(&rt->u.dst);
 
1329         write_unlock_bh(&table->tb6_lock);
 
1334 int ip6_del_rt(struct rt6_info *rt)
 
1336         return __ip6_del_rt(rt, NULL);
 
1339 static int ip6_route_del(struct fib6_config *cfg)
 
1341         struct fib6_table *table;
 
1342         struct fib6_node *fn;
 
1343         struct rt6_info *rt;
 
1346         table = fib6_get_table(cfg->fc_table);
 
1350         read_lock_bh(&table->tb6_lock);
 
1352         fn = fib6_locate(&table->tb6_root,
 
1353                          &cfg->fc_dst, cfg->fc_dst_len,
 
1354                          &cfg->fc_src, cfg->fc_src_len);
 
1357                 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
 
1358                         if (cfg->fc_ifindex &&
 
1359                             (rt->rt6i_dev == NULL ||
 
1360                              rt->rt6i_dev->ifindex != cfg->fc_ifindex))
 
1362                         if (cfg->fc_flags & RTF_GATEWAY &&
 
1363                             !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
 
1365                         if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
 
1367                         dst_hold(&rt->u.dst);
 
1368                         read_unlock_bh(&table->tb6_lock);
 
1370                         return __ip6_del_rt(rt, &cfg->fc_nlinfo);
 
1373         read_unlock_bh(&table->tb6_lock);
 
1381 struct ip6rd_flowi {
 
1383         struct in6_addr gateway;
 
1386 static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
 
1390         struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
 
1391         struct rt6_info *rt;
 
1392         struct fib6_node *fn;
 
1395          * Get the "current" route for this destination and
 
1396          * check if the redirect has come from approriate router.
 
1398          * RFC 2461 specifies that redirects should only be
 
1399          * accepted if they come from the nexthop to the target.
 
1400          * Due to the way the routes are chosen, this notion
 
1401          * is a bit fuzzy and one might need to check all possible
 
1405         read_lock_bh(&table->tb6_lock);
 
1406         fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
 
1408         for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
 
1410                  * Current route is on-link; redirect is always invalid.
 
1412                  * Seems, previous statement is not true. It could
 
1413                  * be node, which looks for us as on-link (f.e. proxy ndisc)
 
1414                  * But then router serving it might decide, that we should
 
1415                  * know truth 8)8) --ANK (980726).
 
1417                 if (rt6_check_expired(rt))
 
1419                 if (!(rt->rt6i_flags & RTF_GATEWAY))
 
1421                 if (fl->oif != rt->rt6i_dev->ifindex)
 
1423                 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
 
1429                 rt = &ip6_null_entry;
 
1430         BACKTRACK(&fl->fl6_src);
 
1432         dst_hold(&rt->u.dst);
 
1434         read_unlock_bh(&table->tb6_lock);
 
1439 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
 
1440                                            struct in6_addr *src,
 
1441                                            struct in6_addr *gateway,
 
1442                                            struct net_device *dev)
 
1444         int flags = RT6_LOOKUP_F_HAS_SADDR;
 
1445         struct ip6rd_flowi rdfl = {
 
1447                         .oif = dev->ifindex,
 
1455                 .gateway = *gateway,
 
1458         if (rt6_need_strict(dest))
 
1459                 flags |= RT6_LOOKUP_F_IFACE;
 
1461         return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect);
 
1464 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
 
1465                   struct in6_addr *saddr,
 
1466                   struct neighbour *neigh, u8 *lladdr, int on_link)
 
1468         struct rt6_info *rt, *nrt = NULL;
 
1469         struct netevent_redirect netevent;
 
1471         rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
 
1473         if (rt == &ip6_null_entry) {
 
1474                 if (net_ratelimit())
 
1475                         printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
 
1476                                "for redirect target\n");
 
1481          *      We have finally decided to accept it.
 
1484         neigh_update(neigh, lladdr, NUD_STALE,
 
1485                      NEIGH_UPDATE_F_WEAK_OVERRIDE|
 
1486                      NEIGH_UPDATE_F_OVERRIDE|
 
1487                      (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
 
1488                                      NEIGH_UPDATE_F_ISROUTER))
 
1492          * Redirect received -> path was valid.
 
1493          * Look, redirects are sent only in response to data packets,
 
1494          * so that this nexthop apparently is reachable. --ANK
 
1496         dst_confirm(&rt->u.dst);
 
1498         /* Duplicate redirect: silently ignore. */
 
1499         if (neigh == rt->u.dst.neighbour)
 
1502         nrt = ip6_rt_copy(rt);
 
1506         nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
 
1508                 nrt->rt6i_flags &= ~RTF_GATEWAY;
 
1510         ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
 
1511         nrt->rt6i_dst.plen = 128;
 
1512         nrt->u.dst.flags |= DST_HOST;
 
1514         ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
 
1515         nrt->rt6i_nexthop = neigh_clone(neigh);
 
1516         /* Reset pmtu, it may be better */
 
1517         nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
 
1518         nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
 
1520         if (ip6_ins_rt(nrt))
 
1523         netevent.old = &rt->u.dst;
 
1524         netevent.new = &nrt->u.dst;
 
1525         call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
 
1527         if (rt->rt6i_flags&RTF_CACHE) {
 
1533         dst_release(&rt->u.dst);
 
1538  *      Handle ICMP "packet too big" messages
 
1539  *      i.e. Path MTU discovery
 
1542 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
 
1543                         struct net_device *dev, u32 pmtu)
 
1545         struct rt6_info *rt, *nrt;
 
1548         rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
 
1552         if (pmtu >= dst_mtu(&rt->u.dst))
 
1555         if (pmtu < IPV6_MIN_MTU) {
 
1557                  * According to RFC2460, PMTU is set to the IPv6 Minimum Link
 
1558                  * MTU (1280) and a fragment header should always be included
 
1559                  * after a node receiving Too Big message reporting PMTU is
 
1560                  * less than the IPv6 Minimum Link MTU.
 
1562                 pmtu = IPV6_MIN_MTU;
 
1566         /* New mtu received -> path was valid.
 
1567            They are sent only in response to data packets,
 
1568            so that this nexthop apparently is reachable. --ANK
 
1570         dst_confirm(&rt->u.dst);
 
1572         /* Host route. If it is static, it would be better
 
1573            not to override it, but add new one, so that
 
1574            when cache entry will expire old pmtu
 
1575            would return automatically.
 
1577         if (rt->rt6i_flags & RTF_CACHE) {
 
1578                 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
 
1580                         rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
 
1581                 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
 
1582                 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
 
1587            Two cases are possible:
 
1588            1. It is connected route. Action: COW
 
1589            2. It is gatewayed route or NONEXTHOP route. Action: clone it.
 
1591         if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
 
1592                 nrt = rt6_alloc_cow(rt, daddr, saddr);
 
1594                 nrt = rt6_alloc_clone(rt, daddr);
 
1597                 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
 
1599                         nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
 
1601                 /* According to RFC 1981, detecting PMTU increase shouldn't be
 
1602                  * happened within 5 mins, the recommended timer is 10 mins.
 
1603                  * Here this route expiration time is set to ip6_rt_mtu_expires
 
1604                  * which is 10 mins. After 10 mins the decreased pmtu is expired
 
1605                  * and detecting PMTU increase will be automatically happened.
 
1607                 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
 
1608                 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
 
1613         dst_release(&rt->u.dst);
 
1617  *      Misc support functions
 
1620 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
 
1622         struct rt6_info *rt = ip6_dst_alloc();
 
1625                 rt->u.dst.input = ort->u.dst.input;
 
1626                 rt->u.dst.output = ort->u.dst.output;
 
1628                 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
 
1629                 rt->u.dst.error = ort->u.dst.error;
 
1630                 rt->u.dst.dev = ort->u.dst.dev;
 
1632                         dev_hold(rt->u.dst.dev);
 
1633                 rt->rt6i_idev = ort->rt6i_idev;
 
1635                         in6_dev_hold(rt->rt6i_idev);
 
1636                 rt->u.dst.lastuse = jiffies;
 
1637                 rt->rt6i_expires = 0;
 
1639                 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
 
1640                 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
 
1641                 rt->rt6i_metric = 0;
 
1643                 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
 
1644 #ifdef CONFIG_IPV6_SUBTREES
 
1645                 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
 
1647                 rt->rt6i_table = ort->rt6i_table;
 
1652 #ifdef CONFIG_IPV6_ROUTE_INFO
 
1653 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
 
1654                                            struct in6_addr *gwaddr, int ifindex)
 
1656         struct fib6_node *fn;
 
1657         struct rt6_info *rt = NULL;
 
1658         struct fib6_table *table;
 
1660         table = fib6_get_table(RT6_TABLE_INFO);
 
1664         write_lock_bh(&table->tb6_lock);
 
1665         fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
 
1669         for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
 
1670                 if (rt->rt6i_dev->ifindex != ifindex)
 
1672                 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
 
1674                 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
 
1676                 dst_hold(&rt->u.dst);
 
1680         write_unlock_bh(&table->tb6_lock);
 
1684 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
 
1685                                            struct in6_addr *gwaddr, int ifindex,
 
1688         struct fib6_config cfg = {
 
1689                 .fc_table       = RT6_TABLE_INFO,
 
1691                 .fc_ifindex     = ifindex,
 
1692                 .fc_dst_len     = prefixlen,
 
1693                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
 
1694                                   RTF_UP | RTF_PREF(pref),
 
1697         ipv6_addr_copy(&cfg.fc_dst, prefix);
 
1698         ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
 
1700         /* We should treat it as a default route if prefix length is 0. */
 
1702                 cfg.fc_flags |= RTF_DEFAULT;
 
1704         ip6_route_add(&cfg);
 
1706         return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
 
1710 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
 
1712         struct rt6_info *rt;
 
1713         struct fib6_table *table;
 
1715         table = fib6_get_table(RT6_TABLE_DFLT);
 
1719         write_lock_bh(&table->tb6_lock);
 
1720         for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
 
1721                 if (dev == rt->rt6i_dev &&
 
1722                     ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
 
1723                     ipv6_addr_equal(&rt->rt6i_gateway, addr))
 
1727                 dst_hold(&rt->u.dst);
 
1728         write_unlock_bh(&table->tb6_lock);
 
1732 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
 
1733                                      struct net_device *dev,
 
1736         struct fib6_config cfg = {
 
1737                 .fc_table       = RT6_TABLE_DFLT,
 
1739                 .fc_ifindex     = dev->ifindex,
 
1740                 .fc_flags       = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
 
1741                                   RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
 
1744         ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
 
1746         ip6_route_add(&cfg);
 
1748         return rt6_get_dflt_router(gwaddr, dev);
 
1751 void rt6_purge_dflt_routers(void)
 
1753         struct rt6_info *rt;
 
1754         struct fib6_table *table;
 
1756         /* NOTE: Keep consistent with rt6_get_dflt_router */
 
1757         table = fib6_get_table(RT6_TABLE_DFLT);
 
1762         read_lock_bh(&table->tb6_lock);
 
1763         for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
 
1764                 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
 
1765                         dst_hold(&rt->u.dst);
 
1766                         read_unlock_bh(&table->tb6_lock);
 
1771         read_unlock_bh(&table->tb6_lock);
 
1774 static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
 
1775                                  struct fib6_config *cfg)
 
1777         memset(cfg, 0, sizeof(*cfg));
 
1779         cfg->fc_table = RT6_TABLE_MAIN;
 
1780         cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
 
1781         cfg->fc_metric = rtmsg->rtmsg_metric;
 
1782         cfg->fc_expires = rtmsg->rtmsg_info;
 
1783         cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
 
1784         cfg->fc_src_len = rtmsg->rtmsg_src_len;
 
1785         cfg->fc_flags = rtmsg->rtmsg_flags;
 
1787         ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
 
1788         ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
 
1789         ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
 
1792 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
 
1794         struct fib6_config cfg;
 
1795         struct in6_rtmsg rtmsg;
 
1799         case SIOCADDRT:         /* Add a route */
 
1800         case SIOCDELRT:         /* Delete a route */
 
1801                 if (!capable(CAP_NET_ADMIN))
 
1803                 err = copy_from_user(&rtmsg, arg,
 
1804                                      sizeof(struct in6_rtmsg));
 
1808                 rtmsg_to_fib6_config(&rtmsg, &cfg);
 
1813                         err = ip6_route_add(&cfg);
 
1816                         err = ip6_route_del(&cfg);
 
1830  *      Drop the packet on the floor
 
1833 static inline int ip6_pkt_drop(struct sk_buff *skb, int code,
 
1834                                int ipstats_mib_noroutes)
 
1837         switch (ipstats_mib_noroutes) {
 
1838         case IPSTATS_MIB_INNOROUTES:
 
1839                 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
 
1840                 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
 
1841                         IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
 
1845         case IPSTATS_MIB_OUTNOROUTES:
 
1846                 IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes);
 
1849         icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
 
1854 static int ip6_pkt_discard(struct sk_buff *skb)
 
1856         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
 
1859 static int ip6_pkt_discard_out(struct sk_buff *skb)
 
1861         skb->dev = skb->dst->dev;
 
1862         return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
 
1865 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 
1867 static int ip6_pkt_prohibit(struct sk_buff *skb)
 
1869         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
 
1872 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
 
1874         skb->dev = skb->dst->dev;
 
1875         return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
 
1878 static int ip6_pkt_blk_hole(struct sk_buff *skb)
 
1887  *      Allocate a dst for local (unicast / anycast) address.
 
1890 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
 
1891                                     const struct in6_addr *addr,
 
1894         struct rt6_info *rt = ip6_dst_alloc();
 
1897                 return ERR_PTR(-ENOMEM);
 
1899         dev_hold(&loopback_dev);
 
1902         rt->u.dst.flags = DST_HOST;
 
1903         rt->u.dst.input = ip6_input;
 
1904         rt->u.dst.output = ip6_output;
 
1905         rt->rt6i_dev = &loopback_dev;
 
1906         rt->rt6i_idev = idev;
 
1907         rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
 
1908         rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
 
1909         rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
 
1910         rt->u.dst.obsolete = -1;
 
1912         rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
 
1914                 rt->rt6i_flags |= RTF_ANYCAST;
 
1916                 rt->rt6i_flags |= RTF_LOCAL;
 
1917         rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
 
1918         if (rt->rt6i_nexthop == NULL) {
 
1919                 dst_free(&rt->u.dst);
 
1920                 return ERR_PTR(-ENOMEM);
 
1923         ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
 
1924         rt->rt6i_dst.plen = 128;
 
1925         rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL);
 
1927         atomic_set(&rt->u.dst.__refcnt, 1);
 
1932 static int fib6_ifdown(struct rt6_info *rt, void *arg)
 
1934         if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
 
1935             rt != &ip6_null_entry) {
 
1936                 RT6_TRACE("deleted by ifdown %p\n", rt);
 
1942 void rt6_ifdown(struct net_device *dev)
 
1944         fib6_clean_all(fib6_ifdown, 0, dev);
 
1947 struct rt6_mtu_change_arg
 
1949         struct net_device *dev;
 
1953 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
 
1955         struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
 
1956         struct inet6_dev *idev;
 
1958         /* In IPv6 pmtu discovery is not optional,
 
1959            so that RTAX_MTU lock cannot disable it.
 
1960            We still use this lock to block changes
 
1961            caused by addrconf/ndisc.
 
1964         idev = __in6_dev_get(arg->dev);
 
1968         /* For administrative MTU increase, there is no way to discover
 
1969            IPv6 PMTU increase, so PMTU increase should be updated here.
 
1970            Since RFC 1981 doesn't include administrative MTU increase
 
1971            update PMTU increase is a MUST. (i.e. jumbo frame)
 
1974            If new MTU is less than route PMTU, this new MTU will be the
 
1975            lowest MTU in the path, update the route PMTU to reflect PMTU
 
1976            decreases; if new MTU is greater than route PMTU, and the
 
1977            old MTU is the lowest MTU in the path, update the route PMTU
 
1978            to reflect the increase. In this case if the other nodes' MTU
 
1979            also have the lowest MTU, TOO BIG MESSAGE will be lead to
 
1982         if (rt->rt6i_dev == arg->dev &&
 
1983             !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
 
1984             (dst_mtu(&rt->u.dst) > arg->mtu ||
 
1985              (dst_mtu(&rt->u.dst) < arg->mtu &&
 
1986               dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
 
1987                 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
 
1988                 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
 
1993 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
 
1995         struct rt6_mtu_change_arg arg = {
 
2000         fib6_clean_all(rt6_mtu_change_route, 0, &arg);
 
2003 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
 
2004         [RTA_GATEWAY]           = { .len = sizeof(struct in6_addr) },
 
2005         [RTA_OIF]               = { .type = NLA_U32 },
 
2006         [RTA_IIF]               = { .type = NLA_U32 },
 
2007         [RTA_PRIORITY]          = { .type = NLA_U32 },
 
2008         [RTA_METRICS]           = { .type = NLA_NESTED },
 
2011 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
 
2012                               struct fib6_config *cfg)
 
2015         struct nlattr *tb[RTA_MAX+1];
 
2018         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
 
2023         rtm = nlmsg_data(nlh);
 
2024         memset(cfg, 0, sizeof(*cfg));
 
2026         cfg->fc_table = rtm->rtm_table;
 
2027         cfg->fc_dst_len = rtm->rtm_dst_len;
 
2028         cfg->fc_src_len = rtm->rtm_src_len;
 
2029         cfg->fc_flags = RTF_UP;
 
2030         cfg->fc_protocol = rtm->rtm_protocol;
 
2032         if (rtm->rtm_type == RTN_UNREACHABLE)
 
2033                 cfg->fc_flags |= RTF_REJECT;
 
2035         cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
 
2036         cfg->fc_nlinfo.nlh = nlh;
 
2038         if (tb[RTA_GATEWAY]) {
 
2039                 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
 
2040                 cfg->fc_flags |= RTF_GATEWAY;
 
2044                 int plen = (rtm->rtm_dst_len + 7) >> 3;
 
2046                 if (nla_len(tb[RTA_DST]) < plen)
 
2049                 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
 
2053                 int plen = (rtm->rtm_src_len + 7) >> 3;
 
2055                 if (nla_len(tb[RTA_SRC]) < plen)
 
2058                 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
 
2062                 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
 
2064         if (tb[RTA_PRIORITY])
 
2065                 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
 
2067         if (tb[RTA_METRICS]) {
 
2068                 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
 
2069                 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
 
2073                 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
 
2080 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 
2082         struct fib6_config cfg;
 
2085         err = rtm_to_fib6_config(skb, nlh, &cfg);
 
2089         return ip6_route_del(&cfg);
 
2092 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
 
2094         struct fib6_config cfg;
 
2097         err = rtm_to_fib6_config(skb, nlh, &cfg);
 
2101         return ip6_route_add(&cfg);
 
2104 static inline size_t rt6_nlmsg_size(void)
 
2106         return NLMSG_ALIGN(sizeof(struct rtmsg))
 
2107                + nla_total_size(16) /* RTA_SRC */
 
2108                + nla_total_size(16) /* RTA_DST */
 
2109                + nla_total_size(16) /* RTA_GATEWAY */
 
2110                + nla_total_size(16) /* RTA_PREFSRC */
 
2111                + nla_total_size(4) /* RTA_TABLE */
 
2112                + nla_total_size(4) /* RTA_IIF */
 
2113                + nla_total_size(4) /* RTA_OIF */
 
2114                + nla_total_size(4) /* RTA_PRIORITY */
 
2115                + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
 
2116                + nla_total_size(sizeof(struct rta_cacheinfo));
 
2119 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
 
2120                          struct in6_addr *dst, struct in6_addr *src,
 
2121                          int iif, int type, u32 pid, u32 seq,
 
2122                          int prefix, unsigned int flags)
 
2125         struct nlmsghdr *nlh;
 
2129         if (prefix) {   /* user wants prefix routes only */
 
2130                 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
 
2131                         /* success since this is not a prefix route */
 
2136         nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
 
2140         rtm = nlmsg_data(nlh);
 
2141         rtm->rtm_family = AF_INET6;
 
2142         rtm->rtm_dst_len = rt->rt6i_dst.plen;
 
2143         rtm->rtm_src_len = rt->rt6i_src.plen;
 
2146                 table = rt->rt6i_table->tb6_id;
 
2148                 table = RT6_TABLE_UNSPEC;
 
2149         rtm->rtm_table = table;
 
2150         NLA_PUT_U32(skb, RTA_TABLE, table);
 
2151         if (rt->rt6i_flags&RTF_REJECT)
 
2152                 rtm->rtm_type = RTN_UNREACHABLE;
 
2153         else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
 
2154                 rtm->rtm_type = RTN_LOCAL;
 
2156                 rtm->rtm_type = RTN_UNICAST;
 
2158         rtm->rtm_scope = RT_SCOPE_UNIVERSE;
 
2159         rtm->rtm_protocol = rt->rt6i_protocol;
 
2160         if (rt->rt6i_flags&RTF_DYNAMIC)
 
2161                 rtm->rtm_protocol = RTPROT_REDIRECT;
 
2162         else if (rt->rt6i_flags & RTF_ADDRCONF)
 
2163                 rtm->rtm_protocol = RTPROT_KERNEL;
 
2164         else if (rt->rt6i_flags&RTF_DEFAULT)
 
2165                 rtm->rtm_protocol = RTPROT_RA;
 
2167         if (rt->rt6i_flags&RTF_CACHE)
 
2168                 rtm->rtm_flags |= RTM_F_CLONED;
 
2171                 NLA_PUT(skb, RTA_DST, 16, dst);
 
2172                 rtm->rtm_dst_len = 128;
 
2173         } else if (rtm->rtm_dst_len)
 
2174                 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
 
2175 #ifdef CONFIG_IPV6_SUBTREES
 
2177                 NLA_PUT(skb, RTA_SRC, 16, src);
 
2178                 rtm->rtm_src_len = 128;
 
2179         } else if (rtm->rtm_src_len)
 
2180                 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
 
2183                 NLA_PUT_U32(skb, RTA_IIF, iif);
 
2185                 struct in6_addr saddr_buf;
 
2186                 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
 
2187                         NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
 
2190         if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
 
2191                 goto nla_put_failure;
 
2193         if (rt->u.dst.neighbour)
 
2194                 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
 
2197                 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
 
2199         NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
 
2201         expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0;
 
2202         if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
 
2203                                expires, rt->u.dst.error) < 0)
 
2204                 goto nla_put_failure;
 
2206         return nlmsg_end(skb, nlh);
 
2209         nlmsg_cancel(skb, nlh);
 
2213 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
 
2215         struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
 
2218         if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
 
2219                 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
 
2220                 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
 
2224         return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
 
2225                      NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
 
2226                      prefix, NLM_F_MULTI);
 
2229 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
 
2231         struct nlattr *tb[RTA_MAX+1];
 
2232         struct rt6_info *rt;
 
2233         struct sk_buff *skb;
 
2238         err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
 
2243         memset(&fl, 0, sizeof(fl));
 
2246                 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
 
2249                 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
 
2253                 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
 
2256                 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
 
2260                 iif = nla_get_u32(tb[RTA_IIF]);
 
2263                 fl.oif = nla_get_u32(tb[RTA_OIF]);
 
2266                 struct net_device *dev;
 
2267                 dev = __dev_get_by_index(iif);
 
2274         skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
 
2280         /* Reserve room for dummy headers, this skb can pass
 
2281            through good chunk of routing engine.
 
2283         skb_reset_mac_header(skb);
 
2284         skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
 
2286         rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
 
2287         skb->dst = &rt->u.dst;
 
2289         err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
 
2290                             RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
 
2291                             nlh->nlmsg_seq, 0, 0);
 
2297         err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid);
 
2302 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
 
2304         struct sk_buff *skb;
 
2305         u32 pid = 0, seq = 0;
 
2306         struct nlmsghdr *nlh = NULL;
 
2313                         seq = nlh->nlmsg_seq;
 
2316         skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
 
2320         err = rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0);
 
2322                 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
 
2323                 WARN_ON(err == -EMSGSIZE);
 
2327         err = rtnl_notify(skb, pid, RTNLGRP_IPV6_ROUTE, nlh, gfp_any());
 
2330                 rtnl_set_sk_err(RTNLGRP_IPV6_ROUTE, err);
 
2337 #ifdef CONFIG_PROC_FS
 
2339 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
 
2350 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
 
2352         struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
 
2354         if (arg->skip < arg->offset / RT6_INFO_LEN) {
 
2359         if (arg->len >= arg->length)
 
2362         arg->len += sprintf(arg->buffer + arg->len,
 
2363                             NIP6_SEQFMT " %02x ",
 
2364                             NIP6(rt->rt6i_dst.addr),
 
2367 #ifdef CONFIG_IPV6_SUBTREES
 
2368         arg->len += sprintf(arg->buffer + arg->len,
 
2369                             NIP6_SEQFMT " %02x ",
 
2370                             NIP6(rt->rt6i_src.addr),
 
2373         arg->len += sprintf(arg->buffer + arg->len,
 
2374                             "00000000000000000000000000000000 00 ");
 
2377         if (rt->rt6i_nexthop) {
 
2378                 arg->len += sprintf(arg->buffer + arg->len,
 
2380                                     NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
 
2382                 arg->len += sprintf(arg->buffer + arg->len,
 
2383                                     "00000000000000000000000000000000");
 
2385         arg->len += sprintf(arg->buffer + arg->len,
 
2386                             " %08x %08x %08x %08x %8s\n",
 
2387                             rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
 
2388                             rt->u.dst.__use, rt->rt6i_flags,
 
2389                             rt->rt6i_dev ? rt->rt6i_dev->name : "");
 
2393 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
 
2395         struct rt6_proc_arg arg = {
 
2401         fib6_clean_all(rt6_info_route, 0, &arg);
 
2405                 *start += offset % RT6_INFO_LEN;
 
2407         arg.len -= offset % RT6_INFO_LEN;
 
2409         if (arg.len > length)
 
2417 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
 
2419         seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
 
2420                       rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
 
2421                       rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
 
2422                       rt6_stats.fib_rt_cache,
 
2423                       atomic_read(&ip6_dst_ops.entries),
 
2424                       rt6_stats.fib_discarded_routes);
 
2429 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
 
2431         return single_open(file, rt6_stats_seq_show, NULL);
 
2434 static const struct file_operations rt6_stats_seq_fops = {
 
2435         .owner   = THIS_MODULE,
 
2436         .open    = rt6_stats_seq_open,
 
2438         .llseek  = seq_lseek,
 
2439         .release = single_release,
 
2441 #endif  /* CONFIG_PROC_FS */
 
2443 #ifdef CONFIG_SYSCTL
 
2445 static int flush_delay;
 
2448 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
 
2449                               void __user *buffer, size_t *lenp, loff_t *ppos)
 
2452                 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
 
2453                 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
 
2459 ctl_table ipv6_route_table[] = {
 
2461                 .ctl_name       =       NET_IPV6_ROUTE_FLUSH,
 
2462                 .procname       =       "flush",
 
2463                 .data           =       &flush_delay,
 
2464                 .maxlen         =       sizeof(int),
 
2466                 .proc_handler   =       &ipv6_sysctl_rtcache_flush
 
2469                 .ctl_name       =       NET_IPV6_ROUTE_GC_THRESH,
 
2470                 .procname       =       "gc_thresh",
 
2471                 .data           =       &ip6_dst_ops.gc_thresh,
 
2472                 .maxlen         =       sizeof(int),
 
2474                 .proc_handler   =       &proc_dointvec,
 
2477                 .ctl_name       =       NET_IPV6_ROUTE_MAX_SIZE,
 
2478                 .procname       =       "max_size",
 
2479                 .data           =       &ip6_rt_max_size,
 
2480                 .maxlen         =       sizeof(int),
 
2482                 .proc_handler   =       &proc_dointvec,
 
2485                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL,
 
2486                 .procname       =       "gc_min_interval",
 
2487                 .data           =       &ip6_rt_gc_min_interval,
 
2488                 .maxlen         =       sizeof(int),
 
2490                 .proc_handler   =       &proc_dointvec_jiffies,
 
2491                 .strategy       =       &sysctl_jiffies,
 
2494                 .ctl_name       =       NET_IPV6_ROUTE_GC_TIMEOUT,
 
2495                 .procname       =       "gc_timeout",
 
2496                 .data           =       &ip6_rt_gc_timeout,
 
2497                 .maxlen         =       sizeof(int),
 
2499                 .proc_handler   =       &proc_dointvec_jiffies,
 
2500                 .strategy       =       &sysctl_jiffies,
 
2503                 .ctl_name       =       NET_IPV6_ROUTE_GC_INTERVAL,
 
2504                 .procname       =       "gc_interval",
 
2505                 .data           =       &ip6_rt_gc_interval,
 
2506                 .maxlen         =       sizeof(int),
 
2508                 .proc_handler   =       &proc_dointvec_jiffies,
 
2509                 .strategy       =       &sysctl_jiffies,
 
2512                 .ctl_name       =       NET_IPV6_ROUTE_GC_ELASTICITY,
 
2513                 .procname       =       "gc_elasticity",
 
2514                 .data           =       &ip6_rt_gc_elasticity,
 
2515                 .maxlen         =       sizeof(int),
 
2517                 .proc_handler   =       &proc_dointvec_jiffies,
 
2518                 .strategy       =       &sysctl_jiffies,
 
2521                 .ctl_name       =       NET_IPV6_ROUTE_MTU_EXPIRES,
 
2522                 .procname       =       "mtu_expires",
 
2523                 .data           =       &ip6_rt_mtu_expires,
 
2524                 .maxlen         =       sizeof(int),
 
2526                 .proc_handler   =       &proc_dointvec_jiffies,
 
2527                 .strategy       =       &sysctl_jiffies,
 
2530                 .ctl_name       =       NET_IPV6_ROUTE_MIN_ADVMSS,
 
2531                 .procname       =       "min_adv_mss",
 
2532                 .data           =       &ip6_rt_min_advmss,
 
2533                 .maxlen         =       sizeof(int),
 
2535                 .proc_handler   =       &proc_dointvec_jiffies,
 
2536                 .strategy       =       &sysctl_jiffies,
 
2539                 .ctl_name       =       NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
 
2540                 .procname       =       "gc_min_interval_ms",
 
2541                 .data           =       &ip6_rt_gc_min_interval,
 
2542                 .maxlen         =       sizeof(int),
 
2544                 .proc_handler   =       &proc_dointvec_ms_jiffies,
 
2545                 .strategy       =       &sysctl_ms_jiffies,
 
2552 void __init ip6_route_init(void)
 
2554 #ifdef  CONFIG_PROC_FS
 
2555         struct proc_dir_entry *p;
 
2557         ip6_dst_ops.kmem_cachep =
 
2558                 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
 
2559                                   SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
 
2560         ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep;
 
2563 #ifdef  CONFIG_PROC_FS
 
2564         p = proc_net_create("ipv6_route", 0, rt6_proc_info);
 
2566                 p->owner = THIS_MODULE;
 
2568         proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
 
2573 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 
2577         __rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL);
 
2578         __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL);
 
2579         __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL);
 
2582 void ip6_route_cleanup(void)
 
2584 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 
2585         fib6_rules_cleanup();
 
2587 #ifdef CONFIG_PROC_FS
 
2588         proc_net_remove("ipv6_route");
 
2589         proc_net_remove("rt6_stats");
 
2596         kmem_cache_destroy(ip6_dst_ops.kmem_cachep);