2 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
27 #include <linux/capability.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/init.h>
38 #include <linux/netlink.h>
39 #include <linux/if_arp.h>
42 #include <linux/proc_fs.h>
43 #include <linux/seq_file.h>
48 #include <net/ip6_fib.h>
49 #include <net/ip6_route.h>
50 #include <net/ndisc.h>
51 #include <net/addrconf.h>
53 #include <linux/rtnetlink.h>
57 #include <asm/uaccess.h>
60 #include <linux/sysctl.h>
63 /* Set to 3 to get tracing. */
67 #define RDBG(x) printk x
68 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
71 #define RT6_TRACE(x...) do { ; } while (0)
74 #define CLONE_OFFLINK_ROUTE 0
76 #define RT6_SELECT_F_IFACE 0x1
77 #define RT6_SELECT_F_REACHABLE 0x2
79 static int ip6_rt_max_size = 4096;
80 static int ip6_rt_gc_min_interval = HZ / 2;
81 static int ip6_rt_gc_timeout = 60*HZ;
82 int ip6_rt_gc_interval = 30*HZ;
83 static int ip6_rt_gc_elasticity = 9;
84 static int ip6_rt_mtu_expires = 10*60*HZ;
85 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
87 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
88 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
89 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
90 static void ip6_dst_destroy(struct dst_entry *);
91 static void ip6_dst_ifdown(struct dst_entry *,
92 struct net_device *dev, int how);
93 static int ip6_dst_gc(void);
95 static int ip6_pkt_discard(struct sk_buff *skb);
96 static int ip6_pkt_discard_out(struct sk_buff *skb);
97 static void ip6_link_failure(struct sk_buff *skb);
98 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
100 #ifdef CONFIG_IPV6_ROUTE_INFO
101 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
102 struct in6_addr *gwaddr, int ifindex,
104 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
105 struct in6_addr *gwaddr, int ifindex);
108 static struct dst_ops ip6_dst_ops = {
110 .protocol = __constant_htons(ETH_P_IPV6),
113 .check = ip6_dst_check,
114 .destroy = ip6_dst_destroy,
115 .ifdown = ip6_dst_ifdown,
116 .negative_advice = ip6_negative_advice,
117 .link_failure = ip6_link_failure,
118 .update_pmtu = ip6_rt_update_pmtu,
119 .entry_size = sizeof(struct rt6_info),
122 struct rt6_info ip6_null_entry = {
125 .__refcnt = ATOMIC_INIT(1),
127 .dev = &loopback_dev,
129 .error = -ENETUNREACH,
130 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
131 .input = ip6_pkt_discard,
132 .output = ip6_pkt_discard_out,
134 .path = (struct dst_entry*)&ip6_null_entry,
137 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
138 .rt6i_metric = ~(u32) 0,
139 .rt6i_ref = ATOMIC_INIT(1),
142 struct fib6_node ip6_routing_table = {
143 .leaf = &ip6_null_entry,
144 .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
147 /* Protects all the ip6 fib */
149 DEFINE_RWLOCK(rt6_lock);
152 /* allocate dst with ip6_dst_ops */
153 static __inline__ struct rt6_info *ip6_dst_alloc(void)
155 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
158 static void ip6_dst_destroy(struct dst_entry *dst)
160 struct rt6_info *rt = (struct rt6_info *)dst;
161 struct inet6_dev *idev = rt->rt6i_idev;
164 rt->rt6i_idev = NULL;
169 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
172 struct rt6_info *rt = (struct rt6_info *)dst;
173 struct inet6_dev *idev = rt->rt6i_idev;
175 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
176 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
177 if (loopback_idev != NULL) {
178 rt->rt6i_idev = loopback_idev;
184 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
186 return (rt->rt6i_flags & RTF_EXPIRES &&
187 time_after(jiffies, rt->rt6i_expires));
191 * Route lookup. Any rt6_lock is implied.
194 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
198 struct rt6_info *local = NULL;
199 struct rt6_info *sprt;
202 for (sprt = rt; sprt; sprt = sprt->u.next) {
203 struct net_device *dev = sprt->rt6i_dev;
204 if (dev->ifindex == oif)
206 if (dev->flags & IFF_LOOPBACK) {
207 if (sprt->rt6i_idev == NULL ||
208 sprt->rt6i_idev->dev->ifindex != oif) {
211 if (local && (!oif ||
212 local->rt6i_idev->dev->ifindex == oif))
223 return &ip6_null_entry;
228 #ifdef CONFIG_IPV6_ROUTER_PREF
229 static void rt6_probe(struct rt6_info *rt)
231 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
233 * Okay, this does not seem to be appropriate
234 * for now, however, we need to check if it
235 * is really so; aka Router Reachability Probing.
237 * Router Reachability Probe MUST be rate-limited
238 * to no more than one per minute.
240 if (!neigh || (neigh->nud_state & NUD_VALID))
242 read_lock_bh(&neigh->lock);
243 if (!(neigh->nud_state & NUD_VALID) &&
244 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
245 struct in6_addr mcaddr;
246 struct in6_addr *target;
248 neigh->updated = jiffies;
249 read_unlock_bh(&neigh->lock);
251 target = (struct in6_addr *)&neigh->primary_key;
252 addrconf_addr_solict_mult(target, &mcaddr);
253 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
255 read_unlock_bh(&neigh->lock);
258 static inline void rt6_probe(struct rt6_info *rt)
265 * Default Router Selection (RFC 2461 6.3.6)
267 static int inline rt6_check_dev(struct rt6_info *rt, int oif)
269 struct net_device *dev = rt->rt6i_dev;
270 if (!oif || dev->ifindex == oif)
272 if ((dev->flags & IFF_LOOPBACK) &&
273 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
278 static int inline rt6_check_neigh(struct rt6_info *rt)
280 struct neighbour *neigh = rt->rt6i_nexthop;
282 if (rt->rt6i_flags & RTF_NONEXTHOP ||
283 !(rt->rt6i_flags & RTF_GATEWAY))
286 read_lock_bh(&neigh->lock);
287 if (neigh->nud_state & NUD_VALID)
289 read_unlock_bh(&neigh->lock);
294 static int rt6_score_route(struct rt6_info *rt, int oif,
299 m = rt6_check_dev(rt, oif);
300 if (!m && (strict & RT6_SELECT_F_IFACE))
302 #ifdef CONFIG_IPV6_ROUTER_PREF
303 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
305 n = rt6_check_neigh(rt);
308 else if (!n && strict & RT6_SELECT_F_REACHABLE)
313 static struct rt6_info *rt6_select(struct rt6_info **head, int oif,
316 struct rt6_info *match = NULL, *last = NULL;
317 struct rt6_info *rt, *rt0 = *head;
321 RT6_TRACE("%s(head=%p(*head=%p), oif=%d)\n",
322 __FUNCTION__, head, head ? *head : NULL, oif);
324 for (rt = rt0, metric = rt0->rt6i_metric;
325 rt && rt->rt6i_metric == metric && (!last || rt != rt0);
329 if (rt6_check_expired(rt))
334 m = rt6_score_route(rt, oif, strict);
348 (strict & RT6_SELECT_F_REACHABLE) &&
349 last && last != rt0) {
350 /* no entries matched; do round-robin */
351 static DEFINE_SPINLOCK(lock);
354 rt0->u.next = last->u.next;
359 RT6_TRACE("%s() => %p, score=%d\n",
360 __FUNCTION__, match, mpri);
362 return (match ? match : &ip6_null_entry);
365 #ifdef CONFIG_IPV6_ROUTE_INFO
366 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
367 struct in6_addr *gwaddr)
369 struct route_info *rinfo = (struct route_info *) opt;
370 struct in6_addr prefix_buf, *prefix;
375 if (len < sizeof(struct route_info)) {
379 /* Sanity check for prefix_len and length */
380 if (rinfo->length > 3) {
382 } else if (rinfo->prefix_len > 128) {
384 } else if (rinfo->prefix_len > 64) {
385 if (rinfo->length < 2) {
388 } else if (rinfo->prefix_len > 0) {
389 if (rinfo->length < 1) {
394 pref = rinfo->route_pref;
395 if (pref == ICMPV6_ROUTER_PREF_INVALID)
396 pref = ICMPV6_ROUTER_PREF_MEDIUM;
398 lifetime = htonl(rinfo->lifetime);
399 if (lifetime == 0xffffffff) {
401 } else if (lifetime > 0x7fffffff/HZ) {
402 /* Avoid arithmetic overflow */
403 lifetime = 0x7fffffff/HZ - 1;
406 if (rinfo->length == 3)
407 prefix = (struct in6_addr *)rinfo->prefix;
409 /* this function is safe */
410 ipv6_addr_prefix(&prefix_buf,
411 (struct in6_addr *)rinfo->prefix,
413 prefix = &prefix_buf;
416 rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
418 if (rt && !lifetime) {
419 ip6_del_rt(rt, NULL, NULL, NULL);
424 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
427 rt->rt6i_flags = RTF_ROUTEINFO |
428 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
431 if (lifetime == 0xffffffff) {
432 rt->rt6i_flags &= ~RTF_EXPIRES;
434 rt->rt6i_expires = jiffies + HZ * lifetime;
435 rt->rt6i_flags |= RTF_EXPIRES;
437 dst_release(&rt->u.dst);
443 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
446 struct fib6_node *fn;
449 read_lock_bh(&rt6_lock);
450 fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
451 rt = rt6_device_match(fn->leaf, oif, strict);
452 dst_hold(&rt->u.dst);
454 read_unlock_bh(&rt6_lock);
456 rt->u.dst.lastuse = jiffies;
457 if (rt->u.dst.error == 0)
459 dst_release(&rt->u.dst);
463 /* ip6_ins_rt is called with FREE rt6_lock.
464 It takes new route entry, the addition fails by any reason the
465 route is freed. In any case, if caller does not hold it, it may
469 int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
470 void *_rtattr, struct netlink_skb_parms *req)
474 write_lock_bh(&rt6_lock);
475 err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
476 write_unlock_bh(&rt6_lock);
481 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
482 struct in6_addr *saddr)
490 rt = ip6_rt_copy(ort);
493 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
494 if (rt->rt6i_dst.plen != 128 &&
495 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
496 rt->rt6i_flags |= RTF_ANYCAST;
497 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
500 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
501 rt->rt6i_dst.plen = 128;
502 rt->rt6i_flags |= RTF_CACHE;
503 rt->u.dst.flags |= DST_HOST;
505 #ifdef CONFIG_IPV6_SUBTREES
506 if (rt->rt6i_src.plen && saddr) {
507 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
508 rt->rt6i_src.plen = 128;
512 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
519 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
521 struct rt6_info *rt = ip6_rt_copy(ort);
523 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
524 rt->rt6i_dst.plen = 128;
525 rt->rt6i_flags |= RTF_CACHE;
526 if (rt->rt6i_flags & RTF_REJECT)
527 rt->u.dst.error = ort->u.dst.error;
528 rt->u.dst.flags |= DST_HOST;
529 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
534 #define BACKTRACK() \
535 if (rt == &ip6_null_entry) { \
536 while ((fn = fn->parent) != NULL) { \
537 if (fn->fn_flags & RTN_ROOT) { \
540 if (fn->fn_flags & RTN_RTINFO) \
546 void ip6_route_input(struct sk_buff *skb)
548 struct fib6_node *fn;
549 struct rt6_info *rt, *nrt;
553 int reachable = RT6_SELECT_F_REACHABLE;
555 strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
558 read_lock_bh(&rt6_lock);
561 fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
562 &skb->nh.ipv6h->saddr);
565 rt = rt6_select(&fn->leaf, skb->dev->ifindex, strict | reachable);
567 if (rt == &ip6_null_entry ||
568 rt->rt6i_flags & RTF_CACHE)
571 dst_hold(&rt->u.dst);
572 read_unlock_bh(&rt6_lock);
574 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
575 nrt = rt6_alloc_cow(rt, &skb->nh.ipv6h->daddr, &skb->nh.ipv6h->saddr);
577 #if CLONE_OFFLINK_ROUTE
578 nrt = rt6_alloc_clone(rt, &skb->nh.ipv6h->daddr);
584 dst_release(&rt->u.dst);
585 rt = nrt ? : &ip6_null_entry;
587 dst_hold(&rt->u.dst);
589 err = ip6_ins_rt(nrt, NULL, NULL, &NETLINK_CB(skb));
598 * Race condition! In the gap, when rt6_lock was
599 * released someone could insert this route. Relookup.
601 dst_release(&rt->u.dst);
609 dst_hold(&rt->u.dst);
610 read_unlock_bh(&rt6_lock);
612 rt->u.dst.lastuse = jiffies;
614 skb->dst = (struct dst_entry *) rt;
618 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
620 struct fib6_node *fn;
621 struct rt6_info *rt, *nrt;
625 int reachable = RT6_SELECT_F_REACHABLE;
627 strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0;
630 read_lock_bh(&rt6_lock);
633 fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
636 rt = rt6_select(&fn->leaf, fl->oif, strict | reachable);
638 if (rt == &ip6_null_entry ||
639 rt->rt6i_flags & RTF_CACHE)
642 dst_hold(&rt->u.dst);
643 read_unlock_bh(&rt6_lock);
645 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
646 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
648 #if CLONE_OFFLINK_ROUTE
649 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
655 dst_release(&rt->u.dst);
656 rt = nrt ? : &ip6_null_entry;
658 dst_hold(&rt->u.dst);
660 err = ip6_ins_rt(nrt, NULL, NULL, NULL);
669 * Race condition! In the gap, when rt6_lock was
670 * released someone could insert this route. Relookup.
672 dst_release(&rt->u.dst);
680 dst_hold(&rt->u.dst);
681 read_unlock_bh(&rt6_lock);
683 rt->u.dst.lastuse = jiffies;
690 * Destination cache support functions
693 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
697 rt = (struct rt6_info *) dst;
699 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
705 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
707 struct rt6_info *rt = (struct rt6_info *) dst;
710 if (rt->rt6i_flags & RTF_CACHE)
711 ip6_del_rt(rt, NULL, NULL, NULL);
718 static void ip6_link_failure(struct sk_buff *skb)
722 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
724 rt = (struct rt6_info *) skb->dst;
726 if (rt->rt6i_flags&RTF_CACHE) {
727 dst_set_expires(&rt->u.dst, 0);
728 rt->rt6i_flags |= RTF_EXPIRES;
729 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
730 rt->rt6i_node->fn_sernum = -1;
734 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
736 struct rt6_info *rt6 = (struct rt6_info*)dst;
738 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
739 rt6->rt6i_flags |= RTF_MODIFIED;
740 if (mtu < IPV6_MIN_MTU) {
742 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
744 dst->metrics[RTAX_MTU-1] = mtu;
748 /* Protected by rt6_lock. */
749 static struct dst_entry *ndisc_dst_gc_list;
750 static int ipv6_get_mtu(struct net_device *dev);
752 static inline unsigned int ipv6_advmss(unsigned int mtu)
754 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
756 if (mtu < ip6_rt_min_advmss)
757 mtu = ip6_rt_min_advmss;
760 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
761 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
762 * IPV6_MAXPLEN is also valid and means: "any MSS,
763 * rely only on pmtu discovery"
765 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
770 struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
771 struct neighbour *neigh,
772 struct in6_addr *addr,
773 int (*output)(struct sk_buff *))
776 struct inet6_dev *idev = in6_dev_get(dev);
778 if (unlikely(idev == NULL))
781 rt = ip6_dst_alloc();
782 if (unlikely(rt == NULL)) {
791 neigh = ndisc_get_neigh(dev, addr);
794 rt->rt6i_idev = idev;
795 rt->rt6i_nexthop = neigh;
796 atomic_set(&rt->u.dst.__refcnt, 1);
797 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
798 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
799 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
800 rt->u.dst.output = output;
802 #if 0 /* there's no chance to use these for ndisc */
803 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
806 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
807 rt->rt6i_dst.plen = 128;
810 write_lock_bh(&rt6_lock);
811 rt->u.dst.next = ndisc_dst_gc_list;
812 ndisc_dst_gc_list = &rt->u.dst;
813 write_unlock_bh(&rt6_lock);
815 fib6_force_start_gc();
818 return (struct dst_entry *)rt;
821 int ndisc_dst_gc(int *more)
823 struct dst_entry *dst, *next, **pprev;
827 pprev = &ndisc_dst_gc_list;
829 while ((dst = *pprev) != NULL) {
830 if (!atomic_read(&dst->__refcnt)) {
843 static int ip6_dst_gc(void)
845 static unsigned expire = 30*HZ;
846 static unsigned long last_gc;
847 unsigned long now = jiffies;
849 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
850 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
856 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
857 expire = ip6_rt_gc_timeout>>1;
860 expire -= expire>>ip6_rt_gc_elasticity;
861 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
864 /* Clean host part of a prefix. Not necessary in radix tree,
865 but results in cleaner routing tables.
867 Remove it only when all the things will work!
870 static int ipv6_get_mtu(struct net_device *dev)
872 int mtu = IPV6_MIN_MTU;
873 struct inet6_dev *idev;
875 idev = in6_dev_get(dev);
877 mtu = idev->cnf.mtu6;
883 int ipv6_get_hoplimit(struct net_device *dev)
885 int hoplimit = ipv6_devconf.hop_limit;
886 struct inet6_dev *idev;
888 idev = in6_dev_get(dev);
890 hoplimit = idev->cnf.hop_limit;
900 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
901 void *_rtattr, struct netlink_skb_parms *req)
906 struct rt6_info *rt = NULL;
907 struct net_device *dev = NULL;
908 struct inet6_dev *idev = NULL;
911 rta = (struct rtattr **) _rtattr;
913 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
915 #ifndef CONFIG_IPV6_SUBTREES
916 if (rtmsg->rtmsg_src_len)
919 if (rtmsg->rtmsg_ifindex) {
921 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
924 idev = in6_dev_get(dev);
929 if (rtmsg->rtmsg_metric == 0)
930 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
932 rt = ip6_dst_alloc();
939 rt->u.dst.obsolete = -1;
940 rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
941 if (nlh && (r = NLMSG_DATA(nlh))) {
942 rt->rt6i_protocol = r->rtm_protocol;
944 rt->rt6i_protocol = RTPROT_BOOT;
947 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
949 if (addr_type & IPV6_ADDR_MULTICAST)
950 rt->u.dst.input = ip6_mc_input;
952 rt->u.dst.input = ip6_forward;
954 rt->u.dst.output = ip6_output;
956 ipv6_addr_prefix(&rt->rt6i_dst.addr,
957 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
958 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
959 if (rt->rt6i_dst.plen == 128)
960 rt->u.dst.flags = DST_HOST;
962 #ifdef CONFIG_IPV6_SUBTREES
963 ipv6_addr_prefix(&rt->rt6i_src.addr,
964 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
965 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
968 rt->rt6i_metric = rtmsg->rtmsg_metric;
970 /* We cannot add true routes via loopback here,
971 they would result in kernel looping; promote them to reject routes
973 if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
974 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
975 /* hold loopback dev/idev if we haven't done so. */
976 if (dev != &loopback_dev) {
983 idev = in6_dev_get(dev);
989 rt->u.dst.output = ip6_pkt_discard_out;
990 rt->u.dst.input = ip6_pkt_discard;
991 rt->u.dst.error = -ENETUNREACH;
992 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
996 if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
997 struct in6_addr *gw_addr;
1000 gw_addr = &rtmsg->rtmsg_gateway;
1001 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
1002 gwa_type = ipv6_addr_type(gw_addr);
1004 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1005 struct rt6_info *grt;
1007 /* IPv6 strictly inhibits using not link-local
1008 addresses as nexthop address.
1009 Otherwise, router will not able to send redirects.
1010 It is very good, but in some (rare!) circumstances
1011 (SIT, PtP, NBMA NOARP links) it is handy to allow
1012 some exceptions. --ANK
1015 if (!(gwa_type&IPV6_ADDR_UNICAST))
1018 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
1020 err = -EHOSTUNREACH;
1024 if (dev != grt->rt6i_dev) {
1025 dst_release(&grt->u.dst);
1029 dev = grt->rt6i_dev;
1030 idev = grt->rt6i_idev;
1032 in6_dev_hold(grt->rt6i_idev);
1034 if (!(grt->rt6i_flags&RTF_GATEWAY))
1036 dst_release(&grt->u.dst);
1042 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1050 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
1051 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1052 if (IS_ERR(rt->rt6i_nexthop)) {
1053 err = PTR_ERR(rt->rt6i_nexthop);
1054 rt->rt6i_nexthop = NULL;
1059 rt->rt6i_flags = rtmsg->rtmsg_flags;
1062 if (rta && rta[RTA_METRICS-1]) {
1063 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
1064 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
1066 while (RTA_OK(attr, attrlen)) {
1067 unsigned flavor = attr->rta_type;
1069 if (flavor > RTAX_MAX) {
1073 rt->u.dst.metrics[flavor-1] =
1074 *(u32 *)RTA_DATA(attr);
1076 attr = RTA_NEXT(attr, attrlen);
1080 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1081 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1082 if (!rt->u.dst.metrics[RTAX_MTU-1])
1083 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1084 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1085 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1086 rt->u.dst.dev = dev;
1087 rt->rt6i_idev = idev;
1088 return ip6_ins_rt(rt, nlh, _rtattr, req);
1096 dst_free((struct dst_entry *) rt);
1100 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1104 write_lock_bh(&rt6_lock);
1106 err = fib6_del(rt, nlh, _rtattr, req);
1107 dst_release(&rt->u.dst);
1109 write_unlock_bh(&rt6_lock);
1114 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1116 struct fib6_node *fn;
1117 struct rt6_info *rt;
1120 read_lock_bh(&rt6_lock);
1122 fn = fib6_locate(&ip6_routing_table,
1123 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1124 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1127 for (rt = fn->leaf; rt; rt = rt->u.next) {
1128 if (rtmsg->rtmsg_ifindex &&
1129 (rt->rt6i_dev == NULL ||
1130 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1132 if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1133 !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1135 if (rtmsg->rtmsg_metric &&
1136 rtmsg->rtmsg_metric != rt->rt6i_metric)
1138 dst_hold(&rt->u.dst);
1139 read_unlock_bh(&rt6_lock);
1141 return ip6_del_rt(rt, nlh, _rtattr, req);
1144 read_unlock_bh(&rt6_lock);
1152 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1153 struct neighbour *neigh, u8 *lladdr, int on_link)
1155 struct rt6_info *rt, *nrt = NULL;
1157 struct fib6_node *fn;
1160 * Get the "current" route for this destination and
1161 * check if the redirect has come from approriate router.
1163 * RFC 2461 specifies that redirects should only be
1164 * accepted if they come from the nexthop to the target.
1165 * Due to the way the routes are chosen, this notion
1166 * is a bit fuzzy and one might need to check all possible
1169 strict = ipv6_addr_type(dest) & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL);
1171 read_lock_bh(&rt6_lock);
1172 fn = fib6_lookup(&ip6_routing_table, dest, NULL);
1174 for (rt = fn->leaf; rt; rt = rt->u.next) {
1176 * Current route is on-link; redirect is always invalid.
1178 * Seems, previous statement is not true. It could
1179 * be node, which looks for us as on-link (f.e. proxy ndisc)
1180 * But then router serving it might decide, that we should
1181 * know truth 8)8) --ANK (980726).
1183 if (rt6_check_expired(rt))
1185 if (!(rt->rt6i_flags & RTF_GATEWAY))
1187 if (neigh->dev != rt->rt6i_dev)
1189 if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway))
1194 dst_hold(&rt->u.dst);
1196 while ((fn = fn->parent) != NULL) {
1197 if (fn->fn_flags & RTN_ROOT)
1199 if (fn->fn_flags & RTN_RTINFO)
1203 read_unlock_bh(&rt6_lock);
1206 if (net_ratelimit())
1207 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1208 "for redirect target\n");
1213 * We have finally decided to accept it.
1216 neigh_update(neigh, lladdr, NUD_STALE,
1217 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1218 NEIGH_UPDATE_F_OVERRIDE|
1219 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1220 NEIGH_UPDATE_F_ISROUTER))
1224 * Redirect received -> path was valid.
1225 * Look, redirects are sent only in response to data packets,
1226 * so that this nexthop apparently is reachable. --ANK
1228 dst_confirm(&rt->u.dst);
1230 /* Duplicate redirect: silently ignore. */
1231 if (neigh == rt->u.dst.neighbour)
1234 nrt = ip6_rt_copy(rt);
1238 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1240 nrt->rt6i_flags &= ~RTF_GATEWAY;
1242 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1243 nrt->rt6i_dst.plen = 128;
1244 nrt->u.dst.flags |= DST_HOST;
1246 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1247 nrt->rt6i_nexthop = neigh_clone(neigh);
1248 /* Reset pmtu, it may be better */
1249 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1250 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1252 if (ip6_ins_rt(nrt, NULL, NULL, NULL))
1255 if (rt->rt6i_flags&RTF_CACHE) {
1256 ip6_del_rt(rt, NULL, NULL, NULL);
1261 dst_release(&rt->u.dst);
1266 * Handle ICMP "packet too big" messages
1267 * i.e. Path MTU discovery
1270 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1271 struct net_device *dev, u32 pmtu)
1273 struct rt6_info *rt, *nrt;
1276 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1280 if (pmtu >= dst_mtu(&rt->u.dst))
1283 if (pmtu < IPV6_MIN_MTU) {
1285 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1286 * MTU (1280) and a fragment header should always be included
1287 * after a node receiving Too Big message reporting PMTU is
1288 * less than the IPv6 Minimum Link MTU.
1290 pmtu = IPV6_MIN_MTU;
1294 /* New mtu received -> path was valid.
1295 They are sent only in response to data packets,
1296 so that this nexthop apparently is reachable. --ANK
1298 dst_confirm(&rt->u.dst);
1300 /* Host route. If it is static, it would be better
1301 not to override it, but add new one, so that
1302 when cache entry will expire old pmtu
1303 would return automatically.
1305 if (rt->rt6i_flags & RTF_CACHE) {
1306 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1308 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1309 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1310 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1315 Two cases are possible:
1316 1. It is connected route. Action: COW
1317 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1319 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1320 nrt = rt6_alloc_cow(rt, daddr, saddr);
1322 nrt = rt6_alloc_clone(rt, daddr);
1325 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1327 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1329 /* According to RFC 1981, detecting PMTU increase shouldn't be
1330 * happened within 5 mins, the recommended timer is 10 mins.
1331 * Here this route expiration time is set to ip6_rt_mtu_expires
1332 * which is 10 mins. After 10 mins the decreased pmtu is expired
1333 * and detecting PMTU increase will be automatically happened.
1335 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1336 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1338 ip6_ins_rt(nrt, NULL, NULL, NULL);
1341 dst_release(&rt->u.dst);
1345 * Misc support functions
1348 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1350 struct rt6_info *rt = ip6_dst_alloc();
1353 rt->u.dst.input = ort->u.dst.input;
1354 rt->u.dst.output = ort->u.dst.output;
1356 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1357 rt->u.dst.dev = ort->u.dst.dev;
1359 dev_hold(rt->u.dst.dev);
1360 rt->rt6i_idev = ort->rt6i_idev;
1362 in6_dev_hold(rt->rt6i_idev);
1363 rt->u.dst.lastuse = jiffies;
1364 rt->rt6i_expires = 0;
1366 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1367 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1368 rt->rt6i_metric = 0;
1370 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1371 #ifdef CONFIG_IPV6_SUBTREES
1372 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1378 #ifdef CONFIG_IPV6_ROUTE_INFO
1379 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1380 struct in6_addr *gwaddr, int ifindex)
1382 struct fib6_node *fn;
1383 struct rt6_info *rt = NULL;
1385 write_lock_bh(&rt6_lock);
1386 fn = fib6_locate(&ip6_routing_table, prefix ,prefixlen, NULL, 0);
1390 for (rt = fn->leaf; rt; rt = rt->u.next) {
1391 if (rt->rt6i_dev->ifindex != ifindex)
1393 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1395 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1397 dst_hold(&rt->u.dst);
1401 write_unlock_bh(&rt6_lock);
1405 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1406 struct in6_addr *gwaddr, int ifindex,
1409 struct in6_rtmsg rtmsg;
1411 memset(&rtmsg, 0, sizeof(rtmsg));
1412 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1413 ipv6_addr_copy(&rtmsg.rtmsg_dst, prefix);
1414 rtmsg.rtmsg_dst_len = prefixlen;
1415 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1416 rtmsg.rtmsg_metric = 1024;
1417 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref);
1418 /* We should treat it as a default route if prefix length is 0. */
1420 rtmsg.rtmsg_flags |= RTF_DEFAULT;
1421 rtmsg.rtmsg_ifindex = ifindex;
1423 ip6_route_add(&rtmsg, NULL, NULL, NULL);
1425 return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1429 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1431 struct rt6_info *rt;
1432 struct fib6_node *fn;
1434 fn = &ip6_routing_table;
1436 write_lock_bh(&rt6_lock);
1437 for (rt = fn->leaf; rt; rt=rt->u.next) {
1438 if (dev == rt->rt6i_dev &&
1439 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1440 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1444 dst_hold(&rt->u.dst);
1445 write_unlock_bh(&rt6_lock);
1449 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1450 struct net_device *dev,
1453 struct in6_rtmsg rtmsg;
1455 memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1456 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1457 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1458 rtmsg.rtmsg_metric = 1024;
1459 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES |
1462 rtmsg.rtmsg_ifindex = dev->ifindex;
1464 ip6_route_add(&rtmsg, NULL, NULL, NULL);
1465 return rt6_get_dflt_router(gwaddr, dev);
1468 void rt6_purge_dflt_routers(void)
1470 struct rt6_info *rt;
1473 read_lock_bh(&rt6_lock);
1474 for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1475 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1476 dst_hold(&rt->u.dst);
1478 read_unlock_bh(&rt6_lock);
1480 ip6_del_rt(rt, NULL, NULL, NULL);
1485 read_unlock_bh(&rt6_lock);
1488 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1490 struct in6_rtmsg rtmsg;
1494 case SIOCADDRT: /* Add a route */
1495 case SIOCDELRT: /* Delete a route */
1496 if (!capable(CAP_NET_ADMIN))
1498 err = copy_from_user(&rtmsg, arg,
1499 sizeof(struct in6_rtmsg));
1506 err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
1509 err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
1523 * Drop the packet on the floor
1526 static int ip6_pkt_discard(struct sk_buff *skb)
1528 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1529 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1534 static int ip6_pkt_discard_out(struct sk_buff *skb)
1536 skb->dev = skb->dst->dev;
1537 return ip6_pkt_discard(skb);
1541 * Allocate a dst for local (unicast / anycast) address.
1544 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1545 const struct in6_addr *addr,
1548 struct rt6_info *rt = ip6_dst_alloc();
1551 return ERR_PTR(-ENOMEM);
1553 dev_hold(&loopback_dev);
1556 rt->u.dst.flags = DST_HOST;
1557 rt->u.dst.input = ip6_input;
1558 rt->u.dst.output = ip6_output;
1559 rt->rt6i_dev = &loopback_dev;
1560 rt->rt6i_idev = idev;
1561 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1562 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1563 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1564 rt->u.dst.obsolete = -1;
1566 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1568 rt->rt6i_flags |= RTF_ANYCAST;
1570 rt->rt6i_flags |= RTF_LOCAL;
1571 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1572 if (rt->rt6i_nexthop == NULL) {
1573 dst_free((struct dst_entry *) rt);
1574 return ERR_PTR(-ENOMEM);
1577 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1578 rt->rt6i_dst.plen = 128;
1580 atomic_set(&rt->u.dst.__refcnt, 1);
1585 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1587 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1588 rt != &ip6_null_entry) {
1589 RT6_TRACE("deleted by ifdown %p\n", rt);
1595 void rt6_ifdown(struct net_device *dev)
1597 write_lock_bh(&rt6_lock);
1598 fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1599 write_unlock_bh(&rt6_lock);
1602 struct rt6_mtu_change_arg
1604 struct net_device *dev;
1608 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1610 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1611 struct inet6_dev *idev;
1613 /* In IPv6 pmtu discovery is not optional,
1614 so that RTAX_MTU lock cannot disable it.
1615 We still use this lock to block changes
1616 caused by addrconf/ndisc.
1619 idev = __in6_dev_get(arg->dev);
1623 /* For administrative MTU increase, there is no way to discover
1624 IPv6 PMTU increase, so PMTU increase should be updated here.
1625 Since RFC 1981 doesn't include administrative MTU increase
1626 update PMTU increase is a MUST. (i.e. jumbo frame)
1629 If new MTU is less than route PMTU, this new MTU will be the
1630 lowest MTU in the path, update the route PMTU to reflect PMTU
1631 decreases; if new MTU is greater than route PMTU, and the
1632 old MTU is the lowest MTU in the path, update the route PMTU
1633 to reflect the increase. In this case if the other nodes' MTU
1634 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1637 if (rt->rt6i_dev == arg->dev &&
1638 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1639 (dst_mtu(&rt->u.dst) > arg->mtu ||
1640 (dst_mtu(&rt->u.dst) < arg->mtu &&
1641 dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1642 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1643 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1647 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1649 struct rt6_mtu_change_arg arg;
1653 read_lock_bh(&rt6_lock);
1654 fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1655 read_unlock_bh(&rt6_lock);
1658 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1659 struct in6_rtmsg *rtmsg)
1661 memset(rtmsg, 0, sizeof(*rtmsg));
1663 rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1664 rtmsg->rtmsg_src_len = r->rtm_src_len;
1665 rtmsg->rtmsg_flags = RTF_UP;
1666 if (r->rtm_type == RTN_UNREACHABLE)
1667 rtmsg->rtmsg_flags |= RTF_REJECT;
1669 if (rta[RTA_GATEWAY-1]) {
1670 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1672 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1673 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1675 if (rta[RTA_DST-1]) {
1676 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1678 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1680 if (rta[RTA_SRC-1]) {
1681 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1683 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1685 if (rta[RTA_OIF-1]) {
1686 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1688 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1690 if (rta[RTA_PRIORITY-1]) {
1691 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1693 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1698 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1700 struct rtmsg *r = NLMSG_DATA(nlh);
1701 struct in6_rtmsg rtmsg;
1703 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1705 return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1708 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1710 struct rtmsg *r = NLMSG_DATA(nlh);
1711 struct in6_rtmsg rtmsg;
1713 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1715 return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1718 struct rt6_rtnl_dump_arg
1720 struct sk_buff *skb;
1721 struct netlink_callback *cb;
1724 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1725 struct in6_addr *dst, struct in6_addr *src,
1726 int iif, int type, u32 pid, u32 seq,
1727 int prefix, unsigned int flags)
1730 struct nlmsghdr *nlh;
1731 unsigned char *b = skb->tail;
1732 struct rta_cacheinfo ci;
1734 if (prefix) { /* user wants prefix routes only */
1735 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1736 /* success since this is not a prefix route */
1741 nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
1742 rtm = NLMSG_DATA(nlh);
1743 rtm->rtm_family = AF_INET6;
1744 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1745 rtm->rtm_src_len = rt->rt6i_src.plen;
1747 rtm->rtm_table = RT_TABLE_MAIN;
1748 if (rt->rt6i_flags&RTF_REJECT)
1749 rtm->rtm_type = RTN_UNREACHABLE;
1750 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1751 rtm->rtm_type = RTN_LOCAL;
1753 rtm->rtm_type = RTN_UNICAST;
1755 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1756 rtm->rtm_protocol = rt->rt6i_protocol;
1757 if (rt->rt6i_flags&RTF_DYNAMIC)
1758 rtm->rtm_protocol = RTPROT_REDIRECT;
1759 else if (rt->rt6i_flags & RTF_ADDRCONF)
1760 rtm->rtm_protocol = RTPROT_KERNEL;
1761 else if (rt->rt6i_flags&RTF_DEFAULT)
1762 rtm->rtm_protocol = RTPROT_RA;
1764 if (rt->rt6i_flags&RTF_CACHE)
1765 rtm->rtm_flags |= RTM_F_CLONED;
1768 RTA_PUT(skb, RTA_DST, 16, dst);
1769 rtm->rtm_dst_len = 128;
1770 } else if (rtm->rtm_dst_len)
1771 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1772 #ifdef CONFIG_IPV6_SUBTREES
1774 RTA_PUT(skb, RTA_SRC, 16, src);
1775 rtm->rtm_src_len = 128;
1776 } else if (rtm->rtm_src_len)
1777 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1780 RTA_PUT(skb, RTA_IIF, 4, &iif);
1782 struct in6_addr saddr_buf;
1783 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1784 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1786 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1787 goto rtattr_failure;
1788 if (rt->u.dst.neighbour)
1789 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1791 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1792 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1793 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1794 if (rt->rt6i_expires)
1795 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1798 ci.rta_used = rt->u.dst.__use;
1799 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1800 ci.rta_error = rt->u.dst.error;
1804 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1805 nlh->nlmsg_len = skb->tail - b;
1810 skb_trim(skb, b - skb->data);
1814 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1816 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1819 if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1820 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1821 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1825 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1826 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1827 prefix, NLM_F_MULTI);
1830 static int fib6_dump_node(struct fib6_walker_t *w)
1833 struct rt6_info *rt;
1835 for (rt = w->leaf; rt; rt = rt->u.next) {
1836 res = rt6_dump_route(rt, w->args);
1838 /* Frame is full, suspend walking */
1848 static void fib6_dump_end(struct netlink_callback *cb)
1850 struct fib6_walker_t *w = (void*)cb->args[0];
1854 fib6_walker_unlink(w);
1857 cb->done = (void*)cb->args[1];
1861 static int fib6_dump_done(struct netlink_callback *cb)
1864 return cb->done ? cb->done(cb) : 0;
1867 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1869 struct rt6_rtnl_dump_arg arg;
1870 struct fib6_walker_t *w;
1876 w = (void*)cb->args[0];
1880 * 1. hook callback destructor.
1882 cb->args[1] = (long)cb->done;
1883 cb->done = fib6_dump_done;
1886 * 2. allocate and initialize walker.
1888 w = kzalloc(sizeof(*w), GFP_ATOMIC);
1891 RT6_TRACE("dump<%p", w);
1892 w->root = &ip6_routing_table;
1893 w->func = fib6_dump_node;
1895 cb->args[0] = (long)w;
1896 read_lock_bh(&rt6_lock);
1898 read_unlock_bh(&rt6_lock);
1901 read_lock_bh(&rt6_lock);
1902 res = fib6_walk_continue(w);
1903 read_unlock_bh(&rt6_lock);
1906 if (res <= 0 && skb->len == 0)
1907 RT6_TRACE("%p>dump end\n", w);
1909 res = res < 0 ? res : skb->len;
1910 /* res < 0 is an error. (really, impossible)
1911 res == 0 means that dump is complete, but skb still can contain data.
1912 res > 0 dump is not complete, but frame is full.
1914 /* Destroy walker, if dump of this table is complete. */
1920 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1922 struct rtattr **rta = arg;
1925 struct sk_buff *skb;
1927 struct rt6_info *rt;
1929 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1933 /* Reserve room for dummy headers, this skb can pass
1934 through good chunk of routing engine.
1936 skb->mac.raw = skb->data;
1937 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1939 memset(&fl, 0, sizeof(fl));
1941 ipv6_addr_copy(&fl.fl6_src,
1942 (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1944 ipv6_addr_copy(&fl.fl6_dst,
1945 (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1948 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1951 struct net_device *dev;
1952 dev = __dev_get_by_index(iif);
1961 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1963 rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1965 skb->dst = &rt->u.dst;
1967 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1968 err = rt6_fill_node(skb, rt,
1969 &fl.fl6_dst, &fl.fl6_src,
1971 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1972 nlh->nlmsg_seq, 0, 0);
1978 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1988 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh,
1989 struct netlink_skb_parms *req)
1991 struct sk_buff *skb;
1992 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1993 u32 pid = current->pid;
1999 seq = nlh->nlmsg_seq;
2001 skb = alloc_skb(size, gfp_any());
2003 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
2006 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
2008 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
2011 NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
2012 netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
2019 #ifdef CONFIG_PROC_FS
2021 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2032 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2034 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
2037 if (arg->skip < arg->offset / RT6_INFO_LEN) {
2042 if (arg->len >= arg->length)
2045 for (i=0; i<16; i++) {
2046 sprintf(arg->buffer + arg->len, "%02x",
2047 rt->rt6i_dst.addr.s6_addr[i]);
2050 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2053 #ifdef CONFIG_IPV6_SUBTREES
2054 for (i=0; i<16; i++) {
2055 sprintf(arg->buffer + arg->len, "%02x",
2056 rt->rt6i_src.addr.s6_addr[i]);
2059 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
2062 sprintf(arg->buffer + arg->len,
2063 "00000000000000000000000000000000 00 ");
2067 if (rt->rt6i_nexthop) {
2068 for (i=0; i<16; i++) {
2069 sprintf(arg->buffer + arg->len, "%02x",
2070 rt->rt6i_nexthop->primary_key[i]);
2074 sprintf(arg->buffer + arg->len,
2075 "00000000000000000000000000000000");
2078 arg->len += sprintf(arg->buffer + arg->len,
2079 " %08x %08x %08x %08x %8s\n",
2080 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2081 rt->u.dst.__use, rt->rt6i_flags,
2082 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2086 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
2088 struct rt6_proc_arg arg;
2089 arg.buffer = buffer;
2090 arg.offset = offset;
2091 arg.length = length;
2095 read_lock_bh(&rt6_lock);
2096 fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
2097 read_unlock_bh(&rt6_lock);
2101 *start += offset % RT6_INFO_LEN;
2103 arg.len -= offset % RT6_INFO_LEN;
2105 if (arg.len > length)
2113 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2115 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2116 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
2117 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
2118 rt6_stats.fib_rt_cache,
2119 atomic_read(&ip6_dst_ops.entries),
2120 rt6_stats.fib_discarded_routes);
2125 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2127 return single_open(file, rt6_stats_seq_show, NULL);
2130 static struct file_operations rt6_stats_seq_fops = {
2131 .owner = THIS_MODULE,
2132 .open = rt6_stats_seq_open,
2134 .llseek = seq_lseek,
2135 .release = single_release,
2137 #endif /* CONFIG_PROC_FS */
2139 #ifdef CONFIG_SYSCTL
2141 static int flush_delay;
2144 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2145 void __user *buffer, size_t *lenp, loff_t *ppos)
2148 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2149 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2155 ctl_table ipv6_route_table[] = {
2157 .ctl_name = NET_IPV6_ROUTE_FLUSH,
2158 .procname = "flush",
2159 .data = &flush_delay,
2160 .maxlen = sizeof(int),
2162 .proc_handler = &ipv6_sysctl_rtcache_flush
2165 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2166 .procname = "gc_thresh",
2167 .data = &ip6_dst_ops.gc_thresh,
2168 .maxlen = sizeof(int),
2170 .proc_handler = &proc_dointvec,
2173 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2174 .procname = "max_size",
2175 .data = &ip6_rt_max_size,
2176 .maxlen = sizeof(int),
2178 .proc_handler = &proc_dointvec,
2181 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2182 .procname = "gc_min_interval",
2183 .data = &ip6_rt_gc_min_interval,
2184 .maxlen = sizeof(int),
2186 .proc_handler = &proc_dointvec_jiffies,
2187 .strategy = &sysctl_jiffies,
2190 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2191 .procname = "gc_timeout",
2192 .data = &ip6_rt_gc_timeout,
2193 .maxlen = sizeof(int),
2195 .proc_handler = &proc_dointvec_jiffies,
2196 .strategy = &sysctl_jiffies,
2199 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2200 .procname = "gc_interval",
2201 .data = &ip6_rt_gc_interval,
2202 .maxlen = sizeof(int),
2204 .proc_handler = &proc_dointvec_jiffies,
2205 .strategy = &sysctl_jiffies,
2208 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2209 .procname = "gc_elasticity",
2210 .data = &ip6_rt_gc_elasticity,
2211 .maxlen = sizeof(int),
2213 .proc_handler = &proc_dointvec_jiffies,
2214 .strategy = &sysctl_jiffies,
2217 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2218 .procname = "mtu_expires",
2219 .data = &ip6_rt_mtu_expires,
2220 .maxlen = sizeof(int),
2222 .proc_handler = &proc_dointvec_jiffies,
2223 .strategy = &sysctl_jiffies,
2226 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2227 .procname = "min_adv_mss",
2228 .data = &ip6_rt_min_advmss,
2229 .maxlen = sizeof(int),
2231 .proc_handler = &proc_dointvec_jiffies,
2232 .strategy = &sysctl_jiffies,
2235 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2236 .procname = "gc_min_interval_ms",
2237 .data = &ip6_rt_gc_min_interval,
2238 .maxlen = sizeof(int),
2240 .proc_handler = &proc_dointvec_ms_jiffies,
2241 .strategy = &sysctl_ms_jiffies,
2248 void __init ip6_route_init(void)
2250 struct proc_dir_entry *p;
2252 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2253 sizeof(struct rt6_info),
2254 0, SLAB_HWCACHE_ALIGN,
2256 if (!ip6_dst_ops.kmem_cachep)
2257 panic("cannot create ip6_dst_cache");
2260 #ifdef CONFIG_PROC_FS
2261 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2263 p->owner = THIS_MODULE;
2265 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2272 void ip6_route_cleanup(void)
2274 #ifdef CONFIG_PROC_FS
2275 proc_net_remove("ipv6_route");
2276 proc_net_remove("rt6_stats");
2283 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);