2 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
27 #include <linux/capability.h>
28 #include <linux/config.h>
29 #include <linux/errno.h>
30 #include <linux/types.h>
31 #include <linux/times.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/route.h>
36 #include <linux/netdevice.h>
37 #include <linux/in6.h>
38 #include <linux/init.h>
39 #include <linux/netlink.h>
40 #include <linux/if_arp.h>
43 #include <linux/proc_fs.h>
44 #include <linux/seq_file.h>
49 #include <net/ip6_fib.h>
50 #include <net/ip6_route.h>
51 #include <net/ndisc.h>
52 #include <net/addrconf.h>
54 #include <linux/rtnetlink.h>
58 #include <asm/uaccess.h>
61 #include <linux/sysctl.h>
64 /* Set to 3 to get tracing. */
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
72 #define RT6_TRACE(x...) do { ; } while (0)
76 static int ip6_rt_max_size = 4096;
77 static int ip6_rt_gc_min_interval = HZ / 2;
78 static int ip6_rt_gc_timeout = 60*HZ;
79 int ip6_rt_gc_interval = 30*HZ;
80 static int ip6_rt_gc_elasticity = 9;
81 static int ip6_rt_mtu_expires = 10*60*HZ;
82 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
84 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
85 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
86 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
87 static void ip6_dst_destroy(struct dst_entry *);
88 static void ip6_dst_ifdown(struct dst_entry *,
89 struct net_device *dev, int how);
90 static int ip6_dst_gc(void);
92 static int ip6_pkt_discard(struct sk_buff *skb);
93 static int ip6_pkt_discard_out(struct sk_buff *skb);
94 static void ip6_link_failure(struct sk_buff *skb);
95 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
97 static struct dst_ops ip6_dst_ops = {
99 .protocol = __constant_htons(ETH_P_IPV6),
102 .check = ip6_dst_check,
103 .destroy = ip6_dst_destroy,
104 .ifdown = ip6_dst_ifdown,
105 .negative_advice = ip6_negative_advice,
106 .link_failure = ip6_link_failure,
107 .update_pmtu = ip6_rt_update_pmtu,
108 .entry_size = sizeof(struct rt6_info),
111 struct rt6_info ip6_null_entry = {
114 .__refcnt = ATOMIC_INIT(1),
116 .dev = &loopback_dev,
118 .error = -ENETUNREACH,
119 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
120 .input = ip6_pkt_discard,
121 .output = ip6_pkt_discard_out,
123 .path = (struct dst_entry*)&ip6_null_entry,
126 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
127 .rt6i_metric = ~(u32) 0,
128 .rt6i_ref = ATOMIC_INIT(1),
131 struct fib6_node ip6_routing_table = {
132 .leaf = &ip6_null_entry,
133 .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
136 /* Protects all the ip6 fib */
138 DEFINE_RWLOCK(rt6_lock);
141 /* allocate dst with ip6_dst_ops */
142 static __inline__ struct rt6_info *ip6_dst_alloc(void)
144 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
147 static void ip6_dst_destroy(struct dst_entry *dst)
149 struct rt6_info *rt = (struct rt6_info *)dst;
150 struct inet6_dev *idev = rt->rt6i_idev;
153 rt->rt6i_idev = NULL;
158 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
161 struct rt6_info *rt = (struct rt6_info *)dst;
162 struct inet6_dev *idev = rt->rt6i_idev;
164 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
165 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
166 if (loopback_idev != NULL) {
167 rt->rt6i_idev = loopback_idev;
173 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
175 return (rt->rt6i_flags & RTF_EXPIRES &&
176 time_after(jiffies, rt->rt6i_expires));
180 * Route lookup. Any rt6_lock is implied.
183 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
187 struct rt6_info *local = NULL;
188 struct rt6_info *sprt;
191 for (sprt = rt; sprt; sprt = sprt->u.next) {
192 struct net_device *dev = sprt->rt6i_dev;
193 if (dev->ifindex == oif)
195 if (dev->flags & IFF_LOOPBACK) {
196 if (sprt->rt6i_idev == NULL ||
197 sprt->rt6i_idev->dev->ifindex != oif) {
200 if (local && (!oif ||
201 local->rt6i_idev->dev->ifindex == oif))
212 return &ip6_null_entry;
218 * pointer to the last default router chosen. BH is disabled locally.
220 static struct rt6_info *rt6_dflt_pointer;
221 static DEFINE_SPINLOCK(rt6_dflt_lock);
223 void rt6_reset_dflt_pointer(struct rt6_info *rt)
225 spin_lock_bh(&rt6_dflt_lock);
226 if (rt == NULL || rt == rt6_dflt_pointer) {
227 RT6_TRACE("reset default router: %p->NULL\n", rt6_dflt_pointer);
228 rt6_dflt_pointer = NULL;
230 spin_unlock_bh(&rt6_dflt_lock);
233 /* Default Router Selection (RFC 2461 6.3.6) */
234 static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
236 struct rt6_info *match = NULL;
237 struct rt6_info *sprt;
240 for (sprt = rt; sprt; sprt = sprt->u.next) {
241 struct neighbour *neigh;
246 sprt->rt6i_dev->ifindex == oif))
249 if (rt6_check_expired(sprt))
252 if (sprt == rt6_dflt_pointer)
255 if ((neigh = sprt->rt6i_nexthop) != NULL) {
256 read_lock_bh(&neigh->lock);
257 switch (neigh->nud_state) {
275 read_unlock_bh(&neigh->lock);
278 read_unlock_bh(&neigh->lock);
283 if (m > mpri || m >= 12) {
287 /* we choose the last default router if it
288 * is in (probably) reachable state.
289 * If route changed, we should do pmtu
290 * discovery. --yoshfuji
297 spin_lock(&rt6_dflt_lock);
300 * No default routers are known to be reachable.
303 if (rt6_dflt_pointer) {
304 for (sprt = rt6_dflt_pointer->u.next;
305 sprt; sprt = sprt->u.next) {
306 if (sprt->u.dst.obsolete <= 0 &&
307 sprt->u.dst.error == 0 &&
308 !rt6_check_expired(sprt)) {
315 sprt = sprt->u.next) {
316 if (sprt->u.dst.obsolete <= 0 &&
317 sprt->u.dst.error == 0 &&
318 !rt6_check_expired(sprt)) {
322 if (sprt == rt6_dflt_pointer)
329 if (rt6_dflt_pointer != match)
330 RT6_TRACE("changed default router: %p->%p\n",
331 rt6_dflt_pointer, match);
332 rt6_dflt_pointer = match;
334 spin_unlock(&rt6_dflt_lock);
338 * Last Resort: if no default routers found,
339 * use addrconf default route.
340 * We don't record this route.
342 for (sprt = ip6_routing_table.leaf;
343 sprt; sprt = sprt->u.next) {
344 if (!rt6_check_expired(sprt) &&
345 (sprt->rt6i_flags & RTF_DEFAULT) &&
348 sprt->rt6i_dev->ifindex == oif))) {
354 /* no default route. give up. */
355 match = &ip6_null_entry;
362 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
365 struct fib6_node *fn;
368 read_lock_bh(&rt6_lock);
369 fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
370 rt = rt6_device_match(fn->leaf, oif, strict);
371 dst_hold(&rt->u.dst);
373 read_unlock_bh(&rt6_lock);
375 rt->u.dst.lastuse = jiffies;
376 if (rt->u.dst.error == 0)
378 dst_release(&rt->u.dst);
382 /* ip6_ins_rt is called with FREE rt6_lock.
383 It takes new route entry, the addition fails by any reason the
384 route is freed. In any case, if caller does not hold it, it may
388 int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
389 void *_rtattr, struct netlink_skb_parms *req)
393 write_lock_bh(&rt6_lock);
394 err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
395 write_unlock_bh(&rt6_lock);
400 /* No rt6_lock! If COW failed, the function returns dead route entry
401 with dst->error set to errno value.
404 static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
405 struct in6_addr *saddr, struct netlink_skb_parms *req)
414 rt = ip6_rt_copy(ort);
417 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
418 if (rt->rt6i_dst.plen != 128 &&
419 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
420 rt->rt6i_flags |= RTF_ANYCAST;
421 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
424 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
425 rt->rt6i_dst.plen = 128;
426 rt->rt6i_flags |= RTF_CACHE;
427 rt->u.dst.flags |= DST_HOST;
429 #ifdef CONFIG_IPV6_SUBTREES
430 if (rt->rt6i_src.plen && saddr) {
431 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
432 rt->rt6i_src.plen = 128;
436 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
438 dst_hold(&rt->u.dst);
440 err = ip6_ins_rt(rt, NULL, NULL, req);
444 rt->u.dst.error = err;
448 dst_hold(&ip6_null_entry.u.dst);
449 return &ip6_null_entry;
452 #define BACKTRACK() \
453 if (rt == &ip6_null_entry && strict) { \
454 while ((fn = fn->parent) != NULL) { \
455 if (fn->fn_flags & RTN_ROOT) { \
456 dst_hold(&rt->u.dst); \
459 if (fn->fn_flags & RTN_RTINFO) \
465 void ip6_route_input(struct sk_buff *skb)
467 struct fib6_node *fn;
472 strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
475 read_lock_bh(&rt6_lock);
477 fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
478 &skb->nh.ipv6h->saddr);
483 if ((rt->rt6i_flags & RTF_CACHE)) {
484 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
486 dst_hold(&rt->u.dst);
490 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
493 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
494 struct rt6_info *nrt;
495 dst_hold(&rt->u.dst);
496 read_unlock_bh(&rt6_lock);
498 nrt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
499 &skb->nh.ipv6h->saddr,
502 dst_release(&rt->u.dst);
505 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
508 /* Race condition! In the gap, when rt6_lock was
509 released someone could insert this route. Relookup.
511 dst_release(&rt->u.dst);
514 dst_hold(&rt->u.dst);
517 read_unlock_bh(&rt6_lock);
519 rt->u.dst.lastuse = jiffies;
521 skb->dst = (struct dst_entry *) rt;
524 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
526 struct fib6_node *fn;
531 strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
534 read_lock_bh(&rt6_lock);
536 fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
541 if ((rt->rt6i_flags & RTF_CACHE)) {
542 rt = rt6_device_match(rt, fl->oif, strict);
544 dst_hold(&rt->u.dst);
547 if (rt->rt6i_flags & RTF_DEFAULT) {
548 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
549 rt = rt6_best_dflt(rt, fl->oif);
551 rt = rt6_device_match(rt, fl->oif, strict);
555 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
556 struct rt6_info *nrt;
557 dst_hold(&rt->u.dst);
558 read_unlock_bh(&rt6_lock);
560 nrt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src, NULL);
562 dst_release(&rt->u.dst);
565 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
568 /* Race condition! In the gap, when rt6_lock was
569 released someone could insert this route. Relookup.
571 dst_release(&rt->u.dst);
574 dst_hold(&rt->u.dst);
577 read_unlock_bh(&rt6_lock);
579 rt->u.dst.lastuse = jiffies;
586 * Destination cache support functions
589 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
593 rt = (struct rt6_info *) dst;
595 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
601 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
603 struct rt6_info *rt = (struct rt6_info *) dst;
606 if (rt->rt6i_flags & RTF_CACHE)
607 ip6_del_rt(rt, NULL, NULL, NULL);
614 static void ip6_link_failure(struct sk_buff *skb)
618 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
620 rt = (struct rt6_info *) skb->dst;
622 if (rt->rt6i_flags&RTF_CACHE) {
623 dst_set_expires(&rt->u.dst, 0);
624 rt->rt6i_flags |= RTF_EXPIRES;
625 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
626 rt->rt6i_node->fn_sernum = -1;
630 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
632 struct rt6_info *rt6 = (struct rt6_info*)dst;
634 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
635 rt6->rt6i_flags |= RTF_MODIFIED;
636 if (mtu < IPV6_MIN_MTU) {
638 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
640 dst->metrics[RTAX_MTU-1] = mtu;
644 /* Protected by rt6_lock. */
645 static struct dst_entry *ndisc_dst_gc_list;
646 static int ipv6_get_mtu(struct net_device *dev);
648 static inline unsigned int ipv6_advmss(unsigned int mtu)
650 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
652 if (mtu < ip6_rt_min_advmss)
653 mtu = ip6_rt_min_advmss;
656 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
657 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
658 * IPV6_MAXPLEN is also valid and means: "any MSS,
659 * rely only on pmtu discovery"
661 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
666 struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
667 struct neighbour *neigh,
668 struct in6_addr *addr,
669 int (*output)(struct sk_buff *))
672 struct inet6_dev *idev = in6_dev_get(dev);
674 if (unlikely(idev == NULL))
677 rt = ip6_dst_alloc();
678 if (unlikely(rt == NULL)) {
687 neigh = ndisc_get_neigh(dev, addr);
690 rt->rt6i_idev = idev;
691 rt->rt6i_nexthop = neigh;
692 atomic_set(&rt->u.dst.__refcnt, 1);
693 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
694 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
695 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
696 rt->u.dst.output = output;
698 #if 0 /* there's no chance to use these for ndisc */
699 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
702 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
703 rt->rt6i_dst.plen = 128;
706 write_lock_bh(&rt6_lock);
707 rt->u.dst.next = ndisc_dst_gc_list;
708 ndisc_dst_gc_list = &rt->u.dst;
709 write_unlock_bh(&rt6_lock);
711 fib6_force_start_gc();
714 return (struct dst_entry *)rt;
717 int ndisc_dst_gc(int *more)
719 struct dst_entry *dst, *next, **pprev;
723 pprev = &ndisc_dst_gc_list;
725 while ((dst = *pprev) != NULL) {
726 if (!atomic_read(&dst->__refcnt)) {
739 static int ip6_dst_gc(void)
741 static unsigned expire = 30*HZ;
742 static unsigned long last_gc;
743 unsigned long now = jiffies;
745 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
746 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
752 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
753 expire = ip6_rt_gc_timeout>>1;
756 expire -= expire>>ip6_rt_gc_elasticity;
757 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
760 /* Clean host part of a prefix. Not necessary in radix tree,
761 but results in cleaner routing tables.
763 Remove it only when all the things will work!
766 static int ipv6_get_mtu(struct net_device *dev)
768 int mtu = IPV6_MIN_MTU;
769 struct inet6_dev *idev;
771 idev = in6_dev_get(dev);
773 mtu = idev->cnf.mtu6;
779 int ipv6_get_hoplimit(struct net_device *dev)
781 int hoplimit = ipv6_devconf.hop_limit;
782 struct inet6_dev *idev;
784 idev = in6_dev_get(dev);
786 hoplimit = idev->cnf.hop_limit;
796 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
797 void *_rtattr, struct netlink_skb_parms *req)
802 struct rt6_info *rt = NULL;
803 struct net_device *dev = NULL;
804 struct inet6_dev *idev = NULL;
807 rta = (struct rtattr **) _rtattr;
809 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
811 #ifndef CONFIG_IPV6_SUBTREES
812 if (rtmsg->rtmsg_src_len)
815 if (rtmsg->rtmsg_ifindex) {
817 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
820 idev = in6_dev_get(dev);
825 if (rtmsg->rtmsg_metric == 0)
826 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
828 rt = ip6_dst_alloc();
835 rt->u.dst.obsolete = -1;
836 rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
837 if (nlh && (r = NLMSG_DATA(nlh))) {
838 rt->rt6i_protocol = r->rtm_protocol;
840 rt->rt6i_protocol = RTPROT_BOOT;
843 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
845 if (addr_type & IPV6_ADDR_MULTICAST)
846 rt->u.dst.input = ip6_mc_input;
848 rt->u.dst.input = ip6_forward;
850 rt->u.dst.output = ip6_output;
852 ipv6_addr_prefix(&rt->rt6i_dst.addr,
853 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
854 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
855 if (rt->rt6i_dst.plen == 128)
856 rt->u.dst.flags = DST_HOST;
858 #ifdef CONFIG_IPV6_SUBTREES
859 ipv6_addr_prefix(&rt->rt6i_src.addr,
860 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
861 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
864 rt->rt6i_metric = rtmsg->rtmsg_metric;
866 /* We cannot add true routes via loopback here,
867 they would result in kernel looping; promote them to reject routes
869 if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
870 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
871 /* hold loopback dev/idev if we haven't done so. */
872 if (dev != &loopback_dev) {
879 idev = in6_dev_get(dev);
885 rt->u.dst.output = ip6_pkt_discard_out;
886 rt->u.dst.input = ip6_pkt_discard;
887 rt->u.dst.error = -ENETUNREACH;
888 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
892 if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
893 struct in6_addr *gw_addr;
896 gw_addr = &rtmsg->rtmsg_gateway;
897 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
898 gwa_type = ipv6_addr_type(gw_addr);
900 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
901 struct rt6_info *grt;
903 /* IPv6 strictly inhibits using not link-local
904 addresses as nexthop address.
905 Otherwise, router will not able to send redirects.
906 It is very good, but in some (rare!) circumstances
907 (SIT, PtP, NBMA NOARP links) it is handy to allow
908 some exceptions. --ANK
911 if (!(gwa_type&IPV6_ADDR_UNICAST))
914 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
920 if (dev != grt->rt6i_dev) {
921 dst_release(&grt->u.dst);
926 idev = grt->rt6i_idev;
928 in6_dev_hold(grt->rt6i_idev);
930 if (!(grt->rt6i_flags&RTF_GATEWAY))
932 dst_release(&grt->u.dst);
938 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
946 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
947 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
948 if (IS_ERR(rt->rt6i_nexthop)) {
949 err = PTR_ERR(rt->rt6i_nexthop);
950 rt->rt6i_nexthop = NULL;
955 rt->rt6i_flags = rtmsg->rtmsg_flags;
958 if (rta && rta[RTA_METRICS-1]) {
959 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
960 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
962 while (RTA_OK(attr, attrlen)) {
963 unsigned flavor = attr->rta_type;
965 if (flavor > RTAX_MAX) {
969 rt->u.dst.metrics[flavor-1] =
970 *(u32 *)RTA_DATA(attr);
972 attr = RTA_NEXT(attr, attrlen);
976 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
977 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
978 if (!rt->u.dst.metrics[RTAX_MTU-1])
979 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
980 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
981 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
983 rt->rt6i_idev = idev;
984 return ip6_ins_rt(rt, nlh, _rtattr, req);
992 dst_free((struct dst_entry *) rt);
996 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1000 write_lock_bh(&rt6_lock);
1002 rt6_reset_dflt_pointer(NULL);
1004 err = fib6_del(rt, nlh, _rtattr, req);
1005 dst_release(&rt->u.dst);
1007 write_unlock_bh(&rt6_lock);
1012 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1014 struct fib6_node *fn;
1015 struct rt6_info *rt;
1018 read_lock_bh(&rt6_lock);
1020 fn = fib6_locate(&ip6_routing_table,
1021 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1022 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1025 for (rt = fn->leaf; rt; rt = rt->u.next) {
1026 if (rtmsg->rtmsg_ifindex &&
1027 (rt->rt6i_dev == NULL ||
1028 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1030 if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1031 !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1033 if (rtmsg->rtmsg_metric &&
1034 rtmsg->rtmsg_metric != rt->rt6i_metric)
1036 dst_hold(&rt->u.dst);
1037 read_unlock_bh(&rt6_lock);
1039 return ip6_del_rt(rt, nlh, _rtattr, req);
1042 read_unlock_bh(&rt6_lock);
1050 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1051 struct neighbour *neigh, u8 *lladdr, int on_link)
1053 struct rt6_info *rt, *nrt;
1055 /* Locate old route to this destination. */
1056 rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
1061 if (neigh->dev != rt->rt6i_dev)
1065 * Current route is on-link; redirect is always invalid.
1067 * Seems, previous statement is not true. It could
1068 * be node, which looks for us as on-link (f.e. proxy ndisc)
1069 * But then router serving it might decide, that we should
1070 * know truth 8)8) --ANK (980726).
1072 if (!(rt->rt6i_flags&RTF_GATEWAY))
1076 * RFC 2461 specifies that redirects should only be
1077 * accepted if they come from the nexthop to the target.
1078 * Due to the way default routers are chosen, this notion
1079 * is a bit fuzzy and one might need to check all default
1082 if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) {
1083 if (rt->rt6i_flags & RTF_DEFAULT) {
1084 struct rt6_info *rt1;
1086 read_lock(&rt6_lock);
1087 for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1088 if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) {
1089 dst_hold(&rt1->u.dst);
1090 dst_release(&rt->u.dst);
1091 read_unlock(&rt6_lock);
1096 read_unlock(&rt6_lock);
1098 if (net_ratelimit())
1099 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1100 "for redirect target\n");
1107 * We have finally decided to accept it.
1110 neigh_update(neigh, lladdr, NUD_STALE,
1111 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1112 NEIGH_UPDATE_F_OVERRIDE|
1113 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1114 NEIGH_UPDATE_F_ISROUTER))
1118 * Redirect received -> path was valid.
1119 * Look, redirects are sent only in response to data packets,
1120 * so that this nexthop apparently is reachable. --ANK
1122 dst_confirm(&rt->u.dst);
1124 /* Duplicate redirect: silently ignore. */
1125 if (neigh == rt->u.dst.neighbour)
1128 nrt = ip6_rt_copy(rt);
1132 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1134 nrt->rt6i_flags &= ~RTF_GATEWAY;
1136 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1137 nrt->rt6i_dst.plen = 128;
1138 nrt->u.dst.flags |= DST_HOST;
1140 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1141 nrt->rt6i_nexthop = neigh_clone(neigh);
1142 /* Reset pmtu, it may be better */
1143 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1144 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1146 if (ip6_ins_rt(nrt, NULL, NULL, NULL))
1149 if (rt->rt6i_flags&RTF_CACHE) {
1150 ip6_del_rt(rt, NULL, NULL, NULL);
1155 dst_release(&rt->u.dst);
1160 * Handle ICMP "packet too big" messages
1161 * i.e. Path MTU discovery
1164 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1165 struct net_device *dev, u32 pmtu)
1167 struct rt6_info *rt, *nrt;
1170 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1174 if (pmtu >= dst_mtu(&rt->u.dst))
1177 if (pmtu < IPV6_MIN_MTU) {
1179 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1180 * MTU (1280) and a fragment header should always be included
1181 * after a node receiving Too Big message reporting PMTU is
1182 * less than the IPv6 Minimum Link MTU.
1184 pmtu = IPV6_MIN_MTU;
1188 /* New mtu received -> path was valid.
1189 They are sent only in response to data packets,
1190 so that this nexthop apparently is reachable. --ANK
1192 dst_confirm(&rt->u.dst);
1194 /* Host route. If it is static, it would be better
1195 not to override it, but add new one, so that
1196 when cache entry will expire old pmtu
1197 would return automatically.
1199 if (rt->rt6i_flags & RTF_CACHE) {
1200 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1202 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1203 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1204 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1209 Two cases are possible:
1210 1. It is connected route. Action: COW
1211 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1213 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
1214 nrt = rt6_cow(rt, daddr, saddr, NULL);
1215 if (!nrt->u.dst.error) {
1216 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1218 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1219 /* According to RFC 1981, detecting PMTU increase shouldn't be
1220 happened within 5 mins, the recommended timer is 10 mins.
1221 Here this route expiration time is set to ip6_rt_mtu_expires
1222 which is 10 mins. After 10 mins the decreased pmtu is expired
1223 and detecting PMTU increase will be automatically happened.
1225 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1226 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1228 dst_release(&nrt->u.dst);
1230 nrt = ip6_rt_copy(rt);
1233 ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
1234 nrt->rt6i_dst.plen = 128;
1235 nrt->u.dst.flags |= DST_HOST;
1236 nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
1237 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1238 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
1239 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1241 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1242 ip6_ins_rt(nrt, NULL, NULL, NULL);
1246 dst_release(&rt->u.dst);
1250 * Misc support functions
1253 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1255 struct rt6_info *rt = ip6_dst_alloc();
1258 rt->u.dst.input = ort->u.dst.input;
1259 rt->u.dst.output = ort->u.dst.output;
1261 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1262 rt->u.dst.dev = ort->u.dst.dev;
1264 dev_hold(rt->u.dst.dev);
1265 rt->rt6i_idev = ort->rt6i_idev;
1267 in6_dev_hold(rt->rt6i_idev);
1268 rt->u.dst.lastuse = jiffies;
1269 rt->rt6i_expires = 0;
1271 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1272 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1273 rt->rt6i_metric = 0;
1275 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1276 #ifdef CONFIG_IPV6_SUBTREES
1277 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1283 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1285 struct rt6_info *rt;
1286 struct fib6_node *fn;
1288 fn = &ip6_routing_table;
1290 write_lock_bh(&rt6_lock);
1291 for (rt = fn->leaf; rt; rt=rt->u.next) {
1292 if (dev == rt->rt6i_dev &&
1293 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1297 dst_hold(&rt->u.dst);
1298 write_unlock_bh(&rt6_lock);
1302 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1303 struct net_device *dev)
1305 struct in6_rtmsg rtmsg;
1307 memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1308 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1309 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1310 rtmsg.rtmsg_metric = 1024;
1311 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES;
1313 rtmsg.rtmsg_ifindex = dev->ifindex;
1315 ip6_route_add(&rtmsg, NULL, NULL, NULL);
1316 return rt6_get_dflt_router(gwaddr, dev);
1319 void rt6_purge_dflt_routers(void)
1321 struct rt6_info *rt;
1324 read_lock_bh(&rt6_lock);
1325 for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1326 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1327 dst_hold(&rt->u.dst);
1329 rt6_reset_dflt_pointer(NULL);
1331 read_unlock_bh(&rt6_lock);
1333 ip6_del_rt(rt, NULL, NULL, NULL);
1338 read_unlock_bh(&rt6_lock);
1341 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1343 struct in6_rtmsg rtmsg;
1347 case SIOCADDRT: /* Add a route */
1348 case SIOCDELRT: /* Delete a route */
1349 if (!capable(CAP_NET_ADMIN))
1351 err = copy_from_user(&rtmsg, arg,
1352 sizeof(struct in6_rtmsg));
1359 err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
1362 err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
1376 * Drop the packet on the floor
1379 static int ip6_pkt_discard(struct sk_buff *skb)
1381 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1382 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1387 static int ip6_pkt_discard_out(struct sk_buff *skb)
1389 skb->dev = skb->dst->dev;
1390 return ip6_pkt_discard(skb);
1394 * Allocate a dst for local (unicast / anycast) address.
1397 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1398 const struct in6_addr *addr,
1401 struct rt6_info *rt = ip6_dst_alloc();
1404 return ERR_PTR(-ENOMEM);
1406 dev_hold(&loopback_dev);
1409 rt->u.dst.flags = DST_HOST;
1410 rt->u.dst.input = ip6_input;
1411 rt->u.dst.output = ip6_output;
1412 rt->rt6i_dev = &loopback_dev;
1413 rt->rt6i_idev = idev;
1414 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1415 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1416 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1417 rt->u.dst.obsolete = -1;
1419 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1421 rt->rt6i_flags |= RTF_ANYCAST;
1423 rt->rt6i_flags |= RTF_LOCAL;
1424 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1425 if (rt->rt6i_nexthop == NULL) {
1426 dst_free((struct dst_entry *) rt);
1427 return ERR_PTR(-ENOMEM);
1430 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1431 rt->rt6i_dst.plen = 128;
1433 atomic_set(&rt->u.dst.__refcnt, 1);
1438 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1440 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1441 rt != &ip6_null_entry) {
1442 RT6_TRACE("deleted by ifdown %p\n", rt);
1448 void rt6_ifdown(struct net_device *dev)
1450 write_lock_bh(&rt6_lock);
1451 fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1452 write_unlock_bh(&rt6_lock);
1455 struct rt6_mtu_change_arg
1457 struct net_device *dev;
1461 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1463 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1464 struct inet6_dev *idev;
1466 /* In IPv6 pmtu discovery is not optional,
1467 so that RTAX_MTU lock cannot disable it.
1468 We still use this lock to block changes
1469 caused by addrconf/ndisc.
1472 idev = __in6_dev_get(arg->dev);
1476 /* For administrative MTU increase, there is no way to discover
1477 IPv6 PMTU increase, so PMTU increase should be updated here.
1478 Since RFC 1981 doesn't include administrative MTU increase
1479 update PMTU increase is a MUST. (i.e. jumbo frame)
1482 If new MTU is less than route PMTU, this new MTU will be the
1483 lowest MTU in the path, update the route PMTU to reflect PMTU
1484 decreases; if new MTU is greater than route PMTU, and the
1485 old MTU is the lowest MTU in the path, update the route PMTU
1486 to reflect the increase. In this case if the other nodes' MTU
1487 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1490 if (rt->rt6i_dev == arg->dev &&
1491 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1492 (dst_mtu(&rt->u.dst) > arg->mtu ||
1493 (dst_mtu(&rt->u.dst) < arg->mtu &&
1494 dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1495 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1496 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1500 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1502 struct rt6_mtu_change_arg arg;
1506 read_lock_bh(&rt6_lock);
1507 fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1508 read_unlock_bh(&rt6_lock);
1511 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1512 struct in6_rtmsg *rtmsg)
1514 memset(rtmsg, 0, sizeof(*rtmsg));
1516 rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1517 rtmsg->rtmsg_src_len = r->rtm_src_len;
1518 rtmsg->rtmsg_flags = RTF_UP;
1519 if (r->rtm_type == RTN_UNREACHABLE)
1520 rtmsg->rtmsg_flags |= RTF_REJECT;
1522 if (rta[RTA_GATEWAY-1]) {
1523 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1525 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1526 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1528 if (rta[RTA_DST-1]) {
1529 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1531 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1533 if (rta[RTA_SRC-1]) {
1534 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1536 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1538 if (rta[RTA_OIF-1]) {
1539 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1541 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1543 if (rta[RTA_PRIORITY-1]) {
1544 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1546 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1551 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1553 struct rtmsg *r = NLMSG_DATA(nlh);
1554 struct in6_rtmsg rtmsg;
1556 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1558 return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1561 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1563 struct rtmsg *r = NLMSG_DATA(nlh);
1564 struct in6_rtmsg rtmsg;
1566 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1568 return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1571 struct rt6_rtnl_dump_arg
1573 struct sk_buff *skb;
1574 struct netlink_callback *cb;
1577 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1578 struct in6_addr *dst, struct in6_addr *src,
1579 int iif, int type, u32 pid, u32 seq,
1580 int prefix, unsigned int flags)
1583 struct nlmsghdr *nlh;
1584 unsigned char *b = skb->tail;
1585 struct rta_cacheinfo ci;
1587 if (prefix) { /* user wants prefix routes only */
1588 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1589 /* success since this is not a prefix route */
1594 nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
1595 rtm = NLMSG_DATA(nlh);
1596 rtm->rtm_family = AF_INET6;
1597 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1598 rtm->rtm_src_len = rt->rt6i_src.plen;
1600 rtm->rtm_table = RT_TABLE_MAIN;
1601 if (rt->rt6i_flags&RTF_REJECT)
1602 rtm->rtm_type = RTN_UNREACHABLE;
1603 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1604 rtm->rtm_type = RTN_LOCAL;
1606 rtm->rtm_type = RTN_UNICAST;
1608 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1609 rtm->rtm_protocol = rt->rt6i_protocol;
1610 if (rt->rt6i_flags&RTF_DYNAMIC)
1611 rtm->rtm_protocol = RTPROT_REDIRECT;
1612 else if (rt->rt6i_flags & RTF_ADDRCONF)
1613 rtm->rtm_protocol = RTPROT_KERNEL;
1614 else if (rt->rt6i_flags&RTF_DEFAULT)
1615 rtm->rtm_protocol = RTPROT_RA;
1617 if (rt->rt6i_flags&RTF_CACHE)
1618 rtm->rtm_flags |= RTM_F_CLONED;
1621 RTA_PUT(skb, RTA_DST, 16, dst);
1622 rtm->rtm_dst_len = 128;
1623 } else if (rtm->rtm_dst_len)
1624 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1625 #ifdef CONFIG_IPV6_SUBTREES
1627 RTA_PUT(skb, RTA_SRC, 16, src);
1628 rtm->rtm_src_len = 128;
1629 } else if (rtm->rtm_src_len)
1630 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1633 RTA_PUT(skb, RTA_IIF, 4, &iif);
1635 struct in6_addr saddr_buf;
1636 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1637 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1639 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1640 goto rtattr_failure;
1641 if (rt->u.dst.neighbour)
1642 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1644 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1645 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1646 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1647 if (rt->rt6i_expires)
1648 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1651 ci.rta_used = rt->u.dst.__use;
1652 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1653 ci.rta_error = rt->u.dst.error;
1657 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1658 nlh->nlmsg_len = skb->tail - b;
1663 skb_trim(skb, b - skb->data);
1667 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1669 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1672 if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1673 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1674 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1678 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1679 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1680 prefix, NLM_F_MULTI);
1683 static int fib6_dump_node(struct fib6_walker_t *w)
1686 struct rt6_info *rt;
1688 for (rt = w->leaf; rt; rt = rt->u.next) {
1689 res = rt6_dump_route(rt, w->args);
1691 /* Frame is full, suspend walking */
1701 static void fib6_dump_end(struct netlink_callback *cb)
1703 struct fib6_walker_t *w = (void*)cb->args[0];
1707 fib6_walker_unlink(w);
1710 cb->done = (void*)cb->args[1];
1714 static int fib6_dump_done(struct netlink_callback *cb)
1717 return cb->done ? cb->done(cb) : 0;
1720 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1722 struct rt6_rtnl_dump_arg arg;
1723 struct fib6_walker_t *w;
1729 w = (void*)cb->args[0];
1733 * 1. hook callback destructor.
1735 cb->args[1] = (long)cb->done;
1736 cb->done = fib6_dump_done;
1739 * 2. allocate and initialize walker.
1741 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1744 RT6_TRACE("dump<%p", w);
1745 memset(w, 0, sizeof(*w));
1746 w->root = &ip6_routing_table;
1747 w->func = fib6_dump_node;
1749 cb->args[0] = (long)w;
1750 read_lock_bh(&rt6_lock);
1752 read_unlock_bh(&rt6_lock);
1755 read_lock_bh(&rt6_lock);
1756 res = fib6_walk_continue(w);
1757 read_unlock_bh(&rt6_lock);
1760 if (res <= 0 && skb->len == 0)
1761 RT6_TRACE("%p>dump end\n", w);
1763 res = res < 0 ? res : skb->len;
1764 /* res < 0 is an error. (really, impossible)
1765 res == 0 means that dump is complete, but skb still can contain data.
1766 res > 0 dump is not complete, but frame is full.
1768 /* Destroy walker, if dump of this table is complete. */
1774 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1776 struct rtattr **rta = arg;
1779 struct sk_buff *skb;
1781 struct rt6_info *rt;
1783 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1787 /* Reserve room for dummy headers, this skb can pass
1788 through good chunk of routing engine.
1790 skb->mac.raw = skb->data;
1791 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1793 memset(&fl, 0, sizeof(fl));
1795 ipv6_addr_copy(&fl.fl6_src,
1796 (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1798 ipv6_addr_copy(&fl.fl6_dst,
1799 (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1802 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1805 struct net_device *dev;
1806 dev = __dev_get_by_index(iif);
1815 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1817 rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1819 skb->dst = &rt->u.dst;
1821 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1822 err = rt6_fill_node(skb, rt,
1823 &fl.fl6_dst, &fl.fl6_src,
1825 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1826 nlh->nlmsg_seq, 0, 0);
1832 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1842 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh,
1843 struct netlink_skb_parms *req)
1845 struct sk_buff *skb;
1846 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1847 u32 pid = current->pid;
1853 seq = nlh->nlmsg_seq;
1855 skb = alloc_skb(size, gfp_any());
1857 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
1860 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
1862 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
1865 NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
1866 netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
1873 #ifdef CONFIG_PROC_FS
1875 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1886 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1888 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1891 if (arg->skip < arg->offset / RT6_INFO_LEN) {
1896 if (arg->len >= arg->length)
1899 for (i=0; i<16; i++) {
1900 sprintf(arg->buffer + arg->len, "%02x",
1901 rt->rt6i_dst.addr.s6_addr[i]);
1904 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1907 #ifdef CONFIG_IPV6_SUBTREES
1908 for (i=0; i<16; i++) {
1909 sprintf(arg->buffer + arg->len, "%02x",
1910 rt->rt6i_src.addr.s6_addr[i]);
1913 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1916 sprintf(arg->buffer + arg->len,
1917 "00000000000000000000000000000000 00 ");
1921 if (rt->rt6i_nexthop) {
1922 for (i=0; i<16; i++) {
1923 sprintf(arg->buffer + arg->len, "%02x",
1924 rt->rt6i_nexthop->primary_key[i]);
1928 sprintf(arg->buffer + arg->len,
1929 "00000000000000000000000000000000");
1932 arg->len += sprintf(arg->buffer + arg->len,
1933 " %08x %08x %08x %08x %8s\n",
1934 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1935 rt->u.dst.__use, rt->rt6i_flags,
1936 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1940 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1942 struct rt6_proc_arg arg;
1943 arg.buffer = buffer;
1944 arg.offset = offset;
1945 arg.length = length;
1949 read_lock_bh(&rt6_lock);
1950 fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1951 read_unlock_bh(&rt6_lock);
1955 *start += offset % RT6_INFO_LEN;
1957 arg.len -= offset % RT6_INFO_LEN;
1959 if (arg.len > length)
1967 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1969 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1970 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1971 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1972 rt6_stats.fib_rt_cache,
1973 atomic_read(&ip6_dst_ops.entries),
1974 rt6_stats.fib_discarded_routes);
1979 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1981 return single_open(file, rt6_stats_seq_show, NULL);
1984 static struct file_operations rt6_stats_seq_fops = {
1985 .owner = THIS_MODULE,
1986 .open = rt6_stats_seq_open,
1988 .llseek = seq_lseek,
1989 .release = single_release,
1991 #endif /* CONFIG_PROC_FS */
1993 #ifdef CONFIG_SYSCTL
1995 static int flush_delay;
1998 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
1999 void __user *buffer, size_t *lenp, loff_t *ppos)
2002 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2003 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2009 ctl_table ipv6_route_table[] = {
2011 .ctl_name = NET_IPV6_ROUTE_FLUSH,
2012 .procname = "flush",
2013 .data = &flush_delay,
2014 .maxlen = sizeof(int),
2016 .proc_handler = &ipv6_sysctl_rtcache_flush
2019 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2020 .procname = "gc_thresh",
2021 .data = &ip6_dst_ops.gc_thresh,
2022 .maxlen = sizeof(int),
2024 .proc_handler = &proc_dointvec,
2027 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2028 .procname = "max_size",
2029 .data = &ip6_rt_max_size,
2030 .maxlen = sizeof(int),
2032 .proc_handler = &proc_dointvec,
2035 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2036 .procname = "gc_min_interval",
2037 .data = &ip6_rt_gc_min_interval,
2038 .maxlen = sizeof(int),
2040 .proc_handler = &proc_dointvec_jiffies,
2041 .strategy = &sysctl_jiffies,
2044 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2045 .procname = "gc_timeout",
2046 .data = &ip6_rt_gc_timeout,
2047 .maxlen = sizeof(int),
2049 .proc_handler = &proc_dointvec_jiffies,
2050 .strategy = &sysctl_jiffies,
2053 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2054 .procname = "gc_interval",
2055 .data = &ip6_rt_gc_interval,
2056 .maxlen = sizeof(int),
2058 .proc_handler = &proc_dointvec_jiffies,
2059 .strategy = &sysctl_jiffies,
2062 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2063 .procname = "gc_elasticity",
2064 .data = &ip6_rt_gc_elasticity,
2065 .maxlen = sizeof(int),
2067 .proc_handler = &proc_dointvec_jiffies,
2068 .strategy = &sysctl_jiffies,
2071 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2072 .procname = "mtu_expires",
2073 .data = &ip6_rt_mtu_expires,
2074 .maxlen = sizeof(int),
2076 .proc_handler = &proc_dointvec_jiffies,
2077 .strategy = &sysctl_jiffies,
2080 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2081 .procname = "min_adv_mss",
2082 .data = &ip6_rt_min_advmss,
2083 .maxlen = sizeof(int),
2085 .proc_handler = &proc_dointvec_jiffies,
2086 .strategy = &sysctl_jiffies,
2089 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2090 .procname = "gc_min_interval_ms",
2091 .data = &ip6_rt_gc_min_interval,
2092 .maxlen = sizeof(int),
2094 .proc_handler = &proc_dointvec_ms_jiffies,
2095 .strategy = &sysctl_ms_jiffies,
2102 void __init ip6_route_init(void)
2104 struct proc_dir_entry *p;
2106 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2107 sizeof(struct rt6_info),
2108 0, SLAB_HWCACHE_ALIGN,
2110 if (!ip6_dst_ops.kmem_cachep)
2111 panic("cannot create ip6_dst_cache");
2114 #ifdef CONFIG_PROC_FS
2115 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2117 p->owner = THIS_MODULE;
2119 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2126 void ip6_route_cleanup(void)
2128 #ifdef CONFIG_PROC_FS
2129 proc_net_remove("ipv6_route");
2130 proc_net_remove("rt6_stats");
2137 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);