2 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
27 #include <linux/config.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/times.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/route.h>
35 #include <linux/netdevice.h>
36 #include <linux/in6.h>
37 #include <linux/init.h>
38 #include <linux/netlink.h>
39 #include <linux/if_arp.h>
42 #include <linux/proc_fs.h>
43 #include <linux/seq_file.h>
48 #include <net/ip6_fib.h>
49 #include <net/ip6_route.h>
50 #include <net/ndisc.h>
51 #include <net/addrconf.h>
53 #include <linux/rtnetlink.h>
57 #include <asm/uaccess.h>
60 #include <linux/sysctl.h>
63 /* Set to 3 to get tracing. */
67 #define RDBG(x) printk x
68 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
71 #define RT6_TRACE(x...) do { ; } while (0)
75 static int ip6_rt_max_size = 4096;
76 static int ip6_rt_gc_min_interval = HZ / 2;
77 static int ip6_rt_gc_timeout = 60*HZ;
78 int ip6_rt_gc_interval = 30*HZ;
79 static int ip6_rt_gc_elasticity = 9;
80 static int ip6_rt_mtu_expires = 10*60*HZ;
81 static int ip6_rt_min_advmss = IPV6_MIN_MTU - 20 - 40;
83 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
84 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
85 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
86 static void ip6_dst_destroy(struct dst_entry *);
87 static void ip6_dst_ifdown(struct dst_entry *,
88 struct net_device *dev, int how);
89 static int ip6_dst_gc(void);
91 static int ip6_pkt_discard(struct sk_buff *skb);
92 static int ip6_pkt_discard_out(struct sk_buff *skb);
93 static void ip6_link_failure(struct sk_buff *skb);
94 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
96 static struct dst_ops ip6_dst_ops = {
98 .protocol = __constant_htons(ETH_P_IPV6),
101 .check = ip6_dst_check,
102 .destroy = ip6_dst_destroy,
103 .ifdown = ip6_dst_ifdown,
104 .negative_advice = ip6_negative_advice,
105 .link_failure = ip6_link_failure,
106 .update_pmtu = ip6_rt_update_pmtu,
107 .entry_size = sizeof(struct rt6_info),
110 struct rt6_info ip6_null_entry = {
113 .__refcnt = ATOMIC_INIT(1),
115 .dev = &loopback_dev,
117 .error = -ENETUNREACH,
118 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
119 .input = ip6_pkt_discard,
120 .output = ip6_pkt_discard_out,
122 .path = (struct dst_entry*)&ip6_null_entry,
125 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
126 .rt6i_metric = ~(u32) 0,
127 .rt6i_ref = ATOMIC_INIT(1),
130 struct fib6_node ip6_routing_table = {
131 .leaf = &ip6_null_entry,
132 .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO,
135 /* Protects all the ip6 fib */
137 DEFINE_RWLOCK(rt6_lock);
140 /* allocate dst with ip6_dst_ops */
141 static __inline__ struct rt6_info *ip6_dst_alloc(void)
143 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
146 static void ip6_dst_destroy(struct dst_entry *dst)
148 struct rt6_info *rt = (struct rt6_info *)dst;
149 struct inet6_dev *idev = rt->rt6i_idev;
152 rt->rt6i_idev = NULL;
157 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
160 struct rt6_info *rt = (struct rt6_info *)dst;
161 struct inet6_dev *idev = rt->rt6i_idev;
163 if (dev != &loopback_dev && idev != NULL && idev->dev == dev) {
164 struct inet6_dev *loopback_idev = in6_dev_get(&loopback_dev);
165 if (loopback_idev != NULL) {
166 rt->rt6i_idev = loopback_idev;
172 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
174 return (rt->rt6i_flags & RTF_EXPIRES &&
175 time_after(jiffies, rt->rt6i_expires));
179 * Route lookup. Any rt6_lock is implied.
182 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
186 struct rt6_info *local = NULL;
187 struct rt6_info *sprt;
190 for (sprt = rt; sprt; sprt = sprt->u.next) {
191 struct net_device *dev = sprt->rt6i_dev;
192 if (dev->ifindex == oif)
194 if (dev->flags & IFF_LOOPBACK) {
195 if (sprt->rt6i_idev == NULL ||
196 sprt->rt6i_idev->dev->ifindex != oif) {
199 if (local && (!oif ||
200 local->rt6i_idev->dev->ifindex == oif))
211 return &ip6_null_entry;
217 * pointer to the last default router chosen. BH is disabled locally.
219 static struct rt6_info *rt6_dflt_pointer;
220 static DEFINE_SPINLOCK(rt6_dflt_lock);
222 void rt6_reset_dflt_pointer(struct rt6_info *rt)
224 spin_lock_bh(&rt6_dflt_lock);
225 if (rt == NULL || rt == rt6_dflt_pointer) {
226 RT6_TRACE("reset default router: %p->NULL\n", rt6_dflt_pointer);
227 rt6_dflt_pointer = NULL;
229 spin_unlock_bh(&rt6_dflt_lock);
232 /* Default Router Selection (RFC 2461 6.3.6) */
233 static struct rt6_info *rt6_best_dflt(struct rt6_info *rt, int oif)
235 struct rt6_info *match = NULL;
236 struct rt6_info *sprt;
239 for (sprt = rt; sprt; sprt = sprt->u.next) {
240 struct neighbour *neigh;
245 sprt->rt6i_dev->ifindex == oif))
248 if (rt6_check_expired(sprt))
251 if (sprt == rt6_dflt_pointer)
254 if ((neigh = sprt->rt6i_nexthop) != NULL) {
255 read_lock_bh(&neigh->lock);
256 switch (neigh->nud_state) {
274 read_unlock_bh(&neigh->lock);
277 read_unlock_bh(&neigh->lock);
282 if (m > mpri || m >= 12) {
286 /* we choose the last default router if it
287 * is in (probably) reachable state.
288 * If route changed, we should do pmtu
289 * discovery. --yoshfuji
296 spin_lock(&rt6_dflt_lock);
299 * No default routers are known to be reachable.
302 if (rt6_dflt_pointer) {
303 for (sprt = rt6_dflt_pointer->u.next;
304 sprt; sprt = sprt->u.next) {
305 if (sprt->u.dst.obsolete <= 0 &&
306 sprt->u.dst.error == 0 &&
307 !rt6_check_expired(sprt)) {
314 sprt = sprt->u.next) {
315 if (sprt->u.dst.obsolete <= 0 &&
316 sprt->u.dst.error == 0 &&
317 !rt6_check_expired(sprt)) {
321 if (sprt == rt6_dflt_pointer)
328 if (rt6_dflt_pointer != match)
329 RT6_TRACE("changed default router: %p->%p\n",
330 rt6_dflt_pointer, match);
331 rt6_dflt_pointer = match;
333 spin_unlock(&rt6_dflt_lock);
337 * Last Resort: if no default routers found,
338 * use addrconf default route.
339 * We don't record this route.
341 for (sprt = ip6_routing_table.leaf;
342 sprt; sprt = sprt->u.next) {
343 if (!rt6_check_expired(sprt) &&
344 (sprt->rt6i_flags & RTF_DEFAULT) &&
347 sprt->rt6i_dev->ifindex == oif))) {
353 /* no default route. give up. */
354 match = &ip6_null_entry;
361 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
364 struct fib6_node *fn;
367 read_lock_bh(&rt6_lock);
368 fn = fib6_lookup(&ip6_routing_table, daddr, saddr);
369 rt = rt6_device_match(fn->leaf, oif, strict);
370 dst_hold(&rt->u.dst);
372 read_unlock_bh(&rt6_lock);
374 rt->u.dst.lastuse = jiffies;
375 if (rt->u.dst.error == 0)
377 dst_release(&rt->u.dst);
381 /* ip6_ins_rt is called with FREE rt6_lock.
382 It takes new route entry, the addition fails by any reason the
383 route is freed. In any case, if caller does not hold it, it may
387 int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
388 void *_rtattr, struct netlink_skb_parms *req)
392 write_lock_bh(&rt6_lock);
393 err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
394 write_unlock_bh(&rt6_lock);
399 /* No rt6_lock! If COW failed, the function returns dead route entry
400 with dst->error set to errno value.
403 static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
404 struct in6_addr *saddr, struct netlink_skb_parms *req)
413 rt = ip6_rt_copy(ort);
416 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
417 if (rt->rt6i_dst.plen != 128 &&
418 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
419 rt->rt6i_flags |= RTF_ANYCAST;
420 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
423 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
424 rt->rt6i_dst.plen = 128;
425 rt->rt6i_flags |= RTF_CACHE;
426 rt->u.dst.flags |= DST_HOST;
428 #ifdef CONFIG_IPV6_SUBTREES
429 if (rt->rt6i_src.plen && saddr) {
430 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
431 rt->rt6i_src.plen = 128;
435 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
437 dst_hold(&rt->u.dst);
439 err = ip6_ins_rt(rt, NULL, NULL, req);
443 rt->u.dst.error = err;
447 dst_hold(&ip6_null_entry.u.dst);
448 return &ip6_null_entry;
451 #define BACKTRACK() \
452 if (rt == &ip6_null_entry && strict) { \
453 while ((fn = fn->parent) != NULL) { \
454 if (fn->fn_flags & RTN_ROOT) { \
455 dst_hold(&rt->u.dst); \
458 if (fn->fn_flags & RTN_RTINFO) \
464 void ip6_route_input(struct sk_buff *skb)
466 struct fib6_node *fn;
471 strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
474 read_lock_bh(&rt6_lock);
476 fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr,
477 &skb->nh.ipv6h->saddr);
482 if ((rt->rt6i_flags & RTF_CACHE)) {
483 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
485 dst_hold(&rt->u.dst);
489 rt = rt6_device_match(rt, skb->dev->ifindex, strict);
492 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
493 struct rt6_info *nrt;
494 dst_hold(&rt->u.dst);
495 read_unlock_bh(&rt6_lock);
497 nrt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
498 &skb->nh.ipv6h->saddr,
501 dst_release(&rt->u.dst);
504 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
507 /* Race condition! In the gap, when rt6_lock was
508 released someone could insert this route. Relookup.
510 dst_release(&rt->u.dst);
513 dst_hold(&rt->u.dst);
516 read_unlock_bh(&rt6_lock);
518 rt->u.dst.lastuse = jiffies;
520 skb->dst = (struct dst_entry *) rt;
523 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
525 struct fib6_node *fn;
530 strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
533 read_lock_bh(&rt6_lock);
535 fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
540 if ((rt->rt6i_flags & RTF_CACHE)) {
541 rt = rt6_device_match(rt, fl->oif, strict);
543 dst_hold(&rt->u.dst);
546 if (rt->rt6i_flags & RTF_DEFAULT) {
547 if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
548 rt = rt6_best_dflt(rt, fl->oif);
550 rt = rt6_device_match(rt, fl->oif, strict);
554 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
555 struct rt6_info *nrt;
556 dst_hold(&rt->u.dst);
557 read_unlock_bh(&rt6_lock);
559 nrt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src, NULL);
561 dst_release(&rt->u.dst);
564 if (rt->u.dst.error != -EEXIST || --attempts <= 0)
567 /* Race condition! In the gap, when rt6_lock was
568 released someone could insert this route. Relookup.
570 dst_release(&rt->u.dst);
573 dst_hold(&rt->u.dst);
576 read_unlock_bh(&rt6_lock);
578 rt->u.dst.lastuse = jiffies;
585 * Destination cache support functions
588 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
592 rt = (struct rt6_info *) dst;
594 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
600 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
602 struct rt6_info *rt = (struct rt6_info *) dst;
605 if (rt->rt6i_flags & RTF_CACHE)
606 ip6_del_rt(rt, NULL, NULL, NULL);
613 static void ip6_link_failure(struct sk_buff *skb)
617 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
619 rt = (struct rt6_info *) skb->dst;
621 if (rt->rt6i_flags&RTF_CACHE) {
622 dst_set_expires(&rt->u.dst, 0);
623 rt->rt6i_flags |= RTF_EXPIRES;
624 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
625 rt->rt6i_node->fn_sernum = -1;
629 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
631 struct rt6_info *rt6 = (struct rt6_info*)dst;
633 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
634 rt6->rt6i_flags |= RTF_MODIFIED;
635 if (mtu < IPV6_MIN_MTU) {
637 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
639 dst->metrics[RTAX_MTU-1] = mtu;
643 /* Protected by rt6_lock. */
644 static struct dst_entry *ndisc_dst_gc_list;
645 static int ipv6_get_mtu(struct net_device *dev);
647 static inline unsigned int ipv6_advmss(unsigned int mtu)
649 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
651 if (mtu < ip6_rt_min_advmss)
652 mtu = ip6_rt_min_advmss;
655 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
656 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
657 * IPV6_MAXPLEN is also valid and means: "any MSS,
658 * rely only on pmtu discovery"
660 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
665 struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
666 struct neighbour *neigh,
667 struct in6_addr *addr,
668 int (*output)(struct sk_buff *))
671 struct inet6_dev *idev = in6_dev_get(dev);
673 if (unlikely(idev == NULL))
676 rt = ip6_dst_alloc();
677 if (unlikely(rt == NULL)) {
686 neigh = ndisc_get_neigh(dev, addr);
689 rt->rt6i_idev = idev;
690 rt->rt6i_nexthop = neigh;
691 atomic_set(&rt->u.dst.__refcnt, 1);
692 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
693 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
694 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
695 rt->u.dst.output = output;
697 #if 0 /* there's no chance to use these for ndisc */
698 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
701 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
702 rt->rt6i_dst.plen = 128;
705 write_lock_bh(&rt6_lock);
706 rt->u.dst.next = ndisc_dst_gc_list;
707 ndisc_dst_gc_list = &rt->u.dst;
708 write_unlock_bh(&rt6_lock);
710 fib6_force_start_gc();
713 return (struct dst_entry *)rt;
716 int ndisc_dst_gc(int *more)
718 struct dst_entry *dst, *next, **pprev;
722 pprev = &ndisc_dst_gc_list;
724 while ((dst = *pprev) != NULL) {
725 if (!atomic_read(&dst->__refcnt)) {
738 static int ip6_dst_gc(void)
740 static unsigned expire = 30*HZ;
741 static unsigned long last_gc;
742 unsigned long now = jiffies;
744 if (time_after(last_gc + ip6_rt_gc_min_interval, now) &&
745 atomic_read(&ip6_dst_ops.entries) <= ip6_rt_max_size)
751 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
752 expire = ip6_rt_gc_timeout>>1;
755 expire -= expire>>ip6_rt_gc_elasticity;
756 return (atomic_read(&ip6_dst_ops.entries) > ip6_rt_max_size);
759 /* Clean host part of a prefix. Not necessary in radix tree,
760 but results in cleaner routing tables.
762 Remove it only when all the things will work!
765 static int ipv6_get_mtu(struct net_device *dev)
767 int mtu = IPV6_MIN_MTU;
768 struct inet6_dev *idev;
770 idev = in6_dev_get(dev);
772 mtu = idev->cnf.mtu6;
778 int ipv6_get_hoplimit(struct net_device *dev)
780 int hoplimit = ipv6_devconf.hop_limit;
781 struct inet6_dev *idev;
783 idev = in6_dev_get(dev);
785 hoplimit = idev->cnf.hop_limit;
795 int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
796 void *_rtattr, struct netlink_skb_parms *req)
801 struct rt6_info *rt = NULL;
802 struct net_device *dev = NULL;
803 struct inet6_dev *idev = NULL;
806 rta = (struct rtattr **) _rtattr;
808 if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
810 #ifndef CONFIG_IPV6_SUBTREES
811 if (rtmsg->rtmsg_src_len)
814 if (rtmsg->rtmsg_ifindex) {
816 dev = dev_get_by_index(rtmsg->rtmsg_ifindex);
819 idev = in6_dev_get(dev);
824 if (rtmsg->rtmsg_metric == 0)
825 rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
827 rt = ip6_dst_alloc();
834 rt->u.dst.obsolete = -1;
835 rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info);
836 if (nlh && (r = NLMSG_DATA(nlh))) {
837 rt->rt6i_protocol = r->rtm_protocol;
839 rt->rt6i_protocol = RTPROT_BOOT;
842 addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst);
844 if (addr_type & IPV6_ADDR_MULTICAST)
845 rt->u.dst.input = ip6_mc_input;
847 rt->u.dst.input = ip6_forward;
849 rt->u.dst.output = ip6_output;
851 ipv6_addr_prefix(&rt->rt6i_dst.addr,
852 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len);
853 rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len;
854 if (rt->rt6i_dst.plen == 128)
855 rt->u.dst.flags = DST_HOST;
857 #ifdef CONFIG_IPV6_SUBTREES
858 ipv6_addr_prefix(&rt->rt6i_src.addr,
859 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
860 rt->rt6i_src.plen = rtmsg->rtmsg_src_len;
863 rt->rt6i_metric = rtmsg->rtmsg_metric;
865 /* We cannot add true routes via loopback here,
866 they would result in kernel looping; promote them to reject routes
868 if ((rtmsg->rtmsg_flags&RTF_REJECT) ||
869 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
870 /* hold loopback dev/idev if we haven't done so. */
871 if (dev != &loopback_dev) {
878 idev = in6_dev_get(dev);
884 rt->u.dst.output = ip6_pkt_discard_out;
885 rt->u.dst.input = ip6_pkt_discard;
886 rt->u.dst.error = -ENETUNREACH;
887 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
891 if (rtmsg->rtmsg_flags & RTF_GATEWAY) {
892 struct in6_addr *gw_addr;
895 gw_addr = &rtmsg->rtmsg_gateway;
896 ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway);
897 gwa_type = ipv6_addr_type(gw_addr);
899 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
900 struct rt6_info *grt;
902 /* IPv6 strictly inhibits using not link-local
903 addresses as nexthop address.
904 Otherwise, router will not able to send redirects.
905 It is very good, but in some (rare!) circumstances
906 (SIT, PtP, NBMA NOARP links) it is handy to allow
907 some exceptions. --ANK
910 if (!(gwa_type&IPV6_ADDR_UNICAST))
913 grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1);
919 if (dev != grt->rt6i_dev) {
920 dst_release(&grt->u.dst);
925 idev = grt->rt6i_idev;
927 in6_dev_hold(grt->rt6i_idev);
929 if (!(grt->rt6i_flags&RTF_GATEWAY))
931 dst_release(&grt->u.dst);
937 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
945 if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) {
946 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
947 if (IS_ERR(rt->rt6i_nexthop)) {
948 err = PTR_ERR(rt->rt6i_nexthop);
949 rt->rt6i_nexthop = NULL;
954 rt->rt6i_flags = rtmsg->rtmsg_flags;
957 if (rta && rta[RTA_METRICS-1]) {
958 int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
959 struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
961 while (RTA_OK(attr, attrlen)) {
962 unsigned flavor = attr->rta_type;
964 if (flavor > RTAX_MAX) {
968 rt->u.dst.metrics[flavor-1] =
969 *(u32 *)RTA_DATA(attr);
971 attr = RTA_NEXT(attr, attrlen);
975 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
976 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
977 if (!rt->u.dst.metrics[RTAX_MTU-1])
978 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
979 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
980 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
982 rt->rt6i_idev = idev;
983 return ip6_ins_rt(rt, nlh, _rtattr, req);
991 dst_free((struct dst_entry *) rt);
995 int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
999 write_lock_bh(&rt6_lock);
1001 rt6_reset_dflt_pointer(NULL);
1003 err = fib6_del(rt, nlh, _rtattr, req);
1004 dst_release(&rt->u.dst);
1006 write_unlock_bh(&rt6_lock);
1011 static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
1013 struct fib6_node *fn;
1014 struct rt6_info *rt;
1017 read_lock_bh(&rt6_lock);
1019 fn = fib6_locate(&ip6_routing_table,
1020 &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len,
1021 &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len);
1024 for (rt = fn->leaf; rt; rt = rt->u.next) {
1025 if (rtmsg->rtmsg_ifindex &&
1026 (rt->rt6i_dev == NULL ||
1027 rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex))
1029 if (rtmsg->rtmsg_flags&RTF_GATEWAY &&
1030 !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway))
1032 if (rtmsg->rtmsg_metric &&
1033 rtmsg->rtmsg_metric != rt->rt6i_metric)
1035 dst_hold(&rt->u.dst);
1036 read_unlock_bh(&rt6_lock);
1038 return ip6_del_rt(rt, nlh, _rtattr, req);
1041 read_unlock_bh(&rt6_lock);
1049 void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
1050 struct neighbour *neigh, u8 *lladdr, int on_link)
1052 struct rt6_info *rt, *nrt;
1054 /* Locate old route to this destination. */
1055 rt = rt6_lookup(dest, NULL, neigh->dev->ifindex, 1);
1060 if (neigh->dev != rt->rt6i_dev)
1064 * Current route is on-link; redirect is always invalid.
1066 * Seems, previous statement is not true. It could
1067 * be node, which looks for us as on-link (f.e. proxy ndisc)
1068 * But then router serving it might decide, that we should
1069 * know truth 8)8) --ANK (980726).
1071 if (!(rt->rt6i_flags&RTF_GATEWAY))
1075 * RFC 2461 specifies that redirects should only be
1076 * accepted if they come from the nexthop to the target.
1077 * Due to the way default routers are chosen, this notion
1078 * is a bit fuzzy and one might need to check all default
1081 if (!ipv6_addr_equal(saddr, &rt->rt6i_gateway)) {
1082 if (rt->rt6i_flags & RTF_DEFAULT) {
1083 struct rt6_info *rt1;
1085 read_lock(&rt6_lock);
1086 for (rt1 = ip6_routing_table.leaf; rt1; rt1 = rt1->u.next) {
1087 if (ipv6_addr_equal(saddr, &rt1->rt6i_gateway)) {
1088 dst_hold(&rt1->u.dst);
1089 dst_release(&rt->u.dst);
1090 read_unlock(&rt6_lock);
1095 read_unlock(&rt6_lock);
1097 if (net_ratelimit())
1098 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1099 "for redirect target\n");
1106 * We have finally decided to accept it.
1109 neigh_update(neigh, lladdr, NUD_STALE,
1110 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1111 NEIGH_UPDATE_F_OVERRIDE|
1112 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1113 NEIGH_UPDATE_F_ISROUTER))
1117 * Redirect received -> path was valid.
1118 * Look, redirects are sent only in response to data packets,
1119 * so that this nexthop apparently is reachable. --ANK
1121 dst_confirm(&rt->u.dst);
1123 /* Duplicate redirect: silently ignore. */
1124 if (neigh == rt->u.dst.neighbour)
1127 nrt = ip6_rt_copy(rt);
1131 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1133 nrt->rt6i_flags &= ~RTF_GATEWAY;
1135 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1136 nrt->rt6i_dst.plen = 128;
1137 nrt->u.dst.flags |= DST_HOST;
1139 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1140 nrt->rt6i_nexthop = neigh_clone(neigh);
1141 /* Reset pmtu, it may be better */
1142 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1143 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1145 if (ip6_ins_rt(nrt, NULL, NULL, NULL))
1148 if (rt->rt6i_flags&RTF_CACHE) {
1149 ip6_del_rt(rt, NULL, NULL, NULL);
1154 dst_release(&rt->u.dst);
1159 * Handle ICMP "packet too big" messages
1160 * i.e. Path MTU discovery
1163 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1164 struct net_device *dev, u32 pmtu)
1166 struct rt6_info *rt, *nrt;
1169 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1173 if (pmtu >= dst_mtu(&rt->u.dst))
1176 if (pmtu < IPV6_MIN_MTU) {
1178 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1179 * MTU (1280) and a fragment header should always be included
1180 * after a node receiving Too Big message reporting PMTU is
1181 * less than the IPv6 Minimum Link MTU.
1183 pmtu = IPV6_MIN_MTU;
1187 /* New mtu received -> path was valid.
1188 They are sent only in response to data packets,
1189 so that this nexthop apparently is reachable. --ANK
1191 dst_confirm(&rt->u.dst);
1193 /* Host route. If it is static, it would be better
1194 not to override it, but add new one, so that
1195 when cache entry will expire old pmtu
1196 would return automatically.
1198 if (rt->rt6i_flags & RTF_CACHE) {
1199 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1201 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1202 dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
1203 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1208 Two cases are possible:
1209 1. It is connected route. Action: COW
1210 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1212 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
1213 nrt = rt6_cow(rt, daddr, saddr, NULL);
1214 if (!nrt->u.dst.error) {
1215 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1217 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1218 /* According to RFC 1981, detecting PMTU increase shouldn't be
1219 happened within 5 mins, the recommended timer is 10 mins.
1220 Here this route expiration time is set to ip6_rt_mtu_expires
1221 which is 10 mins. After 10 mins the decreased pmtu is expired
1222 and detecting PMTU increase will be automatically happened.
1224 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1225 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1227 dst_release(&nrt->u.dst);
1229 nrt = ip6_rt_copy(rt);
1232 ipv6_addr_copy(&nrt->rt6i_dst.addr, daddr);
1233 nrt->rt6i_dst.plen = 128;
1234 nrt->u.dst.flags |= DST_HOST;
1235 nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
1236 dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
1237 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
1238 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1240 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1241 ip6_ins_rt(nrt, NULL, NULL, NULL);
1245 dst_release(&rt->u.dst);
1249 * Misc support functions
1252 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1254 struct rt6_info *rt = ip6_dst_alloc();
1257 rt->u.dst.input = ort->u.dst.input;
1258 rt->u.dst.output = ort->u.dst.output;
1260 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1261 rt->u.dst.dev = ort->u.dst.dev;
1263 dev_hold(rt->u.dst.dev);
1264 rt->rt6i_idev = ort->rt6i_idev;
1266 in6_dev_hold(rt->rt6i_idev);
1267 rt->u.dst.lastuse = jiffies;
1268 rt->rt6i_expires = 0;
1270 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1271 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1272 rt->rt6i_metric = 0;
1274 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1275 #ifdef CONFIG_IPV6_SUBTREES
1276 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1282 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1284 struct rt6_info *rt;
1285 struct fib6_node *fn;
1287 fn = &ip6_routing_table;
1289 write_lock_bh(&rt6_lock);
1290 for (rt = fn->leaf; rt; rt=rt->u.next) {
1291 if (dev == rt->rt6i_dev &&
1292 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1296 dst_hold(&rt->u.dst);
1297 write_unlock_bh(&rt6_lock);
1301 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1302 struct net_device *dev)
1304 struct in6_rtmsg rtmsg;
1306 memset(&rtmsg, 0, sizeof(struct in6_rtmsg));
1307 rtmsg.rtmsg_type = RTMSG_NEWROUTE;
1308 ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr);
1309 rtmsg.rtmsg_metric = 1024;
1310 rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES;
1312 rtmsg.rtmsg_ifindex = dev->ifindex;
1314 ip6_route_add(&rtmsg, NULL, NULL, NULL);
1315 return rt6_get_dflt_router(gwaddr, dev);
1318 void rt6_purge_dflt_routers(void)
1320 struct rt6_info *rt;
1323 read_lock_bh(&rt6_lock);
1324 for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) {
1325 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1326 dst_hold(&rt->u.dst);
1328 rt6_reset_dflt_pointer(NULL);
1330 read_unlock_bh(&rt6_lock);
1332 ip6_del_rt(rt, NULL, NULL, NULL);
1337 read_unlock_bh(&rt6_lock);
1340 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1342 struct in6_rtmsg rtmsg;
1346 case SIOCADDRT: /* Add a route */
1347 case SIOCDELRT: /* Delete a route */
1348 if (!capable(CAP_NET_ADMIN))
1350 err = copy_from_user(&rtmsg, arg,
1351 sizeof(struct in6_rtmsg));
1358 err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
1361 err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
1375 * Drop the packet on the floor
1378 static int ip6_pkt_discard(struct sk_buff *skb)
1380 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
1381 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_NOROUTE, 0, skb->dev);
1386 static int ip6_pkt_discard_out(struct sk_buff *skb)
1388 skb->dev = skb->dst->dev;
1389 return ip6_pkt_discard(skb);
1393 * Allocate a dst for local (unicast / anycast) address.
1396 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1397 const struct in6_addr *addr,
1400 struct rt6_info *rt = ip6_dst_alloc();
1403 return ERR_PTR(-ENOMEM);
1405 dev_hold(&loopback_dev);
1408 rt->u.dst.flags = DST_HOST;
1409 rt->u.dst.input = ip6_input;
1410 rt->u.dst.output = ip6_output;
1411 rt->rt6i_dev = &loopback_dev;
1412 rt->rt6i_idev = idev;
1413 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1414 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1415 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1416 rt->u.dst.obsolete = -1;
1418 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1420 rt->rt6i_flags |= RTF_ANYCAST;
1422 rt->rt6i_flags |= RTF_LOCAL;
1423 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1424 if (rt->rt6i_nexthop == NULL) {
1425 dst_free((struct dst_entry *) rt);
1426 return ERR_PTR(-ENOMEM);
1429 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1430 rt->rt6i_dst.plen = 128;
1432 atomic_set(&rt->u.dst.__refcnt, 1);
1437 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1439 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1440 rt != &ip6_null_entry) {
1441 RT6_TRACE("deleted by ifdown %p\n", rt);
1447 void rt6_ifdown(struct net_device *dev)
1449 write_lock_bh(&rt6_lock);
1450 fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev);
1451 write_unlock_bh(&rt6_lock);
1454 struct rt6_mtu_change_arg
1456 struct net_device *dev;
1460 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1462 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1463 struct inet6_dev *idev;
1465 /* In IPv6 pmtu discovery is not optional,
1466 so that RTAX_MTU lock cannot disable it.
1467 We still use this lock to block changes
1468 caused by addrconf/ndisc.
1471 idev = __in6_dev_get(arg->dev);
1475 /* For administrative MTU increase, there is no way to discover
1476 IPv6 PMTU increase, so PMTU increase should be updated here.
1477 Since RFC 1981 doesn't include administrative MTU increase
1478 update PMTU increase is a MUST. (i.e. jumbo frame)
1481 If new MTU is less than route PMTU, this new MTU will be the
1482 lowest MTU in the path, update the route PMTU to reflect PMTU
1483 decreases; if new MTU is greater than route PMTU, and the
1484 old MTU is the lowest MTU in the path, update the route PMTU
1485 to reflect the increase. In this case if the other nodes' MTU
1486 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1489 if (rt->rt6i_dev == arg->dev &&
1490 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1491 (dst_mtu(&rt->u.dst) > arg->mtu ||
1492 (dst_mtu(&rt->u.dst) < arg->mtu &&
1493 dst_mtu(&rt->u.dst) == idev->cnf.mtu6)))
1494 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1495 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1499 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1501 struct rt6_mtu_change_arg arg;
1505 read_lock_bh(&rt6_lock);
1506 fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg);
1507 read_unlock_bh(&rt6_lock);
1510 static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta,
1511 struct in6_rtmsg *rtmsg)
1513 memset(rtmsg, 0, sizeof(*rtmsg));
1515 rtmsg->rtmsg_dst_len = r->rtm_dst_len;
1516 rtmsg->rtmsg_src_len = r->rtm_src_len;
1517 rtmsg->rtmsg_flags = RTF_UP;
1518 if (r->rtm_type == RTN_UNREACHABLE)
1519 rtmsg->rtmsg_flags |= RTF_REJECT;
1521 if (rta[RTA_GATEWAY-1]) {
1522 if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16))
1524 memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16);
1525 rtmsg->rtmsg_flags |= RTF_GATEWAY;
1527 if (rta[RTA_DST-1]) {
1528 if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3))
1530 memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3));
1532 if (rta[RTA_SRC-1]) {
1533 if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3))
1535 memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3));
1537 if (rta[RTA_OIF-1]) {
1538 if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int)))
1540 memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1542 if (rta[RTA_PRIORITY-1]) {
1543 if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4))
1545 memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4);
1550 int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1552 struct rtmsg *r = NLMSG_DATA(nlh);
1553 struct in6_rtmsg rtmsg;
1555 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1557 return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1560 int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
1562 struct rtmsg *r = NLMSG_DATA(nlh);
1563 struct in6_rtmsg rtmsg;
1565 if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
1567 return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
1570 struct rt6_rtnl_dump_arg
1572 struct sk_buff *skb;
1573 struct netlink_callback *cb;
1576 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
1577 struct in6_addr *dst, struct in6_addr *src,
1578 int iif, int type, u32 pid, u32 seq,
1579 int prefix, unsigned int flags)
1582 struct nlmsghdr *nlh;
1583 unsigned char *b = skb->tail;
1584 struct rta_cacheinfo ci;
1586 if (prefix) { /* user wants prefix routes only */
1587 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
1588 /* success since this is not a prefix route */
1593 nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
1594 rtm = NLMSG_DATA(nlh);
1595 rtm->rtm_family = AF_INET6;
1596 rtm->rtm_dst_len = rt->rt6i_dst.plen;
1597 rtm->rtm_src_len = rt->rt6i_src.plen;
1599 rtm->rtm_table = RT_TABLE_MAIN;
1600 if (rt->rt6i_flags&RTF_REJECT)
1601 rtm->rtm_type = RTN_UNREACHABLE;
1602 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
1603 rtm->rtm_type = RTN_LOCAL;
1605 rtm->rtm_type = RTN_UNICAST;
1607 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
1608 rtm->rtm_protocol = rt->rt6i_protocol;
1609 if (rt->rt6i_flags&RTF_DYNAMIC)
1610 rtm->rtm_protocol = RTPROT_REDIRECT;
1611 else if (rt->rt6i_flags & RTF_ADDRCONF)
1612 rtm->rtm_protocol = RTPROT_KERNEL;
1613 else if (rt->rt6i_flags&RTF_DEFAULT)
1614 rtm->rtm_protocol = RTPROT_RA;
1616 if (rt->rt6i_flags&RTF_CACHE)
1617 rtm->rtm_flags |= RTM_F_CLONED;
1620 RTA_PUT(skb, RTA_DST, 16, dst);
1621 rtm->rtm_dst_len = 128;
1622 } else if (rtm->rtm_dst_len)
1623 RTA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
1624 #ifdef CONFIG_IPV6_SUBTREES
1626 RTA_PUT(skb, RTA_SRC, 16, src);
1627 rtm->rtm_src_len = 128;
1628 } else if (rtm->rtm_src_len)
1629 RTA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
1632 RTA_PUT(skb, RTA_IIF, 4, &iif);
1634 struct in6_addr saddr_buf;
1635 if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
1636 RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
1638 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
1639 goto rtattr_failure;
1640 if (rt->u.dst.neighbour)
1641 RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
1643 RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->rt6i_dev->ifindex);
1644 RTA_PUT(skb, RTA_PRIORITY, 4, &rt->rt6i_metric);
1645 ci.rta_lastuse = jiffies_to_clock_t(jiffies - rt->u.dst.lastuse);
1646 if (rt->rt6i_expires)
1647 ci.rta_expires = jiffies_to_clock_t(rt->rt6i_expires - jiffies);
1650 ci.rta_used = rt->u.dst.__use;
1651 ci.rta_clntref = atomic_read(&rt->u.dst.__refcnt);
1652 ci.rta_error = rt->u.dst.error;
1656 RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
1657 nlh->nlmsg_len = skb->tail - b;
1662 skb_trim(skb, b - skb->data);
1666 static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
1668 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
1671 if (arg->cb->nlh->nlmsg_len >= NLMSG_LENGTH(sizeof(struct rtmsg))) {
1672 struct rtmsg *rtm = NLMSG_DATA(arg->cb->nlh);
1673 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
1677 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
1678 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
1679 prefix, NLM_F_MULTI);
1682 static int fib6_dump_node(struct fib6_walker_t *w)
1685 struct rt6_info *rt;
1687 for (rt = w->leaf; rt; rt = rt->u.next) {
1688 res = rt6_dump_route(rt, w->args);
1690 /* Frame is full, suspend walking */
1700 static void fib6_dump_end(struct netlink_callback *cb)
1702 struct fib6_walker_t *w = (void*)cb->args[0];
1706 fib6_walker_unlink(w);
1709 cb->done = (void*)cb->args[1];
1713 static int fib6_dump_done(struct netlink_callback *cb)
1716 return cb->done ? cb->done(cb) : 0;
1719 int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb)
1721 struct rt6_rtnl_dump_arg arg;
1722 struct fib6_walker_t *w;
1728 w = (void*)cb->args[0];
1732 * 1. hook callback destructor.
1734 cb->args[1] = (long)cb->done;
1735 cb->done = fib6_dump_done;
1738 * 2. allocate and initialize walker.
1740 w = kmalloc(sizeof(*w), GFP_ATOMIC);
1743 RT6_TRACE("dump<%p", w);
1744 memset(w, 0, sizeof(*w));
1745 w->root = &ip6_routing_table;
1746 w->func = fib6_dump_node;
1748 cb->args[0] = (long)w;
1749 read_lock_bh(&rt6_lock);
1751 read_unlock_bh(&rt6_lock);
1754 read_lock_bh(&rt6_lock);
1755 res = fib6_walk_continue(w);
1756 read_unlock_bh(&rt6_lock);
1759 if (res <= 0 && skb->len == 0)
1760 RT6_TRACE("%p>dump end\n", w);
1762 res = res < 0 ? res : skb->len;
1763 /* res < 0 is an error. (really, impossible)
1764 res == 0 means that dump is complete, but skb still can contain data.
1765 res > 0 dump is not complete, but frame is full.
1767 /* Destroy walker, if dump of this table is complete. */
1773 int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
1775 struct rtattr **rta = arg;
1778 struct sk_buff *skb;
1780 struct rt6_info *rt;
1782 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1786 /* Reserve room for dummy headers, this skb can pass
1787 through good chunk of routing engine.
1789 skb->mac.raw = skb->data;
1790 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
1792 memset(&fl, 0, sizeof(fl));
1794 ipv6_addr_copy(&fl.fl6_src,
1795 (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
1797 ipv6_addr_copy(&fl.fl6_dst,
1798 (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
1801 memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
1804 struct net_device *dev;
1805 dev = __dev_get_by_index(iif);
1814 memcpy(&fl.oif, RTA_DATA(rta[RTA_OIF-1]), sizeof(int));
1816 rt = (struct rt6_info*)ip6_route_output(NULL, &fl);
1818 skb->dst = &rt->u.dst;
1820 NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
1821 err = rt6_fill_node(skb, rt,
1822 &fl.fl6_dst, &fl.fl6_src,
1824 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
1825 nlh->nlmsg_seq, 0, 0);
1831 err = netlink_unicast(rtnl, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT);
1841 void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh,
1842 struct netlink_skb_parms *req)
1844 struct sk_buff *skb;
1845 int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
1846 u32 pid = current->pid;
1852 seq = nlh->nlmsg_seq;
1854 skb = alloc_skb(size, gfp_any());
1856 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, ENOBUFS);
1859 if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
1861 netlink_set_err(rtnl, 0, RTNLGRP_IPV6_ROUTE, EINVAL);
1864 NETLINK_CB(skb).dst_group = RTNLGRP_IPV6_ROUTE;
1865 netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV6_ROUTE, gfp_any());
1872 #ifdef CONFIG_PROC_FS
1874 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
1885 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
1887 struct rt6_proc_arg *arg = (struct rt6_proc_arg *) p_arg;
1890 if (arg->skip < arg->offset / RT6_INFO_LEN) {
1895 if (arg->len >= arg->length)
1898 for (i=0; i<16; i++) {
1899 sprintf(arg->buffer + arg->len, "%02x",
1900 rt->rt6i_dst.addr.s6_addr[i]);
1903 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1906 #ifdef CONFIG_IPV6_SUBTREES
1907 for (i=0; i<16; i++) {
1908 sprintf(arg->buffer + arg->len, "%02x",
1909 rt->rt6i_src.addr.s6_addr[i]);
1912 arg->len += sprintf(arg->buffer + arg->len, " %02x ",
1915 sprintf(arg->buffer + arg->len,
1916 "00000000000000000000000000000000 00 ");
1920 if (rt->rt6i_nexthop) {
1921 for (i=0; i<16; i++) {
1922 sprintf(arg->buffer + arg->len, "%02x",
1923 rt->rt6i_nexthop->primary_key[i]);
1927 sprintf(arg->buffer + arg->len,
1928 "00000000000000000000000000000000");
1931 arg->len += sprintf(arg->buffer + arg->len,
1932 " %08x %08x %08x %08x %8s\n",
1933 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
1934 rt->u.dst.__use, rt->rt6i_flags,
1935 rt->rt6i_dev ? rt->rt6i_dev->name : "");
1939 static int rt6_proc_info(char *buffer, char **start, off_t offset, int length)
1941 struct rt6_proc_arg arg;
1942 arg.buffer = buffer;
1943 arg.offset = offset;
1944 arg.length = length;
1948 read_lock_bh(&rt6_lock);
1949 fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg);
1950 read_unlock_bh(&rt6_lock);
1954 *start += offset % RT6_INFO_LEN;
1956 arg.len -= offset % RT6_INFO_LEN;
1958 if (arg.len > length)
1966 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
1968 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
1969 rt6_stats.fib_nodes, rt6_stats.fib_route_nodes,
1970 rt6_stats.fib_rt_alloc, rt6_stats.fib_rt_entries,
1971 rt6_stats.fib_rt_cache,
1972 atomic_read(&ip6_dst_ops.entries),
1973 rt6_stats.fib_discarded_routes);
1978 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
1980 return single_open(file, rt6_stats_seq_show, NULL);
1983 static struct file_operations rt6_stats_seq_fops = {
1984 .owner = THIS_MODULE,
1985 .open = rt6_stats_seq_open,
1987 .llseek = seq_lseek,
1988 .release = single_release,
1990 #endif /* CONFIG_PROC_FS */
1992 #ifdef CONFIG_SYSCTL
1994 static int flush_delay;
1997 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
1998 void __user *buffer, size_t *lenp, loff_t *ppos)
2001 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2002 fib6_run_gc(flush_delay <= 0 ? ~0UL : (unsigned long)flush_delay);
2008 ctl_table ipv6_route_table[] = {
2010 .ctl_name = NET_IPV6_ROUTE_FLUSH,
2011 .procname = "flush",
2012 .data = &flush_delay,
2013 .maxlen = sizeof(int),
2015 .proc_handler = &ipv6_sysctl_rtcache_flush
2018 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2019 .procname = "gc_thresh",
2020 .data = &ip6_dst_ops.gc_thresh,
2021 .maxlen = sizeof(int),
2023 .proc_handler = &proc_dointvec,
2026 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2027 .procname = "max_size",
2028 .data = &ip6_rt_max_size,
2029 .maxlen = sizeof(int),
2031 .proc_handler = &proc_dointvec,
2034 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2035 .procname = "gc_min_interval",
2036 .data = &ip6_rt_gc_min_interval,
2037 .maxlen = sizeof(int),
2039 .proc_handler = &proc_dointvec_jiffies,
2040 .strategy = &sysctl_jiffies,
2043 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2044 .procname = "gc_timeout",
2045 .data = &ip6_rt_gc_timeout,
2046 .maxlen = sizeof(int),
2048 .proc_handler = &proc_dointvec_jiffies,
2049 .strategy = &sysctl_jiffies,
2052 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2053 .procname = "gc_interval",
2054 .data = &ip6_rt_gc_interval,
2055 .maxlen = sizeof(int),
2057 .proc_handler = &proc_dointvec_jiffies,
2058 .strategy = &sysctl_jiffies,
2061 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2062 .procname = "gc_elasticity",
2063 .data = &ip6_rt_gc_elasticity,
2064 .maxlen = sizeof(int),
2066 .proc_handler = &proc_dointvec_jiffies,
2067 .strategy = &sysctl_jiffies,
2070 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2071 .procname = "mtu_expires",
2072 .data = &ip6_rt_mtu_expires,
2073 .maxlen = sizeof(int),
2075 .proc_handler = &proc_dointvec_jiffies,
2076 .strategy = &sysctl_jiffies,
2079 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2080 .procname = "min_adv_mss",
2081 .data = &ip6_rt_min_advmss,
2082 .maxlen = sizeof(int),
2084 .proc_handler = &proc_dointvec_jiffies,
2085 .strategy = &sysctl_jiffies,
2088 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2089 .procname = "gc_min_interval_ms",
2090 .data = &ip6_rt_gc_min_interval,
2091 .maxlen = sizeof(int),
2093 .proc_handler = &proc_dointvec_ms_jiffies,
2094 .strategy = &sysctl_ms_jiffies,
2101 void __init ip6_route_init(void)
2103 struct proc_dir_entry *p;
2105 ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
2106 sizeof(struct rt6_info),
2107 0, SLAB_HWCACHE_ALIGN,
2109 if (!ip6_dst_ops.kmem_cachep)
2110 panic("cannot create ip6_dst_cache");
2113 #ifdef CONFIG_PROC_FS
2114 p = proc_net_create("ipv6_route", 0, rt6_proc_info);
2116 p->owner = THIS_MODULE;
2118 proc_net_fops_create("rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2125 void ip6_route_cleanup(void)
2127 #ifdef CONFIG_PROC_FS
2128 proc_net_remove("ipv6_route");
2129 proc_net_remove("rt6_stats");
2136 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);