2 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
26 * Fixed routing subtrees.
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/times.h>
33 #include <linux/socket.h>
34 #include <linux/sockios.h>
35 #include <linux/net.h>
36 #include <linux/route.h>
37 #include <linux/netdevice.h>
38 #include <linux/in6.h>
39 #include <linux/init.h>
40 #include <linux/if_arp.h>
41 #include <linux/proc_fs.h>
42 #include <linux/seq_file.h>
43 #include <linux/nsproxy.h>
44 #include <net/net_namespace.h>
47 #include <net/ip6_fib.h>
48 #include <net/ip6_route.h>
49 #include <net/ndisc.h>
50 #include <net/addrconf.h>
52 #include <linux/rtnetlink.h>
55 #include <net/netevent.h>
56 #include <net/netlink.h>
58 #include <asm/uaccess.h>
61 #include <linux/sysctl.h>
64 /* Set to 3 to get tracing. */
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
72 #define RT6_TRACE(x...) do { ; } while (0)
75 #define CLONE_OFFLINK_ROUTE 0
77 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
78 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
79 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80 static void ip6_dst_destroy(struct dst_entry *);
81 static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
83 static int ip6_dst_gc(struct dst_ops *ops);
85 static int ip6_pkt_discard(struct sk_buff *skb);
86 static int ip6_pkt_discard_out(struct sk_buff *skb);
87 static void ip6_link_failure(struct sk_buff *skb);
88 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
90 #ifdef CONFIG_IPV6_ROUTE_INFO
91 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
92 struct in6_addr *gwaddr, int ifindex,
94 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
95 struct in6_addr *gwaddr, int ifindex);
98 static struct dst_ops ip6_dst_ops = {
100 .protocol = __constant_htons(ETH_P_IPV6),
103 .check = ip6_dst_check,
104 .destroy = ip6_dst_destroy,
105 .ifdown = ip6_dst_ifdown,
106 .negative_advice = ip6_negative_advice,
107 .link_failure = ip6_link_failure,
108 .update_pmtu = ip6_rt_update_pmtu,
109 .local_out = ip6_local_out,
110 .entry_size = sizeof(struct rt6_info),
111 .entries = ATOMIC_INIT(0),
114 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
118 static struct dst_ops ip6_dst_blackhole_ops = {
120 .protocol = __constant_htons(ETH_P_IPV6),
121 .destroy = ip6_dst_destroy,
122 .check = ip6_dst_check,
123 .update_pmtu = ip6_rt_blackhole_update_pmtu,
124 .entry_size = sizeof(struct rt6_info),
125 .entries = ATOMIC_INIT(0),
128 struct rt6_info ip6_null_entry = {
131 .__refcnt = ATOMIC_INIT(1),
134 .error = -ENETUNREACH,
135 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
136 .input = ip6_pkt_discard,
137 .output = ip6_pkt_discard_out,
139 .path = (struct dst_entry*)&ip6_null_entry,
142 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
143 .rt6i_metric = ~(u32) 0,
144 .rt6i_ref = ATOMIC_INIT(1),
147 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
149 static int ip6_pkt_prohibit(struct sk_buff *skb);
150 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
152 struct rt6_info ip6_prohibit_entry = {
155 .__refcnt = ATOMIC_INIT(1),
159 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
160 .input = ip6_pkt_prohibit,
161 .output = ip6_pkt_prohibit_out,
163 .path = (struct dst_entry*)&ip6_prohibit_entry,
166 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
167 .rt6i_metric = ~(u32) 0,
168 .rt6i_ref = ATOMIC_INIT(1),
171 struct rt6_info ip6_blk_hole_entry = {
174 .__refcnt = ATOMIC_INIT(1),
178 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
179 .input = dst_discard,
180 .output = dst_discard,
182 .path = (struct dst_entry*)&ip6_blk_hole_entry,
185 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
186 .rt6i_metric = ~(u32) 0,
187 .rt6i_ref = ATOMIC_INIT(1),
192 /* allocate dst with ip6_dst_ops */
193 static __inline__ struct rt6_info *ip6_dst_alloc(void)
195 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
198 static void ip6_dst_destroy(struct dst_entry *dst)
200 struct rt6_info *rt = (struct rt6_info *)dst;
201 struct inet6_dev *idev = rt->rt6i_idev;
204 rt->rt6i_idev = NULL;
209 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
212 struct rt6_info *rt = (struct rt6_info *)dst;
213 struct inet6_dev *idev = rt->rt6i_idev;
214 struct net_device *loopback_dev =
215 dev->nd_net->loopback_dev;
217 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
218 struct inet6_dev *loopback_idev =
219 in6_dev_get(loopback_dev);
220 if (loopback_idev != NULL) {
221 rt->rt6i_idev = loopback_idev;
227 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
229 return (rt->rt6i_flags & RTF_EXPIRES &&
230 time_after(jiffies, rt->rt6i_expires));
233 static inline int rt6_need_strict(struct in6_addr *daddr)
235 return (ipv6_addr_type(daddr) &
236 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
240 * Route lookup. Any table->tb6_lock is implied.
243 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
247 struct rt6_info *local = NULL;
248 struct rt6_info *sprt;
251 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
252 struct net_device *dev = sprt->rt6i_dev;
253 if (dev->ifindex == oif)
255 if (dev->flags & IFF_LOOPBACK) {
256 if (sprt->rt6i_idev == NULL ||
257 sprt->rt6i_idev->dev->ifindex != oif) {
260 if (local && (!oif ||
261 local->rt6i_idev->dev->ifindex == oif))
272 return &ip6_null_entry;
277 #ifdef CONFIG_IPV6_ROUTER_PREF
278 static void rt6_probe(struct rt6_info *rt)
280 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
282 * Okay, this does not seem to be appropriate
283 * for now, however, we need to check if it
284 * is really so; aka Router Reachability Probing.
286 * Router Reachability Probe MUST be rate-limited
287 * to no more than one per minute.
289 if (!neigh || (neigh->nud_state & NUD_VALID))
291 read_lock_bh(&neigh->lock);
292 if (!(neigh->nud_state & NUD_VALID) &&
293 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
294 struct in6_addr mcaddr;
295 struct in6_addr *target;
297 neigh->updated = jiffies;
298 read_unlock_bh(&neigh->lock);
300 target = (struct in6_addr *)&neigh->primary_key;
301 addrconf_addr_solict_mult(target, &mcaddr);
302 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
304 read_unlock_bh(&neigh->lock);
307 static inline void rt6_probe(struct rt6_info *rt)
314 * Default Router Selection (RFC 2461 6.3.6)
316 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
318 struct net_device *dev = rt->rt6i_dev;
319 if (!oif || dev->ifindex == oif)
321 if ((dev->flags & IFF_LOOPBACK) &&
322 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
327 static inline int rt6_check_neigh(struct rt6_info *rt)
329 struct neighbour *neigh = rt->rt6i_nexthop;
331 if (rt->rt6i_flags & RTF_NONEXTHOP ||
332 !(rt->rt6i_flags & RTF_GATEWAY))
335 read_lock_bh(&neigh->lock);
336 if (neigh->nud_state & NUD_VALID)
338 #ifdef CONFIG_IPV6_ROUTER_PREF
339 else if (neigh->nud_state & NUD_FAILED)
344 read_unlock_bh(&neigh->lock);
350 static int rt6_score_route(struct rt6_info *rt, int oif,
355 m = rt6_check_dev(rt, oif);
356 if (!m && (strict & RT6_LOOKUP_F_IFACE))
358 #ifdef CONFIG_IPV6_ROUTER_PREF
359 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
361 n = rt6_check_neigh(rt);
362 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
367 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
368 int *mpri, struct rt6_info *match)
372 if (rt6_check_expired(rt))
375 m = rt6_score_route(rt, oif, strict);
380 if (strict & RT6_LOOKUP_F_REACHABLE)
384 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
392 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
393 struct rt6_info *rr_head,
394 u32 metric, int oif, int strict)
396 struct rt6_info *rt, *match;
400 for (rt = rr_head; rt && rt->rt6i_metric == metric;
401 rt = rt->u.dst.rt6_next)
402 match = find_match(rt, oif, strict, &mpri, match);
403 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
404 rt = rt->u.dst.rt6_next)
405 match = find_match(rt, oif, strict, &mpri, match);
410 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
412 struct rt6_info *match, *rt0;
414 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
415 __FUNCTION__, fn->leaf, oif);
419 fn->rr_ptr = rt0 = fn->leaf;
421 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
424 (strict & RT6_LOOKUP_F_REACHABLE)) {
425 struct rt6_info *next = rt0->u.dst.rt6_next;
427 /* no entries matched; do round-robin */
428 if (!next || next->rt6i_metric != rt0->rt6i_metric)
435 RT6_TRACE("%s() => %p\n",
436 __FUNCTION__, match);
438 return (match ? match : &ip6_null_entry);
441 #ifdef CONFIG_IPV6_ROUTE_INFO
442 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
443 struct in6_addr *gwaddr)
445 struct route_info *rinfo = (struct route_info *) opt;
446 struct in6_addr prefix_buf, *prefix;
451 if (len < sizeof(struct route_info)) {
455 /* Sanity check for prefix_len and length */
456 if (rinfo->length > 3) {
458 } else if (rinfo->prefix_len > 128) {
460 } else if (rinfo->prefix_len > 64) {
461 if (rinfo->length < 2) {
464 } else if (rinfo->prefix_len > 0) {
465 if (rinfo->length < 1) {
470 pref = rinfo->route_pref;
471 if (pref == ICMPV6_ROUTER_PREF_INVALID)
472 pref = ICMPV6_ROUTER_PREF_MEDIUM;
474 lifetime = ntohl(rinfo->lifetime);
475 if (lifetime == 0xffffffff) {
477 } else if (lifetime > 0x7fffffff/HZ) {
478 /* Avoid arithmetic overflow */
479 lifetime = 0x7fffffff/HZ - 1;
482 if (rinfo->length == 3)
483 prefix = (struct in6_addr *)rinfo->prefix;
485 /* this function is safe */
486 ipv6_addr_prefix(&prefix_buf,
487 (struct in6_addr *)rinfo->prefix,
489 prefix = &prefix_buf;
492 rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex);
494 if (rt && !lifetime) {
500 rt = rt6_add_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
503 rt->rt6i_flags = RTF_ROUTEINFO |
504 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
507 if (lifetime == 0xffffffff) {
508 rt->rt6i_flags &= ~RTF_EXPIRES;
510 rt->rt6i_expires = jiffies + HZ * lifetime;
511 rt->rt6i_flags |= RTF_EXPIRES;
513 dst_release(&rt->u.dst);
519 #define BACKTRACK(saddr) \
521 if (rt == &ip6_null_entry) { \
522 struct fib6_node *pn; \
524 if (fn->fn_flags & RTN_TL_ROOT) \
527 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
528 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
531 if (fn->fn_flags & RTN_RTINFO) \
537 static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
538 struct flowi *fl, int flags)
540 struct fib6_node *fn;
543 read_lock_bh(&table->tb6_lock);
544 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
547 rt = rt6_device_match(rt, fl->oif, flags);
548 BACKTRACK(&fl->fl6_src);
550 dst_use(&rt->u.dst, jiffies);
551 read_unlock_bh(&table->tb6_lock);
556 struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
567 struct dst_entry *dst;
568 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
571 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
572 flags |= RT6_LOOKUP_F_HAS_SADDR;
575 dst = fib6_rule_lookup(&init_net, &fl, flags, ip6_pol_route_lookup);
577 return (struct rt6_info *) dst;
584 EXPORT_SYMBOL(rt6_lookup);
586 /* ip6_ins_rt is called with FREE table->tb6_lock.
587 It takes new route entry, the addition fails by any reason the
588 route is freed. In any case, if caller does not hold it, it may
592 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
595 struct fib6_table *table;
597 table = rt->rt6i_table;
598 write_lock_bh(&table->tb6_lock);
599 err = fib6_add(&table->tb6_root, rt, info);
600 write_unlock_bh(&table->tb6_lock);
605 int ip6_ins_rt(struct rt6_info *rt)
607 struct nl_info info = {
610 return __ip6_ins_rt(rt, &info);
613 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
614 struct in6_addr *saddr)
622 rt = ip6_rt_copy(ort);
625 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
626 if (rt->rt6i_dst.plen != 128 &&
627 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
628 rt->rt6i_flags |= RTF_ANYCAST;
629 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
632 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
633 rt->rt6i_dst.plen = 128;
634 rt->rt6i_flags |= RTF_CACHE;
635 rt->u.dst.flags |= DST_HOST;
637 #ifdef CONFIG_IPV6_SUBTREES
638 if (rt->rt6i_src.plen && saddr) {
639 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
640 rt->rt6i_src.plen = 128;
644 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
651 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
653 struct rt6_info *rt = ip6_rt_copy(ort);
655 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
656 rt->rt6i_dst.plen = 128;
657 rt->rt6i_flags |= RTF_CACHE;
658 rt->u.dst.flags |= DST_HOST;
659 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
664 static struct rt6_info *ip6_pol_route(struct fib6_table *table, int oif,
665 struct flowi *fl, int flags)
667 struct fib6_node *fn;
668 struct rt6_info *rt, *nrt;
672 int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
674 strict |= flags & RT6_LOOKUP_F_IFACE;
677 read_lock_bh(&table->tb6_lock);
680 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
683 rt = rt6_select(fn, oif, strict | reachable);
684 BACKTRACK(&fl->fl6_src);
685 if (rt == &ip6_null_entry ||
686 rt->rt6i_flags & RTF_CACHE)
689 dst_hold(&rt->u.dst);
690 read_unlock_bh(&table->tb6_lock);
692 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
693 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
695 #if CLONE_OFFLINK_ROUTE
696 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
702 dst_release(&rt->u.dst);
703 rt = nrt ? : &ip6_null_entry;
705 dst_hold(&rt->u.dst);
707 err = ip6_ins_rt(nrt);
716 * Race condition! In the gap, when table->tb6_lock was
717 * released someone could insert this route. Relookup.
719 dst_release(&rt->u.dst);
727 dst_hold(&rt->u.dst);
728 read_unlock_bh(&table->tb6_lock);
730 rt->u.dst.lastuse = jiffies;
736 static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
737 struct flowi *fl, int flags)
739 return ip6_pol_route(table, fl->iif, fl, flags);
742 void ip6_route_input(struct sk_buff *skb)
744 struct ipv6hdr *iph = ipv6_hdr(skb);
745 int flags = RT6_LOOKUP_F_HAS_SADDR;
747 .iif = skb->dev->ifindex,
752 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
756 .proto = iph->nexthdr,
759 if (rt6_need_strict(&iph->daddr))
760 flags |= RT6_LOOKUP_F_IFACE;
762 skb->dst = fib6_rule_lookup(&init_net, &fl, flags, ip6_pol_route_input);
765 static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
766 struct flowi *fl, int flags)
768 return ip6_pol_route(table, fl->oif, fl, flags);
771 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
775 if (rt6_need_strict(&fl->fl6_dst))
776 flags |= RT6_LOOKUP_F_IFACE;
778 if (!ipv6_addr_any(&fl->fl6_src))
779 flags |= RT6_LOOKUP_F_HAS_SADDR;
781 return fib6_rule_lookup(&init_net, fl, flags, ip6_pol_route_output);
784 EXPORT_SYMBOL(ip6_route_output);
786 int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
788 struct rt6_info *ort = (struct rt6_info *) *dstp;
789 struct rt6_info *rt = (struct rt6_info *)
790 dst_alloc(&ip6_dst_blackhole_ops);
791 struct dst_entry *new = NULL;
796 atomic_set(&new->__refcnt, 1);
798 new->input = dst_discard;
799 new->output = dst_discard;
801 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
802 new->dev = ort->u.dst.dev;
805 rt->rt6i_idev = ort->rt6i_idev;
807 in6_dev_hold(rt->rt6i_idev);
808 rt->rt6i_expires = 0;
810 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
811 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
814 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
815 #ifdef CONFIG_IPV6_SUBTREES
816 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
824 return (new ? 0 : -ENOMEM);
826 EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
829 * Destination cache support functions
832 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
836 rt = (struct rt6_info *) dst;
838 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
844 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
846 struct rt6_info *rt = (struct rt6_info *) dst;
849 if (rt->rt6i_flags & RTF_CACHE)
857 static void ip6_link_failure(struct sk_buff *skb)
861 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
863 rt = (struct rt6_info *) skb->dst;
865 if (rt->rt6i_flags&RTF_CACHE) {
866 dst_set_expires(&rt->u.dst, 0);
867 rt->rt6i_flags |= RTF_EXPIRES;
868 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
869 rt->rt6i_node->fn_sernum = -1;
873 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
875 struct rt6_info *rt6 = (struct rt6_info*)dst;
877 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
878 rt6->rt6i_flags |= RTF_MODIFIED;
879 if (mtu < IPV6_MIN_MTU) {
881 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
883 dst->metrics[RTAX_MTU-1] = mtu;
884 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
888 static int ipv6_get_mtu(struct net_device *dev);
890 static inline unsigned int ipv6_advmss(unsigned int mtu)
892 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
894 if (mtu < init_net.ipv6.sysctl.ip6_rt_min_advmss)
895 mtu = init_net.ipv6.sysctl.ip6_rt_min_advmss;
898 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
899 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
900 * IPV6_MAXPLEN is also valid and means: "any MSS,
901 * rely only on pmtu discovery"
903 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
908 static struct dst_entry *icmp6_dst_gc_list;
909 static DEFINE_SPINLOCK(icmp6_dst_lock);
911 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
912 struct neighbour *neigh,
913 struct in6_addr *addr)
916 struct inet6_dev *idev = in6_dev_get(dev);
918 if (unlikely(idev == NULL))
921 rt = ip6_dst_alloc();
922 if (unlikely(rt == NULL)) {
931 neigh = ndisc_get_neigh(dev, addr);
934 rt->rt6i_idev = idev;
935 rt->rt6i_nexthop = neigh;
936 atomic_set(&rt->u.dst.__refcnt, 1);
937 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
938 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
939 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
940 rt->u.dst.output = ip6_output;
942 #if 0 /* there's no chance to use these for ndisc */
943 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
946 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
947 rt->rt6i_dst.plen = 128;
950 spin_lock_bh(&icmp6_dst_lock);
951 rt->u.dst.next = icmp6_dst_gc_list;
952 icmp6_dst_gc_list = &rt->u.dst;
953 spin_unlock_bh(&icmp6_dst_lock);
955 fib6_force_start_gc(dev->nd_net);
961 int icmp6_dst_gc(int *more)
963 struct dst_entry *dst, *next, **pprev;
969 spin_lock_bh(&icmp6_dst_lock);
970 pprev = &icmp6_dst_gc_list;
972 while ((dst = *pprev) != NULL) {
973 if (!atomic_read(&dst->__refcnt)) {
983 spin_unlock_bh(&icmp6_dst_lock);
988 static int ip6_dst_gc(struct dst_ops *ops)
990 static unsigned expire = 30*HZ;
991 static unsigned long last_gc;
992 unsigned long now = jiffies;
994 if (time_after(last_gc + init_net.ipv6.sysctl.ip6_rt_gc_min_interval, now) &&
995 atomic_read(&ip6_dst_ops.entries) <= init_net.ipv6.sysctl.ip6_rt_max_size)
999 fib6_run_gc(expire, &init_net);
1001 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
1002 expire = init_net.ipv6.sysctl.ip6_rt_gc_timeout>>1;
1005 expire -= expire>>init_net.ipv6.sysctl.ip6_rt_gc_elasticity;
1006 return (atomic_read(&ip6_dst_ops.entries) > init_net.ipv6.sysctl.ip6_rt_max_size);
1009 /* Clean host part of a prefix. Not necessary in radix tree,
1010 but results in cleaner routing tables.
1012 Remove it only when all the things will work!
1015 static int ipv6_get_mtu(struct net_device *dev)
1017 int mtu = IPV6_MIN_MTU;
1018 struct inet6_dev *idev;
1020 idev = in6_dev_get(dev);
1022 mtu = idev->cnf.mtu6;
1028 int ipv6_get_hoplimit(struct net_device *dev)
1030 int hoplimit = ipv6_devconf.hop_limit;
1031 struct inet6_dev *idev;
1033 idev = in6_dev_get(dev);
1035 hoplimit = idev->cnf.hop_limit;
1045 int ip6_route_add(struct fib6_config *cfg)
1048 struct rt6_info *rt = NULL;
1049 struct net_device *dev = NULL;
1050 struct inet6_dev *idev = NULL;
1051 struct fib6_table *table;
1054 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1056 #ifndef CONFIG_IPV6_SUBTREES
1057 if (cfg->fc_src_len)
1060 if (cfg->fc_ifindex) {
1062 dev = dev_get_by_index(&init_net, cfg->fc_ifindex);
1065 idev = in6_dev_get(dev);
1070 if (cfg->fc_metric == 0)
1071 cfg->fc_metric = IP6_RT_PRIO_USER;
1073 table = fib6_new_table(&init_net, cfg->fc_table);
1074 if (table == NULL) {
1079 rt = ip6_dst_alloc();
1086 rt->u.dst.obsolete = -1;
1087 rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
1089 if (cfg->fc_protocol == RTPROT_UNSPEC)
1090 cfg->fc_protocol = RTPROT_BOOT;
1091 rt->rt6i_protocol = cfg->fc_protocol;
1093 addr_type = ipv6_addr_type(&cfg->fc_dst);
1095 if (addr_type & IPV6_ADDR_MULTICAST)
1096 rt->u.dst.input = ip6_mc_input;
1098 rt->u.dst.input = ip6_forward;
1100 rt->u.dst.output = ip6_output;
1102 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1103 rt->rt6i_dst.plen = cfg->fc_dst_len;
1104 if (rt->rt6i_dst.plen == 128)
1105 rt->u.dst.flags = DST_HOST;
1107 #ifdef CONFIG_IPV6_SUBTREES
1108 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1109 rt->rt6i_src.plen = cfg->fc_src_len;
1112 rt->rt6i_metric = cfg->fc_metric;
1114 /* We cannot add true routes via loopback here,
1115 they would result in kernel looping; promote them to reject routes
1117 if ((cfg->fc_flags & RTF_REJECT) ||
1118 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1119 /* hold loopback dev/idev if we haven't done so. */
1120 if (dev != init_net.loopback_dev) {
1125 dev = init_net.loopback_dev;
1127 idev = in6_dev_get(dev);
1133 rt->u.dst.output = ip6_pkt_discard_out;
1134 rt->u.dst.input = ip6_pkt_discard;
1135 rt->u.dst.error = -ENETUNREACH;
1136 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1140 if (cfg->fc_flags & RTF_GATEWAY) {
1141 struct in6_addr *gw_addr;
1144 gw_addr = &cfg->fc_gateway;
1145 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1146 gwa_type = ipv6_addr_type(gw_addr);
1148 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1149 struct rt6_info *grt;
1151 /* IPv6 strictly inhibits using not link-local
1152 addresses as nexthop address.
1153 Otherwise, router will not able to send redirects.
1154 It is very good, but in some (rare!) circumstances
1155 (SIT, PtP, NBMA NOARP links) it is handy to allow
1156 some exceptions. --ANK
1159 if (!(gwa_type&IPV6_ADDR_UNICAST))
1162 grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1);
1164 err = -EHOSTUNREACH;
1168 if (dev != grt->rt6i_dev) {
1169 dst_release(&grt->u.dst);
1173 dev = grt->rt6i_dev;
1174 idev = grt->rt6i_idev;
1176 in6_dev_hold(grt->rt6i_idev);
1178 if (!(grt->rt6i_flags&RTF_GATEWAY))
1180 dst_release(&grt->u.dst);
1186 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1194 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1195 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1196 if (IS_ERR(rt->rt6i_nexthop)) {
1197 err = PTR_ERR(rt->rt6i_nexthop);
1198 rt->rt6i_nexthop = NULL;
1203 rt->rt6i_flags = cfg->fc_flags;
1210 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1211 int type = nla_type(nla);
1214 if (type > RTAX_MAX) {
1219 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1224 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1225 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1226 if (!rt->u.dst.metrics[RTAX_MTU-1])
1227 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1228 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1229 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1230 rt->u.dst.dev = dev;
1231 rt->rt6i_idev = idev;
1232 rt->rt6i_table = table;
1234 cfg->fc_nlinfo.nl_net = dev->nd_net;
1236 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1244 dst_free(&rt->u.dst);
1248 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1251 struct fib6_table *table;
1253 if (rt == &ip6_null_entry)
1256 table = rt->rt6i_table;
1257 write_lock_bh(&table->tb6_lock);
1259 err = fib6_del(rt, info);
1260 dst_release(&rt->u.dst);
1262 write_unlock_bh(&table->tb6_lock);
1267 int ip6_del_rt(struct rt6_info *rt)
1269 struct nl_info info = {
1270 .nl_net = &init_net,
1272 return __ip6_del_rt(rt, &info);
1275 static int ip6_route_del(struct fib6_config *cfg)
1277 struct fib6_table *table;
1278 struct fib6_node *fn;
1279 struct rt6_info *rt;
1282 table = fib6_get_table(&init_net, cfg->fc_table);
1286 read_lock_bh(&table->tb6_lock);
1288 fn = fib6_locate(&table->tb6_root,
1289 &cfg->fc_dst, cfg->fc_dst_len,
1290 &cfg->fc_src, cfg->fc_src_len);
1293 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1294 if (cfg->fc_ifindex &&
1295 (rt->rt6i_dev == NULL ||
1296 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1298 if (cfg->fc_flags & RTF_GATEWAY &&
1299 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1301 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1303 dst_hold(&rt->u.dst);
1304 read_unlock_bh(&table->tb6_lock);
1306 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1309 read_unlock_bh(&table->tb6_lock);
1317 struct ip6rd_flowi {
1319 struct in6_addr gateway;
1322 static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1326 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1327 struct rt6_info *rt;
1328 struct fib6_node *fn;
1331 * Get the "current" route for this destination and
1332 * check if the redirect has come from approriate router.
1334 * RFC 2461 specifies that redirects should only be
1335 * accepted if they come from the nexthop to the target.
1336 * Due to the way the routes are chosen, this notion
1337 * is a bit fuzzy and one might need to check all possible
1341 read_lock_bh(&table->tb6_lock);
1342 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1344 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1346 * Current route is on-link; redirect is always invalid.
1348 * Seems, previous statement is not true. It could
1349 * be node, which looks for us as on-link (f.e. proxy ndisc)
1350 * But then router serving it might decide, that we should
1351 * know truth 8)8) --ANK (980726).
1353 if (rt6_check_expired(rt))
1355 if (!(rt->rt6i_flags & RTF_GATEWAY))
1357 if (fl->oif != rt->rt6i_dev->ifindex)
1359 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1365 rt = &ip6_null_entry;
1366 BACKTRACK(&fl->fl6_src);
1368 dst_hold(&rt->u.dst);
1370 read_unlock_bh(&table->tb6_lock);
1375 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1376 struct in6_addr *src,
1377 struct in6_addr *gateway,
1378 struct net_device *dev)
1380 int flags = RT6_LOOKUP_F_HAS_SADDR;
1381 struct ip6rd_flowi rdfl = {
1383 .oif = dev->ifindex,
1391 .gateway = *gateway,
1394 if (rt6_need_strict(dest))
1395 flags |= RT6_LOOKUP_F_IFACE;
1397 return (struct rt6_info *)fib6_rule_lookup(&init_net,
1398 (struct flowi *)&rdfl,
1399 flags, __ip6_route_redirect);
1402 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1403 struct in6_addr *saddr,
1404 struct neighbour *neigh, u8 *lladdr, int on_link)
1406 struct rt6_info *rt, *nrt = NULL;
1407 struct netevent_redirect netevent;
1409 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1411 if (rt == &ip6_null_entry) {
1412 if (net_ratelimit())
1413 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1414 "for redirect target\n");
1419 * We have finally decided to accept it.
1422 neigh_update(neigh, lladdr, NUD_STALE,
1423 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1424 NEIGH_UPDATE_F_OVERRIDE|
1425 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1426 NEIGH_UPDATE_F_ISROUTER))
1430 * Redirect received -> path was valid.
1431 * Look, redirects are sent only in response to data packets,
1432 * so that this nexthop apparently is reachable. --ANK
1434 dst_confirm(&rt->u.dst);
1436 /* Duplicate redirect: silently ignore. */
1437 if (neigh == rt->u.dst.neighbour)
1440 nrt = ip6_rt_copy(rt);
1444 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1446 nrt->rt6i_flags &= ~RTF_GATEWAY;
1448 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1449 nrt->rt6i_dst.plen = 128;
1450 nrt->u.dst.flags |= DST_HOST;
1452 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1453 nrt->rt6i_nexthop = neigh_clone(neigh);
1454 /* Reset pmtu, it may be better */
1455 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1456 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
1458 if (ip6_ins_rt(nrt))
1461 netevent.old = &rt->u.dst;
1462 netevent.new = &nrt->u.dst;
1463 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1465 if (rt->rt6i_flags&RTF_CACHE) {
1471 dst_release(&rt->u.dst);
1476 * Handle ICMP "packet too big" messages
1477 * i.e. Path MTU discovery
1480 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1481 struct net_device *dev, u32 pmtu)
1483 struct rt6_info *rt, *nrt;
1486 rt = rt6_lookup(daddr, saddr, dev->ifindex, 0);
1490 if (pmtu >= dst_mtu(&rt->u.dst))
1493 if (pmtu < IPV6_MIN_MTU) {
1495 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1496 * MTU (1280) and a fragment header should always be included
1497 * after a node receiving Too Big message reporting PMTU is
1498 * less than the IPv6 Minimum Link MTU.
1500 pmtu = IPV6_MIN_MTU;
1504 /* New mtu received -> path was valid.
1505 They are sent only in response to data packets,
1506 so that this nexthop apparently is reachable. --ANK
1508 dst_confirm(&rt->u.dst);
1510 /* Host route. If it is static, it would be better
1511 not to override it, but add new one, so that
1512 when cache entry will expire old pmtu
1513 would return automatically.
1515 if (rt->rt6i_flags & RTF_CACHE) {
1516 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1518 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1519 dst_set_expires(&rt->u.dst, init_net.ipv6.sysctl.ip6_rt_mtu_expires);
1520 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1525 Two cases are possible:
1526 1. It is connected route. Action: COW
1527 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1529 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1530 nrt = rt6_alloc_cow(rt, daddr, saddr);
1532 nrt = rt6_alloc_clone(rt, daddr);
1535 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1537 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1539 /* According to RFC 1981, detecting PMTU increase shouldn't be
1540 * happened within 5 mins, the recommended timer is 10 mins.
1541 * Here this route expiration time is set to ip6_rt_mtu_expires
1542 * which is 10 mins. After 10 mins the decreased pmtu is expired
1543 * and detecting PMTU increase will be automatically happened.
1545 dst_set_expires(&nrt->u.dst, init_net.ipv6.sysctl.ip6_rt_mtu_expires);
1546 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1551 dst_release(&rt->u.dst);
1555 * Misc support functions
1558 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1560 struct rt6_info *rt = ip6_dst_alloc();
1563 rt->u.dst.input = ort->u.dst.input;
1564 rt->u.dst.output = ort->u.dst.output;
1566 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1567 rt->u.dst.error = ort->u.dst.error;
1568 rt->u.dst.dev = ort->u.dst.dev;
1570 dev_hold(rt->u.dst.dev);
1571 rt->rt6i_idev = ort->rt6i_idev;
1573 in6_dev_hold(rt->rt6i_idev);
1574 rt->u.dst.lastuse = jiffies;
1575 rt->rt6i_expires = 0;
1577 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1578 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1579 rt->rt6i_metric = 0;
1581 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1582 #ifdef CONFIG_IPV6_SUBTREES
1583 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1585 rt->rt6i_table = ort->rt6i_table;
1590 #ifdef CONFIG_IPV6_ROUTE_INFO
1591 static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixlen,
1592 struct in6_addr *gwaddr, int ifindex)
1594 struct fib6_node *fn;
1595 struct rt6_info *rt = NULL;
1596 struct fib6_table *table;
1598 table = fib6_get_table(&init_net, RT6_TABLE_INFO);
1602 write_lock_bh(&table->tb6_lock);
1603 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1607 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1608 if (rt->rt6i_dev->ifindex != ifindex)
1610 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1612 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1614 dst_hold(&rt->u.dst);
1618 write_unlock_bh(&table->tb6_lock);
1622 static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixlen,
1623 struct in6_addr *gwaddr, int ifindex,
1626 struct fib6_config cfg = {
1627 .fc_table = RT6_TABLE_INFO,
1628 .fc_metric = IP6_RT_PRIO_USER,
1629 .fc_ifindex = ifindex,
1630 .fc_dst_len = prefixlen,
1631 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1632 RTF_UP | RTF_PREF(pref),
1635 ipv6_addr_copy(&cfg.fc_dst, prefix);
1636 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1638 /* We should treat it as a default route if prefix length is 0. */
1640 cfg.fc_flags |= RTF_DEFAULT;
1642 ip6_route_add(&cfg);
1644 return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex);
1648 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1650 struct rt6_info *rt;
1651 struct fib6_table *table;
1653 table = fib6_get_table(&init_net, RT6_TABLE_DFLT);
1657 write_lock_bh(&table->tb6_lock);
1658 for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
1659 if (dev == rt->rt6i_dev &&
1660 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1661 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1665 dst_hold(&rt->u.dst);
1666 write_unlock_bh(&table->tb6_lock);
1670 EXPORT_SYMBOL(rt6_get_dflt_router);
1672 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1673 struct net_device *dev,
1676 struct fib6_config cfg = {
1677 .fc_table = RT6_TABLE_DFLT,
1678 .fc_metric = IP6_RT_PRIO_USER,
1679 .fc_ifindex = dev->ifindex,
1680 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1681 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1684 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1686 ip6_route_add(&cfg);
1688 return rt6_get_dflt_router(gwaddr, dev);
1691 void rt6_purge_dflt_routers(void)
1693 struct rt6_info *rt;
1694 struct fib6_table *table;
1696 /* NOTE: Keep consistent with rt6_get_dflt_router */
1697 table = fib6_get_table(&init_net, RT6_TABLE_DFLT);
1702 read_lock_bh(&table->tb6_lock);
1703 for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
1704 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1705 dst_hold(&rt->u.dst);
1706 read_unlock_bh(&table->tb6_lock);
1711 read_unlock_bh(&table->tb6_lock);
1714 static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg,
1715 struct fib6_config *cfg)
1717 memset(cfg, 0, sizeof(*cfg));
1719 cfg->fc_table = RT6_TABLE_MAIN;
1720 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1721 cfg->fc_metric = rtmsg->rtmsg_metric;
1722 cfg->fc_expires = rtmsg->rtmsg_info;
1723 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1724 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1725 cfg->fc_flags = rtmsg->rtmsg_flags;
1727 cfg->fc_nlinfo.nl_net = &init_net;
1729 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1730 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1731 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1734 int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
1736 struct fib6_config cfg;
1737 struct in6_rtmsg rtmsg;
1741 case SIOCADDRT: /* Add a route */
1742 case SIOCDELRT: /* Delete a route */
1743 if (!capable(CAP_NET_ADMIN))
1745 err = copy_from_user(&rtmsg, arg,
1746 sizeof(struct in6_rtmsg));
1750 rtmsg_to_fib6_config(&rtmsg, &cfg);
1755 err = ip6_route_add(&cfg);
1758 err = ip6_route_del(&cfg);
1772 * Drop the packet on the floor
1775 static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes)
1778 switch (ipstats_mib_noroutes) {
1779 case IPSTATS_MIB_INNOROUTES:
1780 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1781 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
1782 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
1786 case IPSTATS_MIB_OUTNOROUTES:
1787 IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes);
1790 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
1795 static int ip6_pkt_discard(struct sk_buff *skb)
1797 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
1800 static int ip6_pkt_discard_out(struct sk_buff *skb)
1802 skb->dev = skb->dst->dev;
1803 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1806 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1808 static int ip6_pkt_prohibit(struct sk_buff *skb)
1810 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
1813 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1815 skb->dev = skb->dst->dev;
1816 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
1822 * Allocate a dst for local (unicast / anycast) address.
1825 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1826 const struct in6_addr *addr,
1829 struct rt6_info *rt = ip6_dst_alloc();
1832 return ERR_PTR(-ENOMEM);
1834 dev_hold(init_net.loopback_dev);
1837 rt->u.dst.flags = DST_HOST;
1838 rt->u.dst.input = ip6_input;
1839 rt->u.dst.output = ip6_output;
1840 rt->rt6i_dev = init_net.loopback_dev;
1841 rt->rt6i_idev = idev;
1842 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1843 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
1844 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1845 rt->u.dst.obsolete = -1;
1847 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1849 rt->rt6i_flags |= RTF_ANYCAST;
1851 rt->rt6i_flags |= RTF_LOCAL;
1852 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1853 if (rt->rt6i_nexthop == NULL) {
1854 dst_free(&rt->u.dst);
1855 return ERR_PTR(-ENOMEM);
1858 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1859 rt->rt6i_dst.plen = 128;
1860 rt->rt6i_table = fib6_get_table(&init_net, RT6_TABLE_LOCAL);
1862 atomic_set(&rt->u.dst.__refcnt, 1);
1867 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1869 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1870 rt != &ip6_null_entry) {
1871 RT6_TRACE("deleted by ifdown %p\n", rt);
1877 void rt6_ifdown(struct net *net, struct net_device *dev)
1879 fib6_clean_all(net, fib6_ifdown, 0, dev);
1882 struct rt6_mtu_change_arg
1884 struct net_device *dev;
1888 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1890 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1891 struct inet6_dev *idev;
1893 /* In IPv6 pmtu discovery is not optional,
1894 so that RTAX_MTU lock cannot disable it.
1895 We still use this lock to block changes
1896 caused by addrconf/ndisc.
1899 idev = __in6_dev_get(arg->dev);
1903 /* For administrative MTU increase, there is no way to discover
1904 IPv6 PMTU increase, so PMTU increase should be updated here.
1905 Since RFC 1981 doesn't include administrative MTU increase
1906 update PMTU increase is a MUST. (i.e. jumbo frame)
1909 If new MTU is less than route PMTU, this new MTU will be the
1910 lowest MTU in the path, update the route PMTU to reflect PMTU
1911 decreases; if new MTU is greater than route PMTU, and the
1912 old MTU is the lowest MTU in the path, update the route PMTU
1913 to reflect the increase. In this case if the other nodes' MTU
1914 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1917 if (rt->rt6i_dev == arg->dev &&
1918 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1919 (dst_mtu(&rt->u.dst) >= arg->mtu ||
1920 (dst_mtu(&rt->u.dst) < arg->mtu &&
1921 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
1922 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1923 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
1928 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1930 struct rt6_mtu_change_arg arg = {
1935 fib6_clean_all(dev->nd_net, rt6_mtu_change_route, 0, &arg);
1938 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
1939 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
1940 [RTA_OIF] = { .type = NLA_U32 },
1941 [RTA_IIF] = { .type = NLA_U32 },
1942 [RTA_PRIORITY] = { .type = NLA_U32 },
1943 [RTA_METRICS] = { .type = NLA_NESTED },
1946 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1947 struct fib6_config *cfg)
1950 struct nlattr *tb[RTA_MAX+1];
1953 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1958 rtm = nlmsg_data(nlh);
1959 memset(cfg, 0, sizeof(*cfg));
1961 cfg->fc_table = rtm->rtm_table;
1962 cfg->fc_dst_len = rtm->rtm_dst_len;
1963 cfg->fc_src_len = rtm->rtm_src_len;
1964 cfg->fc_flags = RTF_UP;
1965 cfg->fc_protocol = rtm->rtm_protocol;
1967 if (rtm->rtm_type == RTN_UNREACHABLE)
1968 cfg->fc_flags |= RTF_REJECT;
1970 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
1971 cfg->fc_nlinfo.nlh = nlh;
1972 cfg->fc_nlinfo.nl_net = skb->sk->sk_net;
1974 if (tb[RTA_GATEWAY]) {
1975 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
1976 cfg->fc_flags |= RTF_GATEWAY;
1980 int plen = (rtm->rtm_dst_len + 7) >> 3;
1982 if (nla_len(tb[RTA_DST]) < plen)
1985 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
1989 int plen = (rtm->rtm_src_len + 7) >> 3;
1991 if (nla_len(tb[RTA_SRC]) < plen)
1994 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
1998 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2000 if (tb[RTA_PRIORITY])
2001 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2003 if (tb[RTA_METRICS]) {
2004 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2005 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2009 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2016 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2018 struct net *net = skb->sk->sk_net;
2019 struct fib6_config cfg;
2022 if (net != &init_net)
2025 err = rtm_to_fib6_config(skb, nlh, &cfg);
2029 return ip6_route_del(&cfg);
2032 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2034 struct net *net = skb->sk->sk_net;
2035 struct fib6_config cfg;
2038 if (net != &init_net)
2041 err = rtm_to_fib6_config(skb, nlh, &cfg);
2045 return ip6_route_add(&cfg);
2048 static inline size_t rt6_nlmsg_size(void)
2050 return NLMSG_ALIGN(sizeof(struct rtmsg))
2051 + nla_total_size(16) /* RTA_SRC */
2052 + nla_total_size(16) /* RTA_DST */
2053 + nla_total_size(16) /* RTA_GATEWAY */
2054 + nla_total_size(16) /* RTA_PREFSRC */
2055 + nla_total_size(4) /* RTA_TABLE */
2056 + nla_total_size(4) /* RTA_IIF */
2057 + nla_total_size(4) /* RTA_OIF */
2058 + nla_total_size(4) /* RTA_PRIORITY */
2059 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2060 + nla_total_size(sizeof(struct rta_cacheinfo));
2063 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
2064 struct in6_addr *dst, struct in6_addr *src,
2065 int iif, int type, u32 pid, u32 seq,
2066 int prefix, unsigned int flags)
2069 struct nlmsghdr *nlh;
2073 if (prefix) { /* user wants prefix routes only */
2074 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2075 /* success since this is not a prefix route */
2080 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2084 rtm = nlmsg_data(nlh);
2085 rtm->rtm_family = AF_INET6;
2086 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2087 rtm->rtm_src_len = rt->rt6i_src.plen;
2090 table = rt->rt6i_table->tb6_id;
2092 table = RT6_TABLE_UNSPEC;
2093 rtm->rtm_table = table;
2094 NLA_PUT_U32(skb, RTA_TABLE, table);
2095 if (rt->rt6i_flags&RTF_REJECT)
2096 rtm->rtm_type = RTN_UNREACHABLE;
2097 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2098 rtm->rtm_type = RTN_LOCAL;
2100 rtm->rtm_type = RTN_UNICAST;
2102 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2103 rtm->rtm_protocol = rt->rt6i_protocol;
2104 if (rt->rt6i_flags&RTF_DYNAMIC)
2105 rtm->rtm_protocol = RTPROT_REDIRECT;
2106 else if (rt->rt6i_flags & RTF_ADDRCONF)
2107 rtm->rtm_protocol = RTPROT_KERNEL;
2108 else if (rt->rt6i_flags&RTF_DEFAULT)
2109 rtm->rtm_protocol = RTPROT_RA;
2111 if (rt->rt6i_flags&RTF_CACHE)
2112 rtm->rtm_flags |= RTM_F_CLONED;
2115 NLA_PUT(skb, RTA_DST, 16, dst);
2116 rtm->rtm_dst_len = 128;
2117 } else if (rtm->rtm_dst_len)
2118 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2119 #ifdef CONFIG_IPV6_SUBTREES
2121 NLA_PUT(skb, RTA_SRC, 16, src);
2122 rtm->rtm_src_len = 128;
2123 } else if (rtm->rtm_src_len)
2124 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2127 NLA_PUT_U32(skb, RTA_IIF, iif);
2129 struct in6_addr saddr_buf;
2130 if (ipv6_dev_get_saddr(ip6_dst_idev(&rt->u.dst)->dev,
2131 dst, &saddr_buf) == 0)
2132 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2135 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2136 goto nla_put_failure;
2138 if (rt->u.dst.neighbour)
2139 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2142 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2144 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2146 expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0;
2147 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2148 expires, rt->u.dst.error) < 0)
2149 goto nla_put_failure;
2151 return nlmsg_end(skb, nlh);
2154 nlmsg_cancel(skb, nlh);
2158 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2160 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2163 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2164 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2165 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2169 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2170 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2171 prefix, NLM_F_MULTI);
2174 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2176 struct net *net = in_skb->sk->sk_net;
2177 struct nlattr *tb[RTA_MAX+1];
2178 struct rt6_info *rt;
2179 struct sk_buff *skb;
2184 if (net != &init_net)
2187 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2192 memset(&fl, 0, sizeof(fl));
2195 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2198 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2202 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2205 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2209 iif = nla_get_u32(tb[RTA_IIF]);
2212 fl.oif = nla_get_u32(tb[RTA_OIF]);
2215 struct net_device *dev;
2216 dev = __dev_get_by_index(&init_net, iif);
2223 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2229 /* Reserve room for dummy headers, this skb can pass
2230 through good chunk of routing engine.
2232 skb_reset_mac_header(skb);
2233 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2235 rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
2236 skb->dst = &rt->u.dst;
2238 err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2239 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2240 nlh->nlmsg_seq, 0, 0);
2246 err = rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid);
2251 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2253 struct sk_buff *skb;
2258 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
2260 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2264 err = rt6_fill_node(skb, rt, NULL, NULL, 0,
2265 event, info->pid, seq, 0, 0);
2267 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2268 WARN_ON(err == -EMSGSIZE);
2272 err = rtnl_notify(skb, &init_net, info->pid,
2273 RTNLGRP_IPV6_ROUTE, info->nlh, gfp_any());
2276 rtnl_set_sk_err(&init_net, RTNLGRP_IPV6_ROUTE, err);
2283 #ifdef CONFIG_PROC_FS
2285 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2296 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2298 struct seq_file *m = p_arg;
2300 seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_dst.addr),
2303 #ifdef CONFIG_IPV6_SUBTREES
2304 seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_src.addr),
2307 seq_puts(m, "00000000000000000000000000000000 00 ");
2310 if (rt->rt6i_nexthop) {
2311 seq_printf(m, NIP6_SEQFMT,
2312 NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
2314 seq_puts(m, "00000000000000000000000000000000");
2316 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2317 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2318 rt->u.dst.__use, rt->rt6i_flags,
2319 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2323 static int ipv6_route_show(struct seq_file *m, void *v)
2325 struct net *net = (struct net *)m->private;
2326 fib6_clean_all(net, rt6_info_route, 0, m);
2330 static int ipv6_route_open(struct inode *inode, struct file *file)
2332 struct net *net = get_proc_net(inode);
2335 return single_open(file, ipv6_route_show, net);
2338 static int ipv6_route_release(struct inode *inode, struct file *file)
2340 struct seq_file *seq = file->private_data;
2341 struct net *net = seq->private;
2343 return single_release(inode, file);
2346 static const struct file_operations ipv6_route_proc_fops = {
2347 .owner = THIS_MODULE,
2348 .open = ipv6_route_open,
2350 .llseek = seq_lseek,
2351 .release = ipv6_route_release,
2354 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2356 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2357 init_net.ipv6.rt6_stats->fib_nodes,
2358 init_net.ipv6.rt6_stats->fib_route_nodes,
2359 init_net.ipv6.rt6_stats->fib_rt_alloc,
2360 init_net.ipv6.rt6_stats->fib_rt_entries,
2361 init_net.ipv6.rt6_stats->fib_rt_cache,
2362 atomic_read(&ip6_dst_ops.entries),
2363 init_net.ipv6.rt6_stats->fib_discarded_routes);
2368 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2370 return single_open(file, rt6_stats_seq_show, NULL);
2373 static const struct file_operations rt6_stats_seq_fops = {
2374 .owner = THIS_MODULE,
2375 .open = rt6_stats_seq_open,
2377 .llseek = seq_lseek,
2378 .release = single_release,
2380 #endif /* CONFIG_PROC_FS */
2382 #ifdef CONFIG_SYSCTL
2385 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2386 void __user *buffer, size_t *lenp, loff_t *ppos)
2388 struct net *net = current->nsproxy->net_ns;
2389 int delay = net->ipv6.sysctl.flush_delay;
2391 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2392 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2398 ctl_table ipv6_route_table_template[] = {
2400 .procname = "flush",
2401 .data = &init_net.ipv6.sysctl.flush_delay,
2402 .maxlen = sizeof(int),
2404 .proc_handler = &ipv6_sysctl_rtcache_flush
2407 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2408 .procname = "gc_thresh",
2409 .data = &ip6_dst_ops.gc_thresh,
2410 .maxlen = sizeof(int),
2412 .proc_handler = &proc_dointvec,
2415 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2416 .procname = "max_size",
2417 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2418 .maxlen = sizeof(int),
2420 .proc_handler = &proc_dointvec,
2423 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2424 .procname = "gc_min_interval",
2425 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2426 .maxlen = sizeof(int),
2428 .proc_handler = &proc_dointvec_jiffies,
2429 .strategy = &sysctl_jiffies,
2432 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2433 .procname = "gc_timeout",
2434 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2435 .maxlen = sizeof(int),
2437 .proc_handler = &proc_dointvec_jiffies,
2438 .strategy = &sysctl_jiffies,
2441 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2442 .procname = "gc_interval",
2443 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2444 .maxlen = sizeof(int),
2446 .proc_handler = &proc_dointvec_jiffies,
2447 .strategy = &sysctl_jiffies,
2450 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2451 .procname = "gc_elasticity",
2452 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2453 .maxlen = sizeof(int),
2455 .proc_handler = &proc_dointvec_jiffies,
2456 .strategy = &sysctl_jiffies,
2459 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2460 .procname = "mtu_expires",
2461 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2462 .maxlen = sizeof(int),
2464 .proc_handler = &proc_dointvec_jiffies,
2465 .strategy = &sysctl_jiffies,
2468 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2469 .procname = "min_adv_mss",
2470 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2471 .maxlen = sizeof(int),
2473 .proc_handler = &proc_dointvec_jiffies,
2474 .strategy = &sysctl_jiffies,
2477 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2478 .procname = "gc_min_interval_ms",
2479 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2480 .maxlen = sizeof(int),
2482 .proc_handler = &proc_dointvec_ms_jiffies,
2483 .strategy = &sysctl_ms_jiffies,
2488 struct ctl_table *ipv6_route_sysctl_init(struct net *net)
2490 struct ctl_table *table;
2492 table = kmemdup(ipv6_route_table_template,
2493 sizeof(ipv6_route_table_template),
2497 table[0].data = &net->ipv6.sysctl.flush_delay;
2498 /* table[1].data will be handled when we have
2499 routes per namespace */
2500 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2501 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2502 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2503 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2504 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2505 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2506 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2513 static int ip6_route_net_init(struct net *net)
2515 #ifdef CONFIG_PROC_FS
2516 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2517 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2522 static void ip6_route_net_exit(struct net *net)
2524 #ifdef CONFIG_PROC_FS
2525 proc_net_remove(net, "ipv6_route");
2526 proc_net_remove(net, "rt6_stats");
2530 static struct pernet_operations ip6_route_net_ops = {
2531 .init = ip6_route_net_init,
2532 .exit = ip6_route_net_exit,
2535 int __init ip6_route_init(void)
2539 ip6_dst_ops.kmem_cachep =
2540 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2541 SLAB_HWCACHE_ALIGN, NULL);
2542 if (!ip6_dst_ops.kmem_cachep)
2545 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep;
2549 goto out_kmem_cache;
2555 ret = fib6_rules_init();
2560 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2561 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2562 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2563 goto fib6_rules_init;
2565 ret = register_pernet_subsys(&ip6_route_net_ops);
2567 goto fib6_rules_init;
2572 fib6_rules_cleanup();
2576 rt6_ifdown(&init_net, NULL);
2579 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2583 void ip6_route_cleanup(void)
2585 unregister_pernet_subsys(&ip6_route_net_ops);
2586 fib6_rules_cleanup();
2588 rt6_ifdown(&init_net, NULL);
2590 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);