2 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: route.c,v 1.56 2001/10/31 21:55:55 davem Exp $
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
18 * YOSHIFUJI Hideaki @USAGI
19 * reworked default router selection.
20 * - respect outgoing interface
21 * - select from (probably) reachable routers (i.e.
22 * routers in REACHABLE, STALE, DELAY or PROBE states).
23 * - always select the same router if it is (probably)
24 * reachable. otherwise, round-robin the list.
26 * Fixed routing subtrees.
29 #include <linux/capability.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/times.h>
33 #include <linux/socket.h>
34 #include <linux/sockios.h>
35 #include <linux/net.h>
36 #include <linux/route.h>
37 #include <linux/netdevice.h>
38 #include <linux/in6.h>
39 #include <linux/init.h>
40 #include <linux/if_arp.h>
41 #include <linux/proc_fs.h>
42 #include <linux/seq_file.h>
43 #include <linux/nsproxy.h>
44 #include <net/net_namespace.h>
47 #include <net/ip6_fib.h>
48 #include <net/ip6_route.h>
49 #include <net/ndisc.h>
50 #include <net/addrconf.h>
52 #include <linux/rtnetlink.h>
55 #include <net/netevent.h>
56 #include <net/netlink.h>
58 #include <asm/uaccess.h>
61 #include <linux/sysctl.h>
64 /* Set to 3 to get tracing. */
68 #define RDBG(x) printk x
69 #define RT6_TRACE(x...) printk(KERN_DEBUG x)
72 #define RT6_TRACE(x...) do { ; } while (0)
75 #define CLONE_OFFLINK_ROUTE 0
77 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
78 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie);
79 static struct dst_entry *ip6_negative_advice(struct dst_entry *);
80 static void ip6_dst_destroy(struct dst_entry *);
81 static void ip6_dst_ifdown(struct dst_entry *,
82 struct net_device *dev, int how);
83 static int ip6_dst_gc(struct dst_ops *ops);
85 static int ip6_pkt_discard(struct sk_buff *skb);
86 static int ip6_pkt_discard_out(struct sk_buff *skb);
87 static void ip6_link_failure(struct sk_buff *skb);
88 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
90 #ifdef CONFIG_IPV6_ROUTE_INFO
91 static struct rt6_info *rt6_add_route_info(struct net *net,
92 struct in6_addr *prefix, int prefixlen,
93 struct in6_addr *gwaddr, int ifindex,
95 static struct rt6_info *rt6_get_route_info(struct net *net,
96 struct in6_addr *prefix, int prefixlen,
97 struct in6_addr *gwaddr, int ifindex);
100 static struct dst_ops ip6_dst_ops = {
102 .protocol = __constant_htons(ETH_P_IPV6),
105 .check = ip6_dst_check,
106 .destroy = ip6_dst_destroy,
107 .ifdown = ip6_dst_ifdown,
108 .negative_advice = ip6_negative_advice,
109 .link_failure = ip6_link_failure,
110 .update_pmtu = ip6_rt_update_pmtu,
111 .local_out = ip6_local_out,
112 .entry_size = sizeof(struct rt6_info),
113 .entries = ATOMIC_INIT(0),
116 static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu)
120 static struct dst_ops ip6_dst_blackhole_ops = {
122 .protocol = __constant_htons(ETH_P_IPV6),
123 .destroy = ip6_dst_destroy,
124 .check = ip6_dst_check,
125 .update_pmtu = ip6_rt_blackhole_update_pmtu,
126 .entry_size = sizeof(struct rt6_info),
127 .entries = ATOMIC_INIT(0),
130 static struct rt6_info ip6_null_entry_template = {
133 .__refcnt = ATOMIC_INIT(1),
136 .error = -ENETUNREACH,
137 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
138 .input = ip6_pkt_discard,
139 .output = ip6_pkt_discard_out,
143 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
144 .rt6i_metric = ~(u32) 0,
145 .rt6i_ref = ATOMIC_INIT(1),
148 struct rt6_info *ip6_null_entry;
150 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
152 static int ip6_pkt_prohibit(struct sk_buff *skb);
153 static int ip6_pkt_prohibit_out(struct sk_buff *skb);
155 struct rt6_info ip6_prohibit_entry_template = {
158 .__refcnt = ATOMIC_INIT(1),
162 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
163 .input = ip6_pkt_prohibit,
164 .output = ip6_pkt_prohibit_out,
168 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
169 .rt6i_metric = ~(u32) 0,
170 .rt6i_ref = ATOMIC_INIT(1),
173 struct rt6_info *ip6_prohibit_entry;
175 static struct rt6_info ip6_blk_hole_entry_template = {
178 .__refcnt = ATOMIC_INIT(1),
182 .metrics = { [RTAX_HOPLIMIT - 1] = 255, },
183 .input = dst_discard,
184 .output = dst_discard,
188 .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP),
189 .rt6i_metric = ~(u32) 0,
190 .rt6i_ref = ATOMIC_INIT(1),
193 struct rt6_info *ip6_blk_hole_entry;
197 /* allocate dst with ip6_dst_ops */
198 static __inline__ struct rt6_info *ip6_dst_alloc(void)
200 return (struct rt6_info *)dst_alloc(&ip6_dst_ops);
203 static void ip6_dst_destroy(struct dst_entry *dst)
205 struct rt6_info *rt = (struct rt6_info *)dst;
206 struct inet6_dev *idev = rt->rt6i_idev;
209 rt->rt6i_idev = NULL;
214 static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
217 struct rt6_info *rt = (struct rt6_info *)dst;
218 struct inet6_dev *idev = rt->rt6i_idev;
219 struct net_device *loopback_dev =
220 dev->nd_net->loopback_dev;
222 if (dev != loopback_dev && idev != NULL && idev->dev == dev) {
223 struct inet6_dev *loopback_idev =
224 in6_dev_get(loopback_dev);
225 if (loopback_idev != NULL) {
226 rt->rt6i_idev = loopback_idev;
232 static __inline__ int rt6_check_expired(const struct rt6_info *rt)
234 return (rt->rt6i_flags & RTF_EXPIRES &&
235 time_after(jiffies, rt->rt6i_expires));
238 static inline int rt6_need_strict(struct in6_addr *daddr)
240 return (ipv6_addr_type(daddr) &
241 (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL));
245 * Route lookup. Any table->tb6_lock is implied.
248 static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt,
252 struct rt6_info *local = NULL;
253 struct rt6_info *sprt;
256 for (sprt = rt; sprt; sprt = sprt->u.dst.rt6_next) {
257 struct net_device *dev = sprt->rt6i_dev;
258 if (dev->ifindex == oif)
260 if (dev->flags & IFF_LOOPBACK) {
261 if (sprt->rt6i_idev == NULL ||
262 sprt->rt6i_idev->dev->ifindex != oif) {
265 if (local && (!oif ||
266 local->rt6i_idev->dev->ifindex == oif))
277 return ip6_null_entry;
282 #ifdef CONFIG_IPV6_ROUTER_PREF
283 static void rt6_probe(struct rt6_info *rt)
285 struct neighbour *neigh = rt ? rt->rt6i_nexthop : NULL;
287 * Okay, this does not seem to be appropriate
288 * for now, however, we need to check if it
289 * is really so; aka Router Reachability Probing.
291 * Router Reachability Probe MUST be rate-limited
292 * to no more than one per minute.
294 if (!neigh || (neigh->nud_state & NUD_VALID))
296 read_lock_bh(&neigh->lock);
297 if (!(neigh->nud_state & NUD_VALID) &&
298 time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) {
299 struct in6_addr mcaddr;
300 struct in6_addr *target;
302 neigh->updated = jiffies;
303 read_unlock_bh(&neigh->lock);
305 target = (struct in6_addr *)&neigh->primary_key;
306 addrconf_addr_solict_mult(target, &mcaddr);
307 ndisc_send_ns(rt->rt6i_dev, NULL, target, &mcaddr, NULL);
309 read_unlock_bh(&neigh->lock);
312 static inline void rt6_probe(struct rt6_info *rt)
319 * Default Router Selection (RFC 2461 6.3.6)
321 static inline int rt6_check_dev(struct rt6_info *rt, int oif)
323 struct net_device *dev = rt->rt6i_dev;
324 if (!oif || dev->ifindex == oif)
326 if ((dev->flags & IFF_LOOPBACK) &&
327 rt->rt6i_idev && rt->rt6i_idev->dev->ifindex == oif)
332 static inline int rt6_check_neigh(struct rt6_info *rt)
334 struct neighbour *neigh = rt->rt6i_nexthop;
336 if (rt->rt6i_flags & RTF_NONEXTHOP ||
337 !(rt->rt6i_flags & RTF_GATEWAY))
340 read_lock_bh(&neigh->lock);
341 if (neigh->nud_state & NUD_VALID)
343 #ifdef CONFIG_IPV6_ROUTER_PREF
344 else if (neigh->nud_state & NUD_FAILED)
349 read_unlock_bh(&neigh->lock);
355 static int rt6_score_route(struct rt6_info *rt, int oif,
360 m = rt6_check_dev(rt, oif);
361 if (!m && (strict & RT6_LOOKUP_F_IFACE))
363 #ifdef CONFIG_IPV6_ROUTER_PREF
364 m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2;
366 n = rt6_check_neigh(rt);
367 if (!n && (strict & RT6_LOOKUP_F_REACHABLE))
372 static struct rt6_info *find_match(struct rt6_info *rt, int oif, int strict,
373 int *mpri, struct rt6_info *match)
377 if (rt6_check_expired(rt))
380 m = rt6_score_route(rt, oif, strict);
385 if (strict & RT6_LOOKUP_F_REACHABLE)
389 } else if (strict & RT6_LOOKUP_F_REACHABLE) {
397 static struct rt6_info *find_rr_leaf(struct fib6_node *fn,
398 struct rt6_info *rr_head,
399 u32 metric, int oif, int strict)
401 struct rt6_info *rt, *match;
405 for (rt = rr_head; rt && rt->rt6i_metric == metric;
406 rt = rt->u.dst.rt6_next)
407 match = find_match(rt, oif, strict, &mpri, match);
408 for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric;
409 rt = rt->u.dst.rt6_next)
410 match = find_match(rt, oif, strict, &mpri, match);
415 static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict)
417 struct rt6_info *match, *rt0;
419 RT6_TRACE("%s(fn->leaf=%p, oif=%d)\n",
420 __FUNCTION__, fn->leaf, oif);
424 fn->rr_ptr = rt0 = fn->leaf;
426 match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict);
429 (strict & RT6_LOOKUP_F_REACHABLE)) {
430 struct rt6_info *next = rt0->u.dst.rt6_next;
432 /* no entries matched; do round-robin */
433 if (!next || next->rt6i_metric != rt0->rt6i_metric)
440 RT6_TRACE("%s() => %p\n",
441 __FUNCTION__, match);
443 return (match ? match : ip6_null_entry);
446 #ifdef CONFIG_IPV6_ROUTE_INFO
447 int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
448 struct in6_addr *gwaddr)
450 struct net *net = dev->nd_net;
451 struct route_info *rinfo = (struct route_info *) opt;
452 struct in6_addr prefix_buf, *prefix;
457 if (len < sizeof(struct route_info)) {
461 /* Sanity check for prefix_len and length */
462 if (rinfo->length > 3) {
464 } else if (rinfo->prefix_len > 128) {
466 } else if (rinfo->prefix_len > 64) {
467 if (rinfo->length < 2) {
470 } else if (rinfo->prefix_len > 0) {
471 if (rinfo->length < 1) {
476 pref = rinfo->route_pref;
477 if (pref == ICMPV6_ROUTER_PREF_INVALID)
478 pref = ICMPV6_ROUTER_PREF_MEDIUM;
480 lifetime = ntohl(rinfo->lifetime);
481 if (lifetime == 0xffffffff) {
483 } else if (lifetime > 0x7fffffff/HZ) {
484 /* Avoid arithmetic overflow */
485 lifetime = 0x7fffffff/HZ - 1;
488 if (rinfo->length == 3)
489 prefix = (struct in6_addr *)rinfo->prefix;
491 /* this function is safe */
492 ipv6_addr_prefix(&prefix_buf,
493 (struct in6_addr *)rinfo->prefix,
495 prefix = &prefix_buf;
498 rt = rt6_get_route_info(net, prefix, rinfo->prefix_len, gwaddr,
501 if (rt && !lifetime) {
507 rt = rt6_add_route_info(net, prefix, rinfo->prefix_len, gwaddr, dev->ifindex,
510 rt->rt6i_flags = RTF_ROUTEINFO |
511 (rt->rt6i_flags & ~RTF_PREF_MASK) | RTF_PREF(pref);
514 if (lifetime == 0xffffffff) {
515 rt->rt6i_flags &= ~RTF_EXPIRES;
517 rt->rt6i_expires = jiffies + HZ * lifetime;
518 rt->rt6i_flags |= RTF_EXPIRES;
520 dst_release(&rt->u.dst);
526 #define BACKTRACK(saddr) \
528 if (rt == ip6_null_entry) { \
529 struct fib6_node *pn; \
531 if (fn->fn_flags & RTN_TL_ROOT) \
534 if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) \
535 fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); \
538 if (fn->fn_flags & RTN_RTINFO) \
544 static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table,
545 struct flowi *fl, int flags)
547 struct fib6_node *fn;
550 read_lock_bh(&table->tb6_lock);
551 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
554 rt = rt6_device_match(rt, fl->oif, flags);
555 BACKTRACK(&fl->fl6_src);
557 dst_use(&rt->u.dst, jiffies);
558 read_unlock_bh(&table->tb6_lock);
563 struct rt6_info *rt6_lookup(struct net *net, struct in6_addr *daddr,
564 struct in6_addr *saddr, int oif, int strict)
574 struct dst_entry *dst;
575 int flags = strict ? RT6_LOOKUP_F_IFACE : 0;
578 memcpy(&fl.fl6_src, saddr, sizeof(*saddr));
579 flags |= RT6_LOOKUP_F_HAS_SADDR;
582 dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_lookup);
584 return (struct rt6_info *) dst;
591 EXPORT_SYMBOL(rt6_lookup);
593 /* ip6_ins_rt is called with FREE table->tb6_lock.
594 It takes new route entry, the addition fails by any reason the
595 route is freed. In any case, if caller does not hold it, it may
599 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info)
602 struct fib6_table *table;
604 table = rt->rt6i_table;
605 write_lock_bh(&table->tb6_lock);
606 err = fib6_add(&table->tb6_root, rt, info);
607 write_unlock_bh(&table->tb6_lock);
612 int ip6_ins_rt(struct rt6_info *rt)
614 struct nl_info info = {
615 .nl_net = rt->rt6i_dev->nd_net,
617 return __ip6_ins_rt(rt, &info);
620 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr,
621 struct in6_addr *saddr)
629 rt = ip6_rt_copy(ort);
632 if (!(rt->rt6i_flags&RTF_GATEWAY)) {
633 if (rt->rt6i_dst.plen != 128 &&
634 ipv6_addr_equal(&rt->rt6i_dst.addr, daddr))
635 rt->rt6i_flags |= RTF_ANYCAST;
636 ipv6_addr_copy(&rt->rt6i_gateway, daddr);
639 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
640 rt->rt6i_dst.plen = 128;
641 rt->rt6i_flags |= RTF_CACHE;
642 rt->u.dst.flags |= DST_HOST;
644 #ifdef CONFIG_IPV6_SUBTREES
645 if (rt->rt6i_src.plen && saddr) {
646 ipv6_addr_copy(&rt->rt6i_src.addr, saddr);
647 rt->rt6i_src.plen = 128;
651 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
658 static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *daddr)
660 struct rt6_info *rt = ip6_rt_copy(ort);
662 ipv6_addr_copy(&rt->rt6i_dst.addr, daddr);
663 rt->rt6i_dst.plen = 128;
664 rt->rt6i_flags |= RTF_CACHE;
665 rt->u.dst.flags |= DST_HOST;
666 rt->rt6i_nexthop = neigh_clone(ort->rt6i_nexthop);
671 static struct rt6_info *ip6_pol_route(struct fib6_table *table, int oif,
672 struct flowi *fl, int flags)
674 struct fib6_node *fn;
675 struct rt6_info *rt, *nrt;
679 int reachable = ipv6_devconf.forwarding ? 0 : RT6_LOOKUP_F_REACHABLE;
681 strict |= flags & RT6_LOOKUP_F_IFACE;
684 read_lock_bh(&table->tb6_lock);
687 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
690 rt = rt6_select(fn, oif, strict | reachable);
691 BACKTRACK(&fl->fl6_src);
692 if (rt == ip6_null_entry ||
693 rt->rt6i_flags & RTF_CACHE)
696 dst_hold(&rt->u.dst);
697 read_unlock_bh(&table->tb6_lock);
699 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
700 nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src);
702 #if CLONE_OFFLINK_ROUTE
703 nrt = rt6_alloc_clone(rt, &fl->fl6_dst);
709 dst_release(&rt->u.dst);
710 rt = nrt ? : ip6_null_entry;
712 dst_hold(&rt->u.dst);
714 err = ip6_ins_rt(nrt);
723 * Race condition! In the gap, when table->tb6_lock was
724 * released someone could insert this route. Relookup.
726 dst_release(&rt->u.dst);
734 dst_hold(&rt->u.dst);
735 read_unlock_bh(&table->tb6_lock);
737 rt->u.dst.lastuse = jiffies;
743 static struct rt6_info *ip6_pol_route_input(struct fib6_table *table,
744 struct flowi *fl, int flags)
746 return ip6_pol_route(table, fl->iif, fl, flags);
749 void ip6_route_input(struct sk_buff *skb)
751 struct ipv6hdr *iph = ipv6_hdr(skb);
752 struct net *net = skb->dev->nd_net;
753 int flags = RT6_LOOKUP_F_HAS_SADDR;
755 .iif = skb->dev->ifindex,
760 .flowlabel = (* (__be32 *) iph)&IPV6_FLOWINFO_MASK,
764 .proto = iph->nexthdr,
767 if (rt6_need_strict(&iph->daddr))
768 flags |= RT6_LOOKUP_F_IFACE;
770 skb->dst = fib6_rule_lookup(net, &fl, flags, ip6_pol_route_input);
773 static struct rt6_info *ip6_pol_route_output(struct fib6_table *table,
774 struct flowi *fl, int flags)
776 return ip6_pol_route(table, fl->oif, fl, flags);
779 struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
783 if (rt6_need_strict(&fl->fl6_dst))
784 flags |= RT6_LOOKUP_F_IFACE;
786 if (!ipv6_addr_any(&fl->fl6_src))
787 flags |= RT6_LOOKUP_F_HAS_SADDR;
789 return fib6_rule_lookup(&init_net, fl, flags, ip6_pol_route_output);
792 EXPORT_SYMBOL(ip6_route_output);
794 int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl)
796 struct rt6_info *ort = (struct rt6_info *) *dstp;
797 struct rt6_info *rt = (struct rt6_info *)
798 dst_alloc(&ip6_dst_blackhole_ops);
799 struct dst_entry *new = NULL;
804 atomic_set(&new->__refcnt, 1);
806 new->input = dst_discard;
807 new->output = dst_discard;
809 memcpy(new->metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
810 new->dev = ort->u.dst.dev;
813 rt->rt6i_idev = ort->rt6i_idev;
815 in6_dev_hold(rt->rt6i_idev);
816 rt->rt6i_expires = 0;
818 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
819 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
822 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
823 #ifdef CONFIG_IPV6_SUBTREES
824 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
832 return (new ? 0 : -ENOMEM);
834 EXPORT_SYMBOL_GPL(ip6_dst_blackhole);
837 * Destination cache support functions
840 static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie)
844 rt = (struct rt6_info *) dst;
846 if (rt && rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie))
852 static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
854 struct rt6_info *rt = (struct rt6_info *) dst;
857 if (rt->rt6i_flags & RTF_CACHE)
865 static void ip6_link_failure(struct sk_buff *skb)
869 icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
871 rt = (struct rt6_info *) skb->dst;
873 if (rt->rt6i_flags&RTF_CACHE) {
874 dst_set_expires(&rt->u.dst, 0);
875 rt->rt6i_flags |= RTF_EXPIRES;
876 } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT))
877 rt->rt6i_node->fn_sernum = -1;
881 static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
883 struct rt6_info *rt6 = (struct rt6_info*)dst;
885 if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) {
886 rt6->rt6i_flags |= RTF_MODIFIED;
887 if (mtu < IPV6_MIN_MTU) {
889 dst->metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
891 dst->metrics[RTAX_MTU-1] = mtu;
892 call_netevent_notifiers(NETEVENT_PMTU_UPDATE, dst);
896 static int ipv6_get_mtu(struct net_device *dev);
898 static inline unsigned int ipv6_advmss(struct net *net, unsigned int mtu)
900 mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
902 if (mtu < net->ipv6.sysctl.ip6_rt_min_advmss)
903 mtu = net->ipv6.sysctl.ip6_rt_min_advmss;
906 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and
907 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size.
908 * IPV6_MAXPLEN is also valid and means: "any MSS,
909 * rely only on pmtu discovery"
911 if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
916 static struct dst_entry *icmp6_dst_gc_list;
917 static DEFINE_SPINLOCK(icmp6_dst_lock);
919 struct dst_entry *icmp6_dst_alloc(struct net_device *dev,
920 struct neighbour *neigh,
921 struct in6_addr *addr)
924 struct inet6_dev *idev = in6_dev_get(dev);
925 struct net *net = dev->nd_net;
927 if (unlikely(idev == NULL))
930 rt = ip6_dst_alloc();
931 if (unlikely(rt == NULL)) {
940 neigh = ndisc_get_neigh(dev, addr);
943 rt->rt6i_idev = idev;
944 rt->rt6i_nexthop = neigh;
945 atomic_set(&rt->u.dst.__refcnt, 1);
946 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
947 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
948 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
949 rt->u.dst.output = ip6_output;
951 #if 0 /* there's no chance to use these for ndisc */
952 rt->u.dst.flags = ipv6_addr_type(addr) & IPV6_ADDR_UNICAST
955 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
956 rt->rt6i_dst.plen = 128;
959 spin_lock_bh(&icmp6_dst_lock);
960 rt->u.dst.next = icmp6_dst_gc_list;
961 icmp6_dst_gc_list = &rt->u.dst;
962 spin_unlock_bh(&icmp6_dst_lock);
964 fib6_force_start_gc(net);
970 int icmp6_dst_gc(int *more)
972 struct dst_entry *dst, *next, **pprev;
978 spin_lock_bh(&icmp6_dst_lock);
979 pprev = &icmp6_dst_gc_list;
981 while ((dst = *pprev) != NULL) {
982 if (!atomic_read(&dst->__refcnt)) {
992 spin_unlock_bh(&icmp6_dst_lock);
997 static int ip6_dst_gc(struct dst_ops *ops)
999 static unsigned expire = 30*HZ;
1000 static unsigned long last_gc;
1001 unsigned long now = jiffies;
1003 if (time_after(last_gc + init_net.ipv6.sysctl.ip6_rt_gc_min_interval, now) &&
1004 atomic_read(&ip6_dst_ops.entries) <= init_net.ipv6.sysctl.ip6_rt_max_size)
1008 fib6_run_gc(expire, &init_net);
1010 if (atomic_read(&ip6_dst_ops.entries) < ip6_dst_ops.gc_thresh)
1011 expire = init_net.ipv6.sysctl.ip6_rt_gc_timeout>>1;
1014 expire -= expire>>init_net.ipv6.sysctl.ip6_rt_gc_elasticity;
1015 return (atomic_read(&ip6_dst_ops.entries) > init_net.ipv6.sysctl.ip6_rt_max_size);
1018 /* Clean host part of a prefix. Not necessary in radix tree,
1019 but results in cleaner routing tables.
1021 Remove it only when all the things will work!
1024 static int ipv6_get_mtu(struct net_device *dev)
1026 int mtu = IPV6_MIN_MTU;
1027 struct inet6_dev *idev;
1029 idev = in6_dev_get(dev);
1031 mtu = idev->cnf.mtu6;
1037 int ipv6_get_hoplimit(struct net_device *dev)
1039 int hoplimit = ipv6_devconf.hop_limit;
1040 struct inet6_dev *idev;
1042 idev = in6_dev_get(dev);
1044 hoplimit = idev->cnf.hop_limit;
1054 int ip6_route_add(struct fib6_config *cfg)
1057 struct net *net = cfg->fc_nlinfo.nl_net;
1058 struct rt6_info *rt = NULL;
1059 struct net_device *dev = NULL;
1060 struct inet6_dev *idev = NULL;
1061 struct fib6_table *table;
1064 if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
1066 #ifndef CONFIG_IPV6_SUBTREES
1067 if (cfg->fc_src_len)
1070 if (cfg->fc_ifindex) {
1072 dev = dev_get_by_index(net, cfg->fc_ifindex);
1075 idev = in6_dev_get(dev);
1080 if (cfg->fc_metric == 0)
1081 cfg->fc_metric = IP6_RT_PRIO_USER;
1083 table = fib6_new_table(net, cfg->fc_table);
1084 if (table == NULL) {
1089 rt = ip6_dst_alloc();
1096 rt->u.dst.obsolete = -1;
1097 rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires);
1099 if (cfg->fc_protocol == RTPROT_UNSPEC)
1100 cfg->fc_protocol = RTPROT_BOOT;
1101 rt->rt6i_protocol = cfg->fc_protocol;
1103 addr_type = ipv6_addr_type(&cfg->fc_dst);
1105 if (addr_type & IPV6_ADDR_MULTICAST)
1106 rt->u.dst.input = ip6_mc_input;
1108 rt->u.dst.input = ip6_forward;
1110 rt->u.dst.output = ip6_output;
1112 ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len);
1113 rt->rt6i_dst.plen = cfg->fc_dst_len;
1114 if (rt->rt6i_dst.plen == 128)
1115 rt->u.dst.flags = DST_HOST;
1117 #ifdef CONFIG_IPV6_SUBTREES
1118 ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len);
1119 rt->rt6i_src.plen = cfg->fc_src_len;
1122 rt->rt6i_metric = cfg->fc_metric;
1124 /* We cannot add true routes via loopback here,
1125 they would result in kernel looping; promote them to reject routes
1127 if ((cfg->fc_flags & RTF_REJECT) ||
1128 (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) {
1129 /* hold loopback dev/idev if we haven't done so. */
1130 if (dev != net->loopback_dev) {
1135 dev = net->loopback_dev;
1137 idev = in6_dev_get(dev);
1143 rt->u.dst.output = ip6_pkt_discard_out;
1144 rt->u.dst.input = ip6_pkt_discard;
1145 rt->u.dst.error = -ENETUNREACH;
1146 rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP;
1150 if (cfg->fc_flags & RTF_GATEWAY) {
1151 struct in6_addr *gw_addr;
1154 gw_addr = &cfg->fc_gateway;
1155 ipv6_addr_copy(&rt->rt6i_gateway, gw_addr);
1156 gwa_type = ipv6_addr_type(gw_addr);
1158 if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) {
1159 struct rt6_info *grt;
1161 /* IPv6 strictly inhibits using not link-local
1162 addresses as nexthop address.
1163 Otherwise, router will not able to send redirects.
1164 It is very good, but in some (rare!) circumstances
1165 (SIT, PtP, NBMA NOARP links) it is handy to allow
1166 some exceptions. --ANK
1169 if (!(gwa_type&IPV6_ADDR_UNICAST))
1172 grt = rt6_lookup(net, gw_addr, NULL, cfg->fc_ifindex, 1);
1174 err = -EHOSTUNREACH;
1178 if (dev != grt->rt6i_dev) {
1179 dst_release(&grt->u.dst);
1183 dev = grt->rt6i_dev;
1184 idev = grt->rt6i_idev;
1186 in6_dev_hold(grt->rt6i_idev);
1188 if (!(grt->rt6i_flags&RTF_GATEWAY))
1190 dst_release(&grt->u.dst);
1196 if (dev == NULL || (dev->flags&IFF_LOOPBACK))
1204 if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) {
1205 rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev);
1206 if (IS_ERR(rt->rt6i_nexthop)) {
1207 err = PTR_ERR(rt->rt6i_nexthop);
1208 rt->rt6i_nexthop = NULL;
1213 rt->rt6i_flags = cfg->fc_flags;
1220 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
1221 int type = nla_type(nla);
1224 if (type > RTAX_MAX) {
1229 rt->u.dst.metrics[type - 1] = nla_get_u32(nla);
1234 if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
1235 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1236 if (!rt->u.dst.metrics[RTAX_MTU-1])
1237 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
1238 if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
1239 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
1240 rt->u.dst.dev = dev;
1241 rt->rt6i_idev = idev;
1242 rt->rt6i_table = table;
1244 cfg->fc_nlinfo.nl_net = dev->nd_net;
1246 return __ip6_ins_rt(rt, &cfg->fc_nlinfo);
1254 dst_free(&rt->u.dst);
1258 static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info)
1261 struct fib6_table *table;
1263 if (rt == ip6_null_entry)
1266 table = rt->rt6i_table;
1267 write_lock_bh(&table->tb6_lock);
1269 err = fib6_del(rt, info);
1270 dst_release(&rt->u.dst);
1272 write_unlock_bh(&table->tb6_lock);
1277 int ip6_del_rt(struct rt6_info *rt)
1279 struct nl_info info = {
1280 .nl_net = rt->rt6i_dev->nd_net,
1282 return __ip6_del_rt(rt, &info);
1285 static int ip6_route_del(struct fib6_config *cfg)
1287 struct fib6_table *table;
1288 struct fib6_node *fn;
1289 struct rt6_info *rt;
1292 table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table);
1296 read_lock_bh(&table->tb6_lock);
1298 fn = fib6_locate(&table->tb6_root,
1299 &cfg->fc_dst, cfg->fc_dst_len,
1300 &cfg->fc_src, cfg->fc_src_len);
1303 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1304 if (cfg->fc_ifindex &&
1305 (rt->rt6i_dev == NULL ||
1306 rt->rt6i_dev->ifindex != cfg->fc_ifindex))
1308 if (cfg->fc_flags & RTF_GATEWAY &&
1309 !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway))
1311 if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric)
1313 dst_hold(&rt->u.dst);
1314 read_unlock_bh(&table->tb6_lock);
1316 return __ip6_del_rt(rt, &cfg->fc_nlinfo);
1319 read_unlock_bh(&table->tb6_lock);
1327 struct ip6rd_flowi {
1329 struct in6_addr gateway;
1332 static struct rt6_info *__ip6_route_redirect(struct fib6_table *table,
1336 struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl;
1337 struct rt6_info *rt;
1338 struct fib6_node *fn;
1341 * Get the "current" route for this destination and
1342 * check if the redirect has come from approriate router.
1344 * RFC 2461 specifies that redirects should only be
1345 * accepted if they come from the nexthop to the target.
1346 * Due to the way the routes are chosen, this notion
1347 * is a bit fuzzy and one might need to check all possible
1351 read_lock_bh(&table->tb6_lock);
1352 fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src);
1354 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1356 * Current route is on-link; redirect is always invalid.
1358 * Seems, previous statement is not true. It could
1359 * be node, which looks for us as on-link (f.e. proxy ndisc)
1360 * But then router serving it might decide, that we should
1361 * know truth 8)8) --ANK (980726).
1363 if (rt6_check_expired(rt))
1365 if (!(rt->rt6i_flags & RTF_GATEWAY))
1367 if (fl->oif != rt->rt6i_dev->ifindex)
1369 if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway))
1375 rt = ip6_null_entry;
1376 BACKTRACK(&fl->fl6_src);
1378 dst_hold(&rt->u.dst);
1380 read_unlock_bh(&table->tb6_lock);
1385 static struct rt6_info *ip6_route_redirect(struct in6_addr *dest,
1386 struct in6_addr *src,
1387 struct in6_addr *gateway,
1388 struct net_device *dev)
1390 int flags = RT6_LOOKUP_F_HAS_SADDR;
1391 struct net *net = dev->nd_net;
1392 struct ip6rd_flowi rdfl = {
1394 .oif = dev->ifindex,
1402 .gateway = *gateway,
1405 if (rt6_need_strict(dest))
1406 flags |= RT6_LOOKUP_F_IFACE;
1408 return (struct rt6_info *)fib6_rule_lookup(net, (struct flowi *)&rdfl,
1409 flags, __ip6_route_redirect);
1412 void rt6_redirect(struct in6_addr *dest, struct in6_addr *src,
1413 struct in6_addr *saddr,
1414 struct neighbour *neigh, u8 *lladdr, int on_link)
1416 struct rt6_info *rt, *nrt = NULL;
1417 struct netevent_redirect netevent;
1419 rt = ip6_route_redirect(dest, src, saddr, neigh->dev);
1421 if (rt == ip6_null_entry) {
1422 if (net_ratelimit())
1423 printk(KERN_DEBUG "rt6_redirect: source isn't a valid nexthop "
1424 "for redirect target\n");
1429 * We have finally decided to accept it.
1432 neigh_update(neigh, lladdr, NUD_STALE,
1433 NEIGH_UPDATE_F_WEAK_OVERRIDE|
1434 NEIGH_UPDATE_F_OVERRIDE|
1435 (on_link ? 0 : (NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
1436 NEIGH_UPDATE_F_ISROUTER))
1440 * Redirect received -> path was valid.
1441 * Look, redirects are sent only in response to data packets,
1442 * so that this nexthop apparently is reachable. --ANK
1444 dst_confirm(&rt->u.dst);
1446 /* Duplicate redirect: silently ignore. */
1447 if (neigh == rt->u.dst.neighbour)
1450 nrt = ip6_rt_copy(rt);
1454 nrt->rt6i_flags = RTF_GATEWAY|RTF_UP|RTF_DYNAMIC|RTF_CACHE;
1456 nrt->rt6i_flags &= ~RTF_GATEWAY;
1458 ipv6_addr_copy(&nrt->rt6i_dst.addr, dest);
1459 nrt->rt6i_dst.plen = 128;
1460 nrt->u.dst.flags |= DST_HOST;
1462 ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
1463 nrt->rt6i_nexthop = neigh_clone(neigh);
1464 /* Reset pmtu, it may be better */
1465 nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
1466 nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(neigh->dev->nd_net,
1467 dst_mtu(&nrt->u.dst));
1469 if (ip6_ins_rt(nrt))
1472 netevent.old = &rt->u.dst;
1473 netevent.new = &nrt->u.dst;
1474 call_netevent_notifiers(NETEVENT_REDIRECT, &netevent);
1476 if (rt->rt6i_flags&RTF_CACHE) {
1482 dst_release(&rt->u.dst);
1487 * Handle ICMP "packet too big" messages
1488 * i.e. Path MTU discovery
1491 void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
1492 struct net_device *dev, u32 pmtu)
1494 struct rt6_info *rt, *nrt;
1495 struct net *net = dev->nd_net;
1498 rt = rt6_lookup(net, daddr, saddr, dev->ifindex, 0);
1502 if (pmtu >= dst_mtu(&rt->u.dst))
1505 if (pmtu < IPV6_MIN_MTU) {
1507 * According to RFC2460, PMTU is set to the IPv6 Minimum Link
1508 * MTU (1280) and a fragment header should always be included
1509 * after a node receiving Too Big message reporting PMTU is
1510 * less than the IPv6 Minimum Link MTU.
1512 pmtu = IPV6_MIN_MTU;
1516 /* New mtu received -> path was valid.
1517 They are sent only in response to data packets,
1518 so that this nexthop apparently is reachable. --ANK
1520 dst_confirm(&rt->u.dst);
1522 /* Host route. If it is static, it would be better
1523 not to override it, but add new one, so that
1524 when cache entry will expire old pmtu
1525 would return automatically.
1527 if (rt->rt6i_flags & RTF_CACHE) {
1528 rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1530 rt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1531 dst_set_expires(&rt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1532 rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
1537 Two cases are possible:
1538 1. It is connected route. Action: COW
1539 2. It is gatewayed route or NONEXTHOP route. Action: clone it.
1541 if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP))
1542 nrt = rt6_alloc_cow(rt, daddr, saddr);
1544 nrt = rt6_alloc_clone(rt, daddr);
1547 nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
1549 nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
1551 /* According to RFC 1981, detecting PMTU increase shouldn't be
1552 * happened within 5 mins, the recommended timer is 10 mins.
1553 * Here this route expiration time is set to ip6_rt_mtu_expires
1554 * which is 10 mins. After 10 mins the decreased pmtu is expired
1555 * and detecting PMTU increase will be automatically happened.
1557 dst_set_expires(&nrt->u.dst, net->ipv6.sysctl.ip6_rt_mtu_expires);
1558 nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
1563 dst_release(&rt->u.dst);
1567 * Misc support functions
1570 static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
1572 struct rt6_info *rt = ip6_dst_alloc();
1575 rt->u.dst.input = ort->u.dst.input;
1576 rt->u.dst.output = ort->u.dst.output;
1578 memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
1579 rt->u.dst.error = ort->u.dst.error;
1580 rt->u.dst.dev = ort->u.dst.dev;
1582 dev_hold(rt->u.dst.dev);
1583 rt->rt6i_idev = ort->rt6i_idev;
1585 in6_dev_hold(rt->rt6i_idev);
1586 rt->u.dst.lastuse = jiffies;
1587 rt->rt6i_expires = 0;
1589 ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
1590 rt->rt6i_flags = ort->rt6i_flags & ~RTF_EXPIRES;
1591 rt->rt6i_metric = 0;
1593 memcpy(&rt->rt6i_dst, &ort->rt6i_dst, sizeof(struct rt6key));
1594 #ifdef CONFIG_IPV6_SUBTREES
1595 memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key));
1597 rt->rt6i_table = ort->rt6i_table;
1602 #ifdef CONFIG_IPV6_ROUTE_INFO
1603 static struct rt6_info *rt6_get_route_info(struct net *net,
1604 struct in6_addr *prefix, int prefixlen,
1605 struct in6_addr *gwaddr, int ifindex)
1607 struct fib6_node *fn;
1608 struct rt6_info *rt = NULL;
1609 struct fib6_table *table;
1611 table = fib6_get_table(net, RT6_TABLE_INFO);
1615 write_lock_bh(&table->tb6_lock);
1616 fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0);
1620 for (rt = fn->leaf; rt; rt = rt->u.dst.rt6_next) {
1621 if (rt->rt6i_dev->ifindex != ifindex)
1623 if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY))
1625 if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr))
1627 dst_hold(&rt->u.dst);
1631 write_unlock_bh(&table->tb6_lock);
1635 static struct rt6_info *rt6_add_route_info(struct net *net,
1636 struct in6_addr *prefix, int prefixlen,
1637 struct in6_addr *gwaddr, int ifindex,
1640 struct fib6_config cfg = {
1641 .fc_table = RT6_TABLE_INFO,
1642 .fc_metric = IP6_RT_PRIO_USER,
1643 .fc_ifindex = ifindex,
1644 .fc_dst_len = prefixlen,
1645 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
1646 RTF_UP | RTF_PREF(pref),
1648 .fc_nlinfo.nlh = NULL,
1649 .fc_nlinfo.nl_net = net,
1652 ipv6_addr_copy(&cfg.fc_dst, prefix);
1653 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1655 /* We should treat it as a default route if prefix length is 0. */
1657 cfg.fc_flags |= RTF_DEFAULT;
1659 ip6_route_add(&cfg);
1661 return rt6_get_route_info(net, prefix, prefixlen, gwaddr, ifindex);
1665 struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev)
1667 struct rt6_info *rt;
1668 struct fib6_table *table;
1670 table = fib6_get_table(dev->nd_net, RT6_TABLE_DFLT);
1674 write_lock_bh(&table->tb6_lock);
1675 for (rt = table->tb6_root.leaf; rt; rt=rt->u.dst.rt6_next) {
1676 if (dev == rt->rt6i_dev &&
1677 ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) &&
1678 ipv6_addr_equal(&rt->rt6i_gateway, addr))
1682 dst_hold(&rt->u.dst);
1683 write_unlock_bh(&table->tb6_lock);
1687 EXPORT_SYMBOL(rt6_get_dflt_router);
1689 struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
1690 struct net_device *dev,
1693 struct fib6_config cfg = {
1694 .fc_table = RT6_TABLE_DFLT,
1695 .fc_metric = IP6_RT_PRIO_USER,
1696 .fc_ifindex = dev->ifindex,
1697 .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
1698 RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
1700 .fc_nlinfo.nlh = NULL,
1701 .fc_nlinfo.nl_net = dev->nd_net,
1704 ipv6_addr_copy(&cfg.fc_gateway, gwaddr);
1706 ip6_route_add(&cfg);
1708 return rt6_get_dflt_router(gwaddr, dev);
1711 void rt6_purge_dflt_routers(struct net *net)
1713 struct rt6_info *rt;
1714 struct fib6_table *table;
1716 /* NOTE: Keep consistent with rt6_get_dflt_router */
1717 table = fib6_get_table(net, RT6_TABLE_DFLT);
1722 read_lock_bh(&table->tb6_lock);
1723 for (rt = table->tb6_root.leaf; rt; rt = rt->u.dst.rt6_next) {
1724 if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) {
1725 dst_hold(&rt->u.dst);
1726 read_unlock_bh(&table->tb6_lock);
1731 read_unlock_bh(&table->tb6_lock);
1734 static void rtmsg_to_fib6_config(struct net *net,
1735 struct in6_rtmsg *rtmsg,
1736 struct fib6_config *cfg)
1738 memset(cfg, 0, sizeof(*cfg));
1740 cfg->fc_table = RT6_TABLE_MAIN;
1741 cfg->fc_ifindex = rtmsg->rtmsg_ifindex;
1742 cfg->fc_metric = rtmsg->rtmsg_metric;
1743 cfg->fc_expires = rtmsg->rtmsg_info;
1744 cfg->fc_dst_len = rtmsg->rtmsg_dst_len;
1745 cfg->fc_src_len = rtmsg->rtmsg_src_len;
1746 cfg->fc_flags = rtmsg->rtmsg_flags;
1748 cfg->fc_nlinfo.nl_net = net;
1750 ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst);
1751 ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src);
1752 ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway);
1755 int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
1757 struct fib6_config cfg;
1758 struct in6_rtmsg rtmsg;
1762 case SIOCADDRT: /* Add a route */
1763 case SIOCDELRT: /* Delete a route */
1764 if (!capable(CAP_NET_ADMIN))
1766 err = copy_from_user(&rtmsg, arg,
1767 sizeof(struct in6_rtmsg));
1771 rtmsg_to_fib6_config(net, &rtmsg, &cfg);
1776 err = ip6_route_add(&cfg);
1779 err = ip6_route_del(&cfg);
1793 * Drop the packet on the floor
1796 static int ip6_pkt_drop(struct sk_buff *skb, int code, int ipstats_mib_noroutes)
1799 switch (ipstats_mib_noroutes) {
1800 case IPSTATS_MIB_INNOROUTES:
1801 type = ipv6_addr_type(&ipv6_hdr(skb)->daddr);
1802 if (type == IPV6_ADDR_ANY || type == IPV6_ADDR_RESERVED) {
1803 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_INADDRERRORS);
1807 case IPSTATS_MIB_OUTNOROUTES:
1808 IP6_INC_STATS(ip6_dst_idev(skb->dst), ipstats_mib_noroutes);
1811 icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev);
1816 static int ip6_pkt_discard(struct sk_buff *skb)
1818 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_INNOROUTES);
1821 static int ip6_pkt_discard_out(struct sk_buff *skb)
1823 skb->dev = skb->dst->dev;
1824 return ip6_pkt_drop(skb, ICMPV6_NOROUTE, IPSTATS_MIB_OUTNOROUTES);
1827 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
1829 static int ip6_pkt_prohibit(struct sk_buff *skb)
1831 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_INNOROUTES);
1834 static int ip6_pkt_prohibit_out(struct sk_buff *skb)
1836 skb->dev = skb->dst->dev;
1837 return ip6_pkt_drop(skb, ICMPV6_ADM_PROHIBITED, IPSTATS_MIB_OUTNOROUTES);
1843 * Allocate a dst for local (unicast / anycast) address.
1846 struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev,
1847 const struct in6_addr *addr,
1850 struct net *net = idev->dev->nd_net;
1851 struct rt6_info *rt = ip6_dst_alloc();
1854 return ERR_PTR(-ENOMEM);
1856 dev_hold(net->loopback_dev);
1859 rt->u.dst.flags = DST_HOST;
1860 rt->u.dst.input = ip6_input;
1861 rt->u.dst.output = ip6_output;
1862 rt->rt6i_dev = net->loopback_dev;
1863 rt->rt6i_idev = idev;
1864 rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
1865 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
1866 rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
1867 rt->u.dst.obsolete = -1;
1869 rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
1871 rt->rt6i_flags |= RTF_ANYCAST;
1873 rt->rt6i_flags |= RTF_LOCAL;
1874 rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
1875 if (rt->rt6i_nexthop == NULL) {
1876 dst_free(&rt->u.dst);
1877 return ERR_PTR(-ENOMEM);
1880 ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
1881 rt->rt6i_dst.plen = 128;
1882 rt->rt6i_table = fib6_get_table(net, RT6_TABLE_LOCAL);
1884 atomic_set(&rt->u.dst.__refcnt, 1);
1889 static int fib6_ifdown(struct rt6_info *rt, void *arg)
1891 if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
1892 rt != ip6_null_entry) {
1893 RT6_TRACE("deleted by ifdown %p\n", rt);
1899 void rt6_ifdown(struct net *net, struct net_device *dev)
1901 fib6_clean_all(net, fib6_ifdown, 0, dev);
1904 struct rt6_mtu_change_arg
1906 struct net_device *dev;
1910 static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg)
1912 struct rt6_mtu_change_arg *arg = (struct rt6_mtu_change_arg *) p_arg;
1913 struct inet6_dev *idev;
1914 struct net *net = arg->dev->nd_net;
1916 /* In IPv6 pmtu discovery is not optional,
1917 so that RTAX_MTU lock cannot disable it.
1918 We still use this lock to block changes
1919 caused by addrconf/ndisc.
1922 idev = __in6_dev_get(arg->dev);
1926 /* For administrative MTU increase, there is no way to discover
1927 IPv6 PMTU increase, so PMTU increase should be updated here.
1928 Since RFC 1981 doesn't include administrative MTU increase
1929 update PMTU increase is a MUST. (i.e. jumbo frame)
1932 If new MTU is less than route PMTU, this new MTU will be the
1933 lowest MTU in the path, update the route PMTU to reflect PMTU
1934 decreases; if new MTU is greater than route PMTU, and the
1935 old MTU is the lowest MTU in the path, update the route PMTU
1936 to reflect the increase. In this case if the other nodes' MTU
1937 also have the lowest MTU, TOO BIG MESSAGE will be lead to
1940 if (rt->rt6i_dev == arg->dev &&
1941 !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
1942 (dst_mtu(&rt->u.dst) >= arg->mtu ||
1943 (dst_mtu(&rt->u.dst) < arg->mtu &&
1944 dst_mtu(&rt->u.dst) == idev->cnf.mtu6))) {
1945 rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
1946 rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, arg->mtu);
1951 void rt6_mtu_change(struct net_device *dev, unsigned mtu)
1953 struct rt6_mtu_change_arg arg = {
1958 fib6_clean_all(dev->nd_net, rt6_mtu_change_route, 0, &arg);
1961 static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = {
1962 [RTA_GATEWAY] = { .len = sizeof(struct in6_addr) },
1963 [RTA_OIF] = { .type = NLA_U32 },
1964 [RTA_IIF] = { .type = NLA_U32 },
1965 [RTA_PRIORITY] = { .type = NLA_U32 },
1966 [RTA_METRICS] = { .type = NLA_NESTED },
1969 static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh,
1970 struct fib6_config *cfg)
1973 struct nlattr *tb[RTA_MAX+1];
1976 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
1981 rtm = nlmsg_data(nlh);
1982 memset(cfg, 0, sizeof(*cfg));
1984 cfg->fc_table = rtm->rtm_table;
1985 cfg->fc_dst_len = rtm->rtm_dst_len;
1986 cfg->fc_src_len = rtm->rtm_src_len;
1987 cfg->fc_flags = RTF_UP;
1988 cfg->fc_protocol = rtm->rtm_protocol;
1990 if (rtm->rtm_type == RTN_UNREACHABLE)
1991 cfg->fc_flags |= RTF_REJECT;
1993 cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid;
1994 cfg->fc_nlinfo.nlh = nlh;
1995 cfg->fc_nlinfo.nl_net = skb->sk->sk_net;
1997 if (tb[RTA_GATEWAY]) {
1998 nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16);
1999 cfg->fc_flags |= RTF_GATEWAY;
2003 int plen = (rtm->rtm_dst_len + 7) >> 3;
2005 if (nla_len(tb[RTA_DST]) < plen)
2008 nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen);
2012 int plen = (rtm->rtm_src_len + 7) >> 3;
2014 if (nla_len(tb[RTA_SRC]) < plen)
2017 nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen);
2021 cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]);
2023 if (tb[RTA_PRIORITY])
2024 cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]);
2026 if (tb[RTA_METRICS]) {
2027 cfg->fc_mx = nla_data(tb[RTA_METRICS]);
2028 cfg->fc_mx_len = nla_len(tb[RTA_METRICS]);
2032 cfg->fc_table = nla_get_u32(tb[RTA_TABLE]);
2039 static int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2041 struct fib6_config cfg;
2044 err = rtm_to_fib6_config(skb, nlh, &cfg);
2048 return ip6_route_del(&cfg);
2051 static int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
2053 struct fib6_config cfg;
2056 err = rtm_to_fib6_config(skb, nlh, &cfg);
2060 return ip6_route_add(&cfg);
2063 static inline size_t rt6_nlmsg_size(void)
2065 return NLMSG_ALIGN(sizeof(struct rtmsg))
2066 + nla_total_size(16) /* RTA_SRC */
2067 + nla_total_size(16) /* RTA_DST */
2068 + nla_total_size(16) /* RTA_GATEWAY */
2069 + nla_total_size(16) /* RTA_PREFSRC */
2070 + nla_total_size(4) /* RTA_TABLE */
2071 + nla_total_size(4) /* RTA_IIF */
2072 + nla_total_size(4) /* RTA_OIF */
2073 + nla_total_size(4) /* RTA_PRIORITY */
2074 + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
2075 + nla_total_size(sizeof(struct rta_cacheinfo));
2078 static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
2079 struct in6_addr *dst, struct in6_addr *src,
2080 int iif, int type, u32 pid, u32 seq,
2081 int prefix, unsigned int flags)
2084 struct nlmsghdr *nlh;
2088 if (prefix) { /* user wants prefix routes only */
2089 if (!(rt->rt6i_flags & RTF_PREFIX_RT)) {
2090 /* success since this is not a prefix route */
2095 nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags);
2099 rtm = nlmsg_data(nlh);
2100 rtm->rtm_family = AF_INET6;
2101 rtm->rtm_dst_len = rt->rt6i_dst.plen;
2102 rtm->rtm_src_len = rt->rt6i_src.plen;
2105 table = rt->rt6i_table->tb6_id;
2107 table = RT6_TABLE_UNSPEC;
2108 rtm->rtm_table = table;
2109 NLA_PUT_U32(skb, RTA_TABLE, table);
2110 if (rt->rt6i_flags&RTF_REJECT)
2111 rtm->rtm_type = RTN_UNREACHABLE;
2112 else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK))
2113 rtm->rtm_type = RTN_LOCAL;
2115 rtm->rtm_type = RTN_UNICAST;
2117 rtm->rtm_scope = RT_SCOPE_UNIVERSE;
2118 rtm->rtm_protocol = rt->rt6i_protocol;
2119 if (rt->rt6i_flags&RTF_DYNAMIC)
2120 rtm->rtm_protocol = RTPROT_REDIRECT;
2121 else if (rt->rt6i_flags & RTF_ADDRCONF)
2122 rtm->rtm_protocol = RTPROT_KERNEL;
2123 else if (rt->rt6i_flags&RTF_DEFAULT)
2124 rtm->rtm_protocol = RTPROT_RA;
2126 if (rt->rt6i_flags&RTF_CACHE)
2127 rtm->rtm_flags |= RTM_F_CLONED;
2130 NLA_PUT(skb, RTA_DST, 16, dst);
2131 rtm->rtm_dst_len = 128;
2132 } else if (rtm->rtm_dst_len)
2133 NLA_PUT(skb, RTA_DST, 16, &rt->rt6i_dst.addr);
2134 #ifdef CONFIG_IPV6_SUBTREES
2136 NLA_PUT(skb, RTA_SRC, 16, src);
2137 rtm->rtm_src_len = 128;
2138 } else if (rtm->rtm_src_len)
2139 NLA_PUT(skb, RTA_SRC, 16, &rt->rt6i_src.addr);
2142 NLA_PUT_U32(skb, RTA_IIF, iif);
2144 struct in6_addr saddr_buf;
2145 if (ipv6_dev_get_saddr(ip6_dst_idev(&rt->u.dst)->dev,
2146 dst, &saddr_buf) == 0)
2147 NLA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
2150 if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
2151 goto nla_put_failure;
2153 if (rt->u.dst.neighbour)
2154 NLA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
2157 NLA_PUT_U32(skb, RTA_OIF, rt->rt6i_dev->ifindex);
2159 NLA_PUT_U32(skb, RTA_PRIORITY, rt->rt6i_metric);
2161 expires = rt->rt6i_expires ? rt->rt6i_expires - jiffies : 0;
2162 if (rtnl_put_cacheinfo(skb, &rt->u.dst, 0, 0, 0,
2163 expires, rt->u.dst.error) < 0)
2164 goto nla_put_failure;
2166 return nlmsg_end(skb, nlh);
2169 nlmsg_cancel(skb, nlh);
2173 int rt6_dump_route(struct rt6_info *rt, void *p_arg)
2175 struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg;
2178 if (nlmsg_len(arg->cb->nlh) >= sizeof(struct rtmsg)) {
2179 struct rtmsg *rtm = nlmsg_data(arg->cb->nlh);
2180 prefix = (rtm->rtm_flags & RTM_F_PREFIX) != 0;
2184 return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
2185 NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
2186 prefix, NLM_F_MULTI);
2189 static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
2191 struct net *net = in_skb->sk->sk_net;
2192 struct nlattr *tb[RTA_MAX+1];
2193 struct rt6_info *rt;
2194 struct sk_buff *skb;
2199 err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy);
2204 memset(&fl, 0, sizeof(fl));
2207 if (nla_len(tb[RTA_SRC]) < sizeof(struct in6_addr))
2210 ipv6_addr_copy(&fl.fl6_src, nla_data(tb[RTA_SRC]));
2214 if (nla_len(tb[RTA_DST]) < sizeof(struct in6_addr))
2217 ipv6_addr_copy(&fl.fl6_dst, nla_data(tb[RTA_DST]));
2221 iif = nla_get_u32(tb[RTA_IIF]);
2224 fl.oif = nla_get_u32(tb[RTA_OIF]);
2227 struct net_device *dev;
2228 dev = __dev_get_by_index(net, iif);
2235 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
2241 /* Reserve room for dummy headers, this skb can pass
2242 through good chunk of routing engine.
2244 skb_reset_mac_header(skb);
2245 skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
2247 rt = (struct rt6_info*) ip6_route_output(NULL, &fl);
2248 skb->dst = &rt->u.dst;
2250 err = rt6_fill_node(skb, rt, &fl.fl6_dst, &fl.fl6_src, iif,
2251 RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
2252 nlh->nlmsg_seq, 0, 0);
2258 err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid);
2263 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info)
2265 struct sk_buff *skb;
2266 struct net *net = info->nl_net;
2271 seq = info->nlh != NULL ? info->nlh->nlmsg_seq : 0;
2273 skb = nlmsg_new(rt6_nlmsg_size(), gfp_any());
2277 err = rt6_fill_node(skb, rt, NULL, NULL, 0,
2278 event, info->pid, seq, 0, 0);
2280 /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */
2281 WARN_ON(err == -EMSGSIZE);
2285 err = rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE,
2286 info->nlh, gfp_any());
2289 rtnl_set_sk_err(net, RTNLGRP_IPV6_ROUTE, err);
2296 #ifdef CONFIG_PROC_FS
2298 #define RT6_INFO_LEN (32 + 4 + 32 + 4 + 32 + 40 + 5 + 1)
2309 static int rt6_info_route(struct rt6_info *rt, void *p_arg)
2311 struct seq_file *m = p_arg;
2313 seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_dst.addr),
2316 #ifdef CONFIG_IPV6_SUBTREES
2317 seq_printf(m, NIP6_SEQFMT " %02x ", NIP6(rt->rt6i_src.addr),
2320 seq_puts(m, "00000000000000000000000000000000 00 ");
2323 if (rt->rt6i_nexthop) {
2324 seq_printf(m, NIP6_SEQFMT,
2325 NIP6(*((struct in6_addr *)rt->rt6i_nexthop->primary_key)));
2327 seq_puts(m, "00000000000000000000000000000000");
2329 seq_printf(m, " %08x %08x %08x %08x %8s\n",
2330 rt->rt6i_metric, atomic_read(&rt->u.dst.__refcnt),
2331 rt->u.dst.__use, rt->rt6i_flags,
2332 rt->rt6i_dev ? rt->rt6i_dev->name : "");
2336 static int ipv6_route_show(struct seq_file *m, void *v)
2338 struct net *net = (struct net *)m->private;
2339 fib6_clean_all(net, rt6_info_route, 0, m);
2343 static int ipv6_route_open(struct inode *inode, struct file *file)
2345 struct net *net = get_proc_net(inode);
2348 return single_open(file, ipv6_route_show, net);
2351 static int ipv6_route_release(struct inode *inode, struct file *file)
2353 struct seq_file *seq = file->private_data;
2354 struct net *net = seq->private;
2356 return single_release(inode, file);
2359 static const struct file_operations ipv6_route_proc_fops = {
2360 .owner = THIS_MODULE,
2361 .open = ipv6_route_open,
2363 .llseek = seq_lseek,
2364 .release = ipv6_route_release,
2367 static int rt6_stats_seq_show(struct seq_file *seq, void *v)
2369 struct net *net = (struct net *)seq->private;
2370 seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n",
2371 net->ipv6.rt6_stats->fib_nodes,
2372 net->ipv6.rt6_stats->fib_route_nodes,
2373 net->ipv6.rt6_stats->fib_rt_alloc,
2374 net->ipv6.rt6_stats->fib_rt_entries,
2375 net->ipv6.rt6_stats->fib_rt_cache,
2376 atomic_read(&ip6_dst_ops.entries),
2377 net->ipv6.rt6_stats->fib_discarded_routes);
2382 static int rt6_stats_seq_open(struct inode *inode, struct file *file)
2384 struct net *net = get_proc_net(inode);
2385 return single_open(file, rt6_stats_seq_show, net);
2388 static int rt6_stats_seq_release(struct inode *inode, struct file *file)
2390 struct seq_file *seq = file->private_data;
2391 struct net *net = (struct net *)seq->private;
2393 return single_release(inode, file);
2396 static const struct file_operations rt6_stats_seq_fops = {
2397 .owner = THIS_MODULE,
2398 .open = rt6_stats_seq_open,
2400 .llseek = seq_lseek,
2401 .release = rt6_stats_seq_release,
2403 #endif /* CONFIG_PROC_FS */
2405 #ifdef CONFIG_SYSCTL
2408 int ipv6_sysctl_rtcache_flush(ctl_table *ctl, int write, struct file * filp,
2409 void __user *buffer, size_t *lenp, loff_t *ppos)
2411 struct net *net = current->nsproxy->net_ns;
2412 int delay = net->ipv6.sysctl.flush_delay;
2414 proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
2415 fib6_run_gc(delay <= 0 ? ~0UL : (unsigned long)delay, net);
2421 ctl_table ipv6_route_table_template[] = {
2423 .procname = "flush",
2424 .data = &init_net.ipv6.sysctl.flush_delay,
2425 .maxlen = sizeof(int),
2427 .proc_handler = &ipv6_sysctl_rtcache_flush
2430 .ctl_name = NET_IPV6_ROUTE_GC_THRESH,
2431 .procname = "gc_thresh",
2432 .data = &ip6_dst_ops.gc_thresh,
2433 .maxlen = sizeof(int),
2435 .proc_handler = &proc_dointvec,
2438 .ctl_name = NET_IPV6_ROUTE_MAX_SIZE,
2439 .procname = "max_size",
2440 .data = &init_net.ipv6.sysctl.ip6_rt_max_size,
2441 .maxlen = sizeof(int),
2443 .proc_handler = &proc_dointvec,
2446 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL,
2447 .procname = "gc_min_interval",
2448 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2449 .maxlen = sizeof(int),
2451 .proc_handler = &proc_dointvec_jiffies,
2452 .strategy = &sysctl_jiffies,
2455 .ctl_name = NET_IPV6_ROUTE_GC_TIMEOUT,
2456 .procname = "gc_timeout",
2457 .data = &init_net.ipv6.sysctl.ip6_rt_gc_timeout,
2458 .maxlen = sizeof(int),
2460 .proc_handler = &proc_dointvec_jiffies,
2461 .strategy = &sysctl_jiffies,
2464 .ctl_name = NET_IPV6_ROUTE_GC_INTERVAL,
2465 .procname = "gc_interval",
2466 .data = &init_net.ipv6.sysctl.ip6_rt_gc_interval,
2467 .maxlen = sizeof(int),
2469 .proc_handler = &proc_dointvec_jiffies,
2470 .strategy = &sysctl_jiffies,
2473 .ctl_name = NET_IPV6_ROUTE_GC_ELASTICITY,
2474 .procname = "gc_elasticity",
2475 .data = &init_net.ipv6.sysctl.ip6_rt_gc_elasticity,
2476 .maxlen = sizeof(int),
2478 .proc_handler = &proc_dointvec_jiffies,
2479 .strategy = &sysctl_jiffies,
2482 .ctl_name = NET_IPV6_ROUTE_MTU_EXPIRES,
2483 .procname = "mtu_expires",
2484 .data = &init_net.ipv6.sysctl.ip6_rt_mtu_expires,
2485 .maxlen = sizeof(int),
2487 .proc_handler = &proc_dointvec_jiffies,
2488 .strategy = &sysctl_jiffies,
2491 .ctl_name = NET_IPV6_ROUTE_MIN_ADVMSS,
2492 .procname = "min_adv_mss",
2493 .data = &init_net.ipv6.sysctl.ip6_rt_min_advmss,
2494 .maxlen = sizeof(int),
2496 .proc_handler = &proc_dointvec_jiffies,
2497 .strategy = &sysctl_jiffies,
2500 .ctl_name = NET_IPV6_ROUTE_GC_MIN_INTERVAL_MS,
2501 .procname = "gc_min_interval_ms",
2502 .data = &init_net.ipv6.sysctl.ip6_rt_gc_min_interval,
2503 .maxlen = sizeof(int),
2505 .proc_handler = &proc_dointvec_ms_jiffies,
2506 .strategy = &sysctl_ms_jiffies,
2511 struct ctl_table *ipv6_route_sysctl_init(struct net *net)
2513 struct ctl_table *table;
2515 table = kmemdup(ipv6_route_table_template,
2516 sizeof(ipv6_route_table_template),
2520 table[0].data = &net->ipv6.sysctl.flush_delay;
2521 /* table[1].data will be handled when we have
2522 routes per namespace */
2523 table[2].data = &net->ipv6.sysctl.ip6_rt_max_size;
2524 table[3].data = &net->ipv6.sysctl.ip6_rt_gc_min_interval;
2525 table[4].data = &net->ipv6.sysctl.ip6_rt_gc_timeout;
2526 table[5].data = &net->ipv6.sysctl.ip6_rt_gc_interval;
2527 table[6].data = &net->ipv6.sysctl.ip6_rt_gc_elasticity;
2528 table[7].data = &net->ipv6.sysctl.ip6_rt_mtu_expires;
2529 table[8].data = &net->ipv6.sysctl.ip6_rt_min_advmss;
2536 static int ip6_route_net_init(struct net *net)
2538 #ifdef CONFIG_PROC_FS
2539 proc_net_fops_create(net, "ipv6_route", 0, &ipv6_route_proc_fops);
2540 proc_net_fops_create(net, "rt6_stats", S_IRUGO, &rt6_stats_seq_fops);
2545 static void ip6_route_net_exit(struct net *net)
2547 #ifdef CONFIG_PROC_FS
2548 proc_net_remove(net, "ipv6_route");
2549 proc_net_remove(net, "rt6_stats");
2551 rt6_ifdown(net, NULL);
2554 static struct pernet_operations ip6_route_net_ops = {
2555 .init = ip6_route_net_init,
2556 .exit = ip6_route_net_exit,
2559 int __init ip6_route_init(void)
2563 ip6_dst_ops.kmem_cachep =
2564 kmem_cache_create("ip6_dst_cache", sizeof(struct rt6_info), 0,
2565 SLAB_HWCACHE_ALIGN, NULL);
2566 if (!ip6_dst_ops.kmem_cachep)
2569 ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops.kmem_cachep;
2572 ip6_null_entry = kmemdup(&ip6_null_entry_template,
2573 sizeof(*ip6_null_entry), GFP_KERNEL);
2574 if (!ip6_null_entry)
2575 goto out_kmem_cache;
2576 ip6_null_entry->u.dst.path = (struct dst_entry *)ip6_null_entry;
2578 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2579 ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template,
2580 sizeof(*ip6_prohibit_entry), GFP_KERNEL);
2581 if (!ip6_prohibit_entry)
2582 goto out_ip6_null_entry;
2583 ip6_prohibit_entry->u.dst.path = (struct dst_entry *)ip6_prohibit_entry;
2585 ip6_blk_hole_entry = kmemdup(&ip6_blk_hole_entry_template,
2586 sizeof(*ip6_blk_hole_entry), GFP_KERNEL);
2587 if (!ip6_blk_hole_entry)
2588 goto out_ip6_prohibit_entry;
2589 ip6_blk_hole_entry->u.dst.path = (struct dst_entry *)ip6_blk_hole_entry;
2594 goto out_ip6_blk_hole_entry;
2600 ret = fib6_rules_init();
2605 if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL) ||
2606 __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL) ||
2607 __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL))
2608 goto fib6_rules_init;
2610 ret = register_pernet_subsys(&ip6_route_net_ops);
2612 goto fib6_rules_init;
2617 fib6_rules_cleanup();
2622 out_ip6_blk_hole_entry:
2623 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2624 kfree(ip6_blk_hole_entry);
2625 out_ip6_prohibit_entry:
2626 kfree(ip6_prohibit_entry);
2629 kfree(ip6_null_entry);
2631 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2635 void ip6_route_cleanup(void)
2637 unregister_pernet_subsys(&ip6_route_net_ops);
2638 fib6_rules_cleanup();
2641 kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
2643 kfree(ip6_null_entry);
2644 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
2645 kfree(ip6_prohibit_entry);
2646 kfree(ip6_blk_hole_entry);