2 * This program is free software; you can redistribute it and/or
3 * modify it under the terms of the GNU General Public License
4 * as published by the Free Software Foundation; either version
5 * 2 of the License, or (at your option) any later version.
7 * Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet
8 * & Swedish University of Agricultural Sciences.
10 * Jens Laas <jens.laas@data.slu.se> Swedish University of
11 * Agricultural Sciences.
13 * Hans Liss <hans.liss@its.uu.se> Uppsala Universitet
15 * This work is based on the LPC-trie which is originally descibed in:
17 * An experimental study of compression methods for dynamic tries
18 * Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
19 * http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/
22 * IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
23 * IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999
25 * Version: $Id: fib_trie.c,v 1.3 2005/06/08 14:20:01 robert Exp $
28 * Code from fib_hash has been reused which includes the following header:
31 * INET An implementation of the TCP/IP protocol suite for the LINUX
32 * operating system. INET is implemented using the BSD Socket
33 * interface as the means of communication with the user level.
35 * IPv4 FIB: lookup engine and maintenance routines.
38 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
40 * This program is free software; you can redistribute it and/or
41 * modify it under the terms of the GNU General Public License
42 * as published by the Free Software Foundation; either version
43 * 2 of the License, or (at your option) any later version.
46 #define VERSION "0.324"
48 #include <linux/config.h>
49 #include <asm/uaccess.h>
50 #include <asm/system.h>
51 #include <asm/bitops.h>
52 #include <linux/types.h>
53 #include <linux/kernel.h>
54 #include <linux/sched.h>
56 #include <linux/string.h>
57 #include <linux/socket.h>
58 #include <linux/sockios.h>
59 #include <linux/errno.h>
61 #include <linux/inet.h>
62 #include <linux/netdevice.h>
63 #include <linux/if_arp.h>
64 #include <linux/proc_fs.h>
65 #include <linux/skbuff.h>
66 #include <linux/netlink.h>
67 #include <linux/init.h>
68 #include <linux/list.h>
70 #include <net/protocol.h>
71 #include <net/route.h>
74 #include <net/ip_fib.h>
75 #include "fib_lookup.h"
77 #undef CONFIG_IP_FIB_TRIE_STATS
78 #define MAX_CHILDS 16384
80 #define EXTRACT(p, n, str) ((str)<<(p)>>(32-(n)))
81 #define KEYLENGTH (8*sizeof(t_key))
82 #define MASK_PFX(k, l) (((l)==0)?0:(k >> (KEYLENGTH-l)) << (KEYLENGTH-l))
83 #define TKEY_GET_MASK(offset, bits) (((bits)==0)?0:((t_key)(-1) << (KEYLENGTH - bits) >> offset))
85 static DEFINE_RWLOCK(fib_lock);
87 typedef unsigned int t_key;
91 #define NODE_TYPE_MASK 0x1UL
92 #define NODE_PARENT(_node) \
93 ((struct tnode *)((_node)->_parent & ~NODE_TYPE_MASK))
94 #define NODE_SET_PARENT(_node, _ptr) \
95 ((_node)->_parent = (((unsigned long)(_ptr)) | \
96 ((_node)->_parent & NODE_TYPE_MASK)))
97 #define NODE_INIT_PARENT(_node, _type) \
98 ((_node)->_parent = (_type))
99 #define NODE_TYPE(_node) \
100 ((_node)->_parent & NODE_TYPE_MASK)
102 #define IS_TNODE(n) (!(n->_parent & T_LEAF))
103 #define IS_LEAF(n) (n->_parent & T_LEAF)
107 unsigned long _parent;
112 unsigned long _parent;
113 struct hlist_head list;
117 struct hlist_node hlist;
119 struct list_head falh;
124 unsigned long _parent;
125 unsigned short pos:5; /* 2log(KEYLENGTH) bits needed */
126 unsigned short bits:5; /* 2log(KEYLENGTH) bits needed */
127 unsigned short full_children; /* KEYLENGTH bits needed */
128 unsigned short empty_children; /* KEYLENGTH bits needed */
129 struct node *child[0];
132 #ifdef CONFIG_IP_FIB_TRIE_STATS
133 struct trie_use_stats {
135 unsigned int backtrack;
136 unsigned int semantic_match_passed;
137 unsigned int semantic_match_miss;
138 unsigned int null_node_hit;
143 unsigned int totdepth;
144 unsigned int maxdepth;
147 unsigned int nullpointers;
148 unsigned int nodesizes[MAX_CHILDS];
153 #ifdef CONFIG_IP_FIB_TRIE_STATS
154 struct trie_use_stats stats;
157 unsigned int revision;
160 static int trie_debug = 0;
162 static int tnode_full(struct tnode *tn, struct node *n);
163 static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n);
164 static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull);
165 static int tnode_child_length(struct tnode *tn);
166 static struct node *resize(struct trie *t, struct tnode *tn);
167 static struct tnode *inflate(struct trie *t, struct tnode *tn);
168 static struct tnode *halve(struct trie *t, struct tnode *tn);
169 static void tnode_free(struct tnode *tn);
170 static void trie_dump_seq(struct seq_file *seq, struct trie *t);
171 extern struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio);
172 extern int fib_detect_death(struct fib_info *fi, int order,
173 struct fib_info **last_resort, int *last_idx, int *dflt);
175 extern void rtmsg_fib(int event, u32 key, struct fib_alias *fa, int z, int tb_id,
176 struct nlmsghdr *n, struct netlink_skb_parms *req);
178 static kmem_cache_t *fn_alias_kmem;
179 static struct trie *trie_local = NULL, *trie_main = NULL;
181 static void trie_bug(char *err)
183 printk("Trie Bug: %s\n", err);
187 static inline struct node *tnode_get_child(struct tnode *tn, int i)
189 if (i >= 1<<tn->bits)
190 trie_bug("tnode_get_child");
195 static inline int tnode_child_length(struct tnode *tn)
201 _________________________________________________________________
202 | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
203 ----------------------------------------------------------------
204 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
206 _________________________________________________________________
207 | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
208 -----------------------------------------------------------------
209 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
218 static inline t_key tkey_extract_bits(t_key a, int offset, int bits)
220 if (offset < KEYLENGTH)
221 return ((t_key)(a << offset)) >> (KEYLENGTH - bits);
226 static inline int tkey_equals(t_key a, t_key b)
231 static inline int tkey_sub_equals(t_key a, int offset, int bits, t_key b)
233 if (bits == 0 || offset >= KEYLENGTH)
235 bits = bits > KEYLENGTH ? KEYLENGTH : bits;
236 return ((a ^ b) << offset) >> (KEYLENGTH - bits) == 0;
239 static inline int tkey_mismatch(t_key a, int offset, t_key b)
246 while((diff << i) >> (KEYLENGTH-1) == 0)
251 /* Candiate for fib_semantics */
253 static void fn_free_alias(struct fib_alias *fa)
255 fib_release_info(fa->fa_info);
256 kmem_cache_free(fn_alias_kmem, fa);
260 To understand this stuff, an understanding of keys and all their bits is
261 necessary. Every node in the trie has a key associated with it, but not
262 all of the bits in that key are significant.
264 Consider a node 'n' and its parent 'tp'.
266 If n is a leaf, every bit in its key is significant. Its presence is
267 necessitaded by path compression, since during a tree traversal (when
268 searching for a leaf - unless we are doing an insertion) we will completely
269 ignore all skipped bits we encounter. Thus we need to verify, at the end of
270 a potentially successful search, that we have indeed been walking the
273 Note that we can never "miss" the correct key in the tree if present by
274 following the wrong path. Path compression ensures that segments of the key
275 that are the same for all keys with a given prefix are skipped, but the
276 skipped part *is* identical for each node in the subtrie below the skipped
277 bit! trie_insert() in this implementation takes care of that - note the
278 call to tkey_sub_equals() in trie_insert().
280 if n is an internal node - a 'tnode' here, the various parts of its key
281 have many different meanings.
284 _________________________________________________________________
285 | i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
286 -----------------------------------------------------------------
287 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
289 _________________________________________________________________
290 | C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
291 -----------------------------------------------------------------
292 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
299 First, let's just ignore the bits that come before the parent tp, that is
300 the bits from 0 to (tp->pos-1). They are *known* but at this point we do
301 not use them for anything.
303 The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the
304 index into the parent's child array. That is, they will be used to find
305 'n' among tp's children.
307 The bits from (tp->pos + tp->bits) to (n->pos - 1) - "S" - are skipped bits
310 All the bits we have seen so far are significant to the node n. The rest
311 of the bits are really not needed or indeed known in n->key.
313 The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
314 n's child array, and will of course be different for each child.
316 The rest of the bits, from (n->pos + n->bits) onward, are completely unknown
321 static void check_tnode(struct tnode *tn)
323 if(tn && tn->pos+tn->bits > 32) {
324 printk("TNODE ERROR tn=%p, pos=%d, bits=%d\n", tn, tn->pos, tn->bits);
328 static int halve_threshold = 25;
329 static int inflate_threshold = 50;
331 static struct leaf *leaf_new(void)
333 struct leaf *l = kmalloc(sizeof(struct leaf), GFP_KERNEL);
335 NODE_INIT_PARENT(l, T_LEAF);
336 INIT_HLIST_HEAD(&l->list);
341 static struct leaf_info *leaf_info_new(int plen)
343 struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL);
346 INIT_LIST_HEAD(&li->falh);
351 static inline void free_leaf(struct leaf *l)
356 static inline void free_leaf_info(struct leaf_info *li)
361 static struct tnode* tnode_new(t_key key, int pos, int bits)
363 int nchildren = 1<<bits;
364 int sz = sizeof(struct tnode) + nchildren * sizeof(struct node *);
365 struct tnode *tn = kmalloc(sz, GFP_KERNEL);
369 NODE_INIT_PARENT(tn, T_TNODE);
373 tn->full_children = 0;
374 tn->empty_children = 1<<bits;
377 printk("AT %p s=%u %u\n", tn, (unsigned int) sizeof(struct tnode),
378 (unsigned int) (sizeof(struct node) * 1<<bits));
382 static void tnode_free(struct tnode *tn)
385 trie_bug("tnode_free\n");
388 free_leaf((struct leaf *)tn);
390 printk("FL %p \n", tn);
392 else if(IS_TNODE(tn)) {
395 printk("FT %p \n", tn);
398 trie_bug("tnode_free\n");
403 * Check whether a tnode 'n' is "full", i.e. it is an internal node
404 * and no bits are skipped. See discussion in dyntree paper p. 6
407 static inline int tnode_full(struct tnode *tn, struct node *n)
409 if(n == NULL || IS_LEAF(n))
412 return ((struct tnode *) n)->pos == tn->pos + tn->bits;
415 static inline void put_child(struct trie *t, struct tnode *tn, int i, struct node *n)
417 tnode_put_child_reorg(tn, i, n, -1);
421 * Add a child at position i overwriting the old value.
422 * Update the value of full_children and empty_children.
425 static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull)
430 if(i >= 1<<tn->bits) {
431 printk("bits=%d, i=%d\n", tn->bits, i);
432 trie_bug("tnode_put_child_reorg bits");
434 write_lock_bh(&fib_lock);
437 /* update emptyChildren */
438 if (n == NULL && chi != NULL)
439 tn->empty_children++;
440 else if (n != NULL && chi == NULL)
441 tn->empty_children--;
443 /* update fullChildren */
445 wasfull = tnode_full(tn, chi);
447 isfull = tnode_full(tn, n);
448 if (wasfull && !isfull)
451 else if (!wasfull && isfull)
454 NODE_SET_PARENT(n, tn);
457 write_unlock_bh(&fib_lock);
460 static struct node *resize(struct trie *t, struct tnode *tn)
468 printk("In tnode_resize %p inflate_threshold=%d threshold=%d\n",
469 tn, inflate_threshold, halve_threshold);
472 if (tn->empty_children == tnode_child_length(tn)) {
477 if (tn->empty_children == tnode_child_length(tn) - 1)
478 for (i = 0; i < tnode_child_length(tn); i++) {
480 write_lock_bh(&fib_lock);
481 if (tn->child[i] != NULL) {
483 /* compress one level */
484 struct node *n = tn->child[i];
486 NODE_INIT_PARENT(n, NODE_TYPE(n));
488 write_unlock_bh(&fib_lock);
492 write_unlock_bh(&fib_lock);
495 * Double as long as the resulting node has a number of
496 * nonempty nodes that are above the threshold.
500 * From "Implementing a dynamic compressed trie" by Stefan Nilsson of
501 * the Helsinki University of Technology and Matti Tikkanen of Nokia
502 * Telecommunications, page 6:
503 * "A node is doubled if the ratio of non-empty children to all
504 * children in the *doubled* node is at least 'high'."
506 * 'high' in this instance is the variable 'inflate_threshold'. It
507 * is expressed as a percentage, so we multiply it with
508 * tnode_child_length() and instead of multiplying by 2 (since the
509 * child array will be doubled by inflate()) and multiplying
510 * the left-hand side by 100 (to handle the percentage thing) we
511 * multiply the left-hand side by 50.
513 * The left-hand side may look a bit weird: tnode_child_length(tn)
514 * - tn->empty_children is of course the number of non-null children
515 * in the current node. tn->full_children is the number of "full"
516 * children, that is non-null tnodes with a skip value of 0.
517 * All of those will be doubled in the resulting inflated tnode, so
518 * we just count them one extra time here.
520 * A clearer way to write this would be:
522 * to_be_doubled = tn->full_children;
523 * not_to_be_doubled = tnode_child_length(tn) - tn->empty_children -
526 * new_child_length = tnode_child_length(tn) * 2;
528 * new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) /
530 * if (new_fill_factor >= inflate_threshold)
532 * ...and so on, tho it would mess up the while() loop.
535 * 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >=
539 * 100 * (not_to_be_doubled + 2*to_be_doubled) >=
540 * inflate_threshold * new_child_length
542 * expand not_to_be_doubled and to_be_doubled, and shorten:
543 * 100 * (tnode_child_length(tn) - tn->empty_children +
544 * tn->full_children ) >= inflate_threshold * new_child_length
546 * expand new_child_length:
547 * 100 * (tnode_child_length(tn) - tn->empty_children +
548 * tn->full_children ) >=
549 * inflate_threshold * tnode_child_length(tn) * 2
552 * 50 * (tn->full_children + tnode_child_length(tn) -
553 * tn->empty_children ) >= inflate_threshold *
554 * tnode_child_length(tn)
560 while ((tn->full_children > 0 &&
561 50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >=
562 inflate_threshold * tnode_child_length(tn))) {
570 * Halve as long as the number of empty children in this
571 * node is above threshold.
573 while (tn->bits > 1 &&
574 100 * (tnode_child_length(tn) - tn->empty_children) <
575 halve_threshold * tnode_child_length(tn))
579 /* Only one child remains */
581 if (tn->empty_children == tnode_child_length(tn) - 1)
582 for (i = 0; i < tnode_child_length(tn); i++) {
584 write_lock_bh(&fib_lock);
585 if (tn->child[i] != NULL) {
586 /* compress one level */
587 struct node *n = tn->child[i];
590 NODE_INIT_PARENT(n, NODE_TYPE(n));
592 write_unlock_bh(&fib_lock);
596 write_unlock_bh(&fib_lock);
599 return (struct node *) tn;
602 static struct tnode *inflate(struct trie *t, struct tnode *tn)
605 struct tnode *oldtnode = tn;
606 int olen = tnode_child_length(tn);
610 printk("In inflate\n");
612 tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1);
615 trie_bug("tnode_new failed");
617 for(i = 0; i < olen; i++) {
618 struct node *node = tnode_get_child(oldtnode, i);
624 /* A leaf or an internal node with skipped bits */
626 if(IS_LEAF(node) || ((struct tnode *) node)->pos >
627 tn->pos + tn->bits - 1) {
628 if(tkey_extract_bits(node->key, tn->pos + tn->bits - 1,
630 put_child(t, tn, 2*i, node);
632 put_child(t, tn, 2*i+1, node);
636 /* An internal node with two children */
637 inode = (struct tnode *) node;
639 if (inode->bits == 1) {
640 put_child(t, tn, 2*i, inode->child[0]);
641 put_child(t, tn, 2*i+1, inode->child[1]);
646 /* An internal node with more than two children */
648 struct tnode *left, *right;
651 /* We will replace this node 'inode' with two new
652 * ones, 'left' and 'right', each with half of the
653 * original children. The two new nodes will have
654 * a position one bit further down the key and this
655 * means that the "significant" part of their keys
656 * (see the discussion near the top of this file)
657 * will differ by one bit, which will be "0" in
658 * left's key and "1" in right's key. Since we are
659 * moving the key position by one step, the bit that
660 * we are moving away from - the bit at position
661 * (inode->pos) - is the one that will differ between
662 * left and right. So... we synthesize that bit in the
664 * The mask 'm' below will be a single "one" bit at
665 * the position (inode->pos)
668 t_key m = TKEY_GET_MASK(inode->pos, 1);
670 /* Use the old key, but set the new significant
673 left = tnode_new(inode->key&(~m), inode->pos + 1,
677 trie_bug("tnode_new failed");
680 /* Use the old key, but set the new significant
683 right = tnode_new(inode->key|m, inode->pos + 1,
687 trie_bug("tnode_new failed");
689 size = tnode_child_length(left);
690 for(j = 0; j < size; j++) {
691 put_child(t, left, j, inode->child[j]);
692 put_child(t, right, j, inode->child[j + size]);
694 put_child(t, tn, 2*i, resize(t, left));
695 put_child(t, tn, 2*i+1, resize(t, right));
700 tnode_free(oldtnode);
704 static struct tnode *halve(struct trie *t, struct tnode *tn)
706 struct tnode *oldtnode = tn;
707 struct node *left, *right;
709 int olen = tnode_child_length(tn);
711 if(trie_debug) printk("In halve\n");
713 tn=tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1);
716 trie_bug("tnode_new failed");
718 for(i = 0; i < olen; i += 2) {
719 left = tnode_get_child(oldtnode, i);
720 right = tnode_get_child(oldtnode, i+1);
722 /* At least one of the children is empty */
724 if (right == NULL) /* Both are empty */
726 put_child(t, tn, i/2, right);
727 } else if (right == NULL)
728 put_child(t, tn, i/2, left);
730 /* Two nonempty children */
732 struct tnode *newBinNode =
733 tnode_new(left->key, tn->pos + tn->bits, 1);
736 trie_bug("tnode_new failed");
738 put_child(t, newBinNode, 0, left);
739 put_child(t, newBinNode, 1, right);
740 put_child(t, tn, i/2, resize(t, newBinNode));
743 tnode_free(oldtnode);
747 static void *trie_init(struct trie *t)
753 #ifdef CONFIG_IP_FIB_TRIE_STATS
754 memset(&t->stats, 0, sizeof(struct trie_use_stats));
760 static struct leaf_info *find_leaf_info(struct hlist_head *head, int plen)
762 struct hlist_node *node;
763 struct leaf_info *li;
765 hlist_for_each_entry(li, node, head, hlist) {
767 if ( li->plen == plen )
773 static inline struct list_head * get_fa_head(struct leaf *l, int plen)
775 struct list_head *fa_head=NULL;
776 struct leaf_info *li = find_leaf_info(&l->list, plen);
784 static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
786 struct leaf_info *li=NULL, *last=NULL;
787 struct hlist_node *node, *tmp;
789 write_lock_bh(&fib_lock);
791 if(hlist_empty(head))
792 hlist_add_head(&new->hlist, head);
794 hlist_for_each_entry_safe(li, node, tmp, head, hlist) {
796 if (new->plen > li->plen)
802 hlist_add_after(&last->hlist, &new->hlist);
804 hlist_add_before(&new->hlist, &li->hlist);
806 write_unlock_bh(&fib_lock);
810 fib_find_node(struct trie *t, u32 key)
819 while (n != NULL && NODE_TYPE(n) == T_TNODE) {
820 tn = (struct tnode *) n;
824 if(tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
825 pos=tn->pos + tn->bits;
826 n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits));
831 /* Case we have found a leaf. Compare prefixes */
833 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) {
834 struct leaf *l = (struct leaf *) n;
840 static struct node *trie_rebalance(struct trie *t, struct tnode *tn)
845 struct tnode *tp = NULL;
853 while (tn != NULL && NODE_PARENT(tn) != NULL) {
856 printk("Rebalance tn=%p \n", tn);
857 if(tn) printk("tn->parent=%p \n", NODE_PARENT(tn));
859 printk("Rebalance tp=%p \n", tp);
860 if(tp) printk("tp->parent=%p \n", NODE_PARENT(tp));
866 tp = NODE_PARENT(tn);
867 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
868 wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
869 tn = (struct tnode *) resize (t, (struct tnode *)tn);
870 tnode_put_child_reorg((struct tnode *)tp, cindex,(struct node*)tn, wasfull);
875 tn = NODE_PARENT(tn);
877 /* Handle last (top) tnode */
879 tn = (struct tnode*) resize(t, (struct tnode *)tn);
881 return (struct node*) tn;
884 static struct list_head *
885 fib_insert_node(struct trie *t, int *err, u32 key, int plen)
888 struct tnode *tp = NULL, *tn = NULL;
892 struct list_head *fa_head=NULL;
893 struct leaf_info *li;
899 /* If we point to NULL, stop. Either the tree is empty and we should
900 * just put a new leaf in if, or we have reached an empty child slot,
901 * and we should just put our new leaf in that.
902 * If we point to a T_TNODE, check if it matches our key. Note that
903 * a T_TNODE might be skipping any number of bits - its 'pos' need
904 * not be the parent's 'pos'+'bits'!
906 * If it does match the current key, get pos/bits from it, extract
907 * the index from our key, push the T_TNODE and walk the tree.
909 * If it doesn't, we have to replace it with a new T_TNODE.
911 * If we point to a T_LEAF, it might or might not have the same key
912 * as we do. If it does, just change the value, update the T_LEAF's
913 * value, and return it.
914 * If it doesn't, we need to replace it with a T_TNODE.
917 while (n != NULL && NODE_TYPE(n) == T_TNODE) {
918 tn = (struct tnode *) n;
922 if(tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
924 pos=tn->pos + tn->bits;
925 n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits));
927 if(n && NODE_PARENT(n) != tn) {
928 printk("BUG tn=%p, n->parent=%p\n", tn, NODE_PARENT(n));
937 * n ----> NULL, LEAF or TNODE
939 * tp is n's (parent) ----> NULL or TNODE
942 if(tp && IS_LEAF(tp))
946 /* Case 1: n is a leaf. Compare prefixes */
948 if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) {
949 struct leaf *l = ( struct leaf *) n;
951 li = leaf_info_new(plen);
959 insert_leaf_info(&l->list, li);
971 li = leaf_info_new(plen);
974 tnode_free((struct tnode *) l);
980 insert_leaf_info(&l->list, li);
982 /* Case 2: n is NULL, and will just insert a new leaf */
983 if (t->trie && n == NULL) {
985 NODE_SET_PARENT(l, tp);
991 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
992 put_child(t, (struct tnode *)tp, cindex, (struct node *)l);
995 /* Case 3: n is a LEAF or a TNODE and the key doesn't match. */
998 * Add a new tnode here
999 * first tnode need some special handling
1003 pos=tp->pos+tp->bits;
1007 newpos = tkey_mismatch(key, pos, n->key);
1008 tn = tnode_new(n->key, newpos, 1);
1012 tn = tnode_new(key, newpos, 1); /* First tnode */
1017 tnode_free((struct tnode *) l);
1022 NODE_SET_PARENT(tn, tp);
1024 missbit=tkey_extract_bits(key, newpos, 1);
1025 put_child(t, tn, missbit, (struct node *)l);
1026 put_child(t, tn, 1-missbit, n);
1029 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1030 put_child(t, (struct tnode *)tp, cindex, (struct node *)tn);
1033 t->trie = (struct node*) tn; /* First tnode */
1037 if(tp && tp->pos+tp->bits > 32) {
1038 printk("ERROR tp=%p pos=%d, bits=%d, key=%0x plen=%d\n",
1039 tp, tp->pos, tp->bits, key, plen);
1041 /* Rebalance the trie */
1042 t->trie = trie_rebalance(t, tp);
1050 fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
1051 struct nlmsghdr *nlhdr, struct netlink_skb_parms *req)
1053 struct trie *t = (struct trie *) tb->tb_data;
1054 struct fib_alias *fa, *new_fa;
1055 struct list_head *fa_head=NULL;
1056 struct fib_info *fi;
1057 int plen = r->rtm_dst_len;
1058 int type = r->rtm_type;
1059 u8 tos = r->rtm_tos;
1069 memcpy(&key, rta->rta_dst, 4);
1074 printk("Insert table=%d %08x/%d\n", tb->tb_id, key, plen);
1076 mask = ntohl( inet_make_mask(plen) );
1083 if ((fi = fib_create_info(r, rta, nlhdr, &err)) == NULL)
1086 l = fib_find_node(t, key);
1090 fa_head = get_fa_head(l, plen);
1091 fa = fib_find_alias(fa_head, tos, fi->fib_priority);
1094 /* Now fa, if non-NULL, points to the first fib alias
1095 * with the same keys [prefix,tos,priority], if such key already
1096 * exists or to the node before which we will insert new one.
1098 * If fa is NULL, we will need to allocate a new one and
1099 * insert to the head of f.
1101 * If f is NULL, no fib node matched the destination key
1102 * and we need to allocate a new one of those as well.
1106 fa->fa_info->fib_priority == fi->fib_priority) {
1107 struct fib_alias *fa_orig;
1110 if (nlhdr->nlmsg_flags & NLM_F_EXCL)
1113 if (nlhdr->nlmsg_flags & NLM_F_REPLACE) {
1114 struct fib_info *fi_drop;
1117 write_lock_bh(&fib_lock);
1119 fi_drop = fa->fa_info;
1122 fa->fa_scope = r->rtm_scope;
1123 state = fa->fa_state;
1124 fa->fa_state &= ~FA_S_ACCESSED;
1126 write_unlock_bh(&fib_lock);
1128 fib_release_info(fi_drop);
1129 if (state & FA_S_ACCESSED)
1134 /* Error if we find a perfect match which
1135 * uses the same scope, type, and nexthop
1139 list_for_each_entry(fa, fa_orig->fa_list.prev, fa_list) {
1140 if (fa->fa_tos != tos)
1142 if (fa->fa_info->fib_priority != fi->fib_priority)
1144 if (fa->fa_type == type &&
1145 fa->fa_scope == r->rtm_scope &&
1146 fa->fa_info == fi) {
1150 if (!(nlhdr->nlmsg_flags & NLM_F_APPEND))
1154 if (!(nlhdr->nlmsg_flags&NLM_F_CREATE))
1158 new_fa = kmem_cache_alloc(fn_alias_kmem, SLAB_KERNEL);
1162 new_fa->fa_info = fi;
1163 new_fa->fa_tos = tos;
1164 new_fa->fa_type = type;
1165 new_fa->fa_scope = r->rtm_scope;
1166 new_fa->fa_state = 0;
1171 * Insert new entry to the list.
1175 fa_head = fib_insert_node(t, &err, key, plen);
1178 goto out_free_new_fa;
1181 write_lock_bh(&fib_lock);
1183 list_add_tail(&new_fa->fa_list,
1184 (fa ? &fa->fa_list : fa_head));
1186 write_unlock_bh(&fib_lock);
1189 rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, nlhdr, req);
1194 kmem_cache_free(fn_alias_kmem, new_fa);
1196 fib_release_info(fi);
1201 static inline int check_leaf(struct trie *t, struct leaf *l, t_key key, int *plen, const struct flowi *flp,
1202 struct fib_result *res, int *err)
1206 struct leaf_info *li;
1207 struct hlist_head *hhead = &l->list;
1208 struct hlist_node *node;
1210 hlist_for_each_entry(li, node, hhead, hlist) {
1213 mask = ntohl(inet_make_mask(i));
1214 if (l->key != (key & mask))
1217 if (((*err) = fib_semantic_match(&li->falh, flp, res, l->key, mask, i)) == 0) {
1219 #ifdef CONFIG_IP_FIB_TRIE_STATS
1220 t->stats.semantic_match_passed++;
1224 #ifdef CONFIG_IP_FIB_TRIE_STATS
1225 t->stats.semantic_match_miss++;
1232 fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
1234 struct trie *t = (struct trie *) tb->tb_data;
1239 t_key key=ntohl(flp->fl4_dst);
1242 int current_prefix_length = KEYLENGTH;
1245 read_lock(&fib_lock);
1249 #ifdef CONFIG_IP_FIB_TRIE_STATS
1255 if( check_leaf(t, (struct leaf *)n, key, &plen, flp, res, &ret) )
1259 pn = (struct tnode *) n;
1268 cindex = tkey_extract_bits(MASK_PFX(key, current_prefix_length), pos, bits);
1270 n = tnode_get_child(pn, cindex);
1273 #ifdef CONFIG_IP_FIB_TRIE_STATS
1274 t->stats.null_node_hit++;
1282 struct tnode *cn = (struct tnode *)n;
1283 t_key node_prefix, key_prefix, pref_mismatch;
1287 * It's a tnode, and we can do some extra checks here if we
1288 * like, to avoid descending into a dead-end branch.
1289 * This tnode is in the parent's child array at index
1290 * key[p_pos..p_pos+p_bits] but potentially with some bits
1291 * chopped off, so in reality the index may be just a
1292 * subprefix, padded with zero at the end.
1293 * We can also take a look at any skipped bits in this
1294 * tnode - everything up to p_pos is supposed to be ok,
1295 * and the non-chopped bits of the index (se previous
1296 * paragraph) are also guaranteed ok, but the rest is
1297 * considered unknown.
1299 * The skipped bits are key[pos+bits..cn->pos].
1302 /* If current_prefix_length < pos+bits, we are already doing
1303 * actual prefix matching, which means everything from
1304 * pos+(bits-chopped_off) onward must be zero along some
1305 * branch of this subtree - otherwise there is *no* valid
1306 * prefix present. Here we can only check the skipped
1307 * bits. Remember, since we have already indexed into the
1308 * parent's child array, we know that the bits we chopped of
1312 /* NOTA BENE: CHECKING ONLY SKIPPED BITS FOR THE NEW NODE HERE */
1314 if (current_prefix_length < pos+bits) {
1315 if (tkey_extract_bits(cn->key, current_prefix_length,
1316 cn->pos - current_prefix_length) != 0 ||
1322 * If chopped_off=0, the index is fully validated and we
1323 * only need to look at the skipped bits for this, the new,
1324 * tnode. What we actually want to do is to find out if
1325 * these skipped bits match our key perfectly, or if we will
1326 * have to count on finding a matching prefix further down,
1327 * because if we do, we would like to have some way of
1328 * verifying the existence of such a prefix at this point.
1331 /* The only thing we can do at this point is to verify that
1332 * any such matching prefix can indeed be a prefix to our
1333 * key, and if the bits in the node we are inspecting that
1334 * do not match our key are not ZERO, this cannot be true.
1335 * Thus, find out where there is a mismatch (before cn->pos)
1336 * and verify that all the mismatching bits are zero in the
1340 /* Note: We aren't very concerned about the piece of the key
1341 * that precede pn->pos+pn->bits, since these have already been
1342 * checked. The bits after cn->pos aren't checked since these are
1343 * by definition "unknown" at this point. Thus, what we want to
1344 * see is if we are about to enter the "prefix matching" state,
1345 * and in that case verify that the skipped bits that will prevail
1346 * throughout this subtree are zero, as they have to be if we are
1347 * to find a matching prefix.
1350 node_prefix = MASK_PFX(cn->key, cn->pos);
1351 key_prefix = MASK_PFX(key, cn->pos);
1352 pref_mismatch = key_prefix^node_prefix;
1355 /* In short: If skipped bits in this node do not match the search
1356 * key, enter the "prefix matching" state.directly.
1358 if (pref_mismatch) {
1359 while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) {
1361 pref_mismatch = pref_mismatch <<1;
1363 key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp);
1365 if (key_prefix != 0)
1368 if (current_prefix_length >= cn->pos)
1369 current_prefix_length=mp;
1372 pn = (struct tnode *)n; /* Descend */
1377 if( check_leaf(t, (struct leaf *)n, key, &plen, flp, res, &ret))
1383 /* As zero don't change the child key (cindex) */
1384 while ((chopped_off <= pn->bits) && !(cindex & (1<<(chopped_off-1)))) {
1388 /* Decrease current_... with bits chopped off */
1389 if (current_prefix_length > pn->pos + pn->bits - chopped_off)
1390 current_prefix_length = pn->pos + pn->bits - chopped_off;
1393 * Either we do the actual chop off according or if we have
1394 * chopped off all bits in this tnode walk up to our parent.
1397 if(chopped_off <= pn->bits)
1398 cindex &= ~(1 << (chopped_off-1));
1400 if( NODE_PARENT(pn) == NULL)
1403 /* Get Child's index */
1404 cindex = tkey_extract_bits(pn->key, NODE_PARENT(pn)->pos, NODE_PARENT(pn)->bits);
1405 pn = NODE_PARENT(pn);
1408 #ifdef CONFIG_IP_FIB_TRIE_STATS
1409 t->stats.backtrack++;
1417 read_unlock(&fib_lock);
1421 static int trie_leaf_remove(struct trie *t, t_key key)
1424 struct tnode *tp = NULL;
1425 struct node *n = t->trie;
1429 printk("entering trie_leaf_remove(%p)\n", n);
1431 /* Note that in the case skipped bits, those bits are *not* checked!
1432 * When we finish this, we will have NULL or a T_LEAF, and the
1433 * T_LEAF may or may not match our key.
1436 while (n != NULL && IS_TNODE(n)) {
1437 struct tnode *tn = (struct tnode *) n;
1439 n = tnode_get_child(tn ,tkey_extract_bits(key, tn->pos, tn->bits));
1441 if(n && NODE_PARENT(n) != tn) {
1442 printk("BUG tn=%p, n->parent=%p\n", tn, NODE_PARENT(n));
1446 l = (struct leaf *) n;
1448 if(!n || !tkey_equals(l->key, key))
1453 * Remove the leaf and rebalance the tree
1459 tp = NODE_PARENT(n);
1460 tnode_free((struct tnode *) n);
1463 cindex = tkey_extract_bits(key, tp->pos, tp->bits);
1464 put_child(t, (struct tnode *)tp, cindex, NULL);
1465 t->trie = trie_rebalance(t, tp);
1474 fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
1475 struct nlmsghdr *nlhdr, struct netlink_skb_parms *req)
1477 struct trie *t = (struct trie *) tb->tb_data;
1479 int plen = r->rtm_dst_len;
1480 u8 tos = r->rtm_tos;
1481 struct fib_alias *fa, *fa_to_delete;
1482 struct list_head *fa_head;
1490 memcpy(&key, rta->rta_dst, 4);
1493 mask = ntohl( inet_make_mask(plen) );
1499 l = fib_find_node(t, key);
1504 fa_head = get_fa_head(l, plen);
1505 fa = fib_find_alias(fa_head, tos, 0);
1511 printk("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
1513 fa_to_delete = NULL;
1514 fa_head = fa->fa_list.prev;
1515 list_for_each_entry(fa, fa_head, fa_list) {
1516 struct fib_info *fi = fa->fa_info;
1518 if (fa->fa_tos != tos)
1521 if ((!r->rtm_type ||
1522 fa->fa_type == r->rtm_type) &&
1523 (r->rtm_scope == RT_SCOPE_NOWHERE ||
1524 fa->fa_scope == r->rtm_scope) &&
1525 (!r->rtm_protocol ||
1526 fi->fib_protocol == r->rtm_protocol) &&
1527 fib_nh_match(r, nlhdr, rta, fi) == 0) {
1535 struct leaf_info *li;
1538 rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, nlhdr, req);
1540 l = fib_find_node(t, key);
1541 li = find_leaf_info(&l->list, plen);
1543 write_lock_bh(&fib_lock);
1545 list_del(&fa->fa_list);
1547 if(list_empty(fa_head)) {
1548 hlist_del(&li->hlist);
1551 write_unlock_bh(&fib_lock);
1556 if(hlist_empty(&l->list))
1557 trie_leaf_remove(t, key);
1559 if (fa->fa_state & FA_S_ACCESSED)
1568 static int trie_flush_list(struct trie *t, struct list_head *head)
1570 struct fib_alias *fa, *fa_node;
1573 list_for_each_entry_safe(fa, fa_node, head, fa_list) {
1574 struct fib_info *fi = fa->fa_info;
1576 if (fi && (fi->fib_flags&RTNH_F_DEAD)) {
1578 write_lock_bh(&fib_lock);
1579 list_del(&fa->fa_list);
1580 write_unlock_bh(&fib_lock);
1589 static int trie_flush_leaf(struct trie *t, struct leaf *l)
1592 struct hlist_head *lih = &l->list;
1593 struct hlist_node *node, *tmp;
1594 struct leaf_info *li = NULL;
1596 hlist_for_each_entry_safe(li, node, tmp, lih, hlist) {
1598 found += trie_flush_list(t, &li->falh);
1600 if (list_empty(&li->falh)) {
1602 write_lock_bh(&fib_lock);
1603 hlist_del(&li->hlist);
1604 write_unlock_bh(&fib_lock);
1612 static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf)
1614 struct node *c = (struct node *) thisleaf;
1622 if (IS_LEAF(t->trie)) /* trie w. just a leaf */
1623 return (struct leaf *) t->trie;
1625 p = (struct tnode*) t->trie; /* Start */
1628 p = (struct tnode *) NODE_PARENT(c);
1632 /* Find the next child of the parent */
1634 pos = 1 + tkey_extract_bits(c->key, p->pos, p->bits);
1638 last = 1 << p->bits;
1639 for(idx = pos; idx < last ; idx++) {
1640 if( p->child[idx]) {
1642 /* Decend if tnode */
1644 while (IS_TNODE(p->child[idx])) {
1645 p = (struct tnode*) p->child[idx];
1648 /* Rightmost non-NULL branch */
1649 if( p && IS_TNODE(p) )
1650 while ( p->child[idx] == NULL && idx < (1 << p->bits) ) idx++;
1652 /* Done with this tnode? */
1653 if( idx >= (1 << p->bits) || p->child[idx] == NULL )
1656 return (struct leaf*) p->child[idx];
1660 /* No more children go up one step */
1661 c = (struct node*) p;
1662 p = (struct tnode *) NODE_PARENT(p);
1664 return NULL; /* Ready. Root of trie */
1667 static int fn_trie_flush(struct fib_table *tb)
1669 struct trie *t = (struct trie *) tb->tb_data;
1670 struct leaf *ll = NULL, *l = NULL;
1675 for (h=0; (l = nextleaf(t, l)) != NULL; h++) {
1676 found += trie_flush_leaf(t, l);
1678 if (ll && hlist_empty(&ll->list))
1679 trie_leaf_remove(t, ll->key);
1683 if (ll && hlist_empty(&ll->list))
1684 trie_leaf_remove(t, ll->key);
1687 printk("trie_flush found=%d\n", found);
1691 static int trie_last_dflt=-1;
1694 fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
1696 struct trie *t = (struct trie *) tb->tb_data;
1697 int order, last_idx;
1698 struct fib_info *fi = NULL;
1699 struct fib_info *last_resort;
1700 struct fib_alias *fa = NULL;
1701 struct list_head *fa_head;
1708 read_lock(&fib_lock);
1710 l = fib_find_node(t, 0);
1714 fa_head = get_fa_head(l, 0);
1718 if (list_empty(fa_head))
1721 list_for_each_entry(fa, fa_head, fa_list) {
1722 struct fib_info *next_fi = fa->fa_info;
1724 if (fa->fa_scope != res->scope ||
1725 fa->fa_type != RTN_UNICAST)
1728 if (next_fi->fib_priority > res->fi->fib_priority)
1730 if (!next_fi->fib_nh[0].nh_gw ||
1731 next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
1733 fa->fa_state |= FA_S_ACCESSED;
1736 if (next_fi != res->fi)
1738 } else if (!fib_detect_death(fi, order, &last_resort,
1739 &last_idx, &trie_last_dflt)) {
1741 fib_info_put(res->fi);
1743 atomic_inc(&fi->fib_clntref);
1744 trie_last_dflt = order;
1750 if (order <= 0 || fi == NULL) {
1751 trie_last_dflt = -1;
1755 if (!fib_detect_death(fi, order, &last_resort, &last_idx, &trie_last_dflt)) {
1757 fib_info_put(res->fi);
1759 atomic_inc(&fi->fib_clntref);
1760 trie_last_dflt = order;
1763 if (last_idx >= 0) {
1765 fib_info_put(res->fi);
1766 res->fi = last_resort;
1768 atomic_inc(&last_resort->fib_clntref);
1770 trie_last_dflt = last_idx;
1772 read_unlock(&fib_lock);
1775 static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fib_table *tb,
1776 struct sk_buff *skb, struct netlink_callback *cb)
1779 struct fib_alias *fa;
1781 u32 xkey=htonl(key);
1786 list_for_each_entry(fa, fah, fa_list) {
1791 if (fa->fa_info->fib_nh == NULL) {
1792 printk("Trie error _fib_nh=NULL in fa[%d] k=%08x plen=%d\n", i, key, plen);
1796 if (fa->fa_info == NULL) {
1797 printk("Trie error fa_info=NULL in fa[%d] k=%08x plen=%d\n", i, key, plen);
1802 if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid,
1811 fa->fa_info, 0) < 0) {
1821 static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, struct sk_buff *skb,
1822 struct netlink_callback *cb)
1825 struct list_head *fa_head;
1826 struct leaf *l = NULL;
1829 for (h=0; (l = nextleaf(t, l)) != NULL; h++) {
1834 memset(&cb->args[3], 0,
1835 sizeof(cb->args) - 3*sizeof(cb->args[0]));
1837 fa_head = get_fa_head(l, plen);
1842 if(list_empty(fa_head))
1845 if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb)<0) {
1854 static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb)
1857 struct trie *t = (struct trie *) tb->tb_data;
1861 read_lock(&fib_lock);
1862 for (m=0; m<=32; m++) {
1867 memset(&cb->args[2], 0,
1868 sizeof(cb->args) - 2*sizeof(cb->args[0]));
1870 if (fn_trie_dump_plen(t, 32-m, tb, skb, cb)<0) {
1875 read_unlock(&fib_lock);
1879 read_unlock(&fib_lock);
1883 /* Fix more generic FIB names for init later */
1885 #ifdef CONFIG_IP_MULTIPLE_TABLES
1886 struct fib_table * fib_hash_init(int id)
1888 struct fib_table * __init fib_hash_init(int id)
1891 struct fib_table *tb;
1894 if (fn_alias_kmem == NULL)
1895 fn_alias_kmem = kmem_cache_create("ip_fib_alias",
1896 sizeof(struct fib_alias),
1897 0, SLAB_HWCACHE_ALIGN,
1900 tb = kmalloc(sizeof(struct fib_table) + sizeof(struct trie),
1906 tb->tb_lookup = fn_trie_lookup;
1907 tb->tb_insert = fn_trie_insert;
1908 tb->tb_delete = fn_trie_delete;
1909 tb->tb_flush = fn_trie_flush;
1910 tb->tb_select_default = fn_trie_select_default;
1911 tb->tb_dump = fn_trie_dump;
1912 memset(tb->tb_data, 0, sizeof(struct trie));
1914 t = (struct trie *) tb->tb_data;
1918 if (id == RT_TABLE_LOCAL)
1920 else if (id == RT_TABLE_MAIN)
1923 if (id == RT_TABLE_LOCAL)
1924 printk("IPv4 FIB: Using LC-trie version %s\n", VERSION);
1929 /* Trie dump functions */
1931 static void putspace_seq(struct seq_file *seq, int n)
1933 while (n--) seq_printf(seq, " ");
1936 static void printbin_seq(struct seq_file *seq, unsigned int v, int bits)
1939 seq_printf(seq, "%s", (v & (1<<bits))?"1":"0");
1942 static void printnode_seq(struct seq_file *seq, int indent, struct node *n,
1943 int pend, int cindex, int bits)
1945 putspace_seq(seq, indent);
1947 seq_printf(seq, "|");
1949 seq_printf(seq, "+");
1951 seq_printf(seq, "%d/", cindex);
1952 printbin_seq(seq, cindex, bits);
1953 seq_printf(seq, ": ");
1956 seq_printf(seq, "<root>: ");
1957 seq_printf(seq, "%s:%p ", IS_LEAF(n)?"Leaf":"Internal node", n);
1960 seq_printf(seq, "key=%d.%d.%d.%d\n",
1961 n->key >> 24, (n->key >> 16) % 256, (n->key >> 8) % 256, n->key % 256);
1963 int plen=((struct tnode *)n)->pos;
1964 t_key prf=MASK_PFX(n->key, plen);
1965 seq_printf(seq, "key=%d.%d.%d.%d/%d\n",
1966 prf >> 24, (prf >> 16) % 256, (prf >> 8) % 256, prf % 256, plen);
1969 struct leaf *l=(struct leaf *)n;
1970 struct fib_alias *fa;
1972 for (i=32; i>=0; i--)
1973 if(find_leaf_info(&l->list, i)) {
1975 struct list_head *fa_head = get_fa_head(l, i);
1980 if(list_empty(fa_head))
1983 putspace_seq(seq, indent+2);
1984 seq_printf(seq, "{/%d...dumping}\n", i);
1987 list_for_each_entry(fa, fa_head, fa_list) {
1988 putspace_seq(seq, indent+2);
1989 if (fa->fa_info->fib_nh == NULL) {
1990 seq_printf(seq, "Error _fib_nh=NULL\n");
1993 if (fa->fa_info == NULL) {
1994 seq_printf(seq, "Error fa_info=NULL\n");
1998 seq_printf(seq, "{type=%d scope=%d TOS=%d}\n",
2005 else if (IS_TNODE(n)) {
2006 struct tnode *tn=(struct tnode *)n;
2007 putspace_seq(seq, indent); seq_printf(seq, "| ");
2008 seq_printf(seq, "{key prefix=%08x/", tn->key&TKEY_GET_MASK(0, tn->pos));
2009 printbin_seq(seq, tkey_extract_bits(tn->key, 0, tn->pos), tn->pos);
2010 seq_printf(seq, "}\n");
2011 putspace_seq(seq, indent); seq_printf(seq, "| ");
2012 seq_printf(seq, "{pos=%d", tn->pos);
2013 seq_printf(seq, " (skip=%d bits)", tn->pos - pend);
2014 seq_printf(seq, " bits=%d (%u children)}\n", tn->bits, (1 << tn->bits));
2015 putspace_seq(seq, indent); seq_printf(seq, "| ");
2016 seq_printf(seq, "{empty=%d full=%d}\n", tn->empty_children, tn->full_children);
2020 static void trie_dump_seq(struct seq_file *seq, struct trie *t)
2022 struct node *n=t->trie;
2028 read_lock(&fib_lock);
2030 seq_printf(seq, "------ trie_dump of t=%p ------\n", t);
2032 printnode_seq(seq, indent, n, pend, cindex, 0);
2034 struct tnode *tn=(struct tnode *)n;
2035 pend = tn->pos+tn->bits;
2036 putspace_seq(seq, indent); seq_printf(seq, "\\--\n");
2040 while (tn && cindex < (1 << tn->bits)) {
2041 if (tn->child[cindex]) {
2045 printnode_seq(seq, indent, tn->child[cindex], pend, cindex, tn->bits);
2046 if (IS_LEAF(tn->child[cindex])) {
2052 * New tnode. Decend one level
2056 n=tn->child[cindex];
2057 tn=(struct tnode *)n;
2058 pend=tn->pos+tn->bits;
2059 putspace_seq(seq, indent); seq_printf(seq, "\\--\n");
2068 * Test if we are done
2071 while (cindex >= (1 << tn->bits)) {
2074 * Move upwards and test for root
2075 * pop off all traversed nodes
2078 if (NODE_PARENT(tn) == NULL) {
2084 cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits);
2085 tn = NODE_PARENT(tn);
2087 n=(struct node *)tn;
2088 pend=tn->pos+tn->bits;
2097 else seq_printf(seq, "------ trie is empty\n");
2099 read_unlock(&fib_lock);
2102 static struct trie_stat *trie_stat_new(void)
2104 struct trie_stat *s = kmalloc(sizeof(struct trie_stat), GFP_KERNEL);
2112 s->nullpointers = 0;
2114 for(i=0; i< MAX_CHILDS; i++)
2115 s->nodesizes[i] = 0;
2120 static struct trie_stat *trie_collect_stats(struct trie *t)
2122 struct node *n=t->trie;
2123 struct trie_stat *s = trie_stat_new();
2129 read_lock(&fib_lock);
2134 struct tnode *tn = (struct tnode *)n;
2135 pend=tn->pos+tn->bits;
2137 s->nodesizes[tn->bits]++;
2140 while (tn && cindex < (1 << tn->bits)) {
2141 if (tn->child[cindex]) {
2144 if (IS_LEAF(tn->child[cindex])) {
2148 if (depth > s->maxdepth)
2149 s->maxdepth = depth;
2150 s->totdepth += depth;
2156 * New tnode. Decend one level
2160 s->nodesizes[tn->bits]++;
2163 n = tn->child[cindex];
2164 tn = (struct tnode *)n;
2165 pend = tn->pos+tn->bits;
2177 * Test if we are done
2180 while (cindex >= (1 << tn->bits)) {
2183 * Move upwards and test for root
2184 * pop off all traversed nodes
2188 if (NODE_PARENT(tn) == NULL) {
2194 cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits);
2195 tn = NODE_PARENT(tn);
2197 n = (struct node *)tn;
2198 pend=tn->pos+tn->bits;
2209 read_unlock(&fib_lock);
2213 #ifdef CONFIG_PROC_FS
2215 static struct fib_alias *fib_triestat_get_first(struct seq_file *seq)
2220 static struct fib_alias *fib_triestat_get_next(struct seq_file *seq)
2225 static void *fib_triestat_seq_start(struct seq_file *seq, loff_t *pos)
2229 if (ip_fib_main_table)
2230 v = *pos ? fib_triestat_get_next(seq) : SEQ_START_TOKEN;
2234 static void *fib_triestat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2237 return v == SEQ_START_TOKEN ? fib_triestat_get_first(seq) : fib_triestat_get_next(seq);
2240 static void fib_triestat_seq_stop(struct seq_file *seq, void *v)
2246 * This outputs /proc/net/fib_triestats
2248 * It always works in backward compatibility mode.
2249 * The format of the file is not supposed to be changed.
2252 static void collect_and_show(struct trie *t, struct seq_file *seq)
2254 int bytes = 0; /* How many bytes are used, a ref is 4 bytes */
2255 int i, max, pointers;
2256 struct trie_stat *stat;
2259 stat = trie_collect_stats(t);
2262 seq_printf(seq, "trie=%p\n", t);
2266 avdepth=stat->totdepth*100 / stat->leaves;
2269 seq_printf(seq, "Aver depth: %d.%02d\n", avdepth / 100, avdepth % 100 );
2270 seq_printf(seq, "Max depth: %4d\n", stat->maxdepth);
2272 seq_printf(seq, "Leaves: %d\n", stat->leaves);
2273 bytes += sizeof(struct leaf) * stat->leaves;
2274 seq_printf(seq, "Internal nodes: %d\n", stat->tnodes);
2275 bytes += sizeof(struct tnode) * stat->tnodes;
2279 while (max >= 0 && stat->nodesizes[max] == 0)
2283 for (i = 1; i <= max; i++)
2284 if (stat->nodesizes[i] != 0) {
2285 seq_printf(seq, " %d: %d", i, stat->nodesizes[i]);
2286 pointers += (1<<i) * stat->nodesizes[i];
2288 seq_printf(seq, "\n");
2289 seq_printf(seq, "Pointers: %d\n", pointers);
2290 bytes += sizeof(struct node *) * pointers;
2291 seq_printf(seq, "Null ptrs: %d\n", stat->nullpointers);
2292 seq_printf(seq, "Total size: %d kB\n", bytes / 1024);
2297 #ifdef CONFIG_IP_FIB_TRIE_STATS
2298 seq_printf(seq, "Counters:\n---------\n");
2299 seq_printf(seq,"gets = %d\n", t->stats.gets);
2300 seq_printf(seq,"backtracks = %d\n", t->stats.backtrack);
2301 seq_printf(seq,"semantic match passed = %d\n", t->stats.semantic_match_passed);
2302 seq_printf(seq,"semantic match miss = %d\n", t->stats.semantic_match_miss);
2303 seq_printf(seq,"null node hit= %d\n", t->stats.null_node_hit);
2305 memset(&(t->stats), 0, sizeof(t->stats));
2307 #endif /* CONFIG_IP_FIB_TRIE_STATS */
2310 static int fib_triestat_seq_show(struct seq_file *seq, void *v)
2314 if (v == SEQ_START_TOKEN) {
2315 seq_printf(seq, "Basic info: size of leaf: %Zd bytes, size of tnode: %Zd bytes.\n",
2316 sizeof(struct leaf), sizeof(struct tnode));
2318 collect_and_show(trie_local, seq);
2321 collect_and_show(trie_main, seq);
2324 snprintf(bf, sizeof(bf),
2325 "*\t%08X\t%08X", 200, 400);
2327 seq_printf(seq, "%-127s\n", bf);
2332 static struct seq_operations fib_triestat_seq_ops = {
2333 .start = fib_triestat_seq_start,
2334 .next = fib_triestat_seq_next,
2335 .stop = fib_triestat_seq_stop,
2336 .show = fib_triestat_seq_show,
2339 static int fib_triestat_seq_open(struct inode *inode, struct file *file)
2341 struct seq_file *seq;
2344 rc = seq_open(file, &fib_triestat_seq_ops);
2348 seq = file->private_data;
2355 static struct file_operations fib_triestat_seq_fops = {
2356 .owner = THIS_MODULE,
2357 .open = fib_triestat_seq_open,
2359 .llseek = seq_lseek,
2360 .release = seq_release_private,
2363 int __init fib_stat_proc_init(void)
2365 if (!proc_net_fops_create("fib_triestat", S_IRUGO, &fib_triestat_seq_fops))
2370 void __init fib_stat_proc_exit(void)
2372 proc_net_remove("fib_triestat");
2375 static struct fib_alias *fib_trie_get_first(struct seq_file *seq)
2380 static struct fib_alias *fib_trie_get_next(struct seq_file *seq)
2385 static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
2389 if (ip_fib_main_table)
2390 v = *pos ? fib_trie_get_next(seq) : SEQ_START_TOKEN;
2394 static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2397 return v == SEQ_START_TOKEN ? fib_trie_get_first(seq) : fib_trie_get_next(seq);
2400 static void fib_trie_seq_stop(struct seq_file *seq, void *v)
2406 * This outputs /proc/net/fib_trie.
2408 * It always works in backward compatibility mode.
2409 * The format of the file is not supposed to be changed.
2412 static int fib_trie_seq_show(struct seq_file *seq, void *v)
2416 if (v == SEQ_START_TOKEN) {
2418 trie_dump_seq(seq, trie_local);
2421 trie_dump_seq(seq, trie_main);
2425 snprintf(bf, sizeof(bf),
2426 "*\t%08X\t%08X", 200, 400);
2427 seq_printf(seq, "%-127s\n", bf);
2433 static struct seq_operations fib_trie_seq_ops = {
2434 .start = fib_trie_seq_start,
2435 .next = fib_trie_seq_next,
2436 .stop = fib_trie_seq_stop,
2437 .show = fib_trie_seq_show,
2440 static int fib_trie_seq_open(struct inode *inode, struct file *file)
2442 struct seq_file *seq;
2445 rc = seq_open(file, &fib_trie_seq_ops);
2449 seq = file->private_data;
2456 static struct file_operations fib_trie_seq_fops = {
2457 .owner = THIS_MODULE,
2458 .open = fib_trie_seq_open,
2460 .llseek = seq_lseek,
2461 .release = seq_release_private,
2464 int __init fib_proc_init(void)
2466 if (!proc_net_fops_create("fib_trie", S_IRUGO, &fib_trie_seq_fops))
2471 void __init fib_proc_exit(void)
2473 proc_net_remove("fib_trie");
2476 #endif /* CONFIG_PROC_FS */