[NETFILTER]: xt_CONNMARK.c build fix
[linux-2.6] / net / ipv4 / fib_semantics.c
1 /*
2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
3  *              operating system.  INET is implemented using the  BSD Socket
4  *              interface as the means of communication with the user level.
5  *
6  *              IPv4 Forwarding Information Base: semantics.
7  *
8  * Version:     $Id: fib_semantics.c,v 1.19 2002/01/12 07:54:56 davem Exp $
9  *
10  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
11  *
12  *              This program is free software; you can redistribute it and/or
13  *              modify it under the terms of the GNU General Public License
14  *              as published by the Free Software Foundation; either version
15  *              2 of the License, or (at your option) any later version.
16  */
17
18 #include <asm/uaccess.h>
19 #include <asm/system.h>
20 #include <linux/bitops.h>
21 #include <linux/types.h>
22 #include <linux/kernel.h>
23 #include <linux/jiffies.h>
24 #include <linux/mm.h>
25 #include <linux/string.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/errno.h>
29 #include <linux/in.h>
30 #include <linux/inet.h>
31 #include <linux/inetdevice.h>
32 #include <linux/netdevice.h>
33 #include <linux/if_arp.h>
34 #include <linux/proc_fs.h>
35 #include <linux/skbuff.h>
36 #include <linux/init.h>
37
38 #include <net/arp.h>
39 #include <net/ip.h>
40 #include <net/protocol.h>
41 #include <net/route.h>
42 #include <net/tcp.h>
43 #include <net/sock.h>
44 #include <net/ip_fib.h>
45 #include <net/ip_mp_alg.h>
46 #include <net/netlink.h>
47 #include <net/nexthop.h>
48
49 #include "fib_lookup.h"
50
51 #define FSprintk(a...)
52
53 static DEFINE_SPINLOCK(fib_info_lock);
54 static struct hlist_head *fib_info_hash;
55 static struct hlist_head *fib_info_laddrhash;
56 static unsigned int fib_hash_size;
57 static unsigned int fib_info_cnt;
58
59 #define DEVINDEX_HASHBITS 8
60 #define DEVINDEX_HASHSIZE (1U << DEVINDEX_HASHBITS)
61 static struct hlist_head fib_info_devhash[DEVINDEX_HASHSIZE];
62
63 #ifdef CONFIG_IP_ROUTE_MULTIPATH
64
65 static DEFINE_SPINLOCK(fib_multipath_lock);
66
67 #define for_nexthops(fi) { int nhsel; const struct fib_nh * nh; \
68 for (nhsel=0, nh = (fi)->fib_nh; nhsel < (fi)->fib_nhs; nh++, nhsel++)
69
70 #define change_nexthops(fi) { int nhsel; struct fib_nh * nh; \
71 for (nhsel=0, nh = (struct fib_nh*)((fi)->fib_nh); nhsel < (fi)->fib_nhs; nh++, nhsel++)
72
73 #else /* CONFIG_IP_ROUTE_MULTIPATH */
74
75 /* Hope, that gcc will optimize it to get rid of dummy loop */
76
77 #define for_nexthops(fi) { int nhsel=0; const struct fib_nh * nh = (fi)->fib_nh; \
78 for (nhsel=0; nhsel < 1; nhsel++)
79
80 #define change_nexthops(fi) { int nhsel=0; struct fib_nh * nh = (struct fib_nh*)((fi)->fib_nh); \
81 for (nhsel=0; nhsel < 1; nhsel++)
82
83 #endif /* CONFIG_IP_ROUTE_MULTIPATH */
84
85 #define endfor_nexthops(fi) }
86
87
88 static const struct 
89 {
90         int     error;
91         u8      scope;
92 } fib_props[RTA_MAX + 1] = {
93         {
94                 .error  = 0,
95                 .scope  = RT_SCOPE_NOWHERE,
96         },      /* RTN_UNSPEC */
97         {
98                 .error  = 0,
99                 .scope  = RT_SCOPE_UNIVERSE,
100         },      /* RTN_UNICAST */
101         {
102                 .error  = 0,
103                 .scope  = RT_SCOPE_HOST,
104         },      /* RTN_LOCAL */
105         {
106                 .error  = 0,
107                 .scope  = RT_SCOPE_LINK,
108         },      /* RTN_BROADCAST */
109         {
110                 .error  = 0,
111                 .scope  = RT_SCOPE_LINK,
112         },      /* RTN_ANYCAST */
113         {
114                 .error  = 0,
115                 .scope  = RT_SCOPE_UNIVERSE,
116         },      /* RTN_MULTICAST */
117         {
118                 .error  = -EINVAL,
119                 .scope  = RT_SCOPE_UNIVERSE,
120         },      /* RTN_BLACKHOLE */
121         {
122                 .error  = -EHOSTUNREACH,
123                 .scope  = RT_SCOPE_UNIVERSE,
124         },      /* RTN_UNREACHABLE */
125         {
126                 .error  = -EACCES,
127                 .scope  = RT_SCOPE_UNIVERSE,
128         },      /* RTN_PROHIBIT */
129         {
130                 .error  = -EAGAIN,
131                 .scope  = RT_SCOPE_UNIVERSE,
132         },      /* RTN_THROW */
133         {
134                 .error  = -EINVAL,
135                 .scope  = RT_SCOPE_NOWHERE,
136         },      /* RTN_NAT */
137         {
138                 .error  = -EINVAL,
139                 .scope  = RT_SCOPE_NOWHERE,
140         },      /* RTN_XRESOLVE */
141 };
142
143
144 /* Release a nexthop info record */
145
146 void free_fib_info(struct fib_info *fi)
147 {
148         if (fi->fib_dead == 0) {
149                 printk("Freeing alive fib_info %p\n", fi);
150                 return;
151         }
152         change_nexthops(fi) {
153                 if (nh->nh_dev)
154                         dev_put(nh->nh_dev);
155                 nh->nh_dev = NULL;
156         } endfor_nexthops(fi);
157         fib_info_cnt--;
158         kfree(fi);
159 }
160
161 void fib_release_info(struct fib_info *fi)
162 {
163         spin_lock_bh(&fib_info_lock);
164         if (fi && --fi->fib_treeref == 0) {
165                 hlist_del(&fi->fib_hash);
166                 if (fi->fib_prefsrc)
167                         hlist_del(&fi->fib_lhash);
168                 change_nexthops(fi) {
169                         if (!nh->nh_dev)
170                                 continue;
171                         hlist_del(&nh->nh_hash);
172                 } endfor_nexthops(fi)
173                 fi->fib_dead = 1;
174                 fib_info_put(fi);
175         }
176         spin_unlock_bh(&fib_info_lock);
177 }
178
179 static __inline__ int nh_comp(const struct fib_info *fi, const struct fib_info *ofi)
180 {
181         const struct fib_nh *onh = ofi->fib_nh;
182
183         for_nexthops(fi) {
184                 if (nh->nh_oif != onh->nh_oif ||
185                     nh->nh_gw  != onh->nh_gw ||
186                     nh->nh_scope != onh->nh_scope ||
187 #ifdef CONFIG_IP_ROUTE_MULTIPATH
188                     nh->nh_weight != onh->nh_weight ||
189 #endif
190 #ifdef CONFIG_NET_CLS_ROUTE
191                     nh->nh_tclassid != onh->nh_tclassid ||
192 #endif
193                     ((nh->nh_flags^onh->nh_flags)&~RTNH_F_DEAD))
194                         return -1;
195                 onh++;
196         } endfor_nexthops(fi);
197         return 0;
198 }
199
200 static inline unsigned int fib_info_hashfn(const struct fib_info *fi)
201 {
202         unsigned int mask = (fib_hash_size - 1);
203         unsigned int val = fi->fib_nhs;
204
205         val ^= fi->fib_protocol;
206         val ^= fi->fib_prefsrc;
207         val ^= fi->fib_priority;
208
209         return (val ^ (val >> 7) ^ (val >> 12)) & mask;
210 }
211
212 static struct fib_info *fib_find_info(const struct fib_info *nfi)
213 {
214         struct hlist_head *head;
215         struct hlist_node *node;
216         struct fib_info *fi;
217         unsigned int hash;
218
219         hash = fib_info_hashfn(nfi);
220         head = &fib_info_hash[hash];
221
222         hlist_for_each_entry(fi, node, head, fib_hash) {
223                 if (fi->fib_nhs != nfi->fib_nhs)
224                         continue;
225                 if (nfi->fib_protocol == fi->fib_protocol &&
226                     nfi->fib_prefsrc == fi->fib_prefsrc &&
227                     nfi->fib_priority == fi->fib_priority &&
228                     memcmp(nfi->fib_metrics, fi->fib_metrics,
229                            sizeof(fi->fib_metrics)) == 0 &&
230                     ((nfi->fib_flags^fi->fib_flags)&~RTNH_F_DEAD) == 0 &&
231                     (nfi->fib_nhs == 0 || nh_comp(fi, nfi) == 0))
232                         return fi;
233         }
234
235         return NULL;
236 }
237
238 static inline unsigned int fib_devindex_hashfn(unsigned int val)
239 {
240         unsigned int mask = DEVINDEX_HASHSIZE - 1;
241
242         return (val ^
243                 (val >> DEVINDEX_HASHBITS) ^
244                 (val >> (DEVINDEX_HASHBITS * 2))) & mask;
245 }
246
247 /* Check, that the gateway is already configured.
248    Used only by redirect accept routine.
249  */
250
251 int ip_fib_check_default(u32 gw, struct net_device *dev)
252 {
253         struct hlist_head *head;
254         struct hlist_node *node;
255         struct fib_nh *nh;
256         unsigned int hash;
257
258         spin_lock(&fib_info_lock);
259
260         hash = fib_devindex_hashfn(dev->ifindex);
261         head = &fib_info_devhash[hash];
262         hlist_for_each_entry(nh, node, head, nh_hash) {
263                 if (nh->nh_dev == dev &&
264                     nh->nh_gw == gw &&
265                     !(nh->nh_flags&RTNH_F_DEAD)) {
266                         spin_unlock(&fib_info_lock);
267                         return 0;
268                 }
269         }
270
271         spin_unlock(&fib_info_lock);
272
273         return -1;
274 }
275
276 void rtmsg_fib(int event, u32 key, struct fib_alias *fa,
277                int dst_len, u32 tb_id, struct nl_info *info)
278 {
279         struct sk_buff *skb;
280         int payload = sizeof(struct rtmsg) + 256;
281         u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0;
282         int err = -ENOBUFS;
283
284         skb = nlmsg_new(nlmsg_total_size(payload), GFP_KERNEL);
285         if (skb == NULL)
286                 goto errout;
287
288         err = fib_dump_info(skb, info->pid, seq, event, tb_id,
289                             fa->fa_type, fa->fa_scope, key, dst_len,
290                             fa->fa_tos, fa->fa_info, 0);
291         if (err < 0) {
292                 kfree_skb(skb);
293                 goto errout;
294         }
295
296         err = rtnl_notify(skb, info->pid, RTNLGRP_IPV4_ROUTE,
297                           info->nlh, GFP_KERNEL);
298 errout:
299         if (err < 0)
300                 rtnl_set_sk_err(RTNLGRP_IPV4_ROUTE, err);
301 }
302
303 /* Return the first fib alias matching TOS with
304  * priority less than or equal to PRIO.
305  */
306 struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio)
307 {
308         if (fah) {
309                 struct fib_alias *fa;
310                 list_for_each_entry(fa, fah, fa_list) {
311                         if (fa->fa_tos > tos)
312                                 continue;
313                         if (fa->fa_info->fib_priority >= prio ||
314                             fa->fa_tos < tos)
315                                 return fa;
316                 }
317         }
318         return NULL;
319 }
320
321 int fib_detect_death(struct fib_info *fi, int order,
322                      struct fib_info **last_resort, int *last_idx, int *dflt)
323 {
324         struct neighbour *n;
325         int state = NUD_NONE;
326
327         n = neigh_lookup(&arp_tbl, &fi->fib_nh[0].nh_gw, fi->fib_dev);
328         if (n) {
329                 state = n->nud_state;
330                 neigh_release(n);
331         }
332         if (state==NUD_REACHABLE)
333                 return 0;
334         if ((state&NUD_VALID) && order != *dflt)
335                 return 0;
336         if ((state&NUD_VALID) ||
337             (*last_idx<0 && order > *dflt)) {
338                 *last_resort = fi;
339                 *last_idx = order;
340         }
341         return 1;
342 }
343
344 #ifdef CONFIG_IP_ROUTE_MULTIPATH
345
346 static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining)
347 {
348         int nhs = 0;
349
350         while (rtnh_ok(rtnh, remaining)) {
351                 nhs++;
352                 rtnh = rtnh_next(rtnh, &remaining);
353         }
354
355         /* leftover implies invalid nexthop configuration, discard it */
356         return remaining > 0 ? 0 : nhs;
357 }
358
359 static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh,
360                        int remaining, struct fib_config *cfg)
361 {
362         change_nexthops(fi) {
363                 int attrlen;
364
365                 if (!rtnh_ok(rtnh, remaining))
366                         return -EINVAL;
367
368                 nh->nh_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags;
369                 nh->nh_oif = rtnh->rtnh_ifindex;
370                 nh->nh_weight = rtnh->rtnh_hops + 1;
371
372                 attrlen = rtnh_attrlen(rtnh);
373                 if (attrlen > 0) {
374                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
375
376                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
377                         nh->nh_gw = nla ? nla_get_u32(nla) : 0;
378 #ifdef CONFIG_NET_CLS_ROUTE
379                         nla = nla_find(attrs, attrlen, RTA_FLOW);
380                         nh->nh_tclassid = nla ? nla_get_u32(nla) : 0;
381 #endif
382                 }
383
384                 rtnh = rtnh_next(rtnh, &remaining);
385         } endfor_nexthops(fi);
386
387         return 0;
388 }
389
390 #endif
391
392 int fib_nh_match(struct fib_config *cfg, struct fib_info *fi)
393 {
394 #ifdef CONFIG_IP_ROUTE_MULTIPATH
395         struct rtnexthop *rtnh;
396         int remaining;
397 #endif
398
399         if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority)
400                 return 1;
401
402         if (cfg->fc_oif || cfg->fc_gw) {
403                 if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) &&
404                     (!cfg->fc_gw  || cfg->fc_gw == fi->fib_nh->nh_gw))
405                         return 0;
406                 return 1;
407         }
408
409 #ifdef CONFIG_IP_ROUTE_MULTIPATH
410         if (cfg->fc_mp == NULL)
411                 return 0;
412
413         rtnh = cfg->fc_mp;
414         remaining = cfg->fc_mp_len;
415         
416         for_nexthops(fi) {
417                 int attrlen;
418
419                 if (!rtnh_ok(rtnh, remaining))
420                         return -EINVAL;
421
422                 if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif)
423                         return 1;
424
425                 attrlen = rtnh_attrlen(rtnh);
426                 if (attrlen < 0) {
427                         struct nlattr *nla, *attrs = rtnh_attrs(rtnh);
428
429                         nla = nla_find(attrs, attrlen, RTA_GATEWAY);
430                         if (nla && nla_get_u32(nla) != nh->nh_gw)
431                                 return 1;
432 #ifdef CONFIG_NET_CLS_ROUTE
433                         nla = nla_find(attrs, attrlen, RTA_FLOW);
434                         if (nla && nla_get_u32(nla) != nh->nh_tclassid)
435                                 return 1;
436 #endif
437                 }
438
439                 rtnh = rtnh_next(rtnh, &remaining);
440         } endfor_nexthops(fi);
441 #endif
442         return 0;
443 }
444
445
446 /*
447    Picture
448    -------
449
450    Semantics of nexthop is very messy by historical reasons.
451    We have to take into account, that:
452    a) gateway can be actually local interface address,
453       so that gatewayed route is direct.
454    b) gateway must be on-link address, possibly
455       described not by an ifaddr, but also by a direct route.
456    c) If both gateway and interface are specified, they should not
457       contradict.
458    d) If we use tunnel routes, gateway could be not on-link.
459
460    Attempt to reconcile all of these (alas, self-contradictory) conditions
461    results in pretty ugly and hairy code with obscure logic.
462
463    I chose to generalized it instead, so that the size
464    of code does not increase practically, but it becomes
465    much more general.
466    Every prefix is assigned a "scope" value: "host" is local address,
467    "link" is direct route,
468    [ ... "site" ... "interior" ... ]
469    and "universe" is true gateway route with global meaning.
470
471    Every prefix refers to a set of "nexthop"s (gw, oif),
472    where gw must have narrower scope. This recursion stops
473    when gw has LOCAL scope or if "nexthop" is declared ONLINK,
474    which means that gw is forced to be on link.
475
476    Code is still hairy, but now it is apparently logically
477    consistent and very flexible. F.e. as by-product it allows
478    to co-exists in peace independent exterior and interior
479    routing processes.
480
481    Normally it looks as following.
482
483    {universe prefix}  -> (gw, oif) [scope link]
484                           |
485                           |-> {link prefix} -> (gw, oif) [scope local]
486                                                 |
487                                                 |-> {local prefix} (terminal node)
488  */
489
490 static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi,
491                         struct fib_nh *nh)
492 {
493         int err;
494
495         if (nh->nh_gw) {
496                 struct fib_result res;
497
498 #ifdef CONFIG_IP_ROUTE_PERVASIVE
499                 if (nh->nh_flags&RTNH_F_PERVASIVE)
500                         return 0;
501 #endif
502                 if (nh->nh_flags&RTNH_F_ONLINK) {
503                         struct net_device *dev;
504
505                         if (cfg->fc_scope >= RT_SCOPE_LINK)
506                                 return -EINVAL;
507                         if (inet_addr_type(nh->nh_gw) != RTN_UNICAST)
508                                 return -EINVAL;
509                         if ((dev = __dev_get_by_index(nh->nh_oif)) == NULL)
510                                 return -ENODEV;
511                         if (!(dev->flags&IFF_UP))
512                                 return -ENETDOWN;
513                         nh->nh_dev = dev;
514                         dev_hold(dev);
515                         nh->nh_scope = RT_SCOPE_LINK;
516                         return 0;
517                 }
518                 {
519                         struct flowi fl = {
520                                 .nl_u = {
521                                         .ip4_u = {
522                                                 .daddr = nh->nh_gw,
523                                                 .scope = cfg->fc_scope + 1,
524                                         },
525                                 },
526                                 .oif = nh->nh_oif,
527                         };
528
529                         /* It is not necessary, but requires a bit of thinking */
530                         if (fl.fl4_scope < RT_SCOPE_LINK)
531                                 fl.fl4_scope = RT_SCOPE_LINK;
532                         if ((err = fib_lookup(&fl, &res)) != 0)
533                                 return err;
534                 }
535                 err = -EINVAL;
536                 if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
537                         goto out;
538                 nh->nh_scope = res.scope;
539                 nh->nh_oif = FIB_RES_OIF(res);
540                 if ((nh->nh_dev = FIB_RES_DEV(res)) == NULL)
541                         goto out;
542                 dev_hold(nh->nh_dev);
543                 err = -ENETDOWN;
544                 if (!(nh->nh_dev->flags & IFF_UP))
545                         goto out;
546                 err = 0;
547 out:
548                 fib_res_put(&res);
549                 return err;
550         } else {
551                 struct in_device *in_dev;
552
553                 if (nh->nh_flags&(RTNH_F_PERVASIVE|RTNH_F_ONLINK))
554                         return -EINVAL;
555
556                 in_dev = inetdev_by_index(nh->nh_oif);
557                 if (in_dev == NULL)
558                         return -ENODEV;
559                 if (!(in_dev->dev->flags&IFF_UP)) {
560                         in_dev_put(in_dev);
561                         return -ENETDOWN;
562                 }
563                 nh->nh_dev = in_dev->dev;
564                 dev_hold(nh->nh_dev);
565                 nh->nh_scope = RT_SCOPE_HOST;
566                 in_dev_put(in_dev);
567         }
568         return 0;
569 }
570
571 static inline unsigned int fib_laddr_hashfn(u32 val)
572 {
573         unsigned int mask = (fib_hash_size - 1);
574
575         return (val ^ (val >> 7) ^ (val >> 14)) & mask;
576 }
577
578 static struct hlist_head *fib_hash_alloc(int bytes)
579 {
580         if (bytes <= PAGE_SIZE)
581                 return kmalloc(bytes, GFP_KERNEL);
582         else
583                 return (struct hlist_head *)
584                         __get_free_pages(GFP_KERNEL, get_order(bytes));
585 }
586
587 static void fib_hash_free(struct hlist_head *hash, int bytes)
588 {
589         if (!hash)
590                 return;
591
592         if (bytes <= PAGE_SIZE)
593                 kfree(hash);
594         else
595                 free_pages((unsigned long) hash, get_order(bytes));
596 }
597
598 static void fib_hash_move(struct hlist_head *new_info_hash,
599                           struct hlist_head *new_laddrhash,
600                           unsigned int new_size)
601 {
602         struct hlist_head *old_info_hash, *old_laddrhash;
603         unsigned int old_size = fib_hash_size;
604         unsigned int i, bytes;
605
606         spin_lock_bh(&fib_info_lock);
607         old_info_hash = fib_info_hash;
608         old_laddrhash = fib_info_laddrhash;
609         fib_hash_size = new_size;
610
611         for (i = 0; i < old_size; i++) {
612                 struct hlist_head *head = &fib_info_hash[i];
613                 struct hlist_node *node, *n;
614                 struct fib_info *fi;
615
616                 hlist_for_each_entry_safe(fi, node, n, head, fib_hash) {
617                         struct hlist_head *dest;
618                         unsigned int new_hash;
619
620                         hlist_del(&fi->fib_hash);
621
622                         new_hash = fib_info_hashfn(fi);
623                         dest = &new_info_hash[new_hash];
624                         hlist_add_head(&fi->fib_hash, dest);
625                 }
626         }
627         fib_info_hash = new_info_hash;
628
629         for (i = 0; i < old_size; i++) {
630                 struct hlist_head *lhead = &fib_info_laddrhash[i];
631                 struct hlist_node *node, *n;
632                 struct fib_info *fi;
633
634                 hlist_for_each_entry_safe(fi, node, n, lhead, fib_lhash) {
635                         struct hlist_head *ldest;
636                         unsigned int new_hash;
637
638                         hlist_del(&fi->fib_lhash);
639
640                         new_hash = fib_laddr_hashfn(fi->fib_prefsrc);
641                         ldest = &new_laddrhash[new_hash];
642                         hlist_add_head(&fi->fib_lhash, ldest);
643                 }
644         }
645         fib_info_laddrhash = new_laddrhash;
646
647         spin_unlock_bh(&fib_info_lock);
648
649         bytes = old_size * sizeof(struct hlist_head *);
650         fib_hash_free(old_info_hash, bytes);
651         fib_hash_free(old_laddrhash, bytes);
652 }
653
654 struct fib_info *fib_create_info(struct fib_config *cfg)
655 {
656         int err;
657         struct fib_info *fi = NULL;
658         struct fib_info *ofi;
659         int nhs = 1;
660
661         /* Fast check to catch the most weird cases */
662         if (fib_props[cfg->fc_type].scope > cfg->fc_scope)
663                 goto err_inval;
664
665 #ifdef CONFIG_IP_ROUTE_MULTIPATH
666         if (cfg->fc_mp) {
667                 nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len);
668                 if (nhs == 0)
669                         goto err_inval;
670         }
671 #endif
672 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
673         if (cfg->fc_mp_alg) {
674                 if (cfg->fc_mp_alg < IP_MP_ALG_NONE ||
675                     cfg->fc_mp_alg > IP_MP_ALG_MAX)
676                         goto err_inval;
677         }
678 #endif
679
680         err = -ENOBUFS;
681         if (fib_info_cnt >= fib_hash_size) {
682                 unsigned int new_size = fib_hash_size << 1;
683                 struct hlist_head *new_info_hash;
684                 struct hlist_head *new_laddrhash;
685                 unsigned int bytes;
686
687                 if (!new_size)
688                         new_size = 1;
689                 bytes = new_size * sizeof(struct hlist_head *);
690                 new_info_hash = fib_hash_alloc(bytes);
691                 new_laddrhash = fib_hash_alloc(bytes);
692                 if (!new_info_hash || !new_laddrhash) {
693                         fib_hash_free(new_info_hash, bytes);
694                         fib_hash_free(new_laddrhash, bytes);
695                 } else {
696                         memset(new_info_hash, 0, bytes);
697                         memset(new_laddrhash, 0, bytes);
698
699                         fib_hash_move(new_info_hash, new_laddrhash, new_size);
700                 }
701
702                 if (!fib_hash_size)
703                         goto failure;
704         }
705
706         fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
707         if (fi == NULL)
708                 goto failure;
709         fib_info_cnt++;
710
711         fi->fib_protocol = cfg->fc_protocol;
712         fi->fib_flags = cfg->fc_flags;
713         fi->fib_priority = cfg->fc_priority;
714         fi->fib_prefsrc = cfg->fc_prefsrc;
715
716         fi->fib_nhs = nhs;
717         change_nexthops(fi) {
718                 nh->nh_parent = fi;
719         } endfor_nexthops(fi)
720
721         if (cfg->fc_mx) {
722                 struct nlattr *nla;
723                 int remaining;
724
725                 nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
726                         int type = nla->nla_type;
727
728                         if (type) {
729                                 if (type > RTAX_MAX)
730                                         goto err_inval;
731                                 fi->fib_metrics[type - 1] = nla_get_u32(nla);
732                         }
733                 }
734         }
735
736         if (cfg->fc_mp) {
737 #ifdef CONFIG_IP_ROUTE_MULTIPATH
738                 err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg);
739                 if (err != 0)
740                         goto failure;
741                 if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif)
742                         goto err_inval;
743                 if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw)
744                         goto err_inval;
745 #ifdef CONFIG_NET_CLS_ROUTE
746                 if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow)
747                         goto err_inval;
748 #endif
749 #else
750                 goto err_inval;
751 #endif
752         } else {
753                 struct fib_nh *nh = fi->fib_nh;
754
755                 nh->nh_oif = cfg->fc_oif;
756                 nh->nh_gw = cfg->fc_gw;
757                 nh->nh_flags = cfg->fc_flags;
758 #ifdef CONFIG_NET_CLS_ROUTE
759                 nh->nh_tclassid = cfg->fc_flow;
760 #endif
761 #ifdef CONFIG_IP_ROUTE_MULTIPATH
762                 nh->nh_weight = 1;
763 #endif
764         }
765
766 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
767         fi->fib_mp_alg = cfg->fc_mp_alg;
768 #endif
769
770         if (fib_props[cfg->fc_type].error) {
771                 if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp)
772                         goto err_inval;
773                 goto link_it;
774         }
775
776         if (cfg->fc_scope > RT_SCOPE_HOST)
777                 goto err_inval;
778
779         if (cfg->fc_scope == RT_SCOPE_HOST) {
780                 struct fib_nh *nh = fi->fib_nh;
781
782                 /* Local address is added. */
783                 if (nhs != 1 || nh->nh_gw)
784                         goto err_inval;
785                 nh->nh_scope = RT_SCOPE_NOWHERE;
786                 nh->nh_dev = dev_get_by_index(fi->fib_nh->nh_oif);
787                 err = -ENODEV;
788                 if (nh->nh_dev == NULL)
789                         goto failure;
790         } else {
791                 change_nexthops(fi) {
792                         if ((err = fib_check_nh(cfg, fi, nh)) != 0)
793                                 goto failure;
794                 } endfor_nexthops(fi)
795         }
796
797         if (fi->fib_prefsrc) {
798                 if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst ||
799                     fi->fib_prefsrc != cfg->fc_dst)
800                         if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL)
801                                 goto err_inval;
802         }
803
804 link_it:
805         if ((ofi = fib_find_info(fi)) != NULL) {
806                 fi->fib_dead = 1;
807                 free_fib_info(fi);
808                 ofi->fib_treeref++;
809                 return ofi;
810         }
811
812         fi->fib_treeref++;
813         atomic_inc(&fi->fib_clntref);
814         spin_lock_bh(&fib_info_lock);
815         hlist_add_head(&fi->fib_hash,
816                        &fib_info_hash[fib_info_hashfn(fi)]);
817         if (fi->fib_prefsrc) {
818                 struct hlist_head *head;
819
820                 head = &fib_info_laddrhash[fib_laddr_hashfn(fi->fib_prefsrc)];
821                 hlist_add_head(&fi->fib_lhash, head);
822         }
823         change_nexthops(fi) {
824                 struct hlist_head *head;
825                 unsigned int hash;
826
827                 if (!nh->nh_dev)
828                         continue;
829                 hash = fib_devindex_hashfn(nh->nh_dev->ifindex);
830                 head = &fib_info_devhash[hash];
831                 hlist_add_head(&nh->nh_hash, head);
832         } endfor_nexthops(fi)
833         spin_unlock_bh(&fib_info_lock);
834         return fi;
835
836 err_inval:
837         err = -EINVAL;
838
839 failure:
840         if (fi) {
841                 fi->fib_dead = 1;
842                 free_fib_info(fi);
843         }
844
845         return ERR_PTR(err);
846 }
847
848 /* Note! fib_semantic_match intentionally uses  RCU list functions. */
849 int fib_semantic_match(struct list_head *head, const struct flowi *flp,
850                        struct fib_result *res, __u32 zone, __u32 mask, 
851                         int prefixlen)
852 {
853         struct fib_alias *fa;
854         int nh_sel = 0;
855
856         list_for_each_entry_rcu(fa, head, fa_list) {
857                 int err;
858
859                 if (fa->fa_tos &&
860                     fa->fa_tos != flp->fl4_tos)
861                         continue;
862
863                 if (fa->fa_scope < flp->fl4_scope)
864                         continue;
865
866                 fa->fa_state |= FA_S_ACCESSED;
867
868                 err = fib_props[fa->fa_type].error;
869                 if (err == 0) {
870                         struct fib_info *fi = fa->fa_info;
871
872                         if (fi->fib_flags & RTNH_F_DEAD)
873                                 continue;
874
875                         switch (fa->fa_type) {
876                         case RTN_UNICAST:
877                         case RTN_LOCAL:
878                         case RTN_BROADCAST:
879                         case RTN_ANYCAST:
880                         case RTN_MULTICAST:
881                                 for_nexthops(fi) {
882                                         if (nh->nh_flags&RTNH_F_DEAD)
883                                                 continue;
884                                         if (!flp->oif || flp->oif == nh->nh_oif)
885                                                 break;
886                                 }
887 #ifdef CONFIG_IP_ROUTE_MULTIPATH
888                                 if (nhsel < fi->fib_nhs) {
889                                         nh_sel = nhsel;
890                                         goto out_fill_res;
891                                 }
892 #else
893                                 if (nhsel < 1) {
894                                         goto out_fill_res;
895                                 }
896 #endif
897                                 endfor_nexthops(fi);
898                                 continue;
899
900                         default:
901                                 printk(KERN_DEBUG "impossible 102\n");
902                                 return -EINVAL;
903                         };
904                 }
905                 return err;
906         }
907         return 1;
908
909 out_fill_res:
910         res->prefixlen = prefixlen;
911         res->nh_sel = nh_sel;
912         res->type = fa->fa_type;
913         res->scope = fa->fa_scope;
914         res->fi = fa->fa_info;
915 #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED
916         res->netmask = mask;
917         res->network = zone &
918                 (0xFFFFFFFF >> (32 - prefixlen));
919 #endif
920         atomic_inc(&res->fi->fib_clntref);
921         return 0;
922 }
923
924 /* Find appropriate source address to this destination */
925
926 u32 __fib_res_prefsrc(struct fib_result *res)
927 {
928         return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope);
929 }
930
931 int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
932                   u32 tb_id, u8 type, u8 scope, u32 dst, int dst_len, u8 tos,
933                   struct fib_info *fi, unsigned int flags)
934 {
935         struct nlmsghdr *nlh;
936         struct rtmsg *rtm;
937
938         nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags);
939         if (nlh == NULL)
940                 return -ENOBUFS;
941
942         rtm = nlmsg_data(nlh);
943         rtm->rtm_family = AF_INET;
944         rtm->rtm_dst_len = dst_len;
945         rtm->rtm_src_len = 0;
946         rtm->rtm_tos = tos;
947         rtm->rtm_table = tb_id;
948         NLA_PUT_U32(skb, RTA_TABLE, tb_id);
949         rtm->rtm_type = type;
950         rtm->rtm_flags = fi->fib_flags;
951         rtm->rtm_scope = scope;
952         rtm->rtm_protocol = fi->fib_protocol;
953
954         if (rtm->rtm_dst_len)
955                 NLA_PUT_U32(skb, RTA_DST, dst);
956
957         if (fi->fib_priority)
958                 NLA_PUT_U32(skb, RTA_PRIORITY, fi->fib_priority);
959
960         if (rtnetlink_put_metrics(skb, fi->fib_metrics) < 0)
961                 goto nla_put_failure;
962
963         if (fi->fib_prefsrc)
964                 NLA_PUT_U32(skb, RTA_PREFSRC, fi->fib_prefsrc);
965
966         if (fi->fib_nhs == 1) {
967                 if (fi->fib_nh->nh_gw)
968                         NLA_PUT_U32(skb, RTA_GATEWAY, fi->fib_nh->nh_gw);
969
970                 if (fi->fib_nh->nh_oif)
971                         NLA_PUT_U32(skb, RTA_OIF, fi->fib_nh->nh_oif);
972 #ifdef CONFIG_NET_CLS_ROUTE
973                 if (fi->fib_nh[0].nh_tclassid)
974                         NLA_PUT_U32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid);
975 #endif
976         }
977 #ifdef CONFIG_IP_ROUTE_MULTIPATH
978         if (fi->fib_nhs > 1) {
979                 struct rtnexthop *rtnh;
980                 struct nlattr *mp;
981
982                 mp = nla_nest_start(skb, RTA_MULTIPATH);
983                 if (mp == NULL)
984                         goto nla_put_failure;
985
986                 for_nexthops(fi) {
987                         rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh));
988                         if (rtnh == NULL)
989                                 goto nla_put_failure;
990
991                         rtnh->rtnh_flags = nh->nh_flags & 0xFF;
992                         rtnh->rtnh_hops = nh->nh_weight - 1;
993                         rtnh->rtnh_ifindex = nh->nh_oif;
994
995                         if (nh->nh_gw)
996                                 NLA_PUT_U32(skb, RTA_GATEWAY, nh->nh_gw);
997 #ifdef CONFIG_NET_CLS_ROUTE
998                         if (nh->nh_tclassid)
999                                 NLA_PUT_U32(skb, RTA_FLOW, nh->nh_tclassid);
1000 #endif
1001                         /* length of rtnetlink header + attributes */
1002                         rtnh->rtnh_len = nlmsg_get_pos(skb) - (void *) rtnh;
1003                 } endfor_nexthops(fi);
1004
1005                 nla_nest_end(skb, mp);
1006         }
1007 #endif
1008         return nlmsg_end(skb, nlh);
1009
1010 nla_put_failure:
1011         return nlmsg_cancel(skb, nlh);
1012 }
1013
1014 /*
1015    Update FIB if:
1016    - local address disappeared -> we must delete all the entries
1017      referring to it.
1018    - device went down -> we must shutdown all nexthops going via it.
1019  */
1020
1021 int fib_sync_down(u32 local, struct net_device *dev, int force)
1022 {
1023         int ret = 0;
1024         int scope = RT_SCOPE_NOWHERE;
1025         
1026         if (force)
1027                 scope = -1;
1028
1029         if (local && fib_info_laddrhash) {
1030                 unsigned int hash = fib_laddr_hashfn(local);
1031                 struct hlist_head *head = &fib_info_laddrhash[hash];
1032                 struct hlist_node *node;
1033                 struct fib_info *fi;
1034
1035                 hlist_for_each_entry(fi, node, head, fib_lhash) {
1036                         if (fi->fib_prefsrc == local) {
1037                                 fi->fib_flags |= RTNH_F_DEAD;
1038                                 ret++;
1039                         }
1040                 }
1041         }
1042
1043         if (dev) {
1044                 struct fib_info *prev_fi = NULL;
1045                 unsigned int hash = fib_devindex_hashfn(dev->ifindex);
1046                 struct hlist_head *head = &fib_info_devhash[hash];
1047                 struct hlist_node *node;
1048                 struct fib_nh *nh;
1049
1050                 hlist_for_each_entry(nh, node, head, nh_hash) {
1051                         struct fib_info *fi = nh->nh_parent;
1052                         int dead;
1053
1054                         BUG_ON(!fi->fib_nhs);
1055                         if (nh->nh_dev != dev || fi == prev_fi)
1056                                 continue;
1057                         prev_fi = fi;
1058                         dead = 0;
1059                         change_nexthops(fi) {
1060                                 if (nh->nh_flags&RTNH_F_DEAD)
1061                                         dead++;
1062                                 else if (nh->nh_dev == dev &&
1063                                          nh->nh_scope != scope) {
1064                                         nh->nh_flags |= RTNH_F_DEAD;
1065 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1066                                         spin_lock_bh(&fib_multipath_lock);
1067                                         fi->fib_power -= nh->nh_power;
1068                                         nh->nh_power = 0;
1069                                         spin_unlock_bh(&fib_multipath_lock);
1070 #endif
1071                                         dead++;
1072                                 }
1073 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1074                                 if (force > 1 && nh->nh_dev == dev) {
1075                                         dead = fi->fib_nhs;
1076                                         break;
1077                                 }
1078 #endif
1079                         } endfor_nexthops(fi)
1080                         if (dead == fi->fib_nhs) {
1081                                 fi->fib_flags |= RTNH_F_DEAD;
1082                                 ret++;
1083                         }
1084                 }
1085         }
1086
1087         return ret;
1088 }
1089
1090 #ifdef CONFIG_IP_ROUTE_MULTIPATH
1091
1092 /*
1093    Dead device goes up. We wake up dead nexthops.
1094    It takes sense only on multipath routes.
1095  */
1096
1097 int fib_sync_up(struct net_device *dev)
1098 {
1099         struct fib_info *prev_fi;
1100         unsigned int hash;
1101         struct hlist_head *head;
1102         struct hlist_node *node;
1103         struct fib_nh *nh;
1104         int ret;
1105
1106         if (!(dev->flags&IFF_UP))
1107                 return 0;
1108
1109         prev_fi = NULL;
1110         hash = fib_devindex_hashfn(dev->ifindex);
1111         head = &fib_info_devhash[hash];
1112         ret = 0;
1113
1114         hlist_for_each_entry(nh, node, head, nh_hash) {
1115                 struct fib_info *fi = nh->nh_parent;
1116                 int alive;
1117
1118                 BUG_ON(!fi->fib_nhs);
1119                 if (nh->nh_dev != dev || fi == prev_fi)
1120                         continue;
1121
1122                 prev_fi = fi;
1123                 alive = 0;
1124                 change_nexthops(fi) {
1125                         if (!(nh->nh_flags&RTNH_F_DEAD)) {
1126                                 alive++;
1127                                 continue;
1128                         }
1129                         if (nh->nh_dev == NULL || !(nh->nh_dev->flags&IFF_UP))
1130                                 continue;
1131                         if (nh->nh_dev != dev || !__in_dev_get_rtnl(dev))
1132                                 continue;
1133                         alive++;
1134                         spin_lock_bh(&fib_multipath_lock);
1135                         nh->nh_power = 0;
1136                         nh->nh_flags &= ~RTNH_F_DEAD;
1137                         spin_unlock_bh(&fib_multipath_lock);
1138                 } endfor_nexthops(fi)
1139
1140                 if (alive > 0) {
1141                         fi->fib_flags &= ~RTNH_F_DEAD;
1142                         ret++;
1143                 }
1144         }
1145
1146         return ret;
1147 }
1148
1149 /*
1150    The algorithm is suboptimal, but it provides really
1151    fair weighted route distribution.
1152  */
1153
1154 void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
1155 {
1156         struct fib_info *fi = res->fi;
1157         int w;
1158
1159         spin_lock_bh(&fib_multipath_lock);
1160         if (fi->fib_power <= 0) {
1161                 int power = 0;
1162                 change_nexthops(fi) {
1163                         if (!(nh->nh_flags&RTNH_F_DEAD)) {
1164                                 power += nh->nh_weight;
1165                                 nh->nh_power = nh->nh_weight;
1166                         }
1167                 } endfor_nexthops(fi);
1168                 fi->fib_power = power;
1169                 if (power <= 0) {
1170                         spin_unlock_bh(&fib_multipath_lock);
1171                         /* Race condition: route has just become dead. */
1172                         res->nh_sel = 0;
1173                         return;
1174                 }
1175         }
1176
1177
1178         /* w should be random number [0..fi->fib_power-1],
1179            it is pretty bad approximation.
1180          */
1181
1182         w = jiffies % fi->fib_power;
1183
1184         change_nexthops(fi) {
1185                 if (!(nh->nh_flags&RTNH_F_DEAD) && nh->nh_power) {
1186                         if ((w -= nh->nh_power) <= 0) {
1187                                 nh->nh_power--;
1188                                 fi->fib_power--;
1189                                 res->nh_sel = nhsel;
1190                                 spin_unlock_bh(&fib_multipath_lock);
1191                                 return;
1192                         }
1193                 }
1194         } endfor_nexthops(fi);
1195
1196         /* Race condition: route has just become dead. */
1197         res->nh_sel = 0;
1198         spin_unlock_bh(&fib_multipath_lock);
1199 }
1200 #endif