6  *      Kazunori MIYAZAWA @USAGI
 
   7  *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
 
   9  *      Kazunori MIYAZAWA @USAGI
 
  11  *              Split up af-specific portion
 
  12  *      Derek Atkins <derek@ihtfp.com>          Add the post_input processor
 
  16 #include <linux/config.h>
 
  17 #include <linux/slab.h>
 
  18 #include <linux/kmod.h>
 
  19 #include <linux/list.h>
 
  20 #include <linux/spinlock.h>
 
  21 #include <linux/workqueue.h>
 
  22 #include <linux/notifier.h>
 
  23 #include <linux/netdevice.h>
 
  24 #include <linux/netfilter.h>
 
  25 #include <linux/module.h>
 
  29 DEFINE_MUTEX(xfrm_cfg_mutex);
 
  30 EXPORT_SYMBOL(xfrm_cfg_mutex);
 
  32 static DEFINE_RWLOCK(xfrm_policy_lock);
 
  34 struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2];
 
  35 EXPORT_SYMBOL(xfrm_policy_list);
 
  37 static DEFINE_RWLOCK(xfrm_policy_afinfo_lock);
 
  38 static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
 
  40 static kmem_cache_t *xfrm_dst_cache __read_mostly;
 
  42 static struct work_struct xfrm_policy_gc_work;
 
  43 static struct list_head xfrm_policy_gc_list =
 
  44         LIST_HEAD_INIT(xfrm_policy_gc_list);
 
  45 static DEFINE_SPINLOCK(xfrm_policy_gc_lock);
 
  47 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
 
  48 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
 
  50 int xfrm_register_type(struct xfrm_type *type, unsigned short family)
 
  52         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
 
  53         struct xfrm_type_map *typemap;
 
  56         if (unlikely(afinfo == NULL))
 
  58         typemap = afinfo->type_map;
 
  60         write_lock(&typemap->lock);
 
  61         if (likely(typemap->map[type->proto] == NULL))
 
  62                 typemap->map[type->proto] = type;
 
  65         write_unlock(&typemap->lock);
 
  66         xfrm_policy_put_afinfo(afinfo);
 
  69 EXPORT_SYMBOL(xfrm_register_type);
 
  71 int xfrm_unregister_type(struct xfrm_type *type, unsigned short family)
 
  73         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
 
  74         struct xfrm_type_map *typemap;
 
  77         if (unlikely(afinfo == NULL))
 
  79         typemap = afinfo->type_map;
 
  81         write_lock(&typemap->lock);
 
  82         if (unlikely(typemap->map[type->proto] != type))
 
  85                 typemap->map[type->proto] = NULL;
 
  86         write_unlock(&typemap->lock);
 
  87         xfrm_policy_put_afinfo(afinfo);
 
  90 EXPORT_SYMBOL(xfrm_unregister_type);
 
  92 struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family)
 
  94         struct xfrm_policy_afinfo *afinfo;
 
  95         struct xfrm_type_map *typemap;
 
  96         struct xfrm_type *type;
 
  97         int modload_attempted = 0;
 
 100         afinfo = xfrm_policy_get_afinfo(family);
 
 101         if (unlikely(afinfo == NULL))
 
 103         typemap = afinfo->type_map;
 
 105         read_lock(&typemap->lock);
 
 106         type = typemap->map[proto];
 
 107         if (unlikely(type && !try_module_get(type->owner)))
 
 109         read_unlock(&typemap->lock);
 
 110         if (!type && !modload_attempted) {
 
 111                 xfrm_policy_put_afinfo(afinfo);
 
 112                 request_module("xfrm-type-%d-%d",
 
 113                                (int) family, (int) proto);
 
 114                 modload_attempted = 1;
 
 118         xfrm_policy_put_afinfo(afinfo);
 
 122 int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, 
 
 123                     unsigned short family)
 
 125         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
 
 128         if (unlikely(afinfo == NULL))
 
 129                 return -EAFNOSUPPORT;
 
 131         if (likely(afinfo->dst_lookup != NULL))
 
 132                 err = afinfo->dst_lookup(dst, fl);
 
 135         xfrm_policy_put_afinfo(afinfo);
 
 138 EXPORT_SYMBOL(xfrm_dst_lookup);
 
 140 void xfrm_put_type(struct xfrm_type *type)
 
 142         module_put(type->owner);
 
 145 static inline unsigned long make_jiffies(long secs)
 
 147         if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
 
 148                 return MAX_SCHEDULE_TIMEOUT-1;
 
 153 static void xfrm_policy_timer(unsigned long data)
 
 155         struct xfrm_policy *xp = (struct xfrm_policy*)data;
 
 156         unsigned long now = (unsigned long)xtime.tv_sec;
 
 157         long next = LONG_MAX;
 
 161         read_lock(&xp->lock);
 
 166         dir = xfrm_policy_id2dir(xp->index);
 
 168         if (xp->lft.hard_add_expires_seconds) {
 
 169                 long tmo = xp->lft.hard_add_expires_seconds +
 
 170                         xp->curlft.add_time - now;
 
 176         if (xp->lft.hard_use_expires_seconds) {
 
 177                 long tmo = xp->lft.hard_use_expires_seconds +
 
 178                         (xp->curlft.use_time ? : xp->curlft.add_time) - now;
 
 184         if (xp->lft.soft_add_expires_seconds) {
 
 185                 long tmo = xp->lft.soft_add_expires_seconds +
 
 186                         xp->curlft.add_time - now;
 
 189                         tmo = XFRM_KM_TIMEOUT;
 
 194         if (xp->lft.soft_use_expires_seconds) {
 
 195                 long tmo = xp->lft.soft_use_expires_seconds +
 
 196                         (xp->curlft.use_time ? : xp->curlft.add_time) - now;
 
 199                         tmo = XFRM_KM_TIMEOUT;
 
 206                 km_policy_expired(xp, dir, 0, 0);
 
 207         if (next != LONG_MAX &&
 
 208             !mod_timer(&xp->timer, jiffies + make_jiffies(next)))
 
 212         read_unlock(&xp->lock);
 
 217         read_unlock(&xp->lock);
 
 218         if (!xfrm_policy_delete(xp, dir))
 
 219                 km_policy_expired(xp, dir, 1, 0);
 
 224 /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
 
 228 struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp)
 
 230         struct xfrm_policy *policy;
 
 232         policy = kmalloc(sizeof(struct xfrm_policy), gfp);
 
 235                 memset(policy, 0, sizeof(struct xfrm_policy));
 
 236                 atomic_set(&policy->refcnt, 1);
 
 237                 rwlock_init(&policy->lock);
 
 238                 init_timer(&policy->timer);
 
 239                 policy->timer.data = (unsigned long)policy;
 
 240                 policy->timer.function = xfrm_policy_timer;
 
 244 EXPORT_SYMBOL(xfrm_policy_alloc);
 
 246 /* Destroy xfrm_policy: descendant resources must be released to this moment. */
 
 248 void __xfrm_policy_destroy(struct xfrm_policy *policy)
 
 250         BUG_ON(!policy->dead);
 
 252         BUG_ON(policy->bundles);
 
 254         if (del_timer(&policy->timer))
 
 257         security_xfrm_policy_free(policy);
 
 260 EXPORT_SYMBOL(__xfrm_policy_destroy);
 
 262 static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
 
 264         struct dst_entry *dst;
 
 266         while ((dst = policy->bundles) != NULL) {
 
 267                 policy->bundles = dst->next;
 
 271         if (del_timer(&policy->timer))
 
 272                 atomic_dec(&policy->refcnt);
 
 274         if (atomic_read(&policy->refcnt) > 1)
 
 277         xfrm_pol_put(policy);
 
 280 static void xfrm_policy_gc_task(void *data)
 
 282         struct xfrm_policy *policy;
 
 283         struct list_head *entry, *tmp;
 
 284         struct list_head gc_list = LIST_HEAD_INIT(gc_list);
 
 286         spin_lock_bh(&xfrm_policy_gc_lock);
 
 287         list_splice_init(&xfrm_policy_gc_list, &gc_list);
 
 288         spin_unlock_bh(&xfrm_policy_gc_lock);
 
 290         list_for_each_safe(entry, tmp, &gc_list) {
 
 291                 policy = list_entry(entry, struct xfrm_policy, list);
 
 292                 xfrm_policy_gc_kill(policy);
 
 296 /* Rule must be locked. Release descentant resources, announce
 
 297  * entry dead. The rule must be unlinked from lists to the moment.
 
 300 static void xfrm_policy_kill(struct xfrm_policy *policy)
 
 304         write_lock_bh(&policy->lock);
 
 307         write_unlock_bh(&policy->lock);
 
 309         if (unlikely(dead)) {
 
 314         spin_lock(&xfrm_policy_gc_lock);
 
 315         list_add(&policy->list, &xfrm_policy_gc_list);
 
 316         spin_unlock(&xfrm_policy_gc_lock);
 
 318         schedule_work(&xfrm_policy_gc_work);
 
 321 /* Generate new index... KAME seems to generate them ordered by cost
 
 322  * of an absolute inpredictability of ordering of rules. This will not pass. */
 
 323 static u32 xfrm_gen_index(int dir)
 
 326         struct xfrm_policy *p;
 
 327         static u32 idx_generator;
 
 330                 idx = (idx_generator | dir);
 
 334                 for (p = xfrm_policy_list[dir]; p; p = p->next) {
 
 343 int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
 
 345         struct xfrm_policy *pol, **p;
 
 346         struct xfrm_policy *delpol = NULL;
 
 347         struct xfrm_policy **newpos = NULL;
 
 348         struct dst_entry *gc_list;
 
 350         write_lock_bh(&xfrm_policy_lock);
 
 351         for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL;) {
 
 352                 if (!delpol && memcmp(&policy->selector, &pol->selector, sizeof(pol->selector)) == 0 &&
 
 353                     xfrm_sec_ctx_match(pol->security, policy->security)) {
 
 355                                 write_unlock_bh(&xfrm_policy_lock);
 
 360                         if (policy->priority > pol->priority)
 
 362                 } else if (policy->priority >= pol->priority) {
 
 374         xfrm_pol_hold(policy);
 
 377         atomic_inc(&flow_cache_genid);
 
 378         policy->index = delpol ? delpol->index : xfrm_gen_index(dir);
 
 379         policy->curlft.add_time = (unsigned long)xtime.tv_sec;
 
 380         policy->curlft.use_time = 0;
 
 381         if (!mod_timer(&policy->timer, jiffies + HZ))
 
 382                 xfrm_pol_hold(policy);
 
 383         write_unlock_bh(&xfrm_policy_lock);
 
 386                 xfrm_policy_kill(delpol);
 
 388         read_lock_bh(&xfrm_policy_lock);
 
 390         for (policy = policy->next; policy; policy = policy->next) {
 
 391                 struct dst_entry *dst;
 
 393                 write_lock(&policy->lock);
 
 394                 dst = policy->bundles;
 
 396                         struct dst_entry *tail = dst;
 
 399                         tail->next = gc_list;
 
 402                         policy->bundles = NULL;
 
 404                 write_unlock(&policy->lock);
 
 406         read_unlock_bh(&xfrm_policy_lock);
 
 409                 struct dst_entry *dst = gc_list;
 
 417 EXPORT_SYMBOL(xfrm_policy_insert);
 
 419 struct xfrm_policy *xfrm_policy_bysel_ctx(int dir, struct xfrm_selector *sel,
 
 420                                           struct xfrm_sec_ctx *ctx, int delete)
 
 422         struct xfrm_policy *pol, **p;
 
 424         write_lock_bh(&xfrm_policy_lock);
 
 425         for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) {
 
 426                 if ((memcmp(sel, &pol->selector, sizeof(*sel)) == 0) &&
 
 427                     (xfrm_sec_ctx_match(ctx, pol->security))) {
 
 434         write_unlock_bh(&xfrm_policy_lock);
 
 437                 atomic_inc(&flow_cache_genid);
 
 438                 xfrm_policy_kill(pol);
 
 442 EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
 
 444 struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete)
 
 446         struct xfrm_policy *pol, **p;
 
 448         write_lock_bh(&xfrm_policy_lock);
 
 449         for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) {
 
 450                 if (pol->index == id) {
 
 457         write_unlock_bh(&xfrm_policy_lock);
 
 460                 atomic_inc(&flow_cache_genid);
 
 461                 xfrm_policy_kill(pol);
 
 465 EXPORT_SYMBOL(xfrm_policy_byid);
 
 467 void xfrm_policy_flush(void)
 
 469         struct xfrm_policy *xp;
 
 472         write_lock_bh(&xfrm_policy_lock);
 
 473         for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
 
 474                 while ((xp = xfrm_policy_list[dir]) != NULL) {
 
 475                         xfrm_policy_list[dir] = xp->next;
 
 476                         write_unlock_bh(&xfrm_policy_lock);
 
 478                         xfrm_policy_kill(xp);
 
 480                         write_lock_bh(&xfrm_policy_lock);
 
 483         atomic_inc(&flow_cache_genid);
 
 484         write_unlock_bh(&xfrm_policy_lock);
 
 486 EXPORT_SYMBOL(xfrm_policy_flush);
 
 488 int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*),
 
 491         struct xfrm_policy *xp;
 
 496         read_lock_bh(&xfrm_policy_lock);
 
 497         for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
 
 498                 for (xp = xfrm_policy_list[dir]; xp; xp = xp->next)
 
 507         for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
 
 508                 for (xp = xfrm_policy_list[dir]; xp; xp = xp->next) {
 
 509                         error = func(xp, dir%XFRM_POLICY_MAX, --count, data);
 
 516         read_unlock_bh(&xfrm_policy_lock);
 
 519 EXPORT_SYMBOL(xfrm_policy_walk);
 
 521 /* Find policy to apply to this flow. */
 
 523 static void xfrm_policy_lookup(struct flowi *fl, u32 sk_sid, u16 family, u8 dir,
 
 524                                void **objp, atomic_t **obj_refp)
 
 526         struct xfrm_policy *pol;
 
 528         read_lock_bh(&xfrm_policy_lock);
 
 529         for (pol = xfrm_policy_list[dir]; pol; pol = pol->next) {
 
 530                 struct xfrm_selector *sel = &pol->selector;
 
 533                 if (pol->family != family)
 
 536                 match = xfrm_selector_match(sel, fl, family);
 
 539                         if (!security_xfrm_policy_lookup(pol, sk_sid, dir)) {
 
 545         read_unlock_bh(&xfrm_policy_lock);
 
 546         if ((*objp = (void *) pol) != NULL)
 
 547                 *obj_refp = &pol->refcnt;
 
 550 static inline int policy_to_flow_dir(int dir)
 
 552         if (XFRM_POLICY_IN == FLOW_DIR_IN &&
 
 553             XFRM_POLICY_OUT == FLOW_DIR_OUT &&
 
 554             XFRM_POLICY_FWD == FLOW_DIR_FWD)
 
 560         case XFRM_POLICY_OUT:
 
 562         case XFRM_POLICY_FWD:
 
 567 static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl, u32 sk_sid)
 
 569         struct xfrm_policy *pol;
 
 571         read_lock_bh(&xfrm_policy_lock);
 
 572         if ((pol = sk->sk_policy[dir]) != NULL) {
 
 573                 int match = xfrm_selector_match(&pol->selector, fl,
 
 578                   err = security_xfrm_policy_lookup(pol, sk_sid, policy_to_flow_dir(dir));
 
 585         read_unlock_bh(&xfrm_policy_lock);
 
 589 static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
 
 591         pol->next = xfrm_policy_list[dir];
 
 592         xfrm_policy_list[dir] = pol;
 
 596 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
 
 599         struct xfrm_policy **polp;
 
 601         for (polp = &xfrm_policy_list[dir];
 
 602              *polp != NULL; polp = &(*polp)->next) {
 
 611 int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
 
 613         write_lock_bh(&xfrm_policy_lock);
 
 614         pol = __xfrm_policy_unlink(pol, dir);
 
 615         write_unlock_bh(&xfrm_policy_lock);
 
 617                 if (dir < XFRM_POLICY_MAX)
 
 618                         atomic_inc(&flow_cache_genid);
 
 619                 xfrm_policy_kill(pol);
 
 624 EXPORT_SYMBOL(xfrm_policy_delete);
 
 626 int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
 
 628         struct xfrm_policy *old_pol;
 
 630         write_lock_bh(&xfrm_policy_lock);
 
 631         old_pol = sk->sk_policy[dir];
 
 632         sk->sk_policy[dir] = pol;
 
 634                 pol->curlft.add_time = (unsigned long)xtime.tv_sec;
 
 635                 pol->index = xfrm_gen_index(XFRM_POLICY_MAX+dir);
 
 636                 __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
 
 639                 __xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir);
 
 640         write_unlock_bh(&xfrm_policy_lock);
 
 643                 xfrm_policy_kill(old_pol);
 
 648 static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir)
 
 650         struct xfrm_policy *newp = xfrm_policy_alloc(GFP_ATOMIC);
 
 653                 newp->selector = old->selector;
 
 654                 if (security_xfrm_policy_clone(old, newp)) {
 
 656                         return NULL;  /* ENOMEM */
 
 658                 newp->lft = old->lft;
 
 659                 newp->curlft = old->curlft;
 
 660                 newp->action = old->action;
 
 661                 newp->flags = old->flags;
 
 662                 newp->xfrm_nr = old->xfrm_nr;
 
 663                 newp->index = old->index;
 
 664                 memcpy(newp->xfrm_vec, old->xfrm_vec,
 
 665                        newp->xfrm_nr*sizeof(struct xfrm_tmpl));
 
 666                 write_lock_bh(&xfrm_policy_lock);
 
 667                 __xfrm_policy_link(newp, XFRM_POLICY_MAX+dir);
 
 668                 write_unlock_bh(&xfrm_policy_lock);
 
 674 int __xfrm_sk_clone_policy(struct sock *sk)
 
 676         struct xfrm_policy *p0 = sk->sk_policy[0],
 
 677                            *p1 = sk->sk_policy[1];
 
 679         sk->sk_policy[0] = sk->sk_policy[1] = NULL;
 
 680         if (p0 && (sk->sk_policy[0] = clone_policy(p0, 0)) == NULL)
 
 682         if (p1 && (sk->sk_policy[1] = clone_policy(p1, 1)) == NULL)
 
 687 /* Resolve list of templates for the flow, given policy. */
 
 690 xfrm_tmpl_resolve(struct xfrm_policy *policy, struct flowi *fl,
 
 691                   struct xfrm_state **xfrm,
 
 692                   unsigned short family)
 
 696         xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
 
 697         xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
 
 699         for (nx=0, i = 0; i < policy->xfrm_nr; i++) {
 
 700                 struct xfrm_state *x;
 
 701                 xfrm_address_t *remote = daddr;
 
 702                 xfrm_address_t *local  = saddr;
 
 703                 struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
 
 706                         remote = &tmpl->id.daddr;
 
 707                         local = &tmpl->saddr;
 
 710                 x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);
 
 712                 if (x && x->km.state == XFRM_STATE_VALID) {
 
 719                         error = (x->km.state == XFRM_STATE_ERROR ?
 
 730         for (nx--; nx>=0; nx--)
 
 731                 xfrm_state_put(xfrm[nx]);
 
 735 /* Check that the bundle accepts the flow and its components are
 
 739 static struct dst_entry *
 
 740 xfrm_find_bundle(struct flowi *fl, struct xfrm_policy *policy, unsigned short family)
 
 743         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
 
 744         if (unlikely(afinfo == NULL))
 
 745                 return ERR_PTR(-EINVAL);
 
 746         x = afinfo->find_bundle(fl, policy);
 
 747         xfrm_policy_put_afinfo(afinfo);
 
 751 /* Allocate chain of dst_entry's, attach known xfrm's, calculate
 
 752  * all the metrics... Shortly, bundle a bundle.
 
 756 xfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx,
 
 757                    struct flowi *fl, struct dst_entry **dst_p,
 
 758                    unsigned short family)
 
 761         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
 
 762         if (unlikely(afinfo == NULL))
 
 764         err = afinfo->bundle_create(policy, xfrm, nx, fl, dst_p);
 
 765         xfrm_policy_put_afinfo(afinfo);
 
 770 static int stale_bundle(struct dst_entry *dst);
 
 772 /* Main function: finds/creates a bundle for given flow.
 
 774  * At the moment we eat a raw IP route. Mostly to speed up lookups
 
 775  * on interfaces with disabled IPsec.
 
 777 int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
 
 778                 struct sock *sk, int flags)
 
 780         struct xfrm_policy *policy;
 
 781         struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
 
 782         struct dst_entry *dst, *dst_orig = *dst_p;
 
 787         u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
 
 788         u32 sk_sid = security_sk_sid(sk, fl, dir);
 
 790         genid = atomic_read(&flow_cache_genid);
 
 792         if (sk && sk->sk_policy[1])
 
 793                 policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, sk_sid);
 
 796                 /* To accelerate a bit...  */
 
 797                 if ((dst_orig->flags & DST_NOXFRM) || !xfrm_policy_list[XFRM_POLICY_OUT])
 
 800                 policy = flow_cache_lookup(fl, sk_sid, dst_orig->ops->family,
 
 801                                            dir, xfrm_policy_lookup);
 
 807         family = dst_orig->ops->family;
 
 808         policy->curlft.use_time = (unsigned long)xtime.tv_sec;
 
 810         switch (policy->action) {
 
 811         case XFRM_POLICY_BLOCK:
 
 812                 /* Prohibit the flow */
 
 816         case XFRM_POLICY_ALLOW:
 
 817                 if (policy->xfrm_nr == 0) {
 
 818                         /* Flow passes not transformed. */
 
 819                         xfrm_pol_put(policy);
 
 823                 /* Try to find matching bundle.
 
 825                  * LATER: help from flow cache. It is optional, this
 
 826                  * is required only for output policy.
 
 828                 dst = xfrm_find_bundle(fl, policy, family);
 
 837                 nx = xfrm_tmpl_resolve(policy, fl, xfrm, family);
 
 839                 if (unlikely(nx<0)) {
 
 841                         if (err == -EAGAIN && flags) {
 
 842                                 DECLARE_WAITQUEUE(wait, current);
 
 844                                 add_wait_queue(&km_waitq, &wait);
 
 845                                 set_current_state(TASK_INTERRUPTIBLE);
 
 847                                 set_current_state(TASK_RUNNING);
 
 848                                 remove_wait_queue(&km_waitq, &wait);
 
 850                                 nx = xfrm_tmpl_resolve(policy, fl, xfrm, family);
 
 852                                 if (nx == -EAGAIN && signal_pending(current)) {
 
 857                                     genid != atomic_read(&flow_cache_genid)) {
 
 858                                         xfrm_pol_put(policy);
 
 867                         /* Flow passes not transformed. */
 
 868                         xfrm_pol_put(policy);
 
 873                 err = xfrm_bundle_create(policy, xfrm, nx, fl, &dst, family);
 
 878                                 xfrm_state_put(xfrm[i]);
 
 882                 write_lock_bh(&policy->lock);
 
 883                 if (unlikely(policy->dead || stale_bundle(dst))) {
 
 884                         /* Wow! While we worked on resolving, this
 
 885                          * policy has gone. Retry. It is not paranoia,
 
 886                          * we just cannot enlist new bundle to dead object.
 
 887                          * We can't enlist stable bundles either.
 
 889                         write_unlock_bh(&policy->lock);
 
 896                 dst->next = policy->bundles;
 
 897                 policy->bundles = dst;
 
 899                 write_unlock_bh(&policy->lock);
 
 902         dst_release(dst_orig);
 
 903         xfrm_pol_put(policy);
 
 907         dst_release(dst_orig);
 
 908         xfrm_pol_put(policy);
 
 912 EXPORT_SYMBOL(xfrm_lookup);
 
 914 /* When skb is transformed back to its "native" form, we have to
 
 915  * check policy restrictions. At the moment we make this in maximally
 
 916  * stupid way. Shame on me. :-) Of course, connected sockets must
 
 917  * have policy cached at them.
 
 921 xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x, 
 
 922               unsigned short family)
 
 924         if (xfrm_state_kern(x))
 
 925                 return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, family);
 
 926         return  x->id.proto == tmpl->id.proto &&
 
 927                 (x->id.spi == tmpl->id.spi || !tmpl->id.spi) &&
 
 928                 (x->props.reqid == tmpl->reqid || !tmpl->reqid) &&
 
 929                 x->props.mode == tmpl->mode &&
 
 930                 (tmpl->aalgos & (1<<x->props.aalgo)) &&
 
 931                 !(x->props.mode && xfrm_state_addr_cmp(tmpl, x, family));
 
 935 xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start,
 
 936                unsigned short family)
 
 940         if (tmpl->optional) {
 
 945         for (; idx < sp->len; idx++) {
 
 946                 if (xfrm_state_ok(tmpl, sp->xvec[idx], family))
 
 948                 if (sp->xvec[idx]->props.mode)
 
 955 xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family)
 
 957         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
 
 959         if (unlikely(afinfo == NULL))
 
 960                 return -EAFNOSUPPORT;
 
 962         afinfo->decode_session(skb, fl);
 
 963         xfrm_policy_put_afinfo(afinfo);
 
 966 EXPORT_SYMBOL(xfrm_decode_session);
 
 968 static inline int secpath_has_tunnel(struct sec_path *sp, int k)
 
 970         for (; k < sp->len; k++) {
 
 971                 if (sp->xvec[k]->props.mode)
 
 978 int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, 
 
 979                         unsigned short family)
 
 981         struct xfrm_policy *pol;
 
 983         u8 fl_dir = policy_to_flow_dir(dir);
 
 986         if (xfrm_decode_session(skb, &fl, family) < 0)
 
 988         nf_nat_decode_session(skb, &fl, family);
 
 990         sk_sid = security_sk_sid(sk, &fl, fl_dir);
 
 992         /* First, check used SA against their selectors. */
 
 996                 for (i=skb->sp->len-1; i>=0; i--) {
 
 997                         struct xfrm_state *x = skb->sp->xvec[i];
 
 998                         if (!xfrm_selector_match(&x->sel, &fl, family))
 
1004         if (sk && sk->sk_policy[dir])
 
1005                 pol = xfrm_sk_policy_lookup(sk, dir, &fl, sk_sid);
 
1008                 pol = flow_cache_lookup(&fl, sk_sid, family, fl_dir,
 
1009                                         xfrm_policy_lookup);
 
1012                 return !skb->sp || !secpath_has_tunnel(skb->sp, 0);
 
1014         pol->curlft.use_time = (unsigned long)xtime.tv_sec;
 
1016         if (pol->action == XFRM_POLICY_ALLOW) {
 
1017                 struct sec_path *sp;
 
1018                 static struct sec_path dummy;
 
1021                 if ((sp = skb->sp) == NULL)
 
1024                 /* For each tunnel xfrm, find the first matching tmpl.
 
1025                  * For each tmpl before that, find corresponding xfrm.
 
1026                  * Order is _important_. Later we will implement
 
1027                  * some barriers, but at the moment barriers
 
1028                  * are implied between each two transformations.
 
1030                 for (i = pol->xfrm_nr-1, k = 0; i >= 0; i--) {
 
1031                         k = xfrm_policy_ok(pol->xfrm_vec+i, sp, k, family);
 
1036                 if (secpath_has_tunnel(sp, k))
 
1047 EXPORT_SYMBOL(__xfrm_policy_check);
 
1049 int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
 
1053         if (xfrm_decode_session(skb, &fl, family) < 0)
 
1056         return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0;
 
1058 EXPORT_SYMBOL(__xfrm_route_forward);
 
1060 static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
 
1062         /* If it is marked obsolete, which is how we even get here,
 
1063          * then we have purged it from the policy bundle list and we
 
1064          * did that for a good reason.
 
1069 static int stale_bundle(struct dst_entry *dst)
 
1071         return !xfrm_bundle_ok((struct xfrm_dst *)dst, NULL, AF_UNSPEC);
 
1074 void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
 
1076         while ((dst = dst->child) && dst->xfrm && dst->dev == dev) {
 
1077                 dst->dev = &loopback_dev;
 
1078                 dev_hold(&loopback_dev);
 
1082 EXPORT_SYMBOL(xfrm_dst_ifdown);
 
1084 static void xfrm_link_failure(struct sk_buff *skb)
 
1086         /* Impossible. Such dst must be popped before reaches point of failure. */
 
1090 static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
 
1093                 if (dst->obsolete) {
 
1101 static void xfrm_prune_bundles(int (*func)(struct dst_entry *))
 
1104         struct xfrm_policy *pol;
 
1105         struct dst_entry *dst, **dstp, *gc_list = NULL;
 
1107         read_lock_bh(&xfrm_policy_lock);
 
1108         for (i=0; i<2*XFRM_POLICY_MAX; i++) {
 
1109                 for (pol = xfrm_policy_list[i]; pol; pol = pol->next) {
 
1110                         write_lock(&pol->lock);
 
1111                         dstp = &pol->bundles;
 
1112                         while ((dst=*dstp) != NULL) {
 
1115                                         dst->next = gc_list;
 
1121                         write_unlock(&pol->lock);
 
1124         read_unlock_bh(&xfrm_policy_lock);
 
1128                 gc_list = dst->next;
 
1133 static int unused_bundle(struct dst_entry *dst)
 
1135         return !atomic_read(&dst->__refcnt);
 
1138 static void __xfrm_garbage_collect(void)
 
1140         xfrm_prune_bundles(unused_bundle);
 
1143 int xfrm_flush_bundles(void)
 
1145         xfrm_prune_bundles(stale_bundle);
 
1149 static int always_true(struct dst_entry *dst)
 
1154 void xfrm_flush_all_bundles(void)
 
1156         xfrm_prune_bundles(always_true);
 
1159 void xfrm_init_pmtu(struct dst_entry *dst)
 
1162                 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
 
1163                 u32 pmtu, route_mtu_cached;
 
1165                 pmtu = dst_mtu(dst->child);
 
1166                 xdst->child_mtu_cached = pmtu;
 
1168                 pmtu = xfrm_state_mtu(dst->xfrm, pmtu);
 
1170                 route_mtu_cached = dst_mtu(xdst->route);
 
1171                 xdst->route_mtu_cached = route_mtu_cached;
 
1173                 if (pmtu > route_mtu_cached)
 
1174                         pmtu = route_mtu_cached;
 
1176                 dst->metrics[RTAX_MTU-1] = pmtu;
 
1177         } while ((dst = dst->next));
 
1180 EXPORT_SYMBOL(xfrm_init_pmtu);
 
1182 /* Check that the bundle accepts the flow and its components are
 
1186 int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family)
 
1188         struct dst_entry *dst = &first->u.dst;
 
1189         struct xfrm_dst *last;
 
1192         if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) ||
 
1193             (dst->dev && !netif_running(dst->dev)))
 
1199                 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
 
1201                 if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family))
 
1203                 if (dst->xfrm->km.state != XFRM_STATE_VALID)
 
1206                 mtu = dst_mtu(dst->child);
 
1207                 if (xdst->child_mtu_cached != mtu) {
 
1209                         xdst->child_mtu_cached = mtu;
 
1212                 if (!dst_check(xdst->route, xdst->route_cookie))
 
1214                 mtu = dst_mtu(xdst->route);
 
1215                 if (xdst->route_mtu_cached != mtu) {
 
1217                         xdst->route_mtu_cached = mtu;
 
1221         } while (dst->xfrm);
 
1226         mtu = last->child_mtu_cached;
 
1230                 mtu = xfrm_state_mtu(dst->xfrm, mtu);
 
1231                 if (mtu > last->route_mtu_cached)
 
1232                         mtu = last->route_mtu_cached;
 
1233                 dst->metrics[RTAX_MTU-1] = mtu;
 
1238                 last = last->u.next;
 
1239                 last->child_mtu_cached = mtu;
 
1245 EXPORT_SYMBOL(xfrm_bundle_ok);
 
1247 int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
 
1250         if (unlikely(afinfo == NULL))
 
1252         if (unlikely(afinfo->family >= NPROTO))
 
1253                 return -EAFNOSUPPORT;
 
1254         write_lock(&xfrm_policy_afinfo_lock);
 
1255         if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL))
 
1258                 struct dst_ops *dst_ops = afinfo->dst_ops;
 
1259                 if (likely(dst_ops->kmem_cachep == NULL))
 
1260                         dst_ops->kmem_cachep = xfrm_dst_cache;
 
1261                 if (likely(dst_ops->check == NULL))
 
1262                         dst_ops->check = xfrm_dst_check;
 
1263                 if (likely(dst_ops->negative_advice == NULL))
 
1264                         dst_ops->negative_advice = xfrm_negative_advice;
 
1265                 if (likely(dst_ops->link_failure == NULL))
 
1266                         dst_ops->link_failure = xfrm_link_failure;
 
1267                 if (likely(afinfo->garbage_collect == NULL))
 
1268                         afinfo->garbage_collect = __xfrm_garbage_collect;
 
1269                 xfrm_policy_afinfo[afinfo->family] = afinfo;
 
1271         write_unlock(&xfrm_policy_afinfo_lock);
 
1274 EXPORT_SYMBOL(xfrm_policy_register_afinfo);
 
1276 int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
 
1279         if (unlikely(afinfo == NULL))
 
1281         if (unlikely(afinfo->family >= NPROTO))
 
1282                 return -EAFNOSUPPORT;
 
1283         write_lock(&xfrm_policy_afinfo_lock);
 
1284         if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) {
 
1285                 if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo))
 
1288                         struct dst_ops *dst_ops = afinfo->dst_ops;
 
1289                         xfrm_policy_afinfo[afinfo->family] = NULL;
 
1290                         dst_ops->kmem_cachep = NULL;
 
1291                         dst_ops->check = NULL;
 
1292                         dst_ops->negative_advice = NULL;
 
1293                         dst_ops->link_failure = NULL;
 
1294                         afinfo->garbage_collect = NULL;
 
1297         write_unlock(&xfrm_policy_afinfo_lock);
 
1300 EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
 
1302 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family)
 
1304         struct xfrm_policy_afinfo *afinfo;
 
1305         if (unlikely(family >= NPROTO))
 
1307         read_lock(&xfrm_policy_afinfo_lock);
 
1308         afinfo = xfrm_policy_afinfo[family];
 
1309         if (likely(afinfo != NULL))
 
1310                 read_lock(&afinfo->lock);
 
1311         read_unlock(&xfrm_policy_afinfo_lock);
 
1315 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo)
 
1317         if (unlikely(afinfo == NULL))
 
1319         read_unlock(&afinfo->lock);
 
1322 static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
 
1326                 xfrm_flush_bundles();
 
1331 static struct notifier_block xfrm_dev_notifier = {
 
1337 static void __init xfrm_policy_init(void)
 
1339         xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
 
1340                                            sizeof(struct xfrm_dst),
 
1341                                            0, SLAB_HWCACHE_ALIGN,
 
1343         if (!xfrm_dst_cache)
 
1344                 panic("XFRM: failed to allocate xfrm_dst_cache\n");
 
1346         INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task, NULL);
 
1347         register_netdevice_notifier(&xfrm_dev_notifier);
 
1350 void __init xfrm_init(void)