[IPSEC]: Move flow construction into xfrm_dst_lookup
[linux-2.6] / net / xfrm / xfrm_policy.c
1 /*
2  * xfrm_policy.c
3  *
4  * Changes:
5  *      Mitsuru KANDA @USAGI
6  *      Kazunori MIYAZAWA @USAGI
7  *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  *              IPv6 support
9  *      Kazunori MIYAZAWA @USAGI
10  *      YOSHIFUJI Hideaki
11  *              Split up af-specific portion
12  *      Derek Atkins <derek@ihtfp.com>          Add the post_input processor
13  *
14  */
15
16 #include <linux/err.h>
17 #include <linux/slab.h>
18 #include <linux/kmod.h>
19 #include <linux/list.h>
20 #include <linux/spinlock.h>
21 #include <linux/workqueue.h>
22 #include <linux/notifier.h>
23 #include <linux/netdevice.h>
24 #include <linux/netfilter.h>
25 #include <linux/module.h>
26 #include <linux/cache.h>
27 #include <net/xfrm.h>
28 #include <net/ip.h>
29
30 #include "xfrm_hash.h"
31
32 int sysctl_xfrm_larval_drop __read_mostly;
33
34 DEFINE_MUTEX(xfrm_cfg_mutex);
35 EXPORT_SYMBOL(xfrm_cfg_mutex);
36
37 static DEFINE_RWLOCK(xfrm_policy_lock);
38
39 unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2];
40 EXPORT_SYMBOL(xfrm_policy_count);
41
42 static DEFINE_RWLOCK(xfrm_policy_afinfo_lock);
43 static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
44
45 static struct kmem_cache *xfrm_dst_cache __read_mostly;
46
47 static struct work_struct xfrm_policy_gc_work;
48 static HLIST_HEAD(xfrm_policy_gc_list);
49 static DEFINE_SPINLOCK(xfrm_policy_gc_lock);
50
51 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
52 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
53
54 static inline int
55 __xfrm4_selector_match(struct xfrm_selector *sel, struct flowi *fl)
56 {
57         return  addr_match(&fl->fl4_dst, &sel->daddr, sel->prefixlen_d) &&
58                 addr_match(&fl->fl4_src, &sel->saddr, sel->prefixlen_s) &&
59                 !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) &&
60                 !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) &&
61                 (fl->proto == sel->proto || !sel->proto) &&
62                 (fl->oif == sel->ifindex || !sel->ifindex);
63 }
64
65 static inline int
66 __xfrm6_selector_match(struct xfrm_selector *sel, struct flowi *fl)
67 {
68         return  addr_match(&fl->fl6_dst, &sel->daddr, sel->prefixlen_d) &&
69                 addr_match(&fl->fl6_src, &sel->saddr, sel->prefixlen_s) &&
70                 !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) &&
71                 !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) &&
72                 (fl->proto == sel->proto || !sel->proto) &&
73                 (fl->oif == sel->ifindex || !sel->ifindex);
74 }
75
76 int xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl,
77                     unsigned short family)
78 {
79         switch (family) {
80         case AF_INET:
81                 return __xfrm4_selector_match(sel, fl);
82         case AF_INET6:
83                 return __xfrm6_selector_match(sel, fl);
84         }
85         return 0;
86 }
87
88 struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos)
89 {
90         xfrm_address_t *saddr = &x->props.saddr;
91         xfrm_address_t *daddr = &x->id.daddr;
92         struct xfrm_policy_afinfo *afinfo;
93         struct dst_entry *dst;
94
95         if (x->type->flags & XFRM_TYPE_LOCAL_COADDR)
96                 saddr = x->coaddr;
97         if (x->type->flags & XFRM_TYPE_REMOTE_COADDR)
98                 daddr = x->coaddr;
99
100         afinfo = xfrm_policy_get_afinfo(x->props.family);
101         if (unlikely(afinfo == NULL))
102                 return ERR_PTR(-EAFNOSUPPORT);
103
104         dst = afinfo->dst_lookup(tos, saddr, daddr);
105         xfrm_policy_put_afinfo(afinfo);
106         return dst;
107 }
108 EXPORT_SYMBOL(xfrm_dst_lookup);
109
110 static inline unsigned long make_jiffies(long secs)
111 {
112         if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
113                 return MAX_SCHEDULE_TIMEOUT-1;
114         else
115                 return secs*HZ;
116 }
117
118 static void xfrm_policy_timer(unsigned long data)
119 {
120         struct xfrm_policy *xp = (struct xfrm_policy*)data;
121         unsigned long now = get_seconds();
122         long next = LONG_MAX;
123         int warn = 0;
124         int dir;
125
126         read_lock(&xp->lock);
127
128         if (xp->dead)
129                 goto out;
130
131         dir = xfrm_policy_id2dir(xp->index);
132
133         if (xp->lft.hard_add_expires_seconds) {
134                 long tmo = xp->lft.hard_add_expires_seconds +
135                         xp->curlft.add_time - now;
136                 if (tmo <= 0)
137                         goto expired;
138                 if (tmo < next)
139                         next = tmo;
140         }
141         if (xp->lft.hard_use_expires_seconds) {
142                 long tmo = xp->lft.hard_use_expires_seconds +
143                         (xp->curlft.use_time ? : xp->curlft.add_time) - now;
144                 if (tmo <= 0)
145                         goto expired;
146                 if (tmo < next)
147                         next = tmo;
148         }
149         if (xp->lft.soft_add_expires_seconds) {
150                 long tmo = xp->lft.soft_add_expires_seconds +
151                         xp->curlft.add_time - now;
152                 if (tmo <= 0) {
153                         warn = 1;
154                         tmo = XFRM_KM_TIMEOUT;
155                 }
156                 if (tmo < next)
157                         next = tmo;
158         }
159         if (xp->lft.soft_use_expires_seconds) {
160                 long tmo = xp->lft.soft_use_expires_seconds +
161                         (xp->curlft.use_time ? : xp->curlft.add_time) - now;
162                 if (tmo <= 0) {
163                         warn = 1;
164                         tmo = XFRM_KM_TIMEOUT;
165                 }
166                 if (tmo < next)
167                         next = tmo;
168         }
169
170         if (warn)
171                 km_policy_expired(xp, dir, 0, 0);
172         if (next != LONG_MAX &&
173             !mod_timer(&xp->timer, jiffies + make_jiffies(next)))
174                 xfrm_pol_hold(xp);
175
176 out:
177         read_unlock(&xp->lock);
178         xfrm_pol_put(xp);
179         return;
180
181 expired:
182         read_unlock(&xp->lock);
183         if (!xfrm_policy_delete(xp, dir))
184                 km_policy_expired(xp, dir, 1, 0);
185         xfrm_pol_put(xp);
186 }
187
188
189 /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
190  * SPD calls.
191  */
192
193 struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp)
194 {
195         struct xfrm_policy *policy;
196
197         policy = kzalloc(sizeof(struct xfrm_policy), gfp);
198
199         if (policy) {
200                 INIT_HLIST_NODE(&policy->bydst);
201                 INIT_HLIST_NODE(&policy->byidx);
202                 rwlock_init(&policy->lock);
203                 atomic_set(&policy->refcnt, 1);
204                 setup_timer(&policy->timer, xfrm_policy_timer,
205                                 (unsigned long)policy);
206         }
207         return policy;
208 }
209 EXPORT_SYMBOL(xfrm_policy_alloc);
210
211 /* Destroy xfrm_policy: descendant resources must be released to this moment. */
212
213 void __xfrm_policy_destroy(struct xfrm_policy *policy)
214 {
215         BUG_ON(!policy->dead);
216
217         BUG_ON(policy->bundles);
218
219         if (del_timer(&policy->timer))
220                 BUG();
221
222         security_xfrm_policy_free(policy);
223         kfree(policy);
224 }
225 EXPORT_SYMBOL(__xfrm_policy_destroy);
226
227 static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
228 {
229         struct dst_entry *dst;
230
231         while ((dst = policy->bundles) != NULL) {
232                 policy->bundles = dst->next;
233                 dst_free(dst);
234         }
235
236         if (del_timer(&policy->timer))
237                 atomic_dec(&policy->refcnt);
238
239         if (atomic_read(&policy->refcnt) > 1)
240                 flow_cache_flush();
241
242         xfrm_pol_put(policy);
243 }
244
245 static void xfrm_policy_gc_task(struct work_struct *work)
246 {
247         struct xfrm_policy *policy;
248         struct hlist_node *entry, *tmp;
249         struct hlist_head gc_list;
250
251         spin_lock_bh(&xfrm_policy_gc_lock);
252         gc_list.first = xfrm_policy_gc_list.first;
253         INIT_HLIST_HEAD(&xfrm_policy_gc_list);
254         spin_unlock_bh(&xfrm_policy_gc_lock);
255
256         hlist_for_each_entry_safe(policy, entry, tmp, &gc_list, bydst)
257                 xfrm_policy_gc_kill(policy);
258 }
259
260 /* Rule must be locked. Release descentant resources, announce
261  * entry dead. The rule must be unlinked from lists to the moment.
262  */
263
264 static void xfrm_policy_kill(struct xfrm_policy *policy)
265 {
266         int dead;
267
268         write_lock_bh(&policy->lock);
269         dead = policy->dead;
270         policy->dead = 1;
271         write_unlock_bh(&policy->lock);
272
273         if (unlikely(dead)) {
274                 WARN_ON(1);
275                 return;
276         }
277
278         spin_lock(&xfrm_policy_gc_lock);
279         hlist_add_head(&policy->bydst, &xfrm_policy_gc_list);
280         spin_unlock(&xfrm_policy_gc_lock);
281
282         schedule_work(&xfrm_policy_gc_work);
283 }
284
285 struct xfrm_policy_hash {
286         struct hlist_head       *table;
287         unsigned int            hmask;
288 };
289
290 static struct hlist_head xfrm_policy_inexact[XFRM_POLICY_MAX*2];
291 static struct xfrm_policy_hash xfrm_policy_bydst[XFRM_POLICY_MAX*2] __read_mostly;
292 static struct hlist_head *xfrm_policy_byidx __read_mostly;
293 static unsigned int xfrm_idx_hmask __read_mostly;
294 static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024;
295
296 static inline unsigned int idx_hash(u32 index)
297 {
298         return __idx_hash(index, xfrm_idx_hmask);
299 }
300
301 static struct hlist_head *policy_hash_bysel(struct xfrm_selector *sel, unsigned short family, int dir)
302 {
303         unsigned int hmask = xfrm_policy_bydst[dir].hmask;
304         unsigned int hash = __sel_hash(sel, family, hmask);
305
306         return (hash == hmask + 1 ?
307                 &xfrm_policy_inexact[dir] :
308                 xfrm_policy_bydst[dir].table + hash);
309 }
310
311 static struct hlist_head *policy_hash_direct(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, int dir)
312 {
313         unsigned int hmask = xfrm_policy_bydst[dir].hmask;
314         unsigned int hash = __addr_hash(daddr, saddr, family, hmask);
315
316         return xfrm_policy_bydst[dir].table + hash;
317 }
318
319 static void xfrm_dst_hash_transfer(struct hlist_head *list,
320                                    struct hlist_head *ndsttable,
321                                    unsigned int nhashmask)
322 {
323         struct hlist_node *entry, *tmp;
324         struct xfrm_policy *pol;
325
326         hlist_for_each_entry_safe(pol, entry, tmp, list, bydst) {
327                 unsigned int h;
328
329                 h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr,
330                                 pol->family, nhashmask);
331                 hlist_add_head(&pol->bydst, ndsttable+h);
332         }
333 }
334
335 static void xfrm_idx_hash_transfer(struct hlist_head *list,
336                                    struct hlist_head *nidxtable,
337                                    unsigned int nhashmask)
338 {
339         struct hlist_node *entry, *tmp;
340         struct xfrm_policy *pol;
341
342         hlist_for_each_entry_safe(pol, entry, tmp, list, byidx) {
343                 unsigned int h;
344
345                 h = __idx_hash(pol->index, nhashmask);
346                 hlist_add_head(&pol->byidx, nidxtable+h);
347         }
348 }
349
350 static unsigned long xfrm_new_hash_mask(unsigned int old_hmask)
351 {
352         return ((old_hmask + 1) << 1) - 1;
353 }
354
355 static void xfrm_bydst_resize(int dir)
356 {
357         unsigned int hmask = xfrm_policy_bydst[dir].hmask;
358         unsigned int nhashmask = xfrm_new_hash_mask(hmask);
359         unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
360         struct hlist_head *odst = xfrm_policy_bydst[dir].table;
361         struct hlist_head *ndst = xfrm_hash_alloc(nsize);
362         int i;
363
364         if (!ndst)
365                 return;
366
367         write_lock_bh(&xfrm_policy_lock);
368
369         for (i = hmask; i >= 0; i--)
370                 xfrm_dst_hash_transfer(odst + i, ndst, nhashmask);
371
372         xfrm_policy_bydst[dir].table = ndst;
373         xfrm_policy_bydst[dir].hmask = nhashmask;
374
375         write_unlock_bh(&xfrm_policy_lock);
376
377         xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head));
378 }
379
380 static void xfrm_byidx_resize(int total)
381 {
382         unsigned int hmask = xfrm_idx_hmask;
383         unsigned int nhashmask = xfrm_new_hash_mask(hmask);
384         unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
385         struct hlist_head *oidx = xfrm_policy_byidx;
386         struct hlist_head *nidx = xfrm_hash_alloc(nsize);
387         int i;
388
389         if (!nidx)
390                 return;
391
392         write_lock_bh(&xfrm_policy_lock);
393
394         for (i = hmask; i >= 0; i--)
395                 xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask);
396
397         xfrm_policy_byidx = nidx;
398         xfrm_idx_hmask = nhashmask;
399
400         write_unlock_bh(&xfrm_policy_lock);
401
402         xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head));
403 }
404
405 static inline int xfrm_bydst_should_resize(int dir, int *total)
406 {
407         unsigned int cnt = xfrm_policy_count[dir];
408         unsigned int hmask = xfrm_policy_bydst[dir].hmask;
409
410         if (total)
411                 *total += cnt;
412
413         if ((hmask + 1) < xfrm_policy_hashmax &&
414             cnt > hmask)
415                 return 1;
416
417         return 0;
418 }
419
420 static inline int xfrm_byidx_should_resize(int total)
421 {
422         unsigned int hmask = xfrm_idx_hmask;
423
424         if ((hmask + 1) < xfrm_policy_hashmax &&
425             total > hmask)
426                 return 1;
427
428         return 0;
429 }
430
431 void xfrm_spd_getinfo(struct xfrmk_spdinfo *si)
432 {
433         read_lock_bh(&xfrm_policy_lock);
434         si->incnt = xfrm_policy_count[XFRM_POLICY_IN];
435         si->outcnt = xfrm_policy_count[XFRM_POLICY_OUT];
436         si->fwdcnt = xfrm_policy_count[XFRM_POLICY_FWD];
437         si->inscnt = xfrm_policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX];
438         si->outscnt = xfrm_policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX];
439         si->fwdscnt = xfrm_policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX];
440         si->spdhcnt = xfrm_idx_hmask;
441         si->spdhmcnt = xfrm_policy_hashmax;
442         read_unlock_bh(&xfrm_policy_lock);
443 }
444 EXPORT_SYMBOL(xfrm_spd_getinfo);
445
446 static DEFINE_MUTEX(hash_resize_mutex);
447 static void xfrm_hash_resize(struct work_struct *__unused)
448 {
449         int dir, total;
450
451         mutex_lock(&hash_resize_mutex);
452
453         total = 0;
454         for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
455                 if (xfrm_bydst_should_resize(dir, &total))
456                         xfrm_bydst_resize(dir);
457         }
458         if (xfrm_byidx_should_resize(total))
459                 xfrm_byidx_resize(total);
460
461         mutex_unlock(&hash_resize_mutex);
462 }
463
464 static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize);
465
466 /* Generate new index... KAME seems to generate them ordered by cost
467  * of an absolute inpredictability of ordering of rules. This will not pass. */
468 static u32 xfrm_gen_index(u8 type, int dir)
469 {
470         static u32 idx_generator;
471
472         for (;;) {
473                 struct hlist_node *entry;
474                 struct hlist_head *list;
475                 struct xfrm_policy *p;
476                 u32 idx;
477                 int found;
478
479                 idx = (idx_generator | dir);
480                 idx_generator += 8;
481                 if (idx == 0)
482                         idx = 8;
483                 list = xfrm_policy_byidx + idx_hash(idx);
484                 found = 0;
485                 hlist_for_each_entry(p, entry, list, byidx) {
486                         if (p->index == idx) {
487                                 found = 1;
488                                 break;
489                         }
490                 }
491                 if (!found)
492                         return idx;
493         }
494 }
495
496 static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s2)
497 {
498         u32 *p1 = (u32 *) s1;
499         u32 *p2 = (u32 *) s2;
500         int len = sizeof(struct xfrm_selector) / sizeof(u32);
501         int i;
502
503         for (i = 0; i < len; i++) {
504                 if (p1[i] != p2[i])
505                         return 1;
506         }
507
508         return 0;
509 }
510
511 int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
512 {
513         struct xfrm_policy *pol;
514         struct xfrm_policy *delpol;
515         struct hlist_head *chain;
516         struct hlist_node *entry, *newpos;
517         struct dst_entry *gc_list;
518
519         write_lock_bh(&xfrm_policy_lock);
520         chain = policy_hash_bysel(&policy->selector, policy->family, dir);
521         delpol = NULL;
522         newpos = NULL;
523         hlist_for_each_entry(pol, entry, chain, bydst) {
524                 if (pol->type == policy->type &&
525                     !selector_cmp(&pol->selector, &policy->selector) &&
526                     xfrm_sec_ctx_match(pol->security, policy->security) &&
527                     !WARN_ON(delpol)) {
528                         if (excl) {
529                                 write_unlock_bh(&xfrm_policy_lock);
530                                 return -EEXIST;
531                         }
532                         delpol = pol;
533                         if (policy->priority > pol->priority)
534                                 continue;
535                 } else if (policy->priority >= pol->priority) {
536                         newpos = &pol->bydst;
537                         continue;
538                 }
539                 if (delpol)
540                         break;
541         }
542         if (newpos)
543                 hlist_add_after(newpos, &policy->bydst);
544         else
545                 hlist_add_head(&policy->bydst, chain);
546         xfrm_pol_hold(policy);
547         xfrm_policy_count[dir]++;
548         atomic_inc(&flow_cache_genid);
549         if (delpol) {
550                 hlist_del(&delpol->bydst);
551                 hlist_del(&delpol->byidx);
552                 xfrm_policy_count[dir]--;
553         }
554         policy->index = delpol ? delpol->index : xfrm_gen_index(policy->type, dir);
555         hlist_add_head(&policy->byidx, xfrm_policy_byidx+idx_hash(policy->index));
556         policy->curlft.add_time = get_seconds();
557         policy->curlft.use_time = 0;
558         if (!mod_timer(&policy->timer, jiffies + HZ))
559                 xfrm_pol_hold(policy);
560         write_unlock_bh(&xfrm_policy_lock);
561
562         if (delpol)
563                 xfrm_policy_kill(delpol);
564         else if (xfrm_bydst_should_resize(dir, NULL))
565                 schedule_work(&xfrm_hash_work);
566
567         read_lock_bh(&xfrm_policy_lock);
568         gc_list = NULL;
569         entry = &policy->bydst;
570         hlist_for_each_entry_continue(policy, entry, bydst) {
571                 struct dst_entry *dst;
572
573                 write_lock(&policy->lock);
574                 dst = policy->bundles;
575                 if (dst) {
576                         struct dst_entry *tail = dst;
577                         while (tail->next)
578                                 tail = tail->next;
579                         tail->next = gc_list;
580                         gc_list = dst;
581
582                         policy->bundles = NULL;
583                 }
584                 write_unlock(&policy->lock);
585         }
586         read_unlock_bh(&xfrm_policy_lock);
587
588         while (gc_list) {
589                 struct dst_entry *dst = gc_list;
590
591                 gc_list = dst->next;
592                 dst_free(dst);
593         }
594
595         return 0;
596 }
597 EXPORT_SYMBOL(xfrm_policy_insert);
598
599 struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir,
600                                           struct xfrm_selector *sel,
601                                           struct xfrm_sec_ctx *ctx, int delete,
602                                           int *err)
603 {
604         struct xfrm_policy *pol, *ret;
605         struct hlist_head *chain;
606         struct hlist_node *entry;
607
608         *err = 0;
609         write_lock_bh(&xfrm_policy_lock);
610         chain = policy_hash_bysel(sel, sel->family, dir);
611         ret = NULL;
612         hlist_for_each_entry(pol, entry, chain, bydst) {
613                 if (pol->type == type &&
614                     !selector_cmp(sel, &pol->selector) &&
615                     xfrm_sec_ctx_match(ctx, pol->security)) {
616                         xfrm_pol_hold(pol);
617                         if (delete) {
618                                 *err = security_xfrm_policy_delete(pol);
619                                 if (*err) {
620                                         write_unlock_bh(&xfrm_policy_lock);
621                                         return pol;
622                                 }
623                                 hlist_del(&pol->bydst);
624                                 hlist_del(&pol->byidx);
625                                 xfrm_policy_count[dir]--;
626                         }
627                         ret = pol;
628                         break;
629                 }
630         }
631         write_unlock_bh(&xfrm_policy_lock);
632
633         if (ret && delete) {
634                 atomic_inc(&flow_cache_genid);
635                 xfrm_policy_kill(ret);
636         }
637         return ret;
638 }
639 EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
640
641 struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete,
642                                      int *err)
643 {
644         struct xfrm_policy *pol, *ret;
645         struct hlist_head *chain;
646         struct hlist_node *entry;
647
648         *err = -ENOENT;
649         if (xfrm_policy_id2dir(id) != dir)
650                 return NULL;
651
652         *err = 0;
653         write_lock_bh(&xfrm_policy_lock);
654         chain = xfrm_policy_byidx + idx_hash(id);
655         ret = NULL;
656         hlist_for_each_entry(pol, entry, chain, byidx) {
657                 if (pol->type == type && pol->index == id) {
658                         xfrm_pol_hold(pol);
659                         if (delete) {
660                                 *err = security_xfrm_policy_delete(pol);
661                                 if (*err) {
662                                         write_unlock_bh(&xfrm_policy_lock);
663                                         return pol;
664                                 }
665                                 hlist_del(&pol->bydst);
666                                 hlist_del(&pol->byidx);
667                                 xfrm_policy_count[dir]--;
668                         }
669                         ret = pol;
670                         break;
671                 }
672         }
673         write_unlock_bh(&xfrm_policy_lock);
674
675         if (ret && delete) {
676                 atomic_inc(&flow_cache_genid);
677                 xfrm_policy_kill(ret);
678         }
679         return ret;
680 }
681 EXPORT_SYMBOL(xfrm_policy_byid);
682
683 #ifdef CONFIG_SECURITY_NETWORK_XFRM
684 static inline int
685 xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info)
686 {
687         int dir, err = 0;
688
689         for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
690                 struct xfrm_policy *pol;
691                 struct hlist_node *entry;
692                 int i;
693
694                 hlist_for_each_entry(pol, entry,
695                                      &xfrm_policy_inexact[dir], bydst) {
696                         if (pol->type != type)
697                                 continue;
698                         err = security_xfrm_policy_delete(pol);
699                         if (err) {
700                                 xfrm_audit_policy_delete(pol, 0,
701                                                          audit_info->loginuid,
702                                                          audit_info->secid);
703                                 return err;
704                         }
705                 }
706                 for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
707                         hlist_for_each_entry(pol, entry,
708                                              xfrm_policy_bydst[dir].table + i,
709                                              bydst) {
710                                 if (pol->type != type)
711                                         continue;
712                                 err = security_xfrm_policy_delete(pol);
713                                 if (err) {
714                                         xfrm_audit_policy_delete(pol, 0,
715                                                         audit_info->loginuid,
716                                                         audit_info->secid);
717                                         return err;
718                                 }
719                         }
720                 }
721         }
722         return err;
723 }
724 #else
725 static inline int
726 xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info)
727 {
728         return 0;
729 }
730 #endif
731
732 int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info)
733 {
734         int dir, err = 0;
735
736         write_lock_bh(&xfrm_policy_lock);
737
738         err = xfrm_policy_flush_secctx_check(type, audit_info);
739         if (err)
740                 goto out;
741
742         for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
743                 struct xfrm_policy *pol;
744                 struct hlist_node *entry;
745                 int i, killed;
746
747                 killed = 0;
748         again1:
749                 hlist_for_each_entry(pol, entry,
750                                      &xfrm_policy_inexact[dir], bydst) {
751                         if (pol->type != type)
752                                 continue;
753                         hlist_del(&pol->bydst);
754                         hlist_del(&pol->byidx);
755                         write_unlock_bh(&xfrm_policy_lock);
756
757                         xfrm_audit_policy_delete(pol, 1, audit_info->loginuid,
758                                                  audit_info->secid);
759
760                         xfrm_policy_kill(pol);
761                         killed++;
762
763                         write_lock_bh(&xfrm_policy_lock);
764                         goto again1;
765                 }
766
767                 for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
768         again2:
769                         hlist_for_each_entry(pol, entry,
770                                              xfrm_policy_bydst[dir].table + i,
771                                              bydst) {
772                                 if (pol->type != type)
773                                         continue;
774                                 hlist_del(&pol->bydst);
775                                 hlist_del(&pol->byidx);
776                                 write_unlock_bh(&xfrm_policy_lock);
777
778                                 xfrm_audit_policy_delete(pol, 1,
779                                                          audit_info->loginuid,
780                                                          audit_info->secid);
781                                 xfrm_policy_kill(pol);
782                                 killed++;
783
784                                 write_lock_bh(&xfrm_policy_lock);
785                                 goto again2;
786                         }
787                 }
788
789                 xfrm_policy_count[dir] -= killed;
790         }
791         atomic_inc(&flow_cache_genid);
792 out:
793         write_unlock_bh(&xfrm_policy_lock);
794         return err;
795 }
796 EXPORT_SYMBOL(xfrm_policy_flush);
797
798 int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*),
799                      void *data)
800 {
801         struct xfrm_policy *pol, *last = NULL;
802         struct hlist_node *entry;
803         int dir, last_dir = 0, count, error;
804
805         read_lock_bh(&xfrm_policy_lock);
806         count = 0;
807
808         for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
809                 struct hlist_head *table = xfrm_policy_bydst[dir].table;
810                 int i;
811
812                 hlist_for_each_entry(pol, entry,
813                                      &xfrm_policy_inexact[dir], bydst) {
814                         if (pol->type != type)
815                                 continue;
816                         if (last) {
817                                 error = func(last, last_dir % XFRM_POLICY_MAX,
818                                              count, data);
819                                 if (error)
820                                         goto out;
821                         }
822                         last = pol;
823                         last_dir = dir;
824                         count++;
825                 }
826                 for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
827                         hlist_for_each_entry(pol, entry, table + i, bydst) {
828                                 if (pol->type != type)
829                                         continue;
830                                 if (last) {
831                                         error = func(last, last_dir % XFRM_POLICY_MAX,
832                                                      count, data);
833                                         if (error)
834                                                 goto out;
835                                 }
836                                 last = pol;
837                                 last_dir = dir;
838                                 count++;
839                         }
840                 }
841         }
842         if (count == 0) {
843                 error = -ENOENT;
844                 goto out;
845         }
846         error = func(last, last_dir % XFRM_POLICY_MAX, 0, data);
847 out:
848         read_unlock_bh(&xfrm_policy_lock);
849         return error;
850 }
851 EXPORT_SYMBOL(xfrm_policy_walk);
852
853 /*
854  * Find policy to apply to this flow.
855  *
856  * Returns 0 if policy found, else an -errno.
857  */
858 static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl,
859                              u8 type, u16 family, int dir)
860 {
861         struct xfrm_selector *sel = &pol->selector;
862         int match, ret = -ESRCH;
863
864         if (pol->family != family ||
865             pol->type != type)
866                 return ret;
867
868         match = xfrm_selector_match(sel, fl, family);
869         if (match)
870                 ret = security_xfrm_policy_lookup(pol, fl->secid, dir);
871
872         return ret;
873 }
874
875 static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl,
876                                                      u16 family, u8 dir)
877 {
878         int err;
879         struct xfrm_policy *pol, *ret;
880         xfrm_address_t *daddr, *saddr;
881         struct hlist_node *entry;
882         struct hlist_head *chain;
883         u32 priority = ~0U;
884
885         daddr = xfrm_flowi_daddr(fl, family);
886         saddr = xfrm_flowi_saddr(fl, family);
887         if (unlikely(!daddr || !saddr))
888                 return NULL;
889
890         read_lock_bh(&xfrm_policy_lock);
891         chain = policy_hash_direct(daddr, saddr, family, dir);
892         ret = NULL;
893         hlist_for_each_entry(pol, entry, chain, bydst) {
894                 err = xfrm_policy_match(pol, fl, type, family, dir);
895                 if (err) {
896                         if (err == -ESRCH)
897                                 continue;
898                         else {
899                                 ret = ERR_PTR(err);
900                                 goto fail;
901                         }
902                 } else {
903                         ret = pol;
904                         priority = ret->priority;
905                         break;
906                 }
907         }
908         chain = &xfrm_policy_inexact[dir];
909         hlist_for_each_entry(pol, entry, chain, bydst) {
910                 err = xfrm_policy_match(pol, fl, type, family, dir);
911                 if (err) {
912                         if (err == -ESRCH)
913                                 continue;
914                         else {
915                                 ret = ERR_PTR(err);
916                                 goto fail;
917                         }
918                 } else if (pol->priority < priority) {
919                         ret = pol;
920                         break;
921                 }
922         }
923         if (ret)
924                 xfrm_pol_hold(ret);
925 fail:
926         read_unlock_bh(&xfrm_policy_lock);
927
928         return ret;
929 }
930
931 static int xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
932                                void **objp, atomic_t **obj_refp)
933 {
934         struct xfrm_policy *pol;
935         int err = 0;
936
937 #ifdef CONFIG_XFRM_SUB_POLICY
938         pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_SUB, fl, family, dir);
939         if (IS_ERR(pol)) {
940                 err = PTR_ERR(pol);
941                 pol = NULL;
942         }
943         if (pol || err)
944                 goto end;
945 #endif
946         pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, fl, family, dir);
947         if (IS_ERR(pol)) {
948                 err = PTR_ERR(pol);
949                 pol = NULL;
950         }
951 #ifdef CONFIG_XFRM_SUB_POLICY
952 end:
953 #endif
954         if ((*objp = (void *) pol) != NULL)
955                 *obj_refp = &pol->refcnt;
956         return err;
957 }
958
959 static inline int policy_to_flow_dir(int dir)
960 {
961         if (XFRM_POLICY_IN == FLOW_DIR_IN &&
962             XFRM_POLICY_OUT == FLOW_DIR_OUT &&
963             XFRM_POLICY_FWD == FLOW_DIR_FWD)
964                 return dir;
965         switch (dir) {
966         default:
967         case XFRM_POLICY_IN:
968                 return FLOW_DIR_IN;
969         case XFRM_POLICY_OUT:
970                 return FLOW_DIR_OUT;
971         case XFRM_POLICY_FWD:
972                 return FLOW_DIR_FWD;
973         }
974 }
975
976 static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl)
977 {
978         struct xfrm_policy *pol;
979
980         read_lock_bh(&xfrm_policy_lock);
981         if ((pol = sk->sk_policy[dir]) != NULL) {
982                 int match = xfrm_selector_match(&pol->selector, fl,
983                                                 sk->sk_family);
984                 int err = 0;
985
986                 if (match) {
987                         err = security_xfrm_policy_lookup(pol, fl->secid,
988                                         policy_to_flow_dir(dir));
989                         if (!err)
990                                 xfrm_pol_hold(pol);
991                         else if (err == -ESRCH)
992                                 pol = NULL;
993                         else
994                                 pol = ERR_PTR(err);
995                 } else
996                         pol = NULL;
997         }
998         read_unlock_bh(&xfrm_policy_lock);
999         return pol;
1000 }
1001
1002 static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
1003 {
1004         struct hlist_head *chain = policy_hash_bysel(&pol->selector,
1005                                                      pol->family, dir);
1006
1007         hlist_add_head(&pol->bydst, chain);
1008         hlist_add_head(&pol->byidx, xfrm_policy_byidx+idx_hash(pol->index));
1009         xfrm_policy_count[dir]++;
1010         xfrm_pol_hold(pol);
1011
1012         if (xfrm_bydst_should_resize(dir, NULL))
1013                 schedule_work(&xfrm_hash_work);
1014 }
1015
1016 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
1017                                                 int dir)
1018 {
1019         if (hlist_unhashed(&pol->bydst))
1020                 return NULL;
1021
1022         hlist_del(&pol->bydst);
1023         hlist_del(&pol->byidx);
1024         xfrm_policy_count[dir]--;
1025
1026         return pol;
1027 }
1028
1029 int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
1030 {
1031         write_lock_bh(&xfrm_policy_lock);
1032         pol = __xfrm_policy_unlink(pol, dir);
1033         write_unlock_bh(&xfrm_policy_lock);
1034         if (pol) {
1035                 if (dir < XFRM_POLICY_MAX)
1036                         atomic_inc(&flow_cache_genid);
1037                 xfrm_policy_kill(pol);
1038                 return 0;
1039         }
1040         return -ENOENT;
1041 }
1042 EXPORT_SYMBOL(xfrm_policy_delete);
1043
1044 int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
1045 {
1046         struct xfrm_policy *old_pol;
1047
1048 #ifdef CONFIG_XFRM_SUB_POLICY
1049         if (pol && pol->type != XFRM_POLICY_TYPE_MAIN)
1050                 return -EINVAL;
1051 #endif
1052
1053         write_lock_bh(&xfrm_policy_lock);
1054         old_pol = sk->sk_policy[dir];
1055         sk->sk_policy[dir] = pol;
1056         if (pol) {
1057                 pol->curlft.add_time = get_seconds();
1058                 pol->index = xfrm_gen_index(pol->type, XFRM_POLICY_MAX+dir);
1059                 __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
1060         }
1061         if (old_pol)
1062                 __xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir);
1063         write_unlock_bh(&xfrm_policy_lock);
1064
1065         if (old_pol) {
1066                 xfrm_policy_kill(old_pol);
1067         }
1068         return 0;
1069 }
1070
1071 static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir)
1072 {
1073         struct xfrm_policy *newp = xfrm_policy_alloc(GFP_ATOMIC);
1074
1075         if (newp) {
1076                 newp->selector = old->selector;
1077                 if (security_xfrm_policy_clone(old, newp)) {
1078                         kfree(newp);
1079                         return NULL;  /* ENOMEM */
1080                 }
1081                 newp->lft = old->lft;
1082                 newp->curlft = old->curlft;
1083                 newp->action = old->action;
1084                 newp->flags = old->flags;
1085                 newp->xfrm_nr = old->xfrm_nr;
1086                 newp->index = old->index;
1087                 newp->type = old->type;
1088                 memcpy(newp->xfrm_vec, old->xfrm_vec,
1089                        newp->xfrm_nr*sizeof(struct xfrm_tmpl));
1090                 write_lock_bh(&xfrm_policy_lock);
1091                 __xfrm_policy_link(newp, XFRM_POLICY_MAX+dir);
1092                 write_unlock_bh(&xfrm_policy_lock);
1093                 xfrm_pol_put(newp);
1094         }
1095         return newp;
1096 }
1097
1098 int __xfrm_sk_clone_policy(struct sock *sk)
1099 {
1100         struct xfrm_policy *p0 = sk->sk_policy[0],
1101                            *p1 = sk->sk_policy[1];
1102
1103         sk->sk_policy[0] = sk->sk_policy[1] = NULL;
1104         if (p0 && (sk->sk_policy[0] = clone_policy(p0, 0)) == NULL)
1105                 return -ENOMEM;
1106         if (p1 && (sk->sk_policy[1] = clone_policy(p1, 1)) == NULL)
1107                 return -ENOMEM;
1108         return 0;
1109 }
1110
1111 static int
1112 xfrm_get_saddr(xfrm_address_t *local, xfrm_address_t *remote,
1113                unsigned short family)
1114 {
1115         int err;
1116         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1117
1118         if (unlikely(afinfo == NULL))
1119                 return -EINVAL;
1120         err = afinfo->get_saddr(local, remote);
1121         xfrm_policy_put_afinfo(afinfo);
1122         return err;
1123 }
1124
1125 /* Resolve list of templates for the flow, given policy. */
1126
1127 static int
1128 xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl,
1129                       struct xfrm_state **xfrm,
1130                       unsigned short family)
1131 {
1132         int nx;
1133         int i, error;
1134         xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
1135         xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
1136         xfrm_address_t tmp;
1137
1138         for (nx=0, i = 0; i < policy->xfrm_nr; i++) {
1139                 struct xfrm_state *x;
1140                 xfrm_address_t *remote = daddr;
1141                 xfrm_address_t *local  = saddr;
1142                 struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
1143
1144                 if (tmpl->mode == XFRM_MODE_TUNNEL ||
1145                     tmpl->mode == XFRM_MODE_BEET) {
1146                         remote = &tmpl->id.daddr;
1147                         local = &tmpl->saddr;
1148                         family = tmpl->encap_family;
1149                         if (xfrm_addr_any(local, family)) {
1150                                 error = xfrm_get_saddr(&tmp, remote, family);
1151                                 if (error)
1152                                         goto fail;
1153                                 local = &tmp;
1154                         }
1155                 }
1156
1157                 x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);
1158
1159                 if (x && x->km.state == XFRM_STATE_VALID) {
1160                         xfrm[nx++] = x;
1161                         daddr = remote;
1162                         saddr = local;
1163                         continue;
1164                 }
1165                 if (x) {
1166                         error = (x->km.state == XFRM_STATE_ERROR ?
1167                                  -EINVAL : -EAGAIN);
1168                         xfrm_state_put(x);
1169                 }
1170
1171                 if (!tmpl->optional)
1172                         goto fail;
1173         }
1174         return nx;
1175
1176 fail:
1177         for (nx--; nx>=0; nx--)
1178                 xfrm_state_put(xfrm[nx]);
1179         return error;
1180 }
1181
1182 static int
1183 xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl,
1184                   struct xfrm_state **xfrm,
1185                   unsigned short family)
1186 {
1187         struct xfrm_state *tp[XFRM_MAX_DEPTH];
1188         struct xfrm_state **tpp = (npols > 1) ? tp : xfrm;
1189         int cnx = 0;
1190         int error;
1191         int ret;
1192         int i;
1193
1194         for (i = 0; i < npols; i++) {
1195                 if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) {
1196                         error = -ENOBUFS;
1197                         goto fail;
1198                 }
1199
1200                 ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family);
1201                 if (ret < 0) {
1202                         error = ret;
1203                         goto fail;
1204                 } else
1205                         cnx += ret;
1206         }
1207
1208         /* found states are sorted for outbound processing */
1209         if (npols > 1)
1210                 xfrm_state_sort(xfrm, tpp, cnx, family);
1211
1212         return cnx;
1213
1214  fail:
1215         for (cnx--; cnx>=0; cnx--)
1216                 xfrm_state_put(tpp[cnx]);
1217         return error;
1218
1219 }
1220
1221 /* Check that the bundle accepts the flow and its components are
1222  * still valid.
1223  */
1224
1225 static struct dst_entry *
1226 xfrm_find_bundle(struct flowi *fl, struct xfrm_policy *policy, unsigned short family)
1227 {
1228         struct dst_entry *x;
1229         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1230         if (unlikely(afinfo == NULL))
1231                 return ERR_PTR(-EINVAL);
1232         x = afinfo->find_bundle(fl, policy);
1233         xfrm_policy_put_afinfo(afinfo);
1234         return x;
1235 }
1236
1237 /* Allocate chain of dst_entry's, attach known xfrm's, calculate
1238  * all the metrics... Shortly, bundle a bundle.
1239  */
1240
1241 static int
1242 xfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx,
1243                    struct flowi *fl, struct dst_entry **dst_p,
1244                    unsigned short family)
1245 {
1246         int err;
1247         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1248         if (unlikely(afinfo == NULL))
1249                 return -EINVAL;
1250         err = afinfo->bundle_create(policy, xfrm, nx, fl, dst_p);
1251         xfrm_policy_put_afinfo(afinfo);
1252         return err;
1253 }
1254
1255 static int inline
1256 xfrm_dst_alloc_copy(void **target, void *src, int size)
1257 {
1258         if (!*target) {
1259                 *target = kmalloc(size, GFP_ATOMIC);
1260                 if (!*target)
1261                         return -ENOMEM;
1262         }
1263         memcpy(*target, src, size);
1264         return 0;
1265 }
1266
1267 static int inline
1268 xfrm_dst_update_parent(struct dst_entry *dst, struct xfrm_selector *sel)
1269 {
1270 #ifdef CONFIG_XFRM_SUB_POLICY
1271         struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
1272         return xfrm_dst_alloc_copy((void **)&(xdst->partner),
1273                                    sel, sizeof(*sel));
1274 #else
1275         return 0;
1276 #endif
1277 }
1278
1279 static int inline
1280 xfrm_dst_update_origin(struct dst_entry *dst, struct flowi *fl)
1281 {
1282 #ifdef CONFIG_XFRM_SUB_POLICY
1283         struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
1284         return xfrm_dst_alloc_copy((void **)&(xdst->origin), fl, sizeof(*fl));
1285 #else
1286         return 0;
1287 #endif
1288 }
1289
1290 static int stale_bundle(struct dst_entry *dst);
1291
1292 /* Main function: finds/creates a bundle for given flow.
1293  *
1294  * At the moment we eat a raw IP route. Mostly to speed up lookups
1295  * on interfaces with disabled IPsec.
1296  */
1297 int __xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
1298                   struct sock *sk, int flags)
1299 {
1300         struct xfrm_policy *policy;
1301         struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
1302         int npols;
1303         int pol_dead;
1304         int xfrm_nr;
1305         int pi;
1306         struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
1307         struct dst_entry *dst, *dst_orig = *dst_p;
1308         int nx = 0;
1309         int err;
1310         u32 genid;
1311         u16 family;
1312         u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
1313
1314 restart:
1315         genid = atomic_read(&flow_cache_genid);
1316         policy = NULL;
1317         for (pi = 0; pi < ARRAY_SIZE(pols); pi++)
1318                 pols[pi] = NULL;
1319         npols = 0;
1320         pol_dead = 0;
1321         xfrm_nr = 0;
1322
1323         if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
1324                 policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
1325                 err = PTR_ERR(policy);
1326                 if (IS_ERR(policy))
1327                         goto dropdst;
1328         }
1329
1330         if (!policy) {
1331                 /* To accelerate a bit...  */
1332                 if ((dst_orig->flags & DST_NOXFRM) ||
1333                     !xfrm_policy_count[XFRM_POLICY_OUT])
1334                         return 0;
1335
1336                 policy = flow_cache_lookup(fl, dst_orig->ops->family,
1337                                            dir, xfrm_policy_lookup);
1338                 err = PTR_ERR(policy);
1339                 if (IS_ERR(policy))
1340                         goto dropdst;
1341         }
1342
1343         if (!policy)
1344                 return 0;
1345
1346         family = dst_orig->ops->family;
1347         policy->curlft.use_time = get_seconds();
1348         pols[0] = policy;
1349         npols ++;
1350         xfrm_nr += pols[0]->xfrm_nr;
1351
1352         switch (policy->action) {
1353         default:
1354         case XFRM_POLICY_BLOCK:
1355                 /* Prohibit the flow */
1356                 err = -EPERM;
1357                 goto error;
1358
1359         case XFRM_POLICY_ALLOW:
1360 #ifndef CONFIG_XFRM_SUB_POLICY
1361                 if (policy->xfrm_nr == 0) {
1362                         /* Flow passes not transformed. */
1363                         xfrm_pol_put(policy);
1364                         return 0;
1365                 }
1366 #endif
1367
1368                 /* Try to find matching bundle.
1369                  *
1370                  * LATER: help from flow cache. It is optional, this
1371                  * is required only for output policy.
1372                  */
1373                 dst = xfrm_find_bundle(fl, policy, family);
1374                 if (IS_ERR(dst)) {
1375                         err = PTR_ERR(dst);
1376                         goto error;
1377                 }
1378
1379                 if (dst)
1380                         break;
1381
1382 #ifdef CONFIG_XFRM_SUB_POLICY
1383                 if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
1384                         pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN,
1385                                                             fl, family,
1386                                                             XFRM_POLICY_OUT);
1387                         if (pols[1]) {
1388                                 if (IS_ERR(pols[1])) {
1389                                         err = PTR_ERR(pols[1]);
1390                                         goto error;
1391                                 }
1392                                 if (pols[1]->action == XFRM_POLICY_BLOCK) {
1393                                         err = -EPERM;
1394                                         goto error;
1395                                 }
1396                                 npols ++;
1397                                 xfrm_nr += pols[1]->xfrm_nr;
1398                         }
1399                 }
1400
1401                 /*
1402                  * Because neither flowi nor bundle information knows about
1403                  * transformation template size. On more than one policy usage
1404                  * we can realize whether all of them is bypass or not after
1405                  * they are searched. See above not-transformed bypass
1406                  * is surrounded by non-sub policy configuration, too.
1407                  */
1408                 if (xfrm_nr == 0) {
1409                         /* Flow passes not transformed. */
1410                         xfrm_pols_put(pols, npols);
1411                         return 0;
1412                 }
1413
1414 #endif
1415                 nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
1416
1417                 if (unlikely(nx<0)) {
1418                         err = nx;
1419                         if (err == -EAGAIN && sysctl_xfrm_larval_drop) {
1420                                 /* EREMOTE tells the caller to generate
1421                                  * a one-shot blackhole route.
1422                                  */
1423                                 xfrm_pol_put(policy);
1424                                 return -EREMOTE;
1425                         }
1426                         if (err == -EAGAIN && flags) {
1427                                 DECLARE_WAITQUEUE(wait, current);
1428
1429                                 add_wait_queue(&km_waitq, &wait);
1430                                 set_current_state(TASK_INTERRUPTIBLE);
1431                                 schedule();
1432                                 set_current_state(TASK_RUNNING);
1433                                 remove_wait_queue(&km_waitq, &wait);
1434
1435                                 nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
1436
1437                                 if (nx == -EAGAIN && signal_pending(current)) {
1438                                         err = -ERESTART;
1439                                         goto error;
1440                                 }
1441                                 if (nx == -EAGAIN ||
1442                                     genid != atomic_read(&flow_cache_genid)) {
1443                                         xfrm_pols_put(pols, npols);
1444                                         goto restart;
1445                                 }
1446                                 err = nx;
1447                         }
1448                         if (err < 0)
1449                                 goto error;
1450                 }
1451                 if (nx == 0) {
1452                         /* Flow passes not transformed. */
1453                         xfrm_pols_put(pols, npols);
1454                         return 0;
1455                 }
1456
1457                 dst = dst_orig;
1458                 err = xfrm_bundle_create(policy, xfrm, nx, fl, &dst, family);
1459
1460                 if (unlikely(err)) {
1461                         int i;
1462                         for (i=0; i<nx; i++)
1463                                 xfrm_state_put(xfrm[i]);
1464                         goto error;
1465                 }
1466
1467                 for (pi = 0; pi < npols; pi++) {
1468                         read_lock_bh(&pols[pi]->lock);
1469                         pol_dead |= pols[pi]->dead;
1470                         read_unlock_bh(&pols[pi]->lock);
1471                 }
1472
1473                 write_lock_bh(&policy->lock);
1474                 if (unlikely(pol_dead || stale_bundle(dst))) {
1475                         /* Wow! While we worked on resolving, this
1476                          * policy has gone. Retry. It is not paranoia,
1477                          * we just cannot enlist new bundle to dead object.
1478                          * We can't enlist stable bundles either.
1479                          */
1480                         write_unlock_bh(&policy->lock);
1481                         if (dst)
1482                                 dst_free(dst);
1483
1484                         err = -EHOSTUNREACH;
1485                         goto error;
1486                 }
1487
1488                 if (npols > 1)
1489                         err = xfrm_dst_update_parent(dst, &pols[1]->selector);
1490                 else
1491                         err = xfrm_dst_update_origin(dst, fl);
1492                 if (unlikely(err)) {
1493                         write_unlock_bh(&policy->lock);
1494                         if (dst)
1495                                 dst_free(dst);
1496                         goto error;
1497                 }
1498
1499                 dst->next = policy->bundles;
1500                 policy->bundles = dst;
1501                 dst_hold(dst);
1502                 write_unlock_bh(&policy->lock);
1503         }
1504         *dst_p = dst;
1505         dst_release(dst_orig);
1506         xfrm_pols_put(pols, npols);
1507         return 0;
1508
1509 error:
1510         xfrm_pols_put(pols, npols);
1511 dropdst:
1512         dst_release(dst_orig);
1513         *dst_p = NULL;
1514         return err;
1515 }
1516 EXPORT_SYMBOL(__xfrm_lookup);
1517
1518 int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
1519                 struct sock *sk, int flags)
1520 {
1521         int err = __xfrm_lookup(dst_p, fl, sk, flags);
1522
1523         if (err == -EREMOTE) {
1524                 dst_release(*dst_p);
1525                 *dst_p = NULL;
1526                 err = -EAGAIN;
1527         }
1528
1529         return err;
1530 }
1531 EXPORT_SYMBOL(xfrm_lookup);
1532
1533 static inline int
1534 xfrm_secpath_reject(int idx, struct sk_buff *skb, struct flowi *fl)
1535 {
1536         struct xfrm_state *x;
1537
1538         if (!skb->sp || idx < 0 || idx >= skb->sp->len)
1539                 return 0;
1540         x = skb->sp->xvec[idx];
1541         if (!x->type->reject)
1542                 return 0;
1543         return x->type->reject(x, skb, fl);
1544 }
1545
1546 /* When skb is transformed back to its "native" form, we have to
1547  * check policy restrictions. At the moment we make this in maximally
1548  * stupid way. Shame on me. :-) Of course, connected sockets must
1549  * have policy cached at them.
1550  */
1551
1552 static inline int
1553 xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x,
1554               unsigned short family)
1555 {
1556         if (xfrm_state_kern(x))
1557                 return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, tmpl->encap_family);
1558         return  x->id.proto == tmpl->id.proto &&
1559                 (x->id.spi == tmpl->id.spi || !tmpl->id.spi) &&
1560                 (x->props.reqid == tmpl->reqid || !tmpl->reqid) &&
1561                 x->props.mode == tmpl->mode &&
1562                 ((tmpl->aalgos & (1<<x->props.aalgo)) ||
1563                  !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) &&
1564                 !(x->props.mode != XFRM_MODE_TRANSPORT &&
1565                   xfrm_state_addr_cmp(tmpl, x, family));
1566 }
1567
1568 /*
1569  * 0 or more than 0 is returned when validation is succeeded (either bypass
1570  * because of optional transport mode, or next index of the mathced secpath
1571  * state with the template.
1572  * -1 is returned when no matching template is found.
1573  * Otherwise "-2 - errored_index" is returned.
1574  */
1575 static inline int
1576 xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start,
1577                unsigned short family)
1578 {
1579         int idx = start;
1580
1581         if (tmpl->optional) {
1582                 if (tmpl->mode == XFRM_MODE_TRANSPORT)
1583                         return start;
1584         } else
1585                 start = -1;
1586         for (; idx < sp->len; idx++) {
1587                 if (xfrm_state_ok(tmpl, sp->xvec[idx], family))
1588                         return ++idx;
1589                 if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) {
1590                         if (start == -1)
1591                                 start = -2-idx;
1592                         break;
1593                 }
1594         }
1595         return start;
1596 }
1597
1598 int
1599 xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family)
1600 {
1601         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1602         int err;
1603
1604         if (unlikely(afinfo == NULL))
1605                 return -EAFNOSUPPORT;
1606
1607         afinfo->decode_session(skb, fl);
1608         err = security_xfrm_decode_session(skb, &fl->secid);
1609         xfrm_policy_put_afinfo(afinfo);
1610         return err;
1611 }
1612 EXPORT_SYMBOL(xfrm_decode_session);
1613
1614 static inline int secpath_has_nontransport(struct sec_path *sp, int k, int *idxp)
1615 {
1616         for (; k < sp->len; k++) {
1617                 if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) {
1618                         *idxp = k;
1619                         return 1;
1620                 }
1621         }
1622
1623         return 0;
1624 }
1625
1626 int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
1627                         unsigned short family)
1628 {
1629         struct xfrm_policy *pol;
1630         struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
1631         int npols = 0;
1632         int xfrm_nr;
1633         int pi;
1634         struct flowi fl;
1635         u8 fl_dir = policy_to_flow_dir(dir);
1636         int xerr_idx = -1;
1637
1638         if (xfrm_decode_session(skb, &fl, family) < 0)
1639                 return 0;
1640         nf_nat_decode_session(skb, &fl, family);
1641
1642         /* First, check used SA against their selectors. */
1643         if (skb->sp) {
1644                 int i;
1645
1646                 for (i=skb->sp->len-1; i>=0; i--) {
1647                         struct xfrm_state *x = skb->sp->xvec[i];
1648                         if (!xfrm_selector_match(&x->sel, &fl, family))
1649                                 return 0;
1650                 }
1651         }
1652
1653         pol = NULL;
1654         if (sk && sk->sk_policy[dir]) {
1655                 pol = xfrm_sk_policy_lookup(sk, dir, &fl);
1656                 if (IS_ERR(pol))
1657                         return 0;
1658         }
1659
1660         if (!pol)
1661                 pol = flow_cache_lookup(&fl, family, fl_dir,
1662                                         xfrm_policy_lookup);
1663
1664         if (IS_ERR(pol))
1665                 return 0;
1666
1667         if (!pol) {
1668                 if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) {
1669                         xfrm_secpath_reject(xerr_idx, skb, &fl);
1670                         return 0;
1671                 }
1672                 return 1;
1673         }
1674
1675         pol->curlft.use_time = get_seconds();
1676
1677         pols[0] = pol;
1678         npols ++;
1679 #ifdef CONFIG_XFRM_SUB_POLICY
1680         if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
1681                 pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN,
1682                                                     &fl, family,
1683                                                     XFRM_POLICY_IN);
1684                 if (pols[1]) {
1685                         if (IS_ERR(pols[1]))
1686                                 return 0;
1687                         pols[1]->curlft.use_time = get_seconds();
1688                         npols ++;
1689                 }
1690         }
1691 #endif
1692
1693         if (pol->action == XFRM_POLICY_ALLOW) {
1694                 struct sec_path *sp;
1695                 static struct sec_path dummy;
1696                 struct xfrm_tmpl *tp[XFRM_MAX_DEPTH];
1697                 struct xfrm_tmpl *stp[XFRM_MAX_DEPTH];
1698                 struct xfrm_tmpl **tpp = tp;
1699                 int ti = 0;
1700                 int i, k;
1701
1702                 if ((sp = skb->sp) == NULL)
1703                         sp = &dummy;
1704
1705                 for (pi = 0; pi < npols; pi++) {
1706                         if (pols[pi] != pol &&
1707                             pols[pi]->action != XFRM_POLICY_ALLOW)
1708                                 goto reject;
1709                         if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH)
1710                                 goto reject_error;
1711                         for (i = 0; i < pols[pi]->xfrm_nr; i++)
1712                                 tpp[ti++] = &pols[pi]->xfrm_vec[i];
1713                 }
1714                 xfrm_nr = ti;
1715                 if (npols > 1) {
1716                         xfrm_tmpl_sort(stp, tpp, xfrm_nr, family);
1717                         tpp = stp;
1718                 }
1719
1720                 /* For each tunnel xfrm, find the first matching tmpl.
1721                  * For each tmpl before that, find corresponding xfrm.
1722                  * Order is _important_. Later we will implement
1723                  * some barriers, but at the moment barriers
1724                  * are implied between each two transformations.
1725                  */
1726                 for (i = xfrm_nr-1, k = 0; i >= 0; i--) {
1727                         k = xfrm_policy_ok(tpp[i], sp, k, family);
1728                         if (k < 0) {
1729                                 if (k < -1)
1730                                         /* "-2 - errored_index" returned */
1731                                         xerr_idx = -(2+k);
1732                                 goto reject;
1733                         }
1734                 }
1735
1736                 if (secpath_has_nontransport(sp, k, &xerr_idx))
1737                         goto reject;
1738
1739                 xfrm_pols_put(pols, npols);
1740                 return 1;
1741         }
1742
1743 reject:
1744         xfrm_secpath_reject(xerr_idx, skb, &fl);
1745 reject_error:
1746         xfrm_pols_put(pols, npols);
1747         return 0;
1748 }
1749 EXPORT_SYMBOL(__xfrm_policy_check);
1750
1751 int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
1752 {
1753         struct flowi fl;
1754
1755         if (xfrm_decode_session(skb, &fl, family) < 0)
1756                 return 0;
1757
1758         return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0;
1759 }
1760 EXPORT_SYMBOL(__xfrm_route_forward);
1761
1762 /* Optimize later using cookies and generation ids. */
1763
1764 static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
1765 {
1766         /* Code (such as __xfrm4_bundle_create()) sets dst->obsolete
1767          * to "-1" to force all XFRM destinations to get validated by
1768          * dst_ops->check on every use.  We do this because when a
1769          * normal route referenced by an XFRM dst is obsoleted we do
1770          * not go looking around for all parent referencing XFRM dsts
1771          * so that we can invalidate them.  It is just too much work.
1772          * Instead we make the checks here on every use.  For example:
1773          *
1774          *      XFRM dst A --> IPv4 dst X
1775          *
1776          * X is the "xdst->route" of A (X is also the "dst->path" of A
1777          * in this example).  If X is marked obsolete, "A" will not
1778          * notice.  That's what we are validating here via the
1779          * stale_bundle() check.
1780          *
1781          * When a policy's bundle is pruned, we dst_free() the XFRM
1782          * dst which causes it's ->obsolete field to be set to a
1783          * positive non-zero integer.  If an XFRM dst has been pruned
1784          * like this, we want to force a new route lookup.
1785          */
1786         if (dst->obsolete < 0 && !stale_bundle(dst))
1787                 return dst;
1788
1789         return NULL;
1790 }
1791
1792 static int stale_bundle(struct dst_entry *dst)
1793 {
1794         return !xfrm_bundle_ok(NULL, (struct xfrm_dst *)dst, NULL, AF_UNSPEC, 0);
1795 }
1796
1797 void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
1798 {
1799         while ((dst = dst->child) && dst->xfrm && dst->dev == dev) {
1800                 dst->dev = init_net.loopback_dev;
1801                 dev_hold(dst->dev);
1802                 dev_put(dev);
1803         }
1804 }
1805 EXPORT_SYMBOL(xfrm_dst_ifdown);
1806
1807 static void xfrm_link_failure(struct sk_buff *skb)
1808 {
1809         /* Impossible. Such dst must be popped before reaches point of failure. */
1810         return;
1811 }
1812
1813 static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
1814 {
1815         if (dst) {
1816                 if (dst->obsolete) {
1817                         dst_release(dst);
1818                         dst = NULL;
1819                 }
1820         }
1821         return dst;
1822 }
1823
1824 static void prune_one_bundle(struct xfrm_policy *pol, int (*func)(struct dst_entry *), struct dst_entry **gc_list_p)
1825 {
1826         struct dst_entry *dst, **dstp;
1827
1828         write_lock(&pol->lock);
1829         dstp = &pol->bundles;
1830         while ((dst=*dstp) != NULL) {
1831                 if (func(dst)) {
1832                         *dstp = dst->next;
1833                         dst->next = *gc_list_p;
1834                         *gc_list_p = dst;
1835                 } else {
1836                         dstp = &dst->next;
1837                 }
1838         }
1839         write_unlock(&pol->lock);
1840 }
1841
1842 static void xfrm_prune_bundles(int (*func)(struct dst_entry *))
1843 {
1844         struct dst_entry *gc_list = NULL;
1845         int dir;
1846
1847         read_lock_bh(&xfrm_policy_lock);
1848         for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
1849                 struct xfrm_policy *pol;
1850                 struct hlist_node *entry;
1851                 struct hlist_head *table;
1852                 int i;
1853
1854                 hlist_for_each_entry(pol, entry,
1855                                      &xfrm_policy_inexact[dir], bydst)
1856                         prune_one_bundle(pol, func, &gc_list);
1857
1858                 table = xfrm_policy_bydst[dir].table;
1859                 for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
1860                         hlist_for_each_entry(pol, entry, table + i, bydst)
1861                                 prune_one_bundle(pol, func, &gc_list);
1862                 }
1863         }
1864         read_unlock_bh(&xfrm_policy_lock);
1865
1866         while (gc_list) {
1867                 struct dst_entry *dst = gc_list;
1868                 gc_list = dst->next;
1869                 dst_free(dst);
1870         }
1871 }
1872
1873 static int unused_bundle(struct dst_entry *dst)
1874 {
1875         return !atomic_read(&dst->__refcnt);
1876 }
1877
1878 static void __xfrm_garbage_collect(void)
1879 {
1880         xfrm_prune_bundles(unused_bundle);
1881 }
1882
1883 static int xfrm_flush_bundles(void)
1884 {
1885         xfrm_prune_bundles(stale_bundle);
1886         return 0;
1887 }
1888
1889 void xfrm_init_pmtu(struct dst_entry *dst)
1890 {
1891         do {
1892                 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
1893                 u32 pmtu, route_mtu_cached;
1894
1895                 pmtu = dst_mtu(dst->child);
1896                 xdst->child_mtu_cached = pmtu;
1897
1898                 pmtu = xfrm_state_mtu(dst->xfrm, pmtu);
1899
1900                 route_mtu_cached = dst_mtu(xdst->route);
1901                 xdst->route_mtu_cached = route_mtu_cached;
1902
1903                 if (pmtu > route_mtu_cached)
1904                         pmtu = route_mtu_cached;
1905
1906                 dst->metrics[RTAX_MTU-1] = pmtu;
1907         } while ((dst = dst->next));
1908 }
1909
1910 EXPORT_SYMBOL(xfrm_init_pmtu);
1911
1912 /* Check that the bundle accepts the flow and its components are
1913  * still valid.
1914  */
1915
1916 int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
1917                 struct flowi *fl, int family, int strict)
1918 {
1919         struct dst_entry *dst = &first->u.dst;
1920         struct xfrm_dst *last;
1921         u32 mtu;
1922
1923         if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) ||
1924             (dst->dev && !netif_running(dst->dev)))
1925                 return 0;
1926 #ifdef CONFIG_XFRM_SUB_POLICY
1927         if (fl) {
1928                 if (first->origin && !flow_cache_uli_match(first->origin, fl))
1929                         return 0;
1930                 if (first->partner &&
1931                     !xfrm_selector_match(first->partner, fl, family))
1932                         return 0;
1933         }
1934 #endif
1935
1936         last = NULL;
1937
1938         do {
1939                 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
1940
1941                 if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family))
1942                         return 0;
1943                 if (fl && pol &&
1944                     !security_xfrm_state_pol_flow_match(dst->xfrm, pol, fl))
1945                         return 0;
1946                 if (dst->xfrm->km.state != XFRM_STATE_VALID)
1947                         return 0;
1948                 if (xdst->genid != dst->xfrm->genid)
1949                         return 0;
1950
1951                 if (strict && fl &&
1952                     !(dst->xfrm->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) &&
1953                     !xfrm_state_addr_flow_check(dst->xfrm, fl, family))
1954                         return 0;
1955
1956                 mtu = dst_mtu(dst->child);
1957                 if (xdst->child_mtu_cached != mtu) {
1958                         last = xdst;
1959                         xdst->child_mtu_cached = mtu;
1960                 }
1961
1962                 if (!dst_check(xdst->route, xdst->route_cookie))
1963                         return 0;
1964                 mtu = dst_mtu(xdst->route);
1965                 if (xdst->route_mtu_cached != mtu) {
1966                         last = xdst;
1967                         xdst->route_mtu_cached = mtu;
1968                 }
1969
1970                 dst = dst->child;
1971         } while (dst->xfrm);
1972
1973         if (likely(!last))
1974                 return 1;
1975
1976         mtu = last->child_mtu_cached;
1977         for (;;) {
1978                 dst = &last->u.dst;
1979
1980                 mtu = xfrm_state_mtu(dst->xfrm, mtu);
1981                 if (mtu > last->route_mtu_cached)
1982                         mtu = last->route_mtu_cached;
1983                 dst->metrics[RTAX_MTU-1] = mtu;
1984
1985                 if (last == first)
1986                         break;
1987
1988                 last = (struct xfrm_dst *)last->u.dst.next;
1989                 last->child_mtu_cached = mtu;
1990         }
1991
1992         return 1;
1993 }
1994
1995 EXPORT_SYMBOL(xfrm_bundle_ok);
1996
1997 int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
1998 {
1999         int err = 0;
2000         if (unlikely(afinfo == NULL))
2001                 return -EINVAL;
2002         if (unlikely(afinfo->family >= NPROTO))
2003                 return -EAFNOSUPPORT;
2004         write_lock_bh(&xfrm_policy_afinfo_lock);
2005         if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL))
2006                 err = -ENOBUFS;
2007         else {
2008                 struct dst_ops *dst_ops = afinfo->dst_ops;
2009                 if (likely(dst_ops->kmem_cachep == NULL))
2010                         dst_ops->kmem_cachep = xfrm_dst_cache;
2011                 if (likely(dst_ops->check == NULL))
2012                         dst_ops->check = xfrm_dst_check;
2013                 if (likely(dst_ops->negative_advice == NULL))
2014                         dst_ops->negative_advice = xfrm_negative_advice;
2015                 if (likely(dst_ops->link_failure == NULL))
2016                         dst_ops->link_failure = xfrm_link_failure;
2017                 if (likely(afinfo->garbage_collect == NULL))
2018                         afinfo->garbage_collect = __xfrm_garbage_collect;
2019                 xfrm_policy_afinfo[afinfo->family] = afinfo;
2020         }
2021         write_unlock_bh(&xfrm_policy_afinfo_lock);
2022         return err;
2023 }
2024 EXPORT_SYMBOL(xfrm_policy_register_afinfo);
2025
2026 int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
2027 {
2028         int err = 0;
2029         if (unlikely(afinfo == NULL))
2030                 return -EINVAL;
2031         if (unlikely(afinfo->family >= NPROTO))
2032                 return -EAFNOSUPPORT;
2033         write_lock_bh(&xfrm_policy_afinfo_lock);
2034         if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) {
2035                 if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo))
2036                         err = -EINVAL;
2037                 else {
2038                         struct dst_ops *dst_ops = afinfo->dst_ops;
2039                         xfrm_policy_afinfo[afinfo->family] = NULL;
2040                         dst_ops->kmem_cachep = NULL;
2041                         dst_ops->check = NULL;
2042                         dst_ops->negative_advice = NULL;
2043                         dst_ops->link_failure = NULL;
2044                         afinfo->garbage_collect = NULL;
2045                 }
2046         }
2047         write_unlock_bh(&xfrm_policy_afinfo_lock);
2048         return err;
2049 }
2050 EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
2051
2052 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family)
2053 {
2054         struct xfrm_policy_afinfo *afinfo;
2055         if (unlikely(family >= NPROTO))
2056                 return NULL;
2057         read_lock(&xfrm_policy_afinfo_lock);
2058         afinfo = xfrm_policy_afinfo[family];
2059         if (unlikely(!afinfo))
2060                 read_unlock(&xfrm_policy_afinfo_lock);
2061         return afinfo;
2062 }
2063
2064 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo)
2065 {
2066         read_unlock(&xfrm_policy_afinfo_lock);
2067 }
2068
2069 static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
2070 {
2071         struct net_device *dev = ptr;
2072
2073         if (dev->nd_net != &init_net)
2074                 return NOTIFY_DONE;
2075
2076         switch (event) {
2077         case NETDEV_DOWN:
2078                 xfrm_flush_bundles();
2079         }
2080         return NOTIFY_DONE;
2081 }
2082
2083 static struct notifier_block xfrm_dev_notifier = {
2084         xfrm_dev_event,
2085         NULL,
2086         0
2087 };
2088
2089 static void __init xfrm_policy_init(void)
2090 {
2091         unsigned int hmask, sz;
2092         int dir;
2093
2094         xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
2095                                            sizeof(struct xfrm_dst),
2096                                            0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2097                                            NULL);
2098
2099         hmask = 8 - 1;
2100         sz = (hmask+1) * sizeof(struct hlist_head);
2101
2102         xfrm_policy_byidx = xfrm_hash_alloc(sz);
2103         xfrm_idx_hmask = hmask;
2104         if (!xfrm_policy_byidx)
2105                 panic("XFRM: failed to allocate byidx hash\n");
2106
2107         for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
2108                 struct xfrm_policy_hash *htab;
2109
2110                 INIT_HLIST_HEAD(&xfrm_policy_inexact[dir]);
2111
2112                 htab = &xfrm_policy_bydst[dir];
2113                 htab->table = xfrm_hash_alloc(sz);
2114                 htab->hmask = hmask;
2115                 if (!htab->table)
2116                         panic("XFRM: failed to allocate bydst hash\n");
2117         }
2118
2119         INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task);
2120         register_netdevice_notifier(&xfrm_dev_notifier);
2121 }
2122
2123 void __init xfrm_init(void)
2124 {
2125         xfrm_state_init();
2126         xfrm_policy_init();
2127         xfrm_input_init();
2128 }
2129
2130 #ifdef CONFIG_AUDITSYSCALL
2131 static inline void xfrm_audit_common_policyinfo(struct xfrm_policy *xp,
2132                                                 struct audit_buffer *audit_buf)
2133 {
2134         if (xp->security)
2135                 audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s",
2136                                  xp->security->ctx_alg, xp->security->ctx_doi,
2137                                  xp->security->ctx_str);
2138
2139         switch(xp->selector.family) {
2140         case AF_INET:
2141                 audit_log_format(audit_buf, " src=%u.%u.%u.%u dst=%u.%u.%u.%u",
2142                                  NIPQUAD(xp->selector.saddr.a4),
2143                                  NIPQUAD(xp->selector.daddr.a4));
2144                 break;
2145         case AF_INET6:
2146                 {
2147                         struct in6_addr saddr6, daddr6;
2148
2149                         memcpy(&saddr6, xp->selector.saddr.a6,
2150                                 sizeof(struct in6_addr));
2151                         memcpy(&daddr6, xp->selector.daddr.a6,
2152                                 sizeof(struct in6_addr));
2153                         audit_log_format(audit_buf,
2154                                 " src=" NIP6_FMT " dst=" NIP6_FMT,
2155                                 NIP6(saddr6), NIP6(daddr6));
2156                 }
2157                 break;
2158         }
2159 }
2160
2161 void
2162 xfrm_audit_policy_add(struct xfrm_policy *xp, int result, u32 auid, u32 sid)
2163 {
2164         struct audit_buffer *audit_buf;
2165         extern int audit_enabled;
2166
2167         if (audit_enabled == 0)
2168                 return;
2169         audit_buf = xfrm_audit_start(auid, sid);
2170         if (audit_buf == NULL)
2171                 return;
2172         audit_log_format(audit_buf, " op=SPD-add res=%u", result);
2173         xfrm_audit_common_policyinfo(xp, audit_buf);
2174         audit_log_end(audit_buf);
2175 }
2176 EXPORT_SYMBOL_GPL(xfrm_audit_policy_add);
2177
2178 void
2179 xfrm_audit_policy_delete(struct xfrm_policy *xp, int result, u32 auid, u32 sid)
2180 {
2181         struct audit_buffer *audit_buf;
2182         extern int audit_enabled;
2183
2184         if (audit_enabled == 0)
2185                 return;
2186         audit_buf = xfrm_audit_start(auid, sid);
2187         if (audit_buf == NULL)
2188                 return;
2189         audit_log_format(audit_buf, " op=SPD-delete res=%u", result);
2190         xfrm_audit_common_policyinfo(xp, audit_buf);
2191         audit_log_end(audit_buf);
2192 }
2193 EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete);
2194 #endif
2195
2196 #ifdef CONFIG_XFRM_MIGRATE
2197 static int xfrm_migrate_selector_match(struct xfrm_selector *sel_cmp,
2198                                        struct xfrm_selector *sel_tgt)
2199 {
2200         if (sel_cmp->proto == IPSEC_ULPROTO_ANY) {
2201                 if (sel_tgt->family == sel_cmp->family &&
2202                     xfrm_addr_cmp(&sel_tgt->daddr, &sel_cmp->daddr,
2203                                   sel_cmp->family) == 0 &&
2204                     xfrm_addr_cmp(&sel_tgt->saddr, &sel_cmp->saddr,
2205                                   sel_cmp->family) == 0 &&
2206                     sel_tgt->prefixlen_d == sel_cmp->prefixlen_d &&
2207                     sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) {
2208                         return 1;
2209                 }
2210         } else {
2211                 if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) {
2212                         return 1;
2213                 }
2214         }
2215         return 0;
2216 }
2217
2218 static struct xfrm_policy * xfrm_migrate_policy_find(struct xfrm_selector *sel,
2219                                                      u8 dir, u8 type)
2220 {
2221         struct xfrm_policy *pol, *ret = NULL;
2222         struct hlist_node *entry;
2223         struct hlist_head *chain;
2224         u32 priority = ~0U;
2225
2226         read_lock_bh(&xfrm_policy_lock);
2227         chain = policy_hash_direct(&sel->daddr, &sel->saddr, sel->family, dir);
2228         hlist_for_each_entry(pol, entry, chain, bydst) {
2229                 if (xfrm_migrate_selector_match(sel, &pol->selector) &&
2230                     pol->type == type) {
2231                         ret = pol;
2232                         priority = ret->priority;
2233                         break;
2234                 }
2235         }
2236         chain = &xfrm_policy_inexact[dir];
2237         hlist_for_each_entry(pol, entry, chain, bydst) {
2238                 if (xfrm_migrate_selector_match(sel, &pol->selector) &&
2239                     pol->type == type &&
2240                     pol->priority < priority) {
2241                         ret = pol;
2242                         break;
2243                 }
2244         }
2245
2246         if (ret)
2247                 xfrm_pol_hold(ret);
2248
2249         read_unlock_bh(&xfrm_policy_lock);
2250
2251         return ret;
2252 }
2253
2254 static int migrate_tmpl_match(struct xfrm_migrate *m, struct xfrm_tmpl *t)
2255 {
2256         int match = 0;
2257
2258         if (t->mode == m->mode && t->id.proto == m->proto &&
2259             (m->reqid == 0 || t->reqid == m->reqid)) {
2260                 switch (t->mode) {
2261                 case XFRM_MODE_TUNNEL:
2262                 case XFRM_MODE_BEET:
2263                         if (xfrm_addr_cmp(&t->id.daddr, &m->old_daddr,
2264                                           m->old_family) == 0 &&
2265                             xfrm_addr_cmp(&t->saddr, &m->old_saddr,
2266                                           m->old_family) == 0) {
2267                                 match = 1;
2268                         }
2269                         break;
2270                 case XFRM_MODE_TRANSPORT:
2271                         /* in case of transport mode, template does not store
2272                            any IP addresses, hence we just compare mode and
2273                            protocol */
2274                         match = 1;
2275                         break;
2276                 default:
2277                         break;
2278                 }
2279         }
2280         return match;
2281 }
2282
2283 /* update endpoint address(es) of template(s) */
2284 static int xfrm_policy_migrate(struct xfrm_policy *pol,
2285                                struct xfrm_migrate *m, int num_migrate)
2286 {
2287         struct xfrm_migrate *mp;
2288         struct dst_entry *dst;
2289         int i, j, n = 0;
2290
2291         write_lock_bh(&pol->lock);
2292         if (unlikely(pol->dead)) {
2293                 /* target policy has been deleted */
2294                 write_unlock_bh(&pol->lock);
2295                 return -ENOENT;
2296         }
2297
2298         for (i = 0; i < pol->xfrm_nr; i++) {
2299                 for (j = 0, mp = m; j < num_migrate; j++, mp++) {
2300                         if (!migrate_tmpl_match(mp, &pol->xfrm_vec[i]))
2301                                 continue;
2302                         n++;
2303                         if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL &&
2304                             pol->xfrm_vec[i].mode != XFRM_MODE_BEET)
2305                                 continue;
2306                         /* update endpoints */
2307                         memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr,
2308                                sizeof(pol->xfrm_vec[i].id.daddr));
2309                         memcpy(&pol->xfrm_vec[i].saddr, &mp->new_saddr,
2310                                sizeof(pol->xfrm_vec[i].saddr));
2311                         pol->xfrm_vec[i].encap_family = mp->new_family;
2312                         /* flush bundles */
2313                         while ((dst = pol->bundles) != NULL) {
2314                                 pol->bundles = dst->next;
2315                                 dst_free(dst);
2316                         }
2317                 }
2318         }
2319
2320         write_unlock_bh(&pol->lock);
2321
2322         if (!n)
2323                 return -ENODATA;
2324
2325         return 0;
2326 }
2327
2328 static int xfrm_migrate_check(struct xfrm_migrate *m, int num_migrate)
2329 {
2330         int i, j;
2331
2332         if (num_migrate < 1 || num_migrate > XFRM_MAX_DEPTH)
2333                 return -EINVAL;
2334
2335         for (i = 0; i < num_migrate; i++) {
2336                 if ((xfrm_addr_cmp(&m[i].old_daddr, &m[i].new_daddr,
2337                                    m[i].old_family) == 0) &&
2338                     (xfrm_addr_cmp(&m[i].old_saddr, &m[i].new_saddr,
2339                                    m[i].old_family) == 0))
2340                         return -EINVAL;
2341                 if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) ||
2342                     xfrm_addr_any(&m[i].new_saddr, m[i].new_family))
2343                         return -EINVAL;
2344
2345                 /* check if there is any duplicated entry */
2346                 for (j = i + 1; j < num_migrate; j++) {
2347                         if (!memcmp(&m[i].old_daddr, &m[j].old_daddr,
2348                                     sizeof(m[i].old_daddr)) &&
2349                             !memcmp(&m[i].old_saddr, &m[j].old_saddr,
2350                                     sizeof(m[i].old_saddr)) &&
2351                             m[i].proto == m[j].proto &&
2352                             m[i].mode == m[j].mode &&
2353                             m[i].reqid == m[j].reqid &&
2354                             m[i].old_family == m[j].old_family)
2355                                 return -EINVAL;
2356                 }
2357         }
2358
2359         return 0;
2360 }
2361
2362 int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
2363                  struct xfrm_migrate *m, int num_migrate)
2364 {
2365         int i, err, nx_cur = 0, nx_new = 0;
2366         struct xfrm_policy *pol = NULL;
2367         struct xfrm_state *x, *xc;
2368         struct xfrm_state *x_cur[XFRM_MAX_DEPTH];
2369         struct xfrm_state *x_new[XFRM_MAX_DEPTH];
2370         struct xfrm_migrate *mp;
2371
2372         if ((err = xfrm_migrate_check(m, num_migrate)) < 0)
2373                 goto out;
2374
2375         /* Stage 1 - find policy */
2376         if ((pol = xfrm_migrate_policy_find(sel, dir, type)) == NULL) {
2377                 err = -ENOENT;
2378                 goto out;
2379         }
2380
2381         /* Stage 2 - find and update state(s) */
2382         for (i = 0, mp = m; i < num_migrate; i++, mp++) {
2383                 if ((x = xfrm_migrate_state_find(mp))) {
2384                         x_cur[nx_cur] = x;
2385                         nx_cur++;
2386                         if ((xc = xfrm_state_migrate(x, mp))) {
2387                                 x_new[nx_new] = xc;
2388                                 nx_new++;
2389                         } else {
2390                                 err = -ENODATA;
2391                                 goto restore_state;
2392                         }
2393                 }
2394         }
2395
2396         /* Stage 3 - update policy */
2397         if ((err = xfrm_policy_migrate(pol, m, num_migrate)) < 0)
2398                 goto restore_state;
2399
2400         /* Stage 4 - delete old state(s) */
2401         if (nx_cur) {
2402                 xfrm_states_put(x_cur, nx_cur);
2403                 xfrm_states_delete(x_cur, nx_cur);
2404         }
2405
2406         /* Stage 5 - announce */
2407         km_migrate(sel, dir, type, m, num_migrate);
2408
2409         xfrm_pol_put(pol);
2410
2411         return 0;
2412 out:
2413         return err;
2414
2415 restore_state:
2416         if (pol)
2417                 xfrm_pol_put(pol);
2418         if (nx_cur)
2419                 xfrm_states_put(x_cur, nx_cur);
2420         if (nx_new)
2421                 xfrm_states_delete(x_new, nx_new);
2422
2423         return err;
2424 }
2425 EXPORT_SYMBOL(xfrm_migrate);
2426 #endif