[XFRM] xfrm_policy: kill some bloat
[linux-2.6] / net / xfrm / xfrm_policy.c
1 /*
2  * xfrm_policy.c
3  *
4  * Changes:
5  *      Mitsuru KANDA @USAGI
6  *      Kazunori MIYAZAWA @USAGI
7  *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  *              IPv6 support
9  *      Kazunori MIYAZAWA @USAGI
10  *      YOSHIFUJI Hideaki
11  *              Split up af-specific portion
12  *      Derek Atkins <derek@ihtfp.com>          Add the post_input processor
13  *
14  */
15
16 #include <linux/err.h>
17 #include <linux/slab.h>
18 #include <linux/kmod.h>
19 #include <linux/list.h>
20 #include <linux/spinlock.h>
21 #include <linux/workqueue.h>
22 #include <linux/notifier.h>
23 #include <linux/netdevice.h>
24 #include <linux/netfilter.h>
25 #include <linux/module.h>
26 #include <linux/cache.h>
27 #include <linux/audit.h>
28 #include <net/dst.h>
29 #include <net/xfrm.h>
30 #include <net/ip.h>
31 #ifdef CONFIG_XFRM_STATISTICS
32 #include <net/snmp.h>
33 #endif
34
35 #include "xfrm_hash.h"
36
37 int sysctl_xfrm_larval_drop __read_mostly;
38
39 #ifdef CONFIG_XFRM_STATISTICS
40 DEFINE_SNMP_STAT(struct linux_xfrm_mib, xfrm_statistics) __read_mostly;
41 EXPORT_SYMBOL(xfrm_statistics);
42 #endif
43
44 DEFINE_MUTEX(xfrm_cfg_mutex);
45 EXPORT_SYMBOL(xfrm_cfg_mutex);
46
47 static DEFINE_RWLOCK(xfrm_policy_lock);
48
49 unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2];
50 EXPORT_SYMBOL(xfrm_policy_count);
51
52 static DEFINE_RWLOCK(xfrm_policy_afinfo_lock);
53 static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
54
55 static struct kmem_cache *xfrm_dst_cache __read_mostly;
56
57 static struct work_struct xfrm_policy_gc_work;
58 static HLIST_HEAD(xfrm_policy_gc_list);
59 static DEFINE_SPINLOCK(xfrm_policy_gc_lock);
60
61 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
62 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
63 static void xfrm_init_pmtu(struct dst_entry *dst);
64
65 static inline int
66 __xfrm4_selector_match(struct xfrm_selector *sel, struct flowi *fl)
67 {
68         return  addr_match(&fl->fl4_dst, &sel->daddr, sel->prefixlen_d) &&
69                 addr_match(&fl->fl4_src, &sel->saddr, sel->prefixlen_s) &&
70                 !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) &&
71                 !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) &&
72                 (fl->proto == sel->proto || !sel->proto) &&
73                 (fl->oif == sel->ifindex || !sel->ifindex);
74 }
75
76 static inline int
77 __xfrm6_selector_match(struct xfrm_selector *sel, struct flowi *fl)
78 {
79         return  addr_match(&fl->fl6_dst, &sel->daddr, sel->prefixlen_d) &&
80                 addr_match(&fl->fl6_src, &sel->saddr, sel->prefixlen_s) &&
81                 !((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) &&
82                 !((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) &&
83                 (fl->proto == sel->proto || !sel->proto) &&
84                 (fl->oif == sel->ifindex || !sel->ifindex);
85 }
86
87 int xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl,
88                     unsigned short family)
89 {
90         switch (family) {
91         case AF_INET:
92                 return __xfrm4_selector_match(sel, fl);
93         case AF_INET6:
94                 return __xfrm6_selector_match(sel, fl);
95         }
96         return 0;
97 }
98
99 static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos,
100                                                 int family)
101 {
102         xfrm_address_t *saddr = &x->props.saddr;
103         xfrm_address_t *daddr = &x->id.daddr;
104         struct xfrm_policy_afinfo *afinfo;
105         struct dst_entry *dst;
106
107         if (x->type->flags & XFRM_TYPE_LOCAL_COADDR)
108                 saddr = x->coaddr;
109         if (x->type->flags & XFRM_TYPE_REMOTE_COADDR)
110                 daddr = x->coaddr;
111
112         afinfo = xfrm_policy_get_afinfo(family);
113         if (unlikely(afinfo == NULL))
114                 return ERR_PTR(-EAFNOSUPPORT);
115
116         dst = afinfo->dst_lookup(tos, saddr, daddr);
117         xfrm_policy_put_afinfo(afinfo);
118         return dst;
119 }
120
121 static inline unsigned long make_jiffies(long secs)
122 {
123         if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
124                 return MAX_SCHEDULE_TIMEOUT-1;
125         else
126                 return secs*HZ;
127 }
128
129 static void xfrm_policy_timer(unsigned long data)
130 {
131         struct xfrm_policy *xp = (struct xfrm_policy*)data;
132         unsigned long now = get_seconds();
133         long next = LONG_MAX;
134         int warn = 0;
135         int dir;
136
137         read_lock(&xp->lock);
138
139         if (xp->dead)
140                 goto out;
141
142         dir = xfrm_policy_id2dir(xp->index);
143
144         if (xp->lft.hard_add_expires_seconds) {
145                 long tmo = xp->lft.hard_add_expires_seconds +
146                         xp->curlft.add_time - now;
147                 if (tmo <= 0)
148                         goto expired;
149                 if (tmo < next)
150                         next = tmo;
151         }
152         if (xp->lft.hard_use_expires_seconds) {
153                 long tmo = xp->lft.hard_use_expires_seconds +
154                         (xp->curlft.use_time ? : xp->curlft.add_time) - now;
155                 if (tmo <= 0)
156                         goto expired;
157                 if (tmo < next)
158                         next = tmo;
159         }
160         if (xp->lft.soft_add_expires_seconds) {
161                 long tmo = xp->lft.soft_add_expires_seconds +
162                         xp->curlft.add_time - now;
163                 if (tmo <= 0) {
164                         warn = 1;
165                         tmo = XFRM_KM_TIMEOUT;
166                 }
167                 if (tmo < next)
168                         next = tmo;
169         }
170         if (xp->lft.soft_use_expires_seconds) {
171                 long tmo = xp->lft.soft_use_expires_seconds +
172                         (xp->curlft.use_time ? : xp->curlft.add_time) - now;
173                 if (tmo <= 0) {
174                         warn = 1;
175                         tmo = XFRM_KM_TIMEOUT;
176                 }
177                 if (tmo < next)
178                         next = tmo;
179         }
180
181         if (warn)
182                 km_policy_expired(xp, dir, 0, 0);
183         if (next != LONG_MAX &&
184             !mod_timer(&xp->timer, jiffies + make_jiffies(next)))
185                 xfrm_pol_hold(xp);
186
187 out:
188         read_unlock(&xp->lock);
189         xfrm_pol_put(xp);
190         return;
191
192 expired:
193         read_unlock(&xp->lock);
194         if (!xfrm_policy_delete(xp, dir))
195                 km_policy_expired(xp, dir, 1, 0);
196         xfrm_pol_put(xp);
197 }
198
199
200 /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
201  * SPD calls.
202  */
203
204 struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp)
205 {
206         struct xfrm_policy *policy;
207
208         policy = kzalloc(sizeof(struct xfrm_policy), gfp);
209
210         if (policy) {
211                 INIT_HLIST_NODE(&policy->bydst);
212                 INIT_HLIST_NODE(&policy->byidx);
213                 rwlock_init(&policy->lock);
214                 atomic_set(&policy->refcnt, 1);
215                 setup_timer(&policy->timer, xfrm_policy_timer,
216                                 (unsigned long)policy);
217         }
218         return policy;
219 }
220 EXPORT_SYMBOL(xfrm_policy_alloc);
221
222 /* Destroy xfrm_policy: descendant resources must be released to this moment. */
223
224 void xfrm_policy_destroy(struct xfrm_policy *policy)
225 {
226         BUG_ON(!policy->dead);
227
228         BUG_ON(policy->bundles);
229
230         if (del_timer(&policy->timer))
231                 BUG();
232
233         security_xfrm_policy_free(policy);
234         kfree(policy);
235 }
236 EXPORT_SYMBOL(xfrm_policy_destroy);
237
238 static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
239 {
240         struct dst_entry *dst;
241
242         while ((dst = policy->bundles) != NULL) {
243                 policy->bundles = dst->next;
244                 dst_free(dst);
245         }
246
247         if (del_timer(&policy->timer))
248                 atomic_dec(&policy->refcnt);
249
250         if (atomic_read(&policy->refcnt) > 1)
251                 flow_cache_flush();
252
253         xfrm_pol_put(policy);
254 }
255
256 static void xfrm_policy_gc_task(struct work_struct *work)
257 {
258         struct xfrm_policy *policy;
259         struct hlist_node *entry, *tmp;
260         struct hlist_head gc_list;
261
262         spin_lock_bh(&xfrm_policy_gc_lock);
263         gc_list.first = xfrm_policy_gc_list.first;
264         INIT_HLIST_HEAD(&xfrm_policy_gc_list);
265         spin_unlock_bh(&xfrm_policy_gc_lock);
266
267         hlist_for_each_entry_safe(policy, entry, tmp, &gc_list, bydst)
268                 xfrm_policy_gc_kill(policy);
269 }
270
271 /* Rule must be locked. Release descentant resources, announce
272  * entry dead. The rule must be unlinked from lists to the moment.
273  */
274
275 static void xfrm_policy_kill(struct xfrm_policy *policy)
276 {
277         int dead;
278
279         write_lock_bh(&policy->lock);
280         dead = policy->dead;
281         policy->dead = 1;
282         write_unlock_bh(&policy->lock);
283
284         if (unlikely(dead)) {
285                 WARN_ON(1);
286                 return;
287         }
288
289         spin_lock(&xfrm_policy_gc_lock);
290         hlist_add_head(&policy->bydst, &xfrm_policy_gc_list);
291         spin_unlock(&xfrm_policy_gc_lock);
292
293         schedule_work(&xfrm_policy_gc_work);
294 }
295
296 struct xfrm_policy_hash {
297         struct hlist_head       *table;
298         unsigned int            hmask;
299 };
300
301 static struct hlist_head xfrm_policy_inexact[XFRM_POLICY_MAX*2];
302 static struct xfrm_policy_hash xfrm_policy_bydst[XFRM_POLICY_MAX*2] __read_mostly;
303 static struct hlist_head *xfrm_policy_byidx __read_mostly;
304 static unsigned int xfrm_idx_hmask __read_mostly;
305 static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024;
306
307 static inline unsigned int idx_hash(u32 index)
308 {
309         return __idx_hash(index, xfrm_idx_hmask);
310 }
311
312 static struct hlist_head *policy_hash_bysel(struct xfrm_selector *sel, unsigned short family, int dir)
313 {
314         unsigned int hmask = xfrm_policy_bydst[dir].hmask;
315         unsigned int hash = __sel_hash(sel, family, hmask);
316
317         return (hash == hmask + 1 ?
318                 &xfrm_policy_inexact[dir] :
319                 xfrm_policy_bydst[dir].table + hash);
320 }
321
322 static struct hlist_head *policy_hash_direct(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, int dir)
323 {
324         unsigned int hmask = xfrm_policy_bydst[dir].hmask;
325         unsigned int hash = __addr_hash(daddr, saddr, family, hmask);
326
327         return xfrm_policy_bydst[dir].table + hash;
328 }
329
330 static void xfrm_dst_hash_transfer(struct hlist_head *list,
331                                    struct hlist_head *ndsttable,
332                                    unsigned int nhashmask)
333 {
334         struct hlist_node *entry, *tmp;
335         struct xfrm_policy *pol;
336
337         hlist_for_each_entry_safe(pol, entry, tmp, list, bydst) {
338                 unsigned int h;
339
340                 h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr,
341                                 pol->family, nhashmask);
342                 hlist_add_head(&pol->bydst, ndsttable+h);
343         }
344 }
345
346 static void xfrm_idx_hash_transfer(struct hlist_head *list,
347                                    struct hlist_head *nidxtable,
348                                    unsigned int nhashmask)
349 {
350         struct hlist_node *entry, *tmp;
351         struct xfrm_policy *pol;
352
353         hlist_for_each_entry_safe(pol, entry, tmp, list, byidx) {
354                 unsigned int h;
355
356                 h = __idx_hash(pol->index, nhashmask);
357                 hlist_add_head(&pol->byidx, nidxtable+h);
358         }
359 }
360
361 static unsigned long xfrm_new_hash_mask(unsigned int old_hmask)
362 {
363         return ((old_hmask + 1) << 1) - 1;
364 }
365
366 static void xfrm_bydst_resize(int dir)
367 {
368         unsigned int hmask = xfrm_policy_bydst[dir].hmask;
369         unsigned int nhashmask = xfrm_new_hash_mask(hmask);
370         unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
371         struct hlist_head *odst = xfrm_policy_bydst[dir].table;
372         struct hlist_head *ndst = xfrm_hash_alloc(nsize);
373         int i;
374
375         if (!ndst)
376                 return;
377
378         write_lock_bh(&xfrm_policy_lock);
379
380         for (i = hmask; i >= 0; i--)
381                 xfrm_dst_hash_transfer(odst + i, ndst, nhashmask);
382
383         xfrm_policy_bydst[dir].table = ndst;
384         xfrm_policy_bydst[dir].hmask = nhashmask;
385
386         write_unlock_bh(&xfrm_policy_lock);
387
388         xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head));
389 }
390
391 static void xfrm_byidx_resize(int total)
392 {
393         unsigned int hmask = xfrm_idx_hmask;
394         unsigned int nhashmask = xfrm_new_hash_mask(hmask);
395         unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
396         struct hlist_head *oidx = xfrm_policy_byidx;
397         struct hlist_head *nidx = xfrm_hash_alloc(nsize);
398         int i;
399
400         if (!nidx)
401                 return;
402
403         write_lock_bh(&xfrm_policy_lock);
404
405         for (i = hmask; i >= 0; i--)
406                 xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask);
407
408         xfrm_policy_byidx = nidx;
409         xfrm_idx_hmask = nhashmask;
410
411         write_unlock_bh(&xfrm_policy_lock);
412
413         xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head));
414 }
415
416 static inline int xfrm_bydst_should_resize(int dir, int *total)
417 {
418         unsigned int cnt = xfrm_policy_count[dir];
419         unsigned int hmask = xfrm_policy_bydst[dir].hmask;
420
421         if (total)
422                 *total += cnt;
423
424         if ((hmask + 1) < xfrm_policy_hashmax &&
425             cnt > hmask)
426                 return 1;
427
428         return 0;
429 }
430
431 static inline int xfrm_byidx_should_resize(int total)
432 {
433         unsigned int hmask = xfrm_idx_hmask;
434
435         if ((hmask + 1) < xfrm_policy_hashmax &&
436             total > hmask)
437                 return 1;
438
439         return 0;
440 }
441
442 void xfrm_spd_getinfo(struct xfrmk_spdinfo *si)
443 {
444         read_lock_bh(&xfrm_policy_lock);
445         si->incnt = xfrm_policy_count[XFRM_POLICY_IN];
446         si->outcnt = xfrm_policy_count[XFRM_POLICY_OUT];
447         si->fwdcnt = xfrm_policy_count[XFRM_POLICY_FWD];
448         si->inscnt = xfrm_policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX];
449         si->outscnt = xfrm_policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX];
450         si->fwdscnt = xfrm_policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX];
451         si->spdhcnt = xfrm_idx_hmask;
452         si->spdhmcnt = xfrm_policy_hashmax;
453         read_unlock_bh(&xfrm_policy_lock);
454 }
455 EXPORT_SYMBOL(xfrm_spd_getinfo);
456
457 static DEFINE_MUTEX(hash_resize_mutex);
458 static void xfrm_hash_resize(struct work_struct *__unused)
459 {
460         int dir, total;
461
462         mutex_lock(&hash_resize_mutex);
463
464         total = 0;
465         for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
466                 if (xfrm_bydst_should_resize(dir, &total))
467                         xfrm_bydst_resize(dir);
468         }
469         if (xfrm_byidx_should_resize(total))
470                 xfrm_byidx_resize(total);
471
472         mutex_unlock(&hash_resize_mutex);
473 }
474
475 static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize);
476
477 /* Generate new index... KAME seems to generate them ordered by cost
478  * of an absolute inpredictability of ordering of rules. This will not pass. */
479 static u32 xfrm_gen_index(u8 type, int dir)
480 {
481         static u32 idx_generator;
482
483         for (;;) {
484                 struct hlist_node *entry;
485                 struct hlist_head *list;
486                 struct xfrm_policy *p;
487                 u32 idx;
488                 int found;
489
490                 idx = (idx_generator | dir);
491                 idx_generator += 8;
492                 if (idx == 0)
493                         idx = 8;
494                 list = xfrm_policy_byidx + idx_hash(idx);
495                 found = 0;
496                 hlist_for_each_entry(p, entry, list, byidx) {
497                         if (p->index == idx) {
498                                 found = 1;
499                                 break;
500                         }
501                 }
502                 if (!found)
503                         return idx;
504         }
505 }
506
507 static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s2)
508 {
509         u32 *p1 = (u32 *) s1;
510         u32 *p2 = (u32 *) s2;
511         int len = sizeof(struct xfrm_selector) / sizeof(u32);
512         int i;
513
514         for (i = 0; i < len; i++) {
515                 if (p1[i] != p2[i])
516                         return 1;
517         }
518
519         return 0;
520 }
521
522 int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
523 {
524         struct xfrm_policy *pol;
525         struct xfrm_policy *delpol;
526         struct hlist_head *chain;
527         struct hlist_node *entry, *newpos;
528         struct dst_entry *gc_list;
529
530         write_lock_bh(&xfrm_policy_lock);
531         chain = policy_hash_bysel(&policy->selector, policy->family, dir);
532         delpol = NULL;
533         newpos = NULL;
534         hlist_for_each_entry(pol, entry, chain, bydst) {
535                 if (pol->type == policy->type &&
536                     !selector_cmp(&pol->selector, &policy->selector) &&
537                     xfrm_sec_ctx_match(pol->security, policy->security) &&
538                     !WARN_ON(delpol)) {
539                         if (excl) {
540                                 write_unlock_bh(&xfrm_policy_lock);
541                                 return -EEXIST;
542                         }
543                         delpol = pol;
544                         if (policy->priority > pol->priority)
545                                 continue;
546                 } else if (policy->priority >= pol->priority) {
547                         newpos = &pol->bydst;
548                         continue;
549                 }
550                 if (delpol)
551                         break;
552         }
553         if (newpos)
554                 hlist_add_after(newpos, &policy->bydst);
555         else
556                 hlist_add_head(&policy->bydst, chain);
557         xfrm_pol_hold(policy);
558         xfrm_policy_count[dir]++;
559         atomic_inc(&flow_cache_genid);
560         if (delpol) {
561                 hlist_del(&delpol->bydst);
562                 hlist_del(&delpol->byidx);
563                 xfrm_policy_count[dir]--;
564         }
565         policy->index = delpol ? delpol->index : xfrm_gen_index(policy->type, dir);
566         hlist_add_head(&policy->byidx, xfrm_policy_byidx+idx_hash(policy->index));
567         policy->curlft.add_time = get_seconds();
568         policy->curlft.use_time = 0;
569         if (!mod_timer(&policy->timer, jiffies + HZ))
570                 xfrm_pol_hold(policy);
571         write_unlock_bh(&xfrm_policy_lock);
572
573         if (delpol)
574                 xfrm_policy_kill(delpol);
575         else if (xfrm_bydst_should_resize(dir, NULL))
576                 schedule_work(&xfrm_hash_work);
577
578         read_lock_bh(&xfrm_policy_lock);
579         gc_list = NULL;
580         entry = &policy->bydst;
581         hlist_for_each_entry_continue(policy, entry, bydst) {
582                 struct dst_entry *dst;
583
584                 write_lock(&policy->lock);
585                 dst = policy->bundles;
586                 if (dst) {
587                         struct dst_entry *tail = dst;
588                         while (tail->next)
589                                 tail = tail->next;
590                         tail->next = gc_list;
591                         gc_list = dst;
592
593                         policy->bundles = NULL;
594                 }
595                 write_unlock(&policy->lock);
596         }
597         read_unlock_bh(&xfrm_policy_lock);
598
599         while (gc_list) {
600                 struct dst_entry *dst = gc_list;
601
602                 gc_list = dst->next;
603                 dst_free(dst);
604         }
605
606         return 0;
607 }
608 EXPORT_SYMBOL(xfrm_policy_insert);
609
610 struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir,
611                                           struct xfrm_selector *sel,
612                                           struct xfrm_sec_ctx *ctx, int delete,
613                                           int *err)
614 {
615         struct xfrm_policy *pol, *ret;
616         struct hlist_head *chain;
617         struct hlist_node *entry;
618
619         *err = 0;
620         write_lock_bh(&xfrm_policy_lock);
621         chain = policy_hash_bysel(sel, sel->family, dir);
622         ret = NULL;
623         hlist_for_each_entry(pol, entry, chain, bydst) {
624                 if (pol->type == type &&
625                     !selector_cmp(sel, &pol->selector) &&
626                     xfrm_sec_ctx_match(ctx, pol->security)) {
627                         xfrm_pol_hold(pol);
628                         if (delete) {
629                                 *err = security_xfrm_policy_delete(pol);
630                                 if (*err) {
631                                         write_unlock_bh(&xfrm_policy_lock);
632                                         return pol;
633                                 }
634                                 hlist_del(&pol->bydst);
635                                 hlist_del(&pol->byidx);
636                                 xfrm_policy_count[dir]--;
637                         }
638                         ret = pol;
639                         break;
640                 }
641         }
642         write_unlock_bh(&xfrm_policy_lock);
643
644         if (ret && delete) {
645                 atomic_inc(&flow_cache_genid);
646                 xfrm_policy_kill(ret);
647         }
648         return ret;
649 }
650 EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
651
652 struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete,
653                                      int *err)
654 {
655         struct xfrm_policy *pol, *ret;
656         struct hlist_head *chain;
657         struct hlist_node *entry;
658
659         *err = -ENOENT;
660         if (xfrm_policy_id2dir(id) != dir)
661                 return NULL;
662
663         *err = 0;
664         write_lock_bh(&xfrm_policy_lock);
665         chain = xfrm_policy_byidx + idx_hash(id);
666         ret = NULL;
667         hlist_for_each_entry(pol, entry, chain, byidx) {
668                 if (pol->type == type && pol->index == id) {
669                         xfrm_pol_hold(pol);
670                         if (delete) {
671                                 *err = security_xfrm_policy_delete(pol);
672                                 if (*err) {
673                                         write_unlock_bh(&xfrm_policy_lock);
674                                         return pol;
675                                 }
676                                 hlist_del(&pol->bydst);
677                                 hlist_del(&pol->byidx);
678                                 xfrm_policy_count[dir]--;
679                         }
680                         ret = pol;
681                         break;
682                 }
683         }
684         write_unlock_bh(&xfrm_policy_lock);
685
686         if (ret && delete) {
687                 atomic_inc(&flow_cache_genid);
688                 xfrm_policy_kill(ret);
689         }
690         return ret;
691 }
692 EXPORT_SYMBOL(xfrm_policy_byid);
693
694 #ifdef CONFIG_SECURITY_NETWORK_XFRM
695 static inline int
696 xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info)
697 {
698         int dir, err = 0;
699
700         for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
701                 struct xfrm_policy *pol;
702                 struct hlist_node *entry;
703                 int i;
704
705                 hlist_for_each_entry(pol, entry,
706                                      &xfrm_policy_inexact[dir], bydst) {
707                         if (pol->type != type)
708                                 continue;
709                         err = security_xfrm_policy_delete(pol);
710                         if (err) {
711                                 xfrm_audit_policy_delete(pol, 0,
712                                                          audit_info->loginuid,
713                                                          audit_info->secid);
714                                 return err;
715                         }
716                 }
717                 for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
718                         hlist_for_each_entry(pol, entry,
719                                              xfrm_policy_bydst[dir].table + i,
720                                              bydst) {
721                                 if (pol->type != type)
722                                         continue;
723                                 err = security_xfrm_policy_delete(pol);
724                                 if (err) {
725                                         xfrm_audit_policy_delete(pol, 0,
726                                                         audit_info->loginuid,
727                                                         audit_info->secid);
728                                         return err;
729                                 }
730                         }
731                 }
732         }
733         return err;
734 }
735 #else
736 static inline int
737 xfrm_policy_flush_secctx_check(u8 type, struct xfrm_audit *audit_info)
738 {
739         return 0;
740 }
741 #endif
742
743 int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info)
744 {
745         int dir, err = 0;
746
747         write_lock_bh(&xfrm_policy_lock);
748
749         err = xfrm_policy_flush_secctx_check(type, audit_info);
750         if (err)
751                 goto out;
752
753         for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
754                 struct xfrm_policy *pol;
755                 struct hlist_node *entry;
756                 int i, killed;
757
758                 killed = 0;
759         again1:
760                 hlist_for_each_entry(pol, entry,
761                                      &xfrm_policy_inexact[dir], bydst) {
762                         if (pol->type != type)
763                                 continue;
764                         hlist_del(&pol->bydst);
765                         hlist_del(&pol->byidx);
766                         write_unlock_bh(&xfrm_policy_lock);
767
768                         xfrm_audit_policy_delete(pol, 1, audit_info->loginuid,
769                                                  audit_info->secid);
770
771                         xfrm_policy_kill(pol);
772                         killed++;
773
774                         write_lock_bh(&xfrm_policy_lock);
775                         goto again1;
776                 }
777
778                 for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
779         again2:
780                         hlist_for_each_entry(pol, entry,
781                                              xfrm_policy_bydst[dir].table + i,
782                                              bydst) {
783                                 if (pol->type != type)
784                                         continue;
785                                 hlist_del(&pol->bydst);
786                                 hlist_del(&pol->byidx);
787                                 write_unlock_bh(&xfrm_policy_lock);
788
789                                 xfrm_audit_policy_delete(pol, 1,
790                                                          audit_info->loginuid,
791                                                          audit_info->secid);
792                                 xfrm_policy_kill(pol);
793                                 killed++;
794
795                                 write_lock_bh(&xfrm_policy_lock);
796                                 goto again2;
797                         }
798                 }
799
800                 xfrm_policy_count[dir] -= killed;
801         }
802         atomic_inc(&flow_cache_genid);
803 out:
804         write_unlock_bh(&xfrm_policy_lock);
805         return err;
806 }
807 EXPORT_SYMBOL(xfrm_policy_flush);
808
809 int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*),
810                      void *data)
811 {
812         struct xfrm_policy *pol, *last = NULL;
813         struct hlist_node *entry;
814         int dir, last_dir = 0, count, error;
815
816         read_lock_bh(&xfrm_policy_lock);
817         count = 0;
818
819         for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
820                 struct hlist_head *table = xfrm_policy_bydst[dir].table;
821                 int i;
822
823                 hlist_for_each_entry(pol, entry,
824                                      &xfrm_policy_inexact[dir], bydst) {
825                         if (pol->type != type)
826                                 continue;
827                         if (last) {
828                                 error = func(last, last_dir % XFRM_POLICY_MAX,
829                                              count, data);
830                                 if (error)
831                                         goto out;
832                         }
833                         last = pol;
834                         last_dir = dir;
835                         count++;
836                 }
837                 for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
838                         hlist_for_each_entry(pol, entry, table + i, bydst) {
839                                 if (pol->type != type)
840                                         continue;
841                                 if (last) {
842                                         error = func(last, last_dir % XFRM_POLICY_MAX,
843                                                      count, data);
844                                         if (error)
845                                                 goto out;
846                                 }
847                                 last = pol;
848                                 last_dir = dir;
849                                 count++;
850                         }
851                 }
852         }
853         if (count == 0) {
854                 error = -ENOENT;
855                 goto out;
856         }
857         error = func(last, last_dir % XFRM_POLICY_MAX, 0, data);
858 out:
859         read_unlock_bh(&xfrm_policy_lock);
860         return error;
861 }
862 EXPORT_SYMBOL(xfrm_policy_walk);
863
864 /*
865  * Find policy to apply to this flow.
866  *
867  * Returns 0 if policy found, else an -errno.
868  */
869 static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl,
870                              u8 type, u16 family, int dir)
871 {
872         struct xfrm_selector *sel = &pol->selector;
873         int match, ret = -ESRCH;
874
875         if (pol->family != family ||
876             pol->type != type)
877                 return ret;
878
879         match = xfrm_selector_match(sel, fl, family);
880         if (match)
881                 ret = security_xfrm_policy_lookup(pol, fl->secid, dir);
882
883         return ret;
884 }
885
886 static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl,
887                                                      u16 family, u8 dir)
888 {
889         int err;
890         struct xfrm_policy *pol, *ret;
891         xfrm_address_t *daddr, *saddr;
892         struct hlist_node *entry;
893         struct hlist_head *chain;
894         u32 priority = ~0U;
895
896         daddr = xfrm_flowi_daddr(fl, family);
897         saddr = xfrm_flowi_saddr(fl, family);
898         if (unlikely(!daddr || !saddr))
899                 return NULL;
900
901         read_lock_bh(&xfrm_policy_lock);
902         chain = policy_hash_direct(daddr, saddr, family, dir);
903         ret = NULL;
904         hlist_for_each_entry(pol, entry, chain, bydst) {
905                 err = xfrm_policy_match(pol, fl, type, family, dir);
906                 if (err) {
907                         if (err == -ESRCH)
908                                 continue;
909                         else {
910                                 ret = ERR_PTR(err);
911                                 goto fail;
912                         }
913                 } else {
914                         ret = pol;
915                         priority = ret->priority;
916                         break;
917                 }
918         }
919         chain = &xfrm_policy_inexact[dir];
920         hlist_for_each_entry(pol, entry, chain, bydst) {
921                 err = xfrm_policy_match(pol, fl, type, family, dir);
922                 if (err) {
923                         if (err == -ESRCH)
924                                 continue;
925                         else {
926                                 ret = ERR_PTR(err);
927                                 goto fail;
928                         }
929                 } else if (pol->priority < priority) {
930                         ret = pol;
931                         break;
932                 }
933         }
934         if (ret)
935                 xfrm_pol_hold(ret);
936 fail:
937         read_unlock_bh(&xfrm_policy_lock);
938
939         return ret;
940 }
941
942 static int xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
943                                void **objp, atomic_t **obj_refp)
944 {
945         struct xfrm_policy *pol;
946         int err = 0;
947
948 #ifdef CONFIG_XFRM_SUB_POLICY
949         pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_SUB, fl, family, dir);
950         if (IS_ERR(pol)) {
951                 err = PTR_ERR(pol);
952                 pol = NULL;
953         }
954         if (pol || err)
955                 goto end;
956 #endif
957         pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, fl, family, dir);
958         if (IS_ERR(pol)) {
959                 err = PTR_ERR(pol);
960                 pol = NULL;
961         }
962 #ifdef CONFIG_XFRM_SUB_POLICY
963 end:
964 #endif
965         if ((*objp = (void *) pol) != NULL)
966                 *obj_refp = &pol->refcnt;
967         return err;
968 }
969
970 static inline int policy_to_flow_dir(int dir)
971 {
972         if (XFRM_POLICY_IN == FLOW_DIR_IN &&
973             XFRM_POLICY_OUT == FLOW_DIR_OUT &&
974             XFRM_POLICY_FWD == FLOW_DIR_FWD)
975                 return dir;
976         switch (dir) {
977         default:
978         case XFRM_POLICY_IN:
979                 return FLOW_DIR_IN;
980         case XFRM_POLICY_OUT:
981                 return FLOW_DIR_OUT;
982         case XFRM_POLICY_FWD:
983                 return FLOW_DIR_FWD;
984         }
985 }
986
987 static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl)
988 {
989         struct xfrm_policy *pol;
990
991         read_lock_bh(&xfrm_policy_lock);
992         if ((pol = sk->sk_policy[dir]) != NULL) {
993                 int match = xfrm_selector_match(&pol->selector, fl,
994                                                 sk->sk_family);
995                 int err = 0;
996
997                 if (match) {
998                         err = security_xfrm_policy_lookup(pol, fl->secid,
999                                         policy_to_flow_dir(dir));
1000                         if (!err)
1001                                 xfrm_pol_hold(pol);
1002                         else if (err == -ESRCH)
1003                                 pol = NULL;
1004                         else
1005                                 pol = ERR_PTR(err);
1006                 } else
1007                         pol = NULL;
1008         }
1009         read_unlock_bh(&xfrm_policy_lock);
1010         return pol;
1011 }
1012
1013 static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
1014 {
1015         struct hlist_head *chain = policy_hash_bysel(&pol->selector,
1016                                                      pol->family, dir);
1017
1018         hlist_add_head(&pol->bydst, chain);
1019         hlist_add_head(&pol->byidx, xfrm_policy_byidx+idx_hash(pol->index));
1020         xfrm_policy_count[dir]++;
1021         xfrm_pol_hold(pol);
1022
1023         if (xfrm_bydst_should_resize(dir, NULL))
1024                 schedule_work(&xfrm_hash_work);
1025 }
1026
1027 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
1028                                                 int dir)
1029 {
1030         if (hlist_unhashed(&pol->bydst))
1031                 return NULL;
1032
1033         hlist_del(&pol->bydst);
1034         hlist_del(&pol->byidx);
1035         xfrm_policy_count[dir]--;
1036
1037         return pol;
1038 }
1039
1040 int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
1041 {
1042         write_lock_bh(&xfrm_policy_lock);
1043         pol = __xfrm_policy_unlink(pol, dir);
1044         write_unlock_bh(&xfrm_policy_lock);
1045         if (pol) {
1046                 if (dir < XFRM_POLICY_MAX)
1047                         atomic_inc(&flow_cache_genid);
1048                 xfrm_policy_kill(pol);
1049                 return 0;
1050         }
1051         return -ENOENT;
1052 }
1053 EXPORT_SYMBOL(xfrm_policy_delete);
1054
1055 int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
1056 {
1057         struct xfrm_policy *old_pol;
1058
1059 #ifdef CONFIG_XFRM_SUB_POLICY
1060         if (pol && pol->type != XFRM_POLICY_TYPE_MAIN)
1061                 return -EINVAL;
1062 #endif
1063
1064         write_lock_bh(&xfrm_policy_lock);
1065         old_pol = sk->sk_policy[dir];
1066         sk->sk_policy[dir] = pol;
1067         if (pol) {
1068                 pol->curlft.add_time = get_seconds();
1069                 pol->index = xfrm_gen_index(pol->type, XFRM_POLICY_MAX+dir);
1070                 __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
1071         }
1072         if (old_pol)
1073                 __xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir);
1074         write_unlock_bh(&xfrm_policy_lock);
1075
1076         if (old_pol) {
1077                 xfrm_policy_kill(old_pol);
1078         }
1079         return 0;
1080 }
1081
1082 static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir)
1083 {
1084         struct xfrm_policy *newp = xfrm_policy_alloc(GFP_ATOMIC);
1085
1086         if (newp) {
1087                 newp->selector = old->selector;
1088                 if (security_xfrm_policy_clone(old, newp)) {
1089                         kfree(newp);
1090                         return NULL;  /* ENOMEM */
1091                 }
1092                 newp->lft = old->lft;
1093                 newp->curlft = old->curlft;
1094                 newp->action = old->action;
1095                 newp->flags = old->flags;
1096                 newp->xfrm_nr = old->xfrm_nr;
1097                 newp->index = old->index;
1098                 newp->type = old->type;
1099                 memcpy(newp->xfrm_vec, old->xfrm_vec,
1100                        newp->xfrm_nr*sizeof(struct xfrm_tmpl));
1101                 write_lock_bh(&xfrm_policy_lock);
1102                 __xfrm_policy_link(newp, XFRM_POLICY_MAX+dir);
1103                 write_unlock_bh(&xfrm_policy_lock);
1104                 xfrm_pol_put(newp);
1105         }
1106         return newp;
1107 }
1108
1109 int __xfrm_sk_clone_policy(struct sock *sk)
1110 {
1111         struct xfrm_policy *p0 = sk->sk_policy[0],
1112                            *p1 = sk->sk_policy[1];
1113
1114         sk->sk_policy[0] = sk->sk_policy[1] = NULL;
1115         if (p0 && (sk->sk_policy[0] = clone_policy(p0, 0)) == NULL)
1116                 return -ENOMEM;
1117         if (p1 && (sk->sk_policy[1] = clone_policy(p1, 1)) == NULL)
1118                 return -ENOMEM;
1119         return 0;
1120 }
1121
1122 static int
1123 xfrm_get_saddr(xfrm_address_t *local, xfrm_address_t *remote,
1124                unsigned short family)
1125 {
1126         int err;
1127         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1128
1129         if (unlikely(afinfo == NULL))
1130                 return -EINVAL;
1131         err = afinfo->get_saddr(local, remote);
1132         xfrm_policy_put_afinfo(afinfo);
1133         return err;
1134 }
1135
1136 /* Resolve list of templates for the flow, given policy. */
1137
1138 static int
1139 xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl,
1140                       struct xfrm_state **xfrm,
1141                       unsigned short family)
1142 {
1143         int nx;
1144         int i, error;
1145         xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
1146         xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
1147         xfrm_address_t tmp;
1148
1149         for (nx=0, i = 0; i < policy->xfrm_nr; i++) {
1150                 struct xfrm_state *x;
1151                 xfrm_address_t *remote = daddr;
1152                 xfrm_address_t *local  = saddr;
1153                 struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
1154
1155                 if (tmpl->mode == XFRM_MODE_TUNNEL ||
1156                     tmpl->mode == XFRM_MODE_BEET) {
1157                         remote = &tmpl->id.daddr;
1158                         local = &tmpl->saddr;
1159                         family = tmpl->encap_family;
1160                         if (xfrm_addr_any(local, family)) {
1161                                 error = xfrm_get_saddr(&tmp, remote, family);
1162                                 if (error)
1163                                         goto fail;
1164                                 local = &tmp;
1165                         }
1166                 }
1167
1168                 x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);
1169
1170                 if (x && x->km.state == XFRM_STATE_VALID) {
1171                         xfrm[nx++] = x;
1172                         daddr = remote;
1173                         saddr = local;
1174                         continue;
1175                 }
1176                 if (x) {
1177                         error = (x->km.state == XFRM_STATE_ERROR ?
1178                                  -EINVAL : -EAGAIN);
1179                         xfrm_state_put(x);
1180                 }
1181
1182                 if (!tmpl->optional)
1183                         goto fail;
1184         }
1185         return nx;
1186
1187 fail:
1188         for (nx--; nx>=0; nx--)
1189                 xfrm_state_put(xfrm[nx]);
1190         return error;
1191 }
1192
1193 static int
1194 xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl,
1195                   struct xfrm_state **xfrm,
1196                   unsigned short family)
1197 {
1198         struct xfrm_state *tp[XFRM_MAX_DEPTH];
1199         struct xfrm_state **tpp = (npols > 1) ? tp : xfrm;
1200         int cnx = 0;
1201         int error;
1202         int ret;
1203         int i;
1204
1205         for (i = 0; i < npols; i++) {
1206                 if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) {
1207                         error = -ENOBUFS;
1208                         goto fail;
1209                 }
1210
1211                 ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family);
1212                 if (ret < 0) {
1213                         error = ret;
1214                         goto fail;
1215                 } else
1216                         cnx += ret;
1217         }
1218
1219         /* found states are sorted for outbound processing */
1220         if (npols > 1)
1221                 xfrm_state_sort(xfrm, tpp, cnx, family);
1222
1223         return cnx;
1224
1225  fail:
1226         for (cnx--; cnx>=0; cnx--)
1227                 xfrm_state_put(tpp[cnx]);
1228         return error;
1229
1230 }
1231
1232 /* Check that the bundle accepts the flow and its components are
1233  * still valid.
1234  */
1235
1236 static struct dst_entry *
1237 xfrm_find_bundle(struct flowi *fl, struct xfrm_policy *policy, unsigned short family)
1238 {
1239         struct dst_entry *x;
1240         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1241         if (unlikely(afinfo == NULL))
1242                 return ERR_PTR(-EINVAL);
1243         x = afinfo->find_bundle(fl, policy);
1244         xfrm_policy_put_afinfo(afinfo);
1245         return x;
1246 }
1247
1248 static inline int xfrm_get_tos(struct flowi *fl, int family)
1249 {
1250         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1251         int tos;
1252
1253         if (!afinfo)
1254                 return -EINVAL;
1255
1256         tos = afinfo->get_tos(fl);
1257
1258         xfrm_policy_put_afinfo(afinfo);
1259
1260         return tos;
1261 }
1262
1263 static inline struct xfrm_dst *xfrm_alloc_dst(int family)
1264 {
1265         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1266         struct xfrm_dst *xdst;
1267
1268         if (!afinfo)
1269                 return ERR_PTR(-EINVAL);
1270
1271         xdst = dst_alloc(afinfo->dst_ops) ?: ERR_PTR(-ENOBUFS);
1272
1273         xfrm_policy_put_afinfo(afinfo);
1274
1275         return xdst;
1276 }
1277
1278 static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,
1279                                  int nfheader_len)
1280 {
1281         struct xfrm_policy_afinfo *afinfo =
1282                 xfrm_policy_get_afinfo(dst->ops->family);
1283         int err;
1284
1285         if (!afinfo)
1286                 return -EINVAL;
1287
1288         err = afinfo->init_path(path, dst, nfheader_len);
1289
1290         xfrm_policy_put_afinfo(afinfo);
1291
1292         return err;
1293 }
1294
1295 static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev)
1296 {
1297         struct xfrm_policy_afinfo *afinfo =
1298                 xfrm_policy_get_afinfo(xdst->u.dst.ops->family);
1299         int err;
1300
1301         if (!afinfo)
1302                 return -EINVAL;
1303
1304         err = afinfo->fill_dst(xdst, dev);
1305
1306         xfrm_policy_put_afinfo(afinfo);
1307
1308         return err;
1309 }
1310
1311 /* Allocate chain of dst_entry's, attach known xfrm's, calculate
1312  * all the metrics... Shortly, bundle a bundle.
1313  */
1314
1315 static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
1316                                             struct xfrm_state **xfrm, int nx,
1317                                             struct flowi *fl,
1318                                             struct dst_entry *dst)
1319 {
1320         unsigned long now = jiffies;
1321         struct net_device *dev;
1322         struct dst_entry *dst_prev = NULL;
1323         struct dst_entry *dst0 = NULL;
1324         int i = 0;
1325         int err;
1326         int header_len = 0;
1327         int nfheader_len = 0;
1328         int trailer_len = 0;
1329         int tos;
1330         int family = policy->selector.family;
1331
1332         tos = xfrm_get_tos(fl, family);
1333         err = tos;
1334         if (tos < 0)
1335                 goto put_states;
1336
1337         dst_hold(dst);
1338
1339         for (; i < nx; i++) {
1340                 struct xfrm_dst *xdst = xfrm_alloc_dst(family);
1341                 struct dst_entry *dst1 = &xdst->u.dst;
1342
1343                 err = PTR_ERR(xdst);
1344                 if (IS_ERR(xdst)) {
1345                         dst_release(dst);
1346                         goto put_states;
1347                 }
1348
1349                 if (!dst_prev)
1350                         dst0 = dst1;
1351                 else {
1352                         dst_prev->child = dst_clone(dst1);
1353                         dst1->flags |= DST_NOHASH;
1354                 }
1355
1356                 xdst->route = dst;
1357                 memcpy(&dst1->metrics, &dst->metrics, sizeof(dst->metrics));
1358
1359                 if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
1360                         family = xfrm[i]->props.family;
1361                         dst = xfrm_dst_lookup(xfrm[i], tos, family);
1362                         err = PTR_ERR(dst);
1363                         if (IS_ERR(dst))
1364                                 goto put_states;
1365                 } else
1366                         dst_hold(dst);
1367
1368                 dst1->xfrm = xfrm[i];
1369                 xdst->genid = xfrm[i]->genid;
1370
1371                 dst1->obsolete = -1;
1372                 dst1->flags |= DST_HOST;
1373                 dst1->lastuse = now;
1374
1375                 dst1->input = dst_discard;
1376                 dst1->output = xfrm[i]->outer_mode->afinfo->output;
1377
1378                 dst1->next = dst_prev;
1379                 dst_prev = dst1;
1380
1381                 header_len += xfrm[i]->props.header_len;
1382                 if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT)
1383                         nfheader_len += xfrm[i]->props.header_len;
1384                 trailer_len += xfrm[i]->props.trailer_len;
1385         }
1386
1387         dst_prev->child = dst;
1388         dst0->path = dst;
1389
1390         err = -ENODEV;
1391         dev = dst->dev;
1392         if (!dev)
1393                 goto free_dst;
1394
1395         /* Copy neighbout for reachability confirmation */
1396         dst0->neighbour = neigh_clone(dst->neighbour);
1397
1398         xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len);
1399         xfrm_init_pmtu(dst_prev);
1400
1401         for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) {
1402                 struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev;
1403
1404                 err = xfrm_fill_dst(xdst, dev);
1405                 if (err)
1406                         goto free_dst;
1407
1408                 dst_prev->header_len = header_len;
1409                 dst_prev->trailer_len = trailer_len;
1410                 header_len -= xdst->u.dst.xfrm->props.header_len;
1411                 trailer_len -= xdst->u.dst.xfrm->props.trailer_len;
1412         }
1413
1414 out:
1415         return dst0;
1416
1417 put_states:
1418         for (; i < nx; i++)
1419                 xfrm_state_put(xfrm[i]);
1420 free_dst:
1421         if (dst0)
1422                 dst_free(dst0);
1423         dst0 = ERR_PTR(err);
1424         goto out;
1425 }
1426
1427 static int inline
1428 xfrm_dst_alloc_copy(void **target, void *src, int size)
1429 {
1430         if (!*target) {
1431                 *target = kmalloc(size, GFP_ATOMIC);
1432                 if (!*target)
1433                         return -ENOMEM;
1434         }
1435         memcpy(*target, src, size);
1436         return 0;
1437 }
1438
1439 static int inline
1440 xfrm_dst_update_parent(struct dst_entry *dst, struct xfrm_selector *sel)
1441 {
1442 #ifdef CONFIG_XFRM_SUB_POLICY
1443         struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
1444         return xfrm_dst_alloc_copy((void **)&(xdst->partner),
1445                                    sel, sizeof(*sel));
1446 #else
1447         return 0;
1448 #endif
1449 }
1450
1451 static int inline
1452 xfrm_dst_update_origin(struct dst_entry *dst, struct flowi *fl)
1453 {
1454 #ifdef CONFIG_XFRM_SUB_POLICY
1455         struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
1456         return xfrm_dst_alloc_copy((void **)&(xdst->origin), fl, sizeof(*fl));
1457 #else
1458         return 0;
1459 #endif
1460 }
1461
1462 static int stale_bundle(struct dst_entry *dst);
1463
1464 /* Main function: finds/creates a bundle for given flow.
1465  *
1466  * At the moment we eat a raw IP route. Mostly to speed up lookups
1467  * on interfaces with disabled IPsec.
1468  */
1469 int __xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
1470                   struct sock *sk, int flags)
1471 {
1472         struct xfrm_policy *policy;
1473         struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
1474         int npols;
1475         int pol_dead;
1476         int xfrm_nr;
1477         int pi;
1478         struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
1479         struct dst_entry *dst, *dst_orig = *dst_p;
1480         int nx = 0;
1481         int err;
1482         u32 genid;
1483         u16 family;
1484         u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
1485
1486 restart:
1487         genid = atomic_read(&flow_cache_genid);
1488         policy = NULL;
1489         for (pi = 0; pi < ARRAY_SIZE(pols); pi++)
1490                 pols[pi] = NULL;
1491         npols = 0;
1492         pol_dead = 0;
1493         xfrm_nr = 0;
1494
1495         if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
1496                 policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
1497                 err = PTR_ERR(policy);
1498                 if (IS_ERR(policy)) {
1499                         XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR);
1500                         goto dropdst;
1501                 }
1502         }
1503
1504         if (!policy) {
1505                 /* To accelerate a bit...  */
1506                 if ((dst_orig->flags & DST_NOXFRM) ||
1507                     !xfrm_policy_count[XFRM_POLICY_OUT])
1508                         goto nopol;
1509
1510                 policy = flow_cache_lookup(fl, dst_orig->ops->family,
1511                                            dir, xfrm_policy_lookup);
1512                 err = PTR_ERR(policy);
1513                 if (IS_ERR(policy)) {
1514                         XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR);
1515                         goto dropdst;
1516                 }
1517         }
1518
1519         if (!policy)
1520                 goto nopol;
1521
1522         family = dst_orig->ops->family;
1523         pols[0] = policy;
1524         npols ++;
1525         xfrm_nr += pols[0]->xfrm_nr;
1526
1527         err = -ENOENT;
1528         if ((flags & XFRM_LOOKUP_ICMP) && !(policy->flags & XFRM_POLICY_ICMP))
1529                 goto error;
1530
1531         policy->curlft.use_time = get_seconds();
1532
1533         switch (policy->action) {
1534         default:
1535         case XFRM_POLICY_BLOCK:
1536                 /* Prohibit the flow */
1537                 XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLBLOCK);
1538                 err = -EPERM;
1539                 goto error;
1540
1541         case XFRM_POLICY_ALLOW:
1542 #ifndef CONFIG_XFRM_SUB_POLICY
1543                 if (policy->xfrm_nr == 0) {
1544                         /* Flow passes not transformed. */
1545                         xfrm_pol_put(policy);
1546                         return 0;
1547                 }
1548 #endif
1549
1550                 /* Try to find matching bundle.
1551                  *
1552                  * LATER: help from flow cache. It is optional, this
1553                  * is required only for output policy.
1554                  */
1555                 dst = xfrm_find_bundle(fl, policy, family);
1556                 if (IS_ERR(dst)) {
1557                         XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1558                         err = PTR_ERR(dst);
1559                         goto error;
1560                 }
1561
1562                 if (dst)
1563                         break;
1564
1565 #ifdef CONFIG_XFRM_SUB_POLICY
1566                 if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
1567                         pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN,
1568                                                             fl, family,
1569                                                             XFRM_POLICY_OUT);
1570                         if (pols[1]) {
1571                                 if (IS_ERR(pols[1])) {
1572                                         XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLERROR);
1573                                         err = PTR_ERR(pols[1]);
1574                                         goto error;
1575                                 }
1576                                 if (pols[1]->action == XFRM_POLICY_BLOCK) {
1577                                         XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLBLOCK);
1578                                         err = -EPERM;
1579                                         goto error;
1580                                 }
1581                                 npols ++;
1582                                 xfrm_nr += pols[1]->xfrm_nr;
1583                         }
1584                 }
1585
1586                 /*
1587                  * Because neither flowi nor bundle information knows about
1588                  * transformation template size. On more than one policy usage
1589                  * we can realize whether all of them is bypass or not after
1590                  * they are searched. See above not-transformed bypass
1591                  * is surrounded by non-sub policy configuration, too.
1592                  */
1593                 if (xfrm_nr == 0) {
1594                         /* Flow passes not transformed. */
1595                         xfrm_pols_put(pols, npols);
1596                         return 0;
1597                 }
1598
1599 #endif
1600                 nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
1601
1602                 if (unlikely(nx<0)) {
1603                         err = nx;
1604                         if (err == -EAGAIN && sysctl_xfrm_larval_drop) {
1605                                 /* EREMOTE tells the caller to generate
1606                                  * a one-shot blackhole route.
1607                                  */
1608                                 XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES);
1609                                 xfrm_pol_put(policy);
1610                                 return -EREMOTE;
1611                         }
1612                         if (err == -EAGAIN && (flags & XFRM_LOOKUP_WAIT)) {
1613                                 DECLARE_WAITQUEUE(wait, current);
1614
1615                                 add_wait_queue(&km_waitq, &wait);
1616                                 set_current_state(TASK_INTERRUPTIBLE);
1617                                 schedule();
1618                                 set_current_state(TASK_RUNNING);
1619                                 remove_wait_queue(&km_waitq, &wait);
1620
1621                                 nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family);
1622
1623                                 if (nx == -EAGAIN && signal_pending(current)) {
1624                                         XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES);
1625                                         err = -ERESTART;
1626                                         goto error;
1627                                 }
1628                                 if (nx == -EAGAIN ||
1629                                     genid != atomic_read(&flow_cache_genid)) {
1630                                         xfrm_pols_put(pols, npols);
1631                                         goto restart;
1632                                 }
1633                                 err = nx;
1634                         }
1635                         if (err < 0) {
1636                                 XFRM_INC_STATS(LINUX_MIB_XFRMOUTNOSTATES);
1637                                 goto error;
1638                         }
1639                 }
1640                 if (nx == 0) {
1641                         /* Flow passes not transformed. */
1642                         xfrm_pols_put(pols, npols);
1643                         return 0;
1644                 }
1645
1646                 dst = xfrm_bundle_create(policy, xfrm, nx, fl, dst_orig);
1647                 err = PTR_ERR(dst);
1648                 if (IS_ERR(dst)) {
1649                         XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLEGENERROR);
1650                         goto error;
1651                 }
1652
1653                 for (pi = 0; pi < npols; pi++) {
1654                         read_lock_bh(&pols[pi]->lock);
1655                         pol_dead |= pols[pi]->dead;
1656                         read_unlock_bh(&pols[pi]->lock);
1657                 }
1658
1659                 write_lock_bh(&policy->lock);
1660                 if (unlikely(pol_dead || stale_bundle(dst))) {
1661                         /* Wow! While we worked on resolving, this
1662                          * policy has gone. Retry. It is not paranoia,
1663                          * we just cannot enlist new bundle to dead object.
1664                          * We can't enlist stable bundles either.
1665                          */
1666                         write_unlock_bh(&policy->lock);
1667                         if (dst)
1668                                 dst_free(dst);
1669
1670                         if (pol_dead)
1671                                 XFRM_INC_STATS(LINUX_MIB_XFRMOUTPOLDEAD);
1672                         else
1673                                 XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1674                         err = -EHOSTUNREACH;
1675                         goto error;
1676                 }
1677
1678                 if (npols > 1)
1679                         err = xfrm_dst_update_parent(dst, &pols[1]->selector);
1680                 else
1681                         err = xfrm_dst_update_origin(dst, fl);
1682                 if (unlikely(err)) {
1683                         write_unlock_bh(&policy->lock);
1684                         if (dst)
1685                                 dst_free(dst);
1686                         XFRM_INC_STATS(LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1687                         goto error;
1688                 }
1689
1690                 dst->next = policy->bundles;
1691                 policy->bundles = dst;
1692                 dst_hold(dst);
1693                 write_unlock_bh(&policy->lock);
1694         }
1695         *dst_p = dst;
1696         dst_release(dst_orig);
1697         xfrm_pols_put(pols, npols);
1698         return 0;
1699
1700 error:
1701         xfrm_pols_put(pols, npols);
1702 dropdst:
1703         dst_release(dst_orig);
1704         *dst_p = NULL;
1705         return err;
1706
1707 nopol:
1708         err = -ENOENT;
1709         if (flags & XFRM_LOOKUP_ICMP)
1710                 goto dropdst;
1711         return 0;
1712 }
1713 EXPORT_SYMBOL(__xfrm_lookup);
1714
1715 int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
1716                 struct sock *sk, int flags)
1717 {
1718         int err = __xfrm_lookup(dst_p, fl, sk, flags);
1719
1720         if (err == -EREMOTE) {
1721                 dst_release(*dst_p);
1722                 *dst_p = NULL;
1723                 err = -EAGAIN;
1724         }
1725
1726         return err;
1727 }
1728 EXPORT_SYMBOL(xfrm_lookup);
1729
1730 static inline int
1731 xfrm_secpath_reject(int idx, struct sk_buff *skb, struct flowi *fl)
1732 {
1733         struct xfrm_state *x;
1734
1735         if (!skb->sp || idx < 0 || idx >= skb->sp->len)
1736                 return 0;
1737         x = skb->sp->xvec[idx];
1738         if (!x->type->reject)
1739                 return 0;
1740         return x->type->reject(x, skb, fl);
1741 }
1742
1743 /* When skb is transformed back to its "native" form, we have to
1744  * check policy restrictions. At the moment we make this in maximally
1745  * stupid way. Shame on me. :-) Of course, connected sockets must
1746  * have policy cached at them.
1747  */
1748
1749 static inline int
1750 xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x,
1751               unsigned short family)
1752 {
1753         if (xfrm_state_kern(x))
1754                 return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, tmpl->encap_family);
1755         return  x->id.proto == tmpl->id.proto &&
1756                 (x->id.spi == tmpl->id.spi || !tmpl->id.spi) &&
1757                 (x->props.reqid == tmpl->reqid || !tmpl->reqid) &&
1758                 x->props.mode == tmpl->mode &&
1759                 ((tmpl->aalgos & (1<<x->props.aalgo)) ||
1760                  !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) &&
1761                 !(x->props.mode != XFRM_MODE_TRANSPORT &&
1762                   xfrm_state_addr_cmp(tmpl, x, family));
1763 }
1764
1765 /*
1766  * 0 or more than 0 is returned when validation is succeeded (either bypass
1767  * because of optional transport mode, or next index of the mathced secpath
1768  * state with the template.
1769  * -1 is returned when no matching template is found.
1770  * Otherwise "-2 - errored_index" is returned.
1771  */
1772 static inline int
1773 xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start,
1774                unsigned short family)
1775 {
1776         int idx = start;
1777
1778         if (tmpl->optional) {
1779                 if (tmpl->mode == XFRM_MODE_TRANSPORT)
1780                         return start;
1781         } else
1782                 start = -1;
1783         for (; idx < sp->len; idx++) {
1784                 if (xfrm_state_ok(tmpl, sp->xvec[idx], family))
1785                         return ++idx;
1786                 if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) {
1787                         if (start == -1)
1788                                 start = -2-idx;
1789                         break;
1790                 }
1791         }
1792         return start;
1793 }
1794
1795 int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
1796                           unsigned int family, int reverse)
1797 {
1798         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1799         int err;
1800
1801         if (unlikely(afinfo == NULL))
1802                 return -EAFNOSUPPORT;
1803
1804         afinfo->decode_session(skb, fl, reverse);
1805         err = security_xfrm_decode_session(skb, &fl->secid);
1806         xfrm_policy_put_afinfo(afinfo);
1807         return err;
1808 }
1809 EXPORT_SYMBOL(__xfrm_decode_session);
1810
1811 static inline int secpath_has_nontransport(struct sec_path *sp, int k, int *idxp)
1812 {
1813         for (; k < sp->len; k++) {
1814                 if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) {
1815                         *idxp = k;
1816                         return 1;
1817                 }
1818         }
1819
1820         return 0;
1821 }
1822
1823 int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
1824                         unsigned short family)
1825 {
1826         struct xfrm_policy *pol;
1827         struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
1828         int npols = 0;
1829         int xfrm_nr;
1830         int pi;
1831         int reverse;
1832         struct flowi fl;
1833         u8 fl_dir;
1834         int xerr_idx = -1;
1835
1836         reverse = dir & ~XFRM_POLICY_MASK;
1837         dir &= XFRM_POLICY_MASK;
1838         fl_dir = policy_to_flow_dir(dir);
1839
1840         if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) {
1841                 XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR);
1842                 return 0;
1843         }
1844
1845         nf_nat_decode_session(skb, &fl, family);
1846
1847         /* First, check used SA against their selectors. */
1848         if (skb->sp) {
1849                 int i;
1850
1851                 for (i=skb->sp->len-1; i>=0; i--) {
1852                         struct xfrm_state *x = skb->sp->xvec[i];
1853                         if (!xfrm_selector_match(&x->sel, &fl, family)) {
1854                                 XFRM_INC_STATS(LINUX_MIB_XFRMINSTATEMISMATCH);
1855                                 return 0;
1856                         }
1857                 }
1858         }
1859
1860         pol = NULL;
1861         if (sk && sk->sk_policy[dir]) {
1862                 pol = xfrm_sk_policy_lookup(sk, dir, &fl);
1863                 if (IS_ERR(pol)) {
1864                         XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR);
1865                         return 0;
1866                 }
1867         }
1868
1869         if (!pol)
1870                 pol = flow_cache_lookup(&fl, family, fl_dir,
1871                                         xfrm_policy_lookup);
1872
1873         if (IS_ERR(pol)) {
1874                 XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR);
1875                 return 0;
1876         }
1877
1878         if (!pol) {
1879                 if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) {
1880                         xfrm_secpath_reject(xerr_idx, skb, &fl);
1881                         XFRM_INC_STATS(LINUX_MIB_XFRMINNOPOLS);
1882                         return 0;
1883                 }
1884                 return 1;
1885         }
1886
1887         pol->curlft.use_time = get_seconds();
1888
1889         pols[0] = pol;
1890         npols ++;
1891 #ifdef CONFIG_XFRM_SUB_POLICY
1892         if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
1893                 pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN,
1894                                                     &fl, family,
1895                                                     XFRM_POLICY_IN);
1896                 if (pols[1]) {
1897                         if (IS_ERR(pols[1])) {
1898                                 XFRM_INC_STATS(LINUX_MIB_XFRMINPOLERROR);
1899                                 return 0;
1900                         }
1901                         pols[1]->curlft.use_time = get_seconds();
1902                         npols ++;
1903                 }
1904         }
1905 #endif
1906
1907         if (pol->action == XFRM_POLICY_ALLOW) {
1908                 struct sec_path *sp;
1909                 static struct sec_path dummy;
1910                 struct xfrm_tmpl *tp[XFRM_MAX_DEPTH];
1911                 struct xfrm_tmpl *stp[XFRM_MAX_DEPTH];
1912                 struct xfrm_tmpl **tpp = tp;
1913                 int ti = 0;
1914                 int i, k;
1915
1916                 if ((sp = skb->sp) == NULL)
1917                         sp = &dummy;
1918
1919                 for (pi = 0; pi < npols; pi++) {
1920                         if (pols[pi] != pol &&
1921                             pols[pi]->action != XFRM_POLICY_ALLOW) {
1922                                 XFRM_INC_STATS(LINUX_MIB_XFRMINPOLBLOCK);
1923                                 goto reject;
1924                         }
1925                         if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) {
1926                                 XFRM_INC_STATS(LINUX_MIB_XFRMINBUFFERERROR);
1927                                 goto reject_error;
1928                         }
1929                         for (i = 0; i < pols[pi]->xfrm_nr; i++)
1930                                 tpp[ti++] = &pols[pi]->xfrm_vec[i];
1931                 }
1932                 xfrm_nr = ti;
1933                 if (npols > 1) {
1934                         xfrm_tmpl_sort(stp, tpp, xfrm_nr, family);
1935                         tpp = stp;
1936                 }
1937
1938                 /* For each tunnel xfrm, find the first matching tmpl.
1939                  * For each tmpl before that, find corresponding xfrm.
1940                  * Order is _important_. Later we will implement
1941                  * some barriers, but at the moment barriers
1942                  * are implied between each two transformations.
1943                  */
1944                 for (i = xfrm_nr-1, k = 0; i >= 0; i--) {
1945                         k = xfrm_policy_ok(tpp[i], sp, k, family);
1946                         if (k < 0) {
1947                                 if (k < -1)
1948                                         /* "-2 - errored_index" returned */
1949                                         xerr_idx = -(2+k);
1950                                 XFRM_INC_STATS(LINUX_MIB_XFRMINTMPLMISMATCH);
1951                                 goto reject;
1952                         }
1953                 }
1954
1955                 if (secpath_has_nontransport(sp, k, &xerr_idx)) {
1956                         XFRM_INC_STATS(LINUX_MIB_XFRMINTMPLMISMATCH);
1957                         goto reject;
1958                 }
1959
1960                 xfrm_pols_put(pols, npols);
1961                 return 1;
1962         }
1963         XFRM_INC_STATS(LINUX_MIB_XFRMINPOLBLOCK);
1964
1965 reject:
1966         xfrm_secpath_reject(xerr_idx, skb, &fl);
1967 reject_error:
1968         xfrm_pols_put(pols, npols);
1969         return 0;
1970 }
1971 EXPORT_SYMBOL(__xfrm_policy_check);
1972
1973 int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
1974 {
1975         struct flowi fl;
1976
1977         if (xfrm_decode_session(skb, &fl, family) < 0) {
1978                 /* XXX: we should have something like FWDHDRERROR here. */
1979                 XFRM_INC_STATS(LINUX_MIB_XFRMINHDRERROR);
1980                 return 0;
1981         }
1982
1983         return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0;
1984 }
1985 EXPORT_SYMBOL(__xfrm_route_forward);
1986
1987 /* Optimize later using cookies and generation ids. */
1988
1989 static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
1990 {
1991         /* Code (such as __xfrm4_bundle_create()) sets dst->obsolete
1992          * to "-1" to force all XFRM destinations to get validated by
1993          * dst_ops->check on every use.  We do this because when a
1994          * normal route referenced by an XFRM dst is obsoleted we do
1995          * not go looking around for all parent referencing XFRM dsts
1996          * so that we can invalidate them.  It is just too much work.
1997          * Instead we make the checks here on every use.  For example:
1998          *
1999          *      XFRM dst A --> IPv4 dst X
2000          *
2001          * X is the "xdst->route" of A (X is also the "dst->path" of A
2002          * in this example).  If X is marked obsolete, "A" will not
2003          * notice.  That's what we are validating here via the
2004          * stale_bundle() check.
2005          *
2006          * When a policy's bundle is pruned, we dst_free() the XFRM
2007          * dst which causes it's ->obsolete field to be set to a
2008          * positive non-zero integer.  If an XFRM dst has been pruned
2009          * like this, we want to force a new route lookup.
2010          */
2011         if (dst->obsolete < 0 && !stale_bundle(dst))
2012                 return dst;
2013
2014         return NULL;
2015 }
2016
2017 static int stale_bundle(struct dst_entry *dst)
2018 {
2019         return !xfrm_bundle_ok(NULL, (struct xfrm_dst *)dst, NULL, AF_UNSPEC, 0);
2020 }
2021
2022 void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
2023 {
2024         while ((dst = dst->child) && dst->xfrm && dst->dev == dev) {
2025                 dst->dev = dev->nd_net->loopback_dev;
2026                 dev_hold(dst->dev);
2027                 dev_put(dev);
2028         }
2029 }
2030 EXPORT_SYMBOL(xfrm_dst_ifdown);
2031
2032 static void xfrm_link_failure(struct sk_buff *skb)
2033 {
2034         /* Impossible. Such dst must be popped before reaches point of failure. */
2035         return;
2036 }
2037
2038 static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
2039 {
2040         if (dst) {
2041                 if (dst->obsolete) {
2042                         dst_release(dst);
2043                         dst = NULL;
2044                 }
2045         }
2046         return dst;
2047 }
2048
2049 static void prune_one_bundle(struct xfrm_policy *pol, int (*func)(struct dst_entry *), struct dst_entry **gc_list_p)
2050 {
2051         struct dst_entry *dst, **dstp;
2052
2053         write_lock(&pol->lock);
2054         dstp = &pol->bundles;
2055         while ((dst=*dstp) != NULL) {
2056                 if (func(dst)) {
2057                         *dstp = dst->next;
2058                         dst->next = *gc_list_p;
2059                         *gc_list_p = dst;
2060                 } else {
2061                         dstp = &dst->next;
2062                 }
2063         }
2064         write_unlock(&pol->lock);
2065 }
2066
2067 static void xfrm_prune_bundles(int (*func)(struct dst_entry *))
2068 {
2069         struct dst_entry *gc_list = NULL;
2070         int dir;
2071
2072         read_lock_bh(&xfrm_policy_lock);
2073         for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
2074                 struct xfrm_policy *pol;
2075                 struct hlist_node *entry;
2076                 struct hlist_head *table;
2077                 int i;
2078
2079                 hlist_for_each_entry(pol, entry,
2080                                      &xfrm_policy_inexact[dir], bydst)
2081                         prune_one_bundle(pol, func, &gc_list);
2082
2083                 table = xfrm_policy_bydst[dir].table;
2084                 for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) {
2085                         hlist_for_each_entry(pol, entry, table + i, bydst)
2086                                 prune_one_bundle(pol, func, &gc_list);
2087                 }
2088         }
2089         read_unlock_bh(&xfrm_policy_lock);
2090
2091         while (gc_list) {
2092                 struct dst_entry *dst = gc_list;
2093                 gc_list = dst->next;
2094                 dst_free(dst);
2095         }
2096 }
2097
2098 static int unused_bundle(struct dst_entry *dst)
2099 {
2100         return !atomic_read(&dst->__refcnt);
2101 }
2102
2103 static void __xfrm_garbage_collect(void)
2104 {
2105         xfrm_prune_bundles(unused_bundle);
2106 }
2107
2108 static int xfrm_flush_bundles(void)
2109 {
2110         xfrm_prune_bundles(stale_bundle);
2111         return 0;
2112 }
2113
2114 static void xfrm_init_pmtu(struct dst_entry *dst)
2115 {
2116         do {
2117                 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
2118                 u32 pmtu, route_mtu_cached;
2119
2120                 pmtu = dst_mtu(dst->child);
2121                 xdst->child_mtu_cached = pmtu;
2122
2123                 pmtu = xfrm_state_mtu(dst->xfrm, pmtu);
2124
2125                 route_mtu_cached = dst_mtu(xdst->route);
2126                 xdst->route_mtu_cached = route_mtu_cached;
2127
2128                 if (pmtu > route_mtu_cached)
2129                         pmtu = route_mtu_cached;
2130
2131                 dst->metrics[RTAX_MTU-1] = pmtu;
2132         } while ((dst = dst->next));
2133 }
2134
2135 /* Check that the bundle accepts the flow and its components are
2136  * still valid.
2137  */
2138
2139 int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first,
2140                 struct flowi *fl, int family, int strict)
2141 {
2142         struct dst_entry *dst = &first->u.dst;
2143         struct xfrm_dst *last;
2144         u32 mtu;
2145
2146         if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) ||
2147             (dst->dev && !netif_running(dst->dev)))
2148                 return 0;
2149 #ifdef CONFIG_XFRM_SUB_POLICY
2150         if (fl) {
2151                 if (first->origin && !flow_cache_uli_match(first->origin, fl))
2152                         return 0;
2153                 if (first->partner &&
2154                     !xfrm_selector_match(first->partner, fl, family))
2155                         return 0;
2156         }
2157 #endif
2158
2159         last = NULL;
2160
2161         do {
2162                 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
2163
2164                 if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family))
2165                         return 0;
2166                 if (fl && pol &&
2167                     !security_xfrm_state_pol_flow_match(dst->xfrm, pol, fl))
2168                         return 0;
2169                 if (dst->xfrm->km.state != XFRM_STATE_VALID)
2170                         return 0;
2171                 if (xdst->genid != dst->xfrm->genid)
2172                         return 0;
2173
2174                 if (strict && fl &&
2175                     !(dst->xfrm->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL) &&
2176                     !xfrm_state_addr_flow_check(dst->xfrm, fl, family))
2177                         return 0;
2178
2179                 mtu = dst_mtu(dst->child);
2180                 if (xdst->child_mtu_cached != mtu) {
2181                         last = xdst;
2182                         xdst->child_mtu_cached = mtu;
2183                 }
2184
2185                 if (!dst_check(xdst->route, xdst->route_cookie))
2186                         return 0;
2187                 mtu = dst_mtu(xdst->route);
2188                 if (xdst->route_mtu_cached != mtu) {
2189                         last = xdst;
2190                         xdst->route_mtu_cached = mtu;
2191                 }
2192
2193                 dst = dst->child;
2194         } while (dst->xfrm);
2195
2196         if (likely(!last))
2197                 return 1;
2198
2199         mtu = last->child_mtu_cached;
2200         for (;;) {
2201                 dst = &last->u.dst;
2202
2203                 mtu = xfrm_state_mtu(dst->xfrm, mtu);
2204                 if (mtu > last->route_mtu_cached)
2205                         mtu = last->route_mtu_cached;
2206                 dst->metrics[RTAX_MTU-1] = mtu;
2207
2208                 if (last == first)
2209                         break;
2210
2211                 last = (struct xfrm_dst *)last->u.dst.next;
2212                 last->child_mtu_cached = mtu;
2213         }
2214
2215         return 1;
2216 }
2217
2218 EXPORT_SYMBOL(xfrm_bundle_ok);
2219
2220 int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
2221 {
2222         int err = 0;
2223         if (unlikely(afinfo == NULL))
2224                 return -EINVAL;
2225         if (unlikely(afinfo->family >= NPROTO))
2226                 return -EAFNOSUPPORT;
2227         write_lock_bh(&xfrm_policy_afinfo_lock);
2228         if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL))
2229                 err = -ENOBUFS;
2230         else {
2231                 struct dst_ops *dst_ops = afinfo->dst_ops;
2232                 if (likely(dst_ops->kmem_cachep == NULL))
2233                         dst_ops->kmem_cachep = xfrm_dst_cache;
2234                 if (likely(dst_ops->check == NULL))
2235                         dst_ops->check = xfrm_dst_check;
2236                 if (likely(dst_ops->negative_advice == NULL))
2237                         dst_ops->negative_advice = xfrm_negative_advice;
2238                 if (likely(dst_ops->link_failure == NULL))
2239                         dst_ops->link_failure = xfrm_link_failure;
2240                 if (likely(afinfo->garbage_collect == NULL))
2241                         afinfo->garbage_collect = __xfrm_garbage_collect;
2242                 xfrm_policy_afinfo[afinfo->family] = afinfo;
2243         }
2244         write_unlock_bh(&xfrm_policy_afinfo_lock);
2245         return err;
2246 }
2247 EXPORT_SYMBOL(xfrm_policy_register_afinfo);
2248
2249 int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
2250 {
2251         int err = 0;
2252         if (unlikely(afinfo == NULL))
2253                 return -EINVAL;
2254         if (unlikely(afinfo->family >= NPROTO))
2255                 return -EAFNOSUPPORT;
2256         write_lock_bh(&xfrm_policy_afinfo_lock);
2257         if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) {
2258                 if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo))
2259                         err = -EINVAL;
2260                 else {
2261                         struct dst_ops *dst_ops = afinfo->dst_ops;
2262                         xfrm_policy_afinfo[afinfo->family] = NULL;
2263                         dst_ops->kmem_cachep = NULL;
2264                         dst_ops->check = NULL;
2265                         dst_ops->negative_advice = NULL;
2266                         dst_ops->link_failure = NULL;
2267                         afinfo->garbage_collect = NULL;
2268                 }
2269         }
2270         write_unlock_bh(&xfrm_policy_afinfo_lock);
2271         return err;
2272 }
2273 EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
2274
2275 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family)
2276 {
2277         struct xfrm_policy_afinfo *afinfo;
2278         if (unlikely(family >= NPROTO))
2279                 return NULL;
2280         read_lock(&xfrm_policy_afinfo_lock);
2281         afinfo = xfrm_policy_afinfo[family];
2282         if (unlikely(!afinfo))
2283                 read_unlock(&xfrm_policy_afinfo_lock);
2284         return afinfo;
2285 }
2286
2287 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo)
2288 {
2289         read_unlock(&xfrm_policy_afinfo_lock);
2290 }
2291
2292 static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
2293 {
2294         struct net_device *dev = ptr;
2295
2296         if (dev->nd_net != &init_net)
2297                 return NOTIFY_DONE;
2298
2299         switch (event) {
2300         case NETDEV_DOWN:
2301                 xfrm_flush_bundles();
2302         }
2303         return NOTIFY_DONE;
2304 }
2305
2306 static struct notifier_block xfrm_dev_notifier = {
2307         xfrm_dev_event,
2308         NULL,
2309         0
2310 };
2311
2312 #ifdef CONFIG_XFRM_STATISTICS
2313 static int __init xfrm_statistics_init(void)
2314 {
2315         if (snmp_mib_init((void **)xfrm_statistics,
2316                           sizeof(struct linux_xfrm_mib)) < 0)
2317                 return -ENOMEM;
2318         return 0;
2319 }
2320 #endif
2321
2322 static void __init xfrm_policy_init(void)
2323 {
2324         unsigned int hmask, sz;
2325         int dir;
2326
2327         xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
2328                                            sizeof(struct xfrm_dst),
2329                                            0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2330                                            NULL);
2331
2332         hmask = 8 - 1;
2333         sz = (hmask+1) * sizeof(struct hlist_head);
2334
2335         xfrm_policy_byidx = xfrm_hash_alloc(sz);
2336         xfrm_idx_hmask = hmask;
2337         if (!xfrm_policy_byidx)
2338                 panic("XFRM: failed to allocate byidx hash\n");
2339
2340         for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
2341                 struct xfrm_policy_hash *htab;
2342
2343                 INIT_HLIST_HEAD(&xfrm_policy_inexact[dir]);
2344
2345                 htab = &xfrm_policy_bydst[dir];
2346                 htab->table = xfrm_hash_alloc(sz);
2347                 htab->hmask = hmask;
2348                 if (!htab->table)
2349                         panic("XFRM: failed to allocate bydst hash\n");
2350         }
2351
2352         INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task);
2353         register_netdevice_notifier(&xfrm_dev_notifier);
2354 }
2355
2356 void __init xfrm_init(void)
2357 {
2358 #ifdef CONFIG_XFRM_STATISTICS
2359         xfrm_statistics_init();
2360 #endif
2361         xfrm_state_init();
2362         xfrm_policy_init();
2363         xfrm_input_init();
2364 #ifdef CONFIG_XFRM_STATISTICS
2365         xfrm_proc_init();
2366 #endif
2367 }
2368
2369 #ifdef CONFIG_AUDITSYSCALL
2370 static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp,
2371                                          struct audit_buffer *audit_buf)
2372 {
2373         struct xfrm_sec_ctx *ctx = xp->security;
2374         struct xfrm_selector *sel = &xp->selector;
2375
2376         if (ctx)
2377                 audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s",
2378                                  ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str);
2379
2380         switch(sel->family) {
2381         case AF_INET:
2382                 audit_log_format(audit_buf, " src=" NIPQUAD_FMT,
2383                                  NIPQUAD(sel->saddr.a4));
2384                 if (sel->prefixlen_s != 32)
2385                         audit_log_format(audit_buf, " src_prefixlen=%d",
2386                                          sel->prefixlen_s);
2387                 audit_log_format(audit_buf, " dst=" NIPQUAD_FMT,
2388                                  NIPQUAD(sel->daddr.a4));
2389                 if (sel->prefixlen_d != 32)
2390                         audit_log_format(audit_buf, " dst_prefixlen=%d",
2391                                          sel->prefixlen_d);
2392                 break;
2393         case AF_INET6:
2394                 audit_log_format(audit_buf, " src=" NIP6_FMT,
2395                                  NIP6(*(struct in6_addr *)sel->saddr.a6));
2396                 if (sel->prefixlen_s != 128)
2397                         audit_log_format(audit_buf, " src_prefixlen=%d",
2398                                          sel->prefixlen_s);
2399                 audit_log_format(audit_buf, " dst=" NIP6_FMT,
2400                                  NIP6(*(struct in6_addr *)sel->daddr.a6));
2401                 if (sel->prefixlen_d != 128)
2402                         audit_log_format(audit_buf, " dst_prefixlen=%d",
2403                                          sel->prefixlen_d);
2404                 break;
2405         }
2406 }
2407
2408 void xfrm_audit_policy_add(struct xfrm_policy *xp, int result,
2409                            u32 auid, u32 secid)
2410 {
2411         struct audit_buffer *audit_buf;
2412
2413         audit_buf = xfrm_audit_start("SPD-add");
2414         if (audit_buf == NULL)
2415                 return;
2416         xfrm_audit_helper_usrinfo(auid, secid, audit_buf);
2417         audit_log_format(audit_buf, " res=%u", result);
2418         xfrm_audit_common_policyinfo(xp, audit_buf);
2419         audit_log_end(audit_buf);
2420 }
2421 EXPORT_SYMBOL_GPL(xfrm_audit_policy_add);
2422
2423 void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result,
2424                               u32 auid, u32 secid)
2425 {
2426         struct audit_buffer *audit_buf;
2427
2428         audit_buf = xfrm_audit_start("SPD-delete");
2429         if (audit_buf == NULL)
2430                 return;
2431         xfrm_audit_helper_usrinfo(auid, secid, audit_buf);
2432         audit_log_format(audit_buf, " res=%u", result);
2433         xfrm_audit_common_policyinfo(xp, audit_buf);
2434         audit_log_end(audit_buf);
2435 }
2436 EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete);
2437 #endif
2438
2439 #ifdef CONFIG_XFRM_MIGRATE
2440 static int xfrm_migrate_selector_match(struct xfrm_selector *sel_cmp,
2441                                        struct xfrm_selector *sel_tgt)
2442 {
2443         if (sel_cmp->proto == IPSEC_ULPROTO_ANY) {
2444                 if (sel_tgt->family == sel_cmp->family &&
2445                     xfrm_addr_cmp(&sel_tgt->daddr, &sel_cmp->daddr,
2446                                   sel_cmp->family) == 0 &&
2447                     xfrm_addr_cmp(&sel_tgt->saddr, &sel_cmp->saddr,
2448                                   sel_cmp->family) == 0 &&
2449                     sel_tgt->prefixlen_d == sel_cmp->prefixlen_d &&
2450                     sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) {
2451                         return 1;
2452                 }
2453         } else {
2454                 if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) {
2455                         return 1;
2456                 }
2457         }
2458         return 0;
2459 }
2460
2461 static struct xfrm_policy * xfrm_migrate_policy_find(struct xfrm_selector *sel,
2462                                                      u8 dir, u8 type)
2463 {
2464         struct xfrm_policy *pol, *ret = NULL;
2465         struct hlist_node *entry;
2466         struct hlist_head *chain;
2467         u32 priority = ~0U;
2468
2469         read_lock_bh(&xfrm_policy_lock);
2470         chain = policy_hash_direct(&sel->daddr, &sel->saddr, sel->family, dir);
2471         hlist_for_each_entry(pol, entry, chain, bydst) {
2472                 if (xfrm_migrate_selector_match(sel, &pol->selector) &&
2473                     pol->type == type) {
2474                         ret = pol;
2475                         priority = ret->priority;
2476                         break;
2477                 }
2478         }
2479         chain = &xfrm_policy_inexact[dir];
2480         hlist_for_each_entry(pol, entry, chain, bydst) {
2481                 if (xfrm_migrate_selector_match(sel, &pol->selector) &&
2482                     pol->type == type &&
2483                     pol->priority < priority) {
2484                         ret = pol;
2485                         break;
2486                 }
2487         }
2488
2489         if (ret)
2490                 xfrm_pol_hold(ret);
2491
2492         read_unlock_bh(&xfrm_policy_lock);
2493
2494         return ret;
2495 }
2496
2497 static int migrate_tmpl_match(struct xfrm_migrate *m, struct xfrm_tmpl *t)
2498 {
2499         int match = 0;
2500
2501         if (t->mode == m->mode && t->id.proto == m->proto &&
2502             (m->reqid == 0 || t->reqid == m->reqid)) {
2503                 switch (t->mode) {
2504                 case XFRM_MODE_TUNNEL:
2505                 case XFRM_MODE_BEET:
2506                         if (xfrm_addr_cmp(&t->id.daddr, &m->old_daddr,
2507                                           m->old_family) == 0 &&
2508                             xfrm_addr_cmp(&t->saddr, &m->old_saddr,
2509                                           m->old_family) == 0) {
2510                                 match = 1;
2511                         }
2512                         break;
2513                 case XFRM_MODE_TRANSPORT:
2514                         /* in case of transport mode, template does not store
2515                            any IP addresses, hence we just compare mode and
2516                            protocol */
2517                         match = 1;
2518                         break;
2519                 default:
2520                         break;
2521                 }
2522         }
2523         return match;
2524 }
2525
2526 /* update endpoint address(es) of template(s) */
2527 static int xfrm_policy_migrate(struct xfrm_policy *pol,
2528                                struct xfrm_migrate *m, int num_migrate)
2529 {
2530         struct xfrm_migrate *mp;
2531         struct dst_entry *dst;
2532         int i, j, n = 0;
2533
2534         write_lock_bh(&pol->lock);
2535         if (unlikely(pol->dead)) {
2536                 /* target policy has been deleted */
2537                 write_unlock_bh(&pol->lock);
2538                 return -ENOENT;
2539         }
2540
2541         for (i = 0; i < pol->xfrm_nr; i++) {
2542                 for (j = 0, mp = m; j < num_migrate; j++, mp++) {
2543                         if (!migrate_tmpl_match(mp, &pol->xfrm_vec[i]))
2544                                 continue;
2545                         n++;
2546                         if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL &&
2547                             pol->xfrm_vec[i].mode != XFRM_MODE_BEET)
2548                                 continue;
2549                         /* update endpoints */
2550                         memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr,
2551                                sizeof(pol->xfrm_vec[i].id.daddr));
2552                         memcpy(&pol->xfrm_vec[i].saddr, &mp->new_saddr,
2553                                sizeof(pol->xfrm_vec[i].saddr));
2554                         pol->xfrm_vec[i].encap_family = mp->new_family;
2555                         /* flush bundles */
2556                         while ((dst = pol->bundles) != NULL) {
2557                                 pol->bundles = dst->next;
2558                                 dst_free(dst);
2559                         }
2560                 }
2561         }
2562
2563         write_unlock_bh(&pol->lock);
2564
2565         if (!n)
2566                 return -ENODATA;
2567
2568         return 0;
2569 }
2570
2571 static int xfrm_migrate_check(struct xfrm_migrate *m, int num_migrate)
2572 {
2573         int i, j;
2574
2575         if (num_migrate < 1 || num_migrate > XFRM_MAX_DEPTH)
2576                 return -EINVAL;
2577
2578         for (i = 0; i < num_migrate; i++) {
2579                 if ((xfrm_addr_cmp(&m[i].old_daddr, &m[i].new_daddr,
2580                                    m[i].old_family) == 0) &&
2581                     (xfrm_addr_cmp(&m[i].old_saddr, &m[i].new_saddr,
2582                                    m[i].old_family) == 0))
2583                         return -EINVAL;
2584                 if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) ||
2585                     xfrm_addr_any(&m[i].new_saddr, m[i].new_family))
2586                         return -EINVAL;
2587
2588                 /* check if there is any duplicated entry */
2589                 for (j = i + 1; j < num_migrate; j++) {
2590                         if (!memcmp(&m[i].old_daddr, &m[j].old_daddr,
2591                                     sizeof(m[i].old_daddr)) &&
2592                             !memcmp(&m[i].old_saddr, &m[j].old_saddr,
2593                                     sizeof(m[i].old_saddr)) &&
2594                             m[i].proto == m[j].proto &&
2595                             m[i].mode == m[j].mode &&
2596                             m[i].reqid == m[j].reqid &&
2597                             m[i].old_family == m[j].old_family)
2598                                 return -EINVAL;
2599                 }
2600         }
2601
2602         return 0;
2603 }
2604
2605 int xfrm_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
2606                  struct xfrm_migrate *m, int num_migrate)
2607 {
2608         int i, err, nx_cur = 0, nx_new = 0;
2609         struct xfrm_policy *pol = NULL;
2610         struct xfrm_state *x, *xc;
2611         struct xfrm_state *x_cur[XFRM_MAX_DEPTH];
2612         struct xfrm_state *x_new[XFRM_MAX_DEPTH];
2613         struct xfrm_migrate *mp;
2614
2615         if ((err = xfrm_migrate_check(m, num_migrate)) < 0)
2616                 goto out;
2617
2618         /* Stage 1 - find policy */
2619         if ((pol = xfrm_migrate_policy_find(sel, dir, type)) == NULL) {
2620                 err = -ENOENT;
2621                 goto out;
2622         }
2623
2624         /* Stage 2 - find and update state(s) */
2625         for (i = 0, mp = m; i < num_migrate; i++, mp++) {
2626                 if ((x = xfrm_migrate_state_find(mp))) {
2627                         x_cur[nx_cur] = x;
2628                         nx_cur++;
2629                         if ((xc = xfrm_state_migrate(x, mp))) {
2630                                 x_new[nx_new] = xc;
2631                                 nx_new++;
2632                         } else {
2633                                 err = -ENODATA;
2634                                 goto restore_state;
2635                         }
2636                 }
2637         }
2638
2639         /* Stage 3 - update policy */
2640         if ((err = xfrm_policy_migrate(pol, m, num_migrate)) < 0)
2641                 goto restore_state;
2642
2643         /* Stage 4 - delete old state(s) */
2644         if (nx_cur) {
2645                 xfrm_states_put(x_cur, nx_cur);
2646                 xfrm_states_delete(x_cur, nx_cur);
2647         }
2648
2649         /* Stage 5 - announce */
2650         km_migrate(sel, dir, type, m, num_migrate);
2651
2652         xfrm_pol_put(pol);
2653
2654         return 0;
2655 out:
2656         return err;
2657
2658 restore_state:
2659         if (pol)
2660                 xfrm_pol_put(pol);
2661         if (nx_cur)
2662                 xfrm_states_put(x_cur, nx_cur);
2663         if (nx_new)
2664                 xfrm_states_delete(x_new, nx_new);
2665
2666         return err;
2667 }
2668 EXPORT_SYMBOL(xfrm_migrate);
2669 #endif