6 * Kazunori MIYAZAWA @USAGI
7 * Kunihiro Ishiguro <kunihiro@ipinfusion.com>
9 * YOSHIFUJI Hideaki @USAGI
10 * Split up af-specific functions
11 * Derek Atkins <derek@ihtfp.com>
12 * Add UDP Encapsulation
16 #include <linux/workqueue.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <linux/module.h>
21 #include <linux/cache.h>
22 #include <asm/uaccess.h>
23 #include <linux/audit.h>
25 #include "xfrm_hash.h"
28 EXPORT_SYMBOL(xfrm_nl);
30 u32 sysctl_xfrm_aevent_etime = XFRM_AE_ETIME;
31 EXPORT_SYMBOL(sysctl_xfrm_aevent_etime);
33 u32 sysctl_xfrm_aevent_rseqth = XFRM_AE_SEQT_SIZE;
34 EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth);
36 /* Each xfrm_state may be linked to two tables:
38 1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
39 2. Hash table by (daddr,family,reqid) to find what SAs exist for given
40 destination/tunnel endpoint. (output)
43 static DEFINE_SPINLOCK(xfrm_state_lock);
45 /* Hash table to find appropriate SA towards given target (endpoint
46 * of tunnel or destination of transport mode) allowed by selector.
48 * Main use is finding SA after policy selected tunnel or transport mode.
49 * Also, it can be used by ah/esp icmp error handler to find offending SA.
51 static struct hlist_head *xfrm_state_bydst __read_mostly;
52 static struct hlist_head *xfrm_state_bysrc __read_mostly;
53 static struct hlist_head *xfrm_state_byspi __read_mostly;
54 static unsigned int xfrm_state_hmask __read_mostly;
55 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
56 static unsigned int xfrm_state_num;
57 static unsigned int xfrm_state_genid;
59 static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr,
60 xfrm_address_t *saddr,
62 unsigned short family)
64 return __xfrm_dst_hash(daddr, saddr, reqid, family, xfrm_state_hmask);
67 static inline unsigned int xfrm_src_hash(xfrm_address_t *daddr,
68 xfrm_address_t *saddr,
69 unsigned short family)
71 return __xfrm_src_hash(daddr, saddr, family, xfrm_state_hmask);
74 static inline unsigned int
75 xfrm_spi_hash(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
77 return __xfrm_spi_hash(daddr, spi, proto, family, xfrm_state_hmask);
80 static void xfrm_hash_transfer(struct hlist_head *list,
81 struct hlist_head *ndsttable,
82 struct hlist_head *nsrctable,
83 struct hlist_head *nspitable,
84 unsigned int nhashmask)
86 struct hlist_node *entry, *tmp;
89 hlist_for_each_entry_safe(x, entry, tmp, list, bydst) {
92 h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
93 x->props.reqid, x->props.family,
95 hlist_add_head(&x->bydst, ndsttable+h);
97 h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr,
100 hlist_add_head(&x->bysrc, nsrctable+h);
103 h = __xfrm_spi_hash(&x->id.daddr, x->id.spi,
104 x->id.proto, x->props.family,
106 hlist_add_head(&x->byspi, nspitable+h);
111 static unsigned long xfrm_hash_new_size(void)
113 return ((xfrm_state_hmask + 1) << 1) *
114 sizeof(struct hlist_head);
117 static DEFINE_MUTEX(hash_resize_mutex);
119 static void xfrm_hash_resize(struct work_struct *__unused)
121 struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
122 unsigned long nsize, osize;
123 unsigned int nhashmask, ohashmask;
126 mutex_lock(&hash_resize_mutex);
128 nsize = xfrm_hash_new_size();
129 ndst = xfrm_hash_alloc(nsize);
132 nsrc = xfrm_hash_alloc(nsize);
134 xfrm_hash_free(ndst, nsize);
137 nspi = xfrm_hash_alloc(nsize);
139 xfrm_hash_free(ndst, nsize);
140 xfrm_hash_free(nsrc, nsize);
144 spin_lock_bh(&xfrm_state_lock);
146 nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
147 for (i = xfrm_state_hmask; i >= 0; i--)
148 xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi,
151 odst = xfrm_state_bydst;
152 osrc = xfrm_state_bysrc;
153 ospi = xfrm_state_byspi;
154 ohashmask = xfrm_state_hmask;
156 xfrm_state_bydst = ndst;
157 xfrm_state_bysrc = nsrc;
158 xfrm_state_byspi = nspi;
159 xfrm_state_hmask = nhashmask;
161 spin_unlock_bh(&xfrm_state_lock);
163 osize = (ohashmask + 1) * sizeof(struct hlist_head);
164 xfrm_hash_free(odst, osize);
165 xfrm_hash_free(osrc, osize);
166 xfrm_hash_free(ospi, osize);
169 mutex_unlock(&hash_resize_mutex);
172 static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize);
174 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
175 EXPORT_SYMBOL(km_waitq);
177 static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
178 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
180 static struct work_struct xfrm_state_gc_work;
181 static HLIST_HEAD(xfrm_state_gc_list);
182 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
184 int __xfrm_state_delete(struct xfrm_state *x);
186 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
187 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
189 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
190 void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
192 static void xfrm_state_gc_destroy(struct xfrm_state *x)
194 del_timer_sync(&x->timer);
195 del_timer_sync(&x->rtimer);
202 xfrm_put_mode(x->mode);
204 x->type->destructor(x);
205 xfrm_put_type(x->type);
207 security_xfrm_state_free(x);
211 static void xfrm_state_gc_task(struct work_struct *data)
213 struct xfrm_state *x;
214 struct hlist_node *entry, *tmp;
215 struct hlist_head gc_list;
217 spin_lock_bh(&xfrm_state_gc_lock);
218 gc_list.first = xfrm_state_gc_list.first;
219 INIT_HLIST_HEAD(&xfrm_state_gc_list);
220 spin_unlock_bh(&xfrm_state_gc_lock);
222 hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst)
223 xfrm_state_gc_destroy(x);
228 static inline unsigned long make_jiffies(long secs)
230 if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
231 return MAX_SCHEDULE_TIMEOUT-1;
236 static void xfrm_timer_handler(unsigned long data)
238 struct xfrm_state *x = (struct xfrm_state*)data;
239 unsigned long now = (unsigned long)xtime.tv_sec;
240 long next = LONG_MAX;
245 if (x->km.state == XFRM_STATE_DEAD)
247 if (x->km.state == XFRM_STATE_EXPIRED)
249 if (x->lft.hard_add_expires_seconds) {
250 long tmo = x->lft.hard_add_expires_seconds +
251 x->curlft.add_time - now;
257 if (x->lft.hard_use_expires_seconds) {
258 long tmo = x->lft.hard_use_expires_seconds +
259 (x->curlft.use_time ? : now) - now;
267 if (x->lft.soft_add_expires_seconds) {
268 long tmo = x->lft.soft_add_expires_seconds +
269 x->curlft.add_time - now;
275 if (x->lft.soft_use_expires_seconds) {
276 long tmo = x->lft.soft_use_expires_seconds +
277 (x->curlft.use_time ? : now) - now;
286 km_state_expired(x, 0, 0);
288 if (next != LONG_MAX)
289 mod_timer(&x->timer, jiffies + make_jiffies(next));
294 if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
295 x->km.state = XFRM_STATE_EXPIRED;
301 err = __xfrm_state_delete(x);
302 if (!err && x->id.spi)
303 km_state_expired(x, 1, 0);
305 xfrm_audit_log(audit_get_loginuid(current->audit_context), 0,
306 AUDIT_MAC_IPSEC_DELSA, err ? 0 : 1, NULL, x);
309 spin_unlock(&x->lock);
312 static void xfrm_replay_timer_handler(unsigned long data);
314 struct xfrm_state *xfrm_state_alloc(void)
316 struct xfrm_state *x;
318 x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
321 atomic_set(&x->refcnt, 1);
322 atomic_set(&x->tunnel_users, 0);
323 INIT_HLIST_NODE(&x->bydst);
324 INIT_HLIST_NODE(&x->bysrc);
325 INIT_HLIST_NODE(&x->byspi);
326 init_timer(&x->timer);
327 x->timer.function = xfrm_timer_handler;
328 x->timer.data = (unsigned long)x;
329 init_timer(&x->rtimer);
330 x->rtimer.function = xfrm_replay_timer_handler;
331 x->rtimer.data = (unsigned long)x;
332 x->curlft.add_time = (unsigned long)xtime.tv_sec;
333 x->lft.soft_byte_limit = XFRM_INF;
334 x->lft.soft_packet_limit = XFRM_INF;
335 x->lft.hard_byte_limit = XFRM_INF;
336 x->lft.hard_packet_limit = XFRM_INF;
337 x->replay_maxage = 0;
338 x->replay_maxdiff = 0;
339 spin_lock_init(&x->lock);
343 EXPORT_SYMBOL(xfrm_state_alloc);
345 void __xfrm_state_destroy(struct xfrm_state *x)
347 BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
349 spin_lock_bh(&xfrm_state_gc_lock);
350 hlist_add_head(&x->bydst, &xfrm_state_gc_list);
351 spin_unlock_bh(&xfrm_state_gc_lock);
352 schedule_work(&xfrm_state_gc_work);
354 EXPORT_SYMBOL(__xfrm_state_destroy);
356 int __xfrm_state_delete(struct xfrm_state *x)
360 if (x->km.state != XFRM_STATE_DEAD) {
361 x->km.state = XFRM_STATE_DEAD;
362 spin_lock(&xfrm_state_lock);
363 hlist_del(&x->bydst);
364 hlist_del(&x->bysrc);
366 hlist_del(&x->byspi);
368 spin_unlock(&xfrm_state_lock);
370 /* All xfrm_state objects are created by xfrm_state_alloc.
371 * The xfrm_state_alloc call gives a reference, and that
372 * is what we are dropping here.
380 EXPORT_SYMBOL(__xfrm_state_delete);
382 int xfrm_state_delete(struct xfrm_state *x)
386 spin_lock_bh(&x->lock);
387 err = __xfrm_state_delete(x);
388 spin_unlock_bh(&x->lock);
392 EXPORT_SYMBOL(xfrm_state_delete);
394 void xfrm_state_flush(u8 proto, struct xfrm_audit *audit_info)
399 spin_lock_bh(&xfrm_state_lock);
400 for (i = 0; i <= xfrm_state_hmask; i++) {
401 struct hlist_node *entry;
402 struct xfrm_state *x;
404 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
405 if (!xfrm_state_kern(x) &&
406 xfrm_id_proto_match(x->id.proto, proto)) {
408 spin_unlock_bh(&xfrm_state_lock);
410 err = xfrm_state_delete(x);
411 xfrm_audit_log(audit_info->loginuid,
413 AUDIT_MAC_IPSEC_DELSA,
414 err ? 0 : 1, NULL, x);
417 spin_lock_bh(&xfrm_state_lock);
422 spin_unlock_bh(&xfrm_state_lock);
425 EXPORT_SYMBOL(xfrm_state_flush);
428 xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
429 struct xfrm_tmpl *tmpl,
430 xfrm_address_t *daddr, xfrm_address_t *saddr,
431 unsigned short family)
433 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
436 afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
437 xfrm_state_put_afinfo(afinfo);
441 static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
443 unsigned int h = xfrm_spi_hash(daddr, spi, proto, family);
444 struct xfrm_state *x;
445 struct hlist_node *entry;
447 hlist_for_each_entry(x, entry, xfrm_state_byspi+h, byspi) {
448 if (x->props.family != family ||
450 x->id.proto != proto)
455 if (x->id.daddr.a4 != daddr->a4)
459 if (!ipv6_addr_equal((struct in6_addr *)daddr,
473 static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family)
475 unsigned int h = xfrm_src_hash(daddr, saddr, family);
476 struct xfrm_state *x;
477 struct hlist_node *entry;
479 hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
480 if (x->props.family != family ||
481 x->id.proto != proto)
486 if (x->id.daddr.a4 != daddr->a4 ||
487 x->props.saddr.a4 != saddr->a4)
491 if (!ipv6_addr_equal((struct in6_addr *)daddr,
494 !ipv6_addr_equal((struct in6_addr *)saddr,
508 static inline struct xfrm_state *
509 __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
512 return __xfrm_state_lookup(&x->id.daddr, x->id.spi,
513 x->id.proto, family);
515 return __xfrm_state_lookup_byaddr(&x->id.daddr,
517 x->id.proto, family);
520 static void xfrm_hash_grow_check(int have_hash_collision)
522 if (have_hash_collision &&
523 (xfrm_state_hmask + 1) < xfrm_state_hashmax &&
524 xfrm_state_num > xfrm_state_hmask)
525 schedule_work(&xfrm_hash_work);
529 xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
530 struct flowi *fl, struct xfrm_tmpl *tmpl,
531 struct xfrm_policy *pol, int *err,
532 unsigned short family)
534 unsigned int h = xfrm_dst_hash(daddr, saddr, tmpl->reqid, family);
535 struct hlist_node *entry;
536 struct xfrm_state *x, *x0;
537 int acquire_in_progress = 0;
539 struct xfrm_state *best = NULL;
541 spin_lock_bh(&xfrm_state_lock);
542 hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
543 if (x->props.family == family &&
544 x->props.reqid == tmpl->reqid &&
545 !(x->props.flags & XFRM_STATE_WILDRECV) &&
546 xfrm_state_addr_check(x, daddr, saddr, family) &&
547 tmpl->mode == x->props.mode &&
548 tmpl->id.proto == x->id.proto &&
549 (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) {
551 1. There is a valid state with matching selector.
553 2. Valid state with inappropriate selector. Skip.
555 Entering area of "sysdeps".
557 3. If state is not valid, selector is temporary,
558 it selects only session which triggered
559 previous resolution. Key manager will do
560 something to install a state with proper
563 if (x->km.state == XFRM_STATE_VALID) {
564 if (!xfrm_selector_match(&x->sel, fl, family) ||
565 !security_xfrm_state_pol_flow_match(x, pol, fl))
568 best->km.dying > x->km.dying ||
569 (best->km.dying == x->km.dying &&
570 best->curlft.add_time < x->curlft.add_time))
572 } else if (x->km.state == XFRM_STATE_ACQ) {
573 acquire_in_progress = 1;
574 } else if (x->km.state == XFRM_STATE_ERROR ||
575 x->km.state == XFRM_STATE_EXPIRED) {
576 if (xfrm_selector_match(&x->sel, fl, family) &&
577 security_xfrm_state_pol_flow_match(x, pol, fl))
584 if (!x && !error && !acquire_in_progress) {
586 (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi,
587 tmpl->id.proto, family)) != NULL) {
592 x = xfrm_state_alloc();
597 /* Initialize temporary selector matching only
598 * to current session. */
599 xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
601 error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid);
603 x->km.state = XFRM_STATE_DEAD;
609 if (km_query(x, tmpl, pol) == 0) {
610 x->km.state = XFRM_STATE_ACQ;
611 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
612 h = xfrm_src_hash(daddr, saddr, family);
613 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
615 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
616 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
618 x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
619 x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
620 add_timer(&x->timer);
622 xfrm_hash_grow_check(x->bydst.next != NULL);
624 x->km.state = XFRM_STATE_DEAD;
634 *err = acquire_in_progress ? -EAGAIN : error;
635 spin_unlock_bh(&xfrm_state_lock);
639 static void __xfrm_state_insert(struct xfrm_state *x)
643 x->genid = ++xfrm_state_genid;
645 h = xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
646 x->props.reqid, x->props.family);
647 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
649 h = xfrm_src_hash(&x->id.daddr, &x->props.saddr, x->props.family);
650 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
653 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
656 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
659 mod_timer(&x->timer, jiffies + HZ);
660 if (x->replay_maxage)
661 mod_timer(&x->rtimer, jiffies + x->replay_maxage);
667 xfrm_hash_grow_check(x->bydst.next != NULL);
670 /* xfrm_state_lock is held */
671 static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
673 unsigned short family = xnew->props.family;
674 u32 reqid = xnew->props.reqid;
675 struct xfrm_state *x;
676 struct hlist_node *entry;
679 h = xfrm_dst_hash(&xnew->id.daddr, &xnew->props.saddr, reqid, family);
680 hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
681 if (x->props.family == family &&
682 x->props.reqid == reqid &&
683 !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) &&
684 !xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family))
685 x->genid = xfrm_state_genid;
689 void xfrm_state_insert(struct xfrm_state *x)
691 spin_lock_bh(&xfrm_state_lock);
692 __xfrm_state_bump_genids(x);
693 __xfrm_state_insert(x);
694 spin_unlock_bh(&xfrm_state_lock);
696 EXPORT_SYMBOL(xfrm_state_insert);
698 /* xfrm_state_lock is held */
699 static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create)
701 unsigned int h = xfrm_dst_hash(daddr, saddr, reqid, family);
702 struct hlist_node *entry;
703 struct xfrm_state *x;
705 hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
706 if (x->props.reqid != reqid ||
707 x->props.mode != mode ||
708 x->props.family != family ||
709 x->km.state != XFRM_STATE_ACQ ||
715 if (x->id.daddr.a4 != daddr->a4 ||
716 x->props.saddr.a4 != saddr->a4)
720 if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6,
721 (struct in6_addr *)daddr) ||
722 !ipv6_addr_equal((struct in6_addr *)
724 (struct in6_addr *)saddr))
736 x = xfrm_state_alloc();
740 x->sel.daddr.a4 = daddr->a4;
741 x->sel.saddr.a4 = saddr->a4;
742 x->sel.prefixlen_d = 32;
743 x->sel.prefixlen_s = 32;
744 x->props.saddr.a4 = saddr->a4;
745 x->id.daddr.a4 = daddr->a4;
749 ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6,
750 (struct in6_addr *)daddr);
751 ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6,
752 (struct in6_addr *)saddr);
753 x->sel.prefixlen_d = 128;
754 x->sel.prefixlen_s = 128;
755 ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6,
756 (struct in6_addr *)saddr);
757 ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6,
758 (struct in6_addr *)daddr);
762 x->km.state = XFRM_STATE_ACQ;
764 x->props.family = family;
765 x->props.mode = mode;
766 x->props.reqid = reqid;
767 x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
769 x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
770 add_timer(&x->timer);
771 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
772 h = xfrm_src_hash(daddr, saddr, family);
773 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
778 xfrm_hash_grow_check(x->bydst.next != NULL);
784 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
786 int xfrm_state_add(struct xfrm_state *x)
788 struct xfrm_state *x1;
791 int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
793 family = x->props.family;
795 spin_lock_bh(&xfrm_state_lock);
797 x1 = __xfrm_state_locate(x, use_spi, family);
805 if (use_spi && x->km.seq) {
806 x1 = __xfrm_find_acq_byseq(x->km.seq);
807 if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) {
814 x1 = __find_acq_core(family, x->props.mode, x->props.reqid,
816 &x->id.daddr, &x->props.saddr, 0);
818 __xfrm_state_bump_genids(x);
819 __xfrm_state_insert(x);
823 spin_unlock_bh(&xfrm_state_lock);
826 xfrm_state_delete(x1);
832 EXPORT_SYMBOL(xfrm_state_add);
834 int xfrm_state_update(struct xfrm_state *x)
836 struct xfrm_state *x1;
838 int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
840 spin_lock_bh(&xfrm_state_lock);
841 x1 = __xfrm_state_locate(x, use_spi, x->props.family);
847 if (xfrm_state_kern(x1)) {
853 if (x1->km.state == XFRM_STATE_ACQ) {
854 __xfrm_state_insert(x);
860 spin_unlock_bh(&xfrm_state_lock);
866 xfrm_state_delete(x1);
872 spin_lock_bh(&x1->lock);
873 if (likely(x1->km.state == XFRM_STATE_VALID)) {
874 if (x->encap && x1->encap)
875 memcpy(x1->encap, x->encap, sizeof(*x1->encap));
876 if (x->coaddr && x1->coaddr) {
877 memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
879 if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
880 memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
881 memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
884 mod_timer(&x1->timer, jiffies + HZ);
885 if (x1->curlft.use_time)
886 xfrm_state_check_expire(x1);
890 spin_unlock_bh(&x1->lock);
896 EXPORT_SYMBOL(xfrm_state_update);
898 int xfrm_state_check_expire(struct xfrm_state *x)
900 if (!x->curlft.use_time)
901 x->curlft.use_time = (unsigned long)xtime.tv_sec;
903 if (x->km.state != XFRM_STATE_VALID)
906 if (x->curlft.bytes >= x->lft.hard_byte_limit ||
907 x->curlft.packets >= x->lft.hard_packet_limit) {
908 x->km.state = XFRM_STATE_EXPIRED;
909 mod_timer(&x->timer, jiffies);
914 (x->curlft.bytes >= x->lft.soft_byte_limit ||
915 x->curlft.packets >= x->lft.soft_packet_limit)) {
917 km_state_expired(x, 0, 0);
921 EXPORT_SYMBOL(xfrm_state_check_expire);
923 static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
925 int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
929 return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
931 /* Check tail too... */
935 int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
937 int err = xfrm_state_check_expire(x);
940 err = xfrm_state_check_space(x, skb);
944 EXPORT_SYMBOL(xfrm_state_check);
947 xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto,
948 unsigned short family)
950 struct xfrm_state *x;
952 spin_lock_bh(&xfrm_state_lock);
953 x = __xfrm_state_lookup(daddr, spi, proto, family);
954 spin_unlock_bh(&xfrm_state_lock);
957 EXPORT_SYMBOL(xfrm_state_lookup);
960 xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr,
961 u8 proto, unsigned short family)
963 struct xfrm_state *x;
965 spin_lock_bh(&xfrm_state_lock);
966 x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family);
967 spin_unlock_bh(&xfrm_state_lock);
970 EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
973 xfrm_find_acq(u8 mode, u32 reqid, u8 proto,
974 xfrm_address_t *daddr, xfrm_address_t *saddr,
975 int create, unsigned short family)
977 struct xfrm_state *x;
979 spin_lock_bh(&xfrm_state_lock);
980 x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create);
981 spin_unlock_bh(&xfrm_state_lock);
985 EXPORT_SYMBOL(xfrm_find_acq);
987 #ifdef CONFIG_XFRM_SUB_POLICY
989 xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
990 unsigned short family)
993 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
995 return -EAFNOSUPPORT;
997 spin_lock_bh(&xfrm_state_lock);
998 if (afinfo->tmpl_sort)
999 err = afinfo->tmpl_sort(dst, src, n);
1000 spin_unlock_bh(&xfrm_state_lock);
1001 xfrm_state_put_afinfo(afinfo);
1004 EXPORT_SYMBOL(xfrm_tmpl_sort);
1007 xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
1008 unsigned short family)
1011 struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1013 return -EAFNOSUPPORT;
1015 spin_lock_bh(&xfrm_state_lock);
1016 if (afinfo->state_sort)
1017 err = afinfo->state_sort(dst, src, n);
1018 spin_unlock_bh(&xfrm_state_lock);
1019 xfrm_state_put_afinfo(afinfo);
1022 EXPORT_SYMBOL(xfrm_state_sort);
1025 /* Silly enough, but I'm lazy to build resolution list */
1027 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
1031 for (i = 0; i <= xfrm_state_hmask; i++) {
1032 struct hlist_node *entry;
1033 struct xfrm_state *x;
1035 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1036 if (x->km.seq == seq &&
1037 x->km.state == XFRM_STATE_ACQ) {
1046 struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
1048 struct xfrm_state *x;
1050 spin_lock_bh(&xfrm_state_lock);
1051 x = __xfrm_find_acq_byseq(seq);
1052 spin_unlock_bh(&xfrm_state_lock);
1055 EXPORT_SYMBOL(xfrm_find_acq_byseq);
1057 u32 xfrm_get_acqseq(void)
1061 static DEFINE_SPINLOCK(acqseq_lock);
1063 spin_lock_bh(&acqseq_lock);
1064 res = (++acqseq ? : ++acqseq);
1065 spin_unlock_bh(&acqseq_lock);
1068 EXPORT_SYMBOL(xfrm_get_acqseq);
1071 xfrm_alloc_spi(struct xfrm_state *x, __be32 minspi, __be32 maxspi)
1074 struct xfrm_state *x0;
1079 if (minspi == maxspi) {
1080 x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
1088 u32 low = ntohl(minspi);
1089 u32 high = ntohl(maxspi);
1090 for (h=0; h<high-low+1; h++) {
1091 spi = low + net_random()%(high-low+1);
1092 x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
1094 x->id.spi = htonl(spi);
1101 spin_lock_bh(&xfrm_state_lock);
1102 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
1103 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
1104 spin_unlock_bh(&xfrm_state_lock);
1108 EXPORT_SYMBOL(xfrm_alloc_spi);
1110 int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
1114 struct xfrm_state *x, *last = NULL;
1115 struct hlist_node *entry;
1119 spin_lock_bh(&xfrm_state_lock);
1120 for (i = 0; i <= xfrm_state_hmask; i++) {
1121 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1122 if (!xfrm_id_proto_match(x->id.proto, proto))
1125 err = func(last, count, data);
1137 err = func(last, 0, data);
1139 spin_unlock_bh(&xfrm_state_lock);
1142 EXPORT_SYMBOL(xfrm_state_walk);
1145 void xfrm_replay_notify(struct xfrm_state *x, int event)
1148 /* we send notify messages in case
1149 * 1. we updated on of the sequence numbers, and the seqno difference
1150 * is at least x->replay_maxdiff, in this case we also update the
1151 * timeout of our timer function
1152 * 2. if x->replay_maxage has elapsed since last update,
1153 * and there were changes
1155 * The state structure must be locked!
1159 case XFRM_REPLAY_UPDATE:
1160 if (x->replay_maxdiff &&
1161 (x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
1162 (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) {
1163 if (x->xflags & XFRM_TIME_DEFER)
1164 event = XFRM_REPLAY_TIMEOUT;
1171 case XFRM_REPLAY_TIMEOUT:
1172 if ((x->replay.seq == x->preplay.seq) &&
1173 (x->replay.bitmap == x->preplay.bitmap) &&
1174 (x->replay.oseq == x->preplay.oseq)) {
1175 x->xflags |= XFRM_TIME_DEFER;
1182 memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state));
1183 c.event = XFRM_MSG_NEWAE;
1184 c.data.aevent = event;
1185 km_state_notify(x, &c);
1187 if (x->replay_maxage &&
1188 !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
1189 x->xflags &= ~XFRM_TIME_DEFER;
1191 EXPORT_SYMBOL(xfrm_replay_notify);
1193 static void xfrm_replay_timer_handler(unsigned long data)
1195 struct xfrm_state *x = (struct xfrm_state*)data;
1197 spin_lock(&x->lock);
1199 if (x->km.state == XFRM_STATE_VALID) {
1200 if (xfrm_aevent_is_on())
1201 xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT);
1203 x->xflags |= XFRM_TIME_DEFER;
1206 spin_unlock(&x->lock);
1209 int xfrm_replay_check(struct xfrm_state *x, __be32 net_seq)
1212 u32 seq = ntohl(net_seq);
1214 if (unlikely(seq == 0))
1217 if (likely(seq > x->replay.seq))
1220 diff = x->replay.seq - seq;
1221 if (diff >= x->props.replay_window) {
1222 x->stats.replay_window++;
1226 if (x->replay.bitmap & (1U << diff)) {
1232 EXPORT_SYMBOL(xfrm_replay_check);
1234 void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq)
1237 u32 seq = ntohl(net_seq);
1239 if (seq > x->replay.seq) {
1240 diff = seq - x->replay.seq;
1241 if (diff < x->props.replay_window)
1242 x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
1244 x->replay.bitmap = 1;
1245 x->replay.seq = seq;
1247 diff = x->replay.seq - seq;
1248 x->replay.bitmap |= (1U << diff);
1251 if (xfrm_aevent_is_on())
1252 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
1254 EXPORT_SYMBOL(xfrm_replay_advance);
1256 static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
1257 static DEFINE_RWLOCK(xfrm_km_lock);
1259 void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
1261 struct xfrm_mgr *km;
1263 read_lock(&xfrm_km_lock);
1264 list_for_each_entry(km, &xfrm_km_list, list)
1265 if (km->notify_policy)
1266 km->notify_policy(xp, dir, c);
1267 read_unlock(&xfrm_km_lock);
1270 void km_state_notify(struct xfrm_state *x, struct km_event *c)
1272 struct xfrm_mgr *km;
1273 read_lock(&xfrm_km_lock);
1274 list_for_each_entry(km, &xfrm_km_list, list)
1277 read_unlock(&xfrm_km_lock);
1280 EXPORT_SYMBOL(km_policy_notify);
1281 EXPORT_SYMBOL(km_state_notify);
1283 void km_state_expired(struct xfrm_state *x, int hard, u32 pid)
1289 c.event = XFRM_MSG_EXPIRE;
1290 km_state_notify(x, &c);
1296 EXPORT_SYMBOL(km_state_expired);
1298 * We send to all registered managers regardless of failure
1299 * We are happy with one success
1301 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
1303 int err = -EINVAL, acqret;
1304 struct xfrm_mgr *km;
1306 read_lock(&xfrm_km_lock);
1307 list_for_each_entry(km, &xfrm_km_list, list) {
1308 acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
1312 read_unlock(&xfrm_km_lock);
1315 EXPORT_SYMBOL(km_query);
1317 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport)
1320 struct xfrm_mgr *km;
1322 read_lock(&xfrm_km_lock);
1323 list_for_each_entry(km, &xfrm_km_list, list) {
1324 if (km->new_mapping)
1325 err = km->new_mapping(x, ipaddr, sport);
1329 read_unlock(&xfrm_km_lock);
1332 EXPORT_SYMBOL(km_new_mapping);
1334 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
1340 c.event = XFRM_MSG_POLEXPIRE;
1341 km_policy_notify(pol, dir, &c);
1346 EXPORT_SYMBOL(km_policy_expired);
1348 int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
1352 struct xfrm_mgr *km;
1354 read_lock(&xfrm_km_lock);
1355 list_for_each_entry(km, &xfrm_km_list, list) {
1357 ret = km->report(proto, sel, addr);
1362 read_unlock(&xfrm_km_lock);
1365 EXPORT_SYMBOL(km_report);
1367 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
1371 struct xfrm_mgr *km;
1372 struct xfrm_policy *pol = NULL;
1374 if (optlen <= 0 || optlen > PAGE_SIZE)
1377 data = kmalloc(optlen, GFP_KERNEL);
1382 if (copy_from_user(data, optval, optlen))
1386 read_lock(&xfrm_km_lock);
1387 list_for_each_entry(km, &xfrm_km_list, list) {
1388 pol = km->compile_policy(sk, optname, data,
1393 read_unlock(&xfrm_km_lock);
1396 xfrm_sk_policy_insert(sk, err, pol);
1405 EXPORT_SYMBOL(xfrm_user_policy);
1407 int xfrm_register_km(struct xfrm_mgr *km)
1409 write_lock_bh(&xfrm_km_lock);
1410 list_add_tail(&km->list, &xfrm_km_list);
1411 write_unlock_bh(&xfrm_km_lock);
1414 EXPORT_SYMBOL(xfrm_register_km);
1416 int xfrm_unregister_km(struct xfrm_mgr *km)
1418 write_lock_bh(&xfrm_km_lock);
1419 list_del(&km->list);
1420 write_unlock_bh(&xfrm_km_lock);
1423 EXPORT_SYMBOL(xfrm_unregister_km);
1425 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
1428 if (unlikely(afinfo == NULL))
1430 if (unlikely(afinfo->family >= NPROTO))
1431 return -EAFNOSUPPORT;
1432 write_lock_bh(&xfrm_state_afinfo_lock);
1433 if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
1436 xfrm_state_afinfo[afinfo->family] = afinfo;
1437 write_unlock_bh(&xfrm_state_afinfo_lock);
1440 EXPORT_SYMBOL(xfrm_state_register_afinfo);
1442 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
1445 if (unlikely(afinfo == NULL))
1447 if (unlikely(afinfo->family >= NPROTO))
1448 return -EAFNOSUPPORT;
1449 write_lock_bh(&xfrm_state_afinfo_lock);
1450 if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
1451 if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
1454 xfrm_state_afinfo[afinfo->family] = NULL;
1456 write_unlock_bh(&xfrm_state_afinfo_lock);
1459 EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
1461 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
1463 struct xfrm_state_afinfo *afinfo;
1464 if (unlikely(family >= NPROTO))
1466 read_lock(&xfrm_state_afinfo_lock);
1467 afinfo = xfrm_state_afinfo[family];
1468 if (unlikely(!afinfo))
1469 read_unlock(&xfrm_state_afinfo_lock);
1473 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
1475 read_unlock(&xfrm_state_afinfo_lock);
1478 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
1479 void xfrm_state_delete_tunnel(struct xfrm_state *x)
1482 struct xfrm_state *t = x->tunnel;
1484 if (atomic_read(&t->tunnel_users) == 2)
1485 xfrm_state_delete(t);
1486 atomic_dec(&t->tunnel_users);
1491 EXPORT_SYMBOL(xfrm_state_delete_tunnel);
1494 * This function is NOT optimal. For example, with ESP it will give an
1495 * MTU that's usually two bytes short of being optimal. However, it will
1496 * usually give an answer that's a multiple of 4 provided the input is
1497 * also a multiple of 4.
1499 int xfrm_state_mtu(struct xfrm_state *x, int mtu)
1503 res -= x->props.header_len;
1511 spin_lock_bh(&x->lock);
1512 if (x->km.state == XFRM_STATE_VALID &&
1513 x->type && x->type->get_max_size)
1514 m = x->type->get_max_size(x, m);
1516 m += x->props.header_len;
1517 spin_unlock_bh(&x->lock);
1527 int xfrm_init_state(struct xfrm_state *x)
1529 struct xfrm_state_afinfo *afinfo;
1530 int family = x->props.family;
1533 err = -EAFNOSUPPORT;
1534 afinfo = xfrm_state_get_afinfo(family);
1539 if (afinfo->init_flags)
1540 err = afinfo->init_flags(x);
1542 xfrm_state_put_afinfo(afinfo);
1547 err = -EPROTONOSUPPORT;
1548 x->type = xfrm_get_type(x->id.proto, family);
1549 if (x->type == NULL)
1552 err = x->type->init_state(x);
1556 x->mode = xfrm_get_mode(x->props.mode, family);
1557 if (x->mode == NULL)
1560 x->km.state = XFRM_STATE_VALID;
1566 EXPORT_SYMBOL(xfrm_init_state);
1568 void __init xfrm_state_init(void)
1572 sz = sizeof(struct hlist_head) * 8;
1574 xfrm_state_bydst = xfrm_hash_alloc(sz);
1575 xfrm_state_bysrc = xfrm_hash_alloc(sz);
1576 xfrm_state_byspi = xfrm_hash_alloc(sz);
1577 if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi)
1578 panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes.");
1579 xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
1581 INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task);