Merge branch 'release' of git://git.kernel.org/pub/scm/linux/kernel/git/aegl/linux-2.6
[linux-2.6] / net / xfrm / xfrm_state.c
1 /*
2  * xfrm_state.c
3  *
4  * Changes:
5  *      Mitsuru KANDA @USAGI
6  *      Kazunori MIYAZAWA @USAGI
7  *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  *              IPv6 support
9  *      YOSHIFUJI Hideaki @USAGI
10  *              Split up af-specific functions
11  *      Derek Atkins <derek@ihtfp.com>
12  *              Add UDP Encapsulation
13  *
14  */
15
16 #include <linux/workqueue.h>
17 #include <net/xfrm.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <linux/module.h>
21 #include <linux/cache.h>
22 #include <linux/audit.h>
23 #include <asm/uaccess.h>
24
25 #include "xfrm_hash.h"
26
27 struct sock *xfrm_nl;
28 EXPORT_SYMBOL(xfrm_nl);
29
30 u32 sysctl_xfrm_aevent_etime __read_mostly = XFRM_AE_ETIME;
31 EXPORT_SYMBOL(sysctl_xfrm_aevent_etime);
32
33 u32 sysctl_xfrm_aevent_rseqth __read_mostly = XFRM_AE_SEQT_SIZE;
34 EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth);
35
36 u32 sysctl_xfrm_acq_expires __read_mostly = 30;
37
38 /* Each xfrm_state may be linked to two tables:
39
40    1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
41    2. Hash table by (daddr,family,reqid) to find what SAs exist for given
42       destination/tunnel endpoint. (output)
43  */
44
45 static DEFINE_SPINLOCK(xfrm_state_lock);
46
47 /* Hash table to find appropriate SA towards given target (endpoint
48  * of tunnel or destination of transport mode) allowed by selector.
49  *
50  * Main use is finding SA after policy selected tunnel or transport mode.
51  * Also, it can be used by ah/esp icmp error handler to find offending SA.
52  */
53 static struct hlist_head *xfrm_state_bydst __read_mostly;
54 static struct hlist_head *xfrm_state_bysrc __read_mostly;
55 static struct hlist_head *xfrm_state_byspi __read_mostly;
56 static unsigned int xfrm_state_hmask __read_mostly;
57 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
58 static unsigned int xfrm_state_num;
59 static unsigned int xfrm_state_genid;
60
61 static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr,
62                                          xfrm_address_t *saddr,
63                                          u32 reqid,
64                                          unsigned short family)
65 {
66         return __xfrm_dst_hash(daddr, saddr, reqid, family, xfrm_state_hmask);
67 }
68
69 static inline unsigned int xfrm_src_hash(xfrm_address_t *daddr,
70                                          xfrm_address_t *saddr,
71                                          unsigned short family)
72 {
73         return __xfrm_src_hash(daddr, saddr, family, xfrm_state_hmask);
74 }
75
76 static inline unsigned int
77 xfrm_spi_hash(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
78 {
79         return __xfrm_spi_hash(daddr, spi, proto, family, xfrm_state_hmask);
80 }
81
82 static void xfrm_hash_transfer(struct hlist_head *list,
83                                struct hlist_head *ndsttable,
84                                struct hlist_head *nsrctable,
85                                struct hlist_head *nspitable,
86                                unsigned int nhashmask)
87 {
88         struct hlist_node *entry, *tmp;
89         struct xfrm_state *x;
90
91         hlist_for_each_entry_safe(x, entry, tmp, list, bydst) {
92                 unsigned int h;
93
94                 h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
95                                     x->props.reqid, x->props.family,
96                                     nhashmask);
97                 hlist_add_head(&x->bydst, ndsttable+h);
98
99                 h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr,
100                                     x->props.family,
101                                     nhashmask);
102                 hlist_add_head(&x->bysrc, nsrctable+h);
103
104                 if (x->id.spi) {
105                         h = __xfrm_spi_hash(&x->id.daddr, x->id.spi,
106                                             x->id.proto, x->props.family,
107                                             nhashmask);
108                         hlist_add_head(&x->byspi, nspitable+h);
109                 }
110         }
111 }
112
113 static unsigned long xfrm_hash_new_size(void)
114 {
115         return ((xfrm_state_hmask + 1) << 1) *
116                 sizeof(struct hlist_head);
117 }
118
119 static DEFINE_MUTEX(hash_resize_mutex);
120
121 static void xfrm_hash_resize(struct work_struct *__unused)
122 {
123         struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
124         unsigned long nsize, osize;
125         unsigned int nhashmask, ohashmask;
126         int i;
127
128         mutex_lock(&hash_resize_mutex);
129
130         nsize = xfrm_hash_new_size();
131         ndst = xfrm_hash_alloc(nsize);
132         if (!ndst)
133                 goto out_unlock;
134         nsrc = xfrm_hash_alloc(nsize);
135         if (!nsrc) {
136                 xfrm_hash_free(ndst, nsize);
137                 goto out_unlock;
138         }
139         nspi = xfrm_hash_alloc(nsize);
140         if (!nspi) {
141                 xfrm_hash_free(ndst, nsize);
142                 xfrm_hash_free(nsrc, nsize);
143                 goto out_unlock;
144         }
145
146         spin_lock_bh(&xfrm_state_lock);
147
148         nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
149         for (i = xfrm_state_hmask; i >= 0; i--)
150                 xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi,
151                                    nhashmask);
152
153         odst = xfrm_state_bydst;
154         osrc = xfrm_state_bysrc;
155         ospi = xfrm_state_byspi;
156         ohashmask = xfrm_state_hmask;
157
158         xfrm_state_bydst = ndst;
159         xfrm_state_bysrc = nsrc;
160         xfrm_state_byspi = nspi;
161         xfrm_state_hmask = nhashmask;
162
163         spin_unlock_bh(&xfrm_state_lock);
164
165         osize = (ohashmask + 1) * sizeof(struct hlist_head);
166         xfrm_hash_free(odst, osize);
167         xfrm_hash_free(osrc, osize);
168         xfrm_hash_free(ospi, osize);
169
170 out_unlock:
171         mutex_unlock(&hash_resize_mutex);
172 }
173
174 static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize);
175
176 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
177 EXPORT_SYMBOL(km_waitq);
178
179 static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
180 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
181
182 static struct work_struct xfrm_state_gc_work;
183 static HLIST_HEAD(xfrm_state_gc_list);
184 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
185
186 int __xfrm_state_delete(struct xfrm_state *x);
187
188 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
189 void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
190
191 static void xfrm_state_gc_destroy(struct xfrm_state *x)
192 {
193         del_timer_sync(&x->timer);
194         del_timer_sync(&x->rtimer);
195         kfree(x->aalg);
196         kfree(x->ealg);
197         kfree(x->calg);
198         kfree(x->encap);
199         kfree(x->coaddr);
200         if (x->mode)
201                 xfrm_put_mode(x->mode);
202         if (x->type) {
203                 x->type->destructor(x);
204                 xfrm_put_type(x->type);
205         }
206         security_xfrm_state_free(x);
207         kfree(x);
208 }
209
210 static void xfrm_state_gc_task(struct work_struct *data)
211 {
212         struct xfrm_state *x;
213         struct hlist_node *entry, *tmp;
214         struct hlist_head gc_list;
215
216         spin_lock_bh(&xfrm_state_gc_lock);
217         gc_list.first = xfrm_state_gc_list.first;
218         INIT_HLIST_HEAD(&xfrm_state_gc_list);
219         spin_unlock_bh(&xfrm_state_gc_lock);
220
221         hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst)
222                 xfrm_state_gc_destroy(x);
223
224         wake_up(&km_waitq);
225 }
226
227 static inline unsigned long make_jiffies(long secs)
228 {
229         if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
230                 return MAX_SCHEDULE_TIMEOUT-1;
231         else
232                 return secs*HZ;
233 }
234
235 static void xfrm_timer_handler(unsigned long data)
236 {
237         struct xfrm_state *x = (struct xfrm_state*)data;
238         unsigned long now = get_seconds();
239         long next = LONG_MAX;
240         int warn = 0;
241         int err = 0;
242
243         spin_lock(&x->lock);
244         if (x->km.state == XFRM_STATE_DEAD)
245                 goto out;
246         if (x->km.state == XFRM_STATE_EXPIRED)
247                 goto expired;
248         if (x->lft.hard_add_expires_seconds) {
249                 long tmo = x->lft.hard_add_expires_seconds +
250                         x->curlft.add_time - now;
251                 if (tmo <= 0)
252                         goto expired;
253                 if (tmo < next)
254                         next = tmo;
255         }
256         if (x->lft.hard_use_expires_seconds) {
257                 long tmo = x->lft.hard_use_expires_seconds +
258                         (x->curlft.use_time ? : now) - now;
259                 if (tmo <= 0)
260                         goto expired;
261                 if (tmo < next)
262                         next = tmo;
263         }
264         if (x->km.dying)
265                 goto resched;
266         if (x->lft.soft_add_expires_seconds) {
267                 long tmo = x->lft.soft_add_expires_seconds +
268                         x->curlft.add_time - now;
269                 if (tmo <= 0)
270                         warn = 1;
271                 else if (tmo < next)
272                         next = tmo;
273         }
274         if (x->lft.soft_use_expires_seconds) {
275                 long tmo = x->lft.soft_use_expires_seconds +
276                         (x->curlft.use_time ? : now) - now;
277                 if (tmo <= 0)
278                         warn = 1;
279                 else if (tmo < next)
280                         next = tmo;
281         }
282
283         x->km.dying = warn;
284         if (warn)
285                 km_state_expired(x, 0, 0);
286 resched:
287         if (next != LONG_MAX)
288                 mod_timer(&x->timer, jiffies + make_jiffies(next));
289
290         goto out;
291
292 expired:
293         if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
294                 x->km.state = XFRM_STATE_EXPIRED;
295                 wake_up(&km_waitq);
296                 next = 2;
297                 goto resched;
298         }
299
300         err = __xfrm_state_delete(x);
301         if (!err && x->id.spi)
302                 km_state_expired(x, 1, 0);
303
304         xfrm_audit_log(audit_get_loginuid(current->audit_context), 0,
305                        AUDIT_MAC_IPSEC_DELSA, err ? 0 : 1, NULL, x);
306
307 out:
308         spin_unlock(&x->lock);
309 }
310
311 static void xfrm_replay_timer_handler(unsigned long data);
312
313 struct xfrm_state *xfrm_state_alloc(void)
314 {
315         struct xfrm_state *x;
316
317         x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
318
319         if (x) {
320                 atomic_set(&x->refcnt, 1);
321                 atomic_set(&x->tunnel_users, 0);
322                 INIT_HLIST_NODE(&x->bydst);
323                 INIT_HLIST_NODE(&x->bysrc);
324                 INIT_HLIST_NODE(&x->byspi);
325                 init_timer(&x->timer);
326                 x->timer.function = xfrm_timer_handler;
327                 x->timer.data     = (unsigned long)x;
328                 init_timer(&x->rtimer);
329                 x->rtimer.function = xfrm_replay_timer_handler;
330                 x->rtimer.data     = (unsigned long)x;
331                 x->curlft.add_time = get_seconds();
332                 x->lft.soft_byte_limit = XFRM_INF;
333                 x->lft.soft_packet_limit = XFRM_INF;
334                 x->lft.hard_byte_limit = XFRM_INF;
335                 x->lft.hard_packet_limit = XFRM_INF;
336                 x->replay_maxage = 0;
337                 x->replay_maxdiff = 0;
338                 spin_lock_init(&x->lock);
339         }
340         return x;
341 }
342 EXPORT_SYMBOL(xfrm_state_alloc);
343
344 void __xfrm_state_destroy(struct xfrm_state *x)
345 {
346         BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
347
348         spin_lock_bh(&xfrm_state_gc_lock);
349         hlist_add_head(&x->bydst, &xfrm_state_gc_list);
350         spin_unlock_bh(&xfrm_state_gc_lock);
351         schedule_work(&xfrm_state_gc_work);
352 }
353 EXPORT_SYMBOL(__xfrm_state_destroy);
354
355 int __xfrm_state_delete(struct xfrm_state *x)
356 {
357         int err = -ESRCH;
358
359         if (x->km.state != XFRM_STATE_DEAD) {
360                 x->km.state = XFRM_STATE_DEAD;
361                 spin_lock(&xfrm_state_lock);
362                 hlist_del(&x->bydst);
363                 hlist_del(&x->bysrc);
364                 if (x->id.spi)
365                         hlist_del(&x->byspi);
366                 xfrm_state_num--;
367                 spin_unlock(&xfrm_state_lock);
368
369                 /* All xfrm_state objects are created by xfrm_state_alloc.
370                  * The xfrm_state_alloc call gives a reference, and that
371                  * is what we are dropping here.
372                  */
373                 __xfrm_state_put(x);
374                 err = 0;
375         }
376
377         return err;
378 }
379 EXPORT_SYMBOL(__xfrm_state_delete);
380
381 int xfrm_state_delete(struct xfrm_state *x)
382 {
383         int err;
384
385         spin_lock_bh(&x->lock);
386         err = __xfrm_state_delete(x);
387         spin_unlock_bh(&x->lock);
388
389         return err;
390 }
391 EXPORT_SYMBOL(xfrm_state_delete);
392
393 #ifdef CONFIG_SECURITY_NETWORK_XFRM
394 static inline int
395 xfrm_state_flush_secctx_check(u8 proto, struct xfrm_audit *audit_info)
396 {
397         int i, err = 0;
398
399         for (i = 0; i <= xfrm_state_hmask; i++) {
400                 struct hlist_node *entry;
401                 struct xfrm_state *x;
402
403                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
404                         if (xfrm_id_proto_match(x->id.proto, proto) &&
405                            (err = security_xfrm_state_delete(x)) != 0) {
406                                 xfrm_audit_log(audit_info->loginuid,
407                                                audit_info->secid,
408                                                AUDIT_MAC_IPSEC_DELSA,
409                                                0, NULL, x);
410
411                                 return err;
412                         }
413                 }
414         }
415
416         return err;
417 }
418 #else
419 static inline int
420 xfrm_state_flush_secctx_check(u8 proto, struct xfrm_audit *audit_info)
421 {
422         return 0;
423 }
424 #endif
425
426 int xfrm_state_flush(u8 proto, struct xfrm_audit *audit_info)
427 {
428         int i, err = 0;
429
430         spin_lock_bh(&xfrm_state_lock);
431         err = xfrm_state_flush_secctx_check(proto, audit_info);
432         if (err)
433                 goto out;
434
435         for (i = 0; i <= xfrm_state_hmask; i++) {
436                 struct hlist_node *entry;
437                 struct xfrm_state *x;
438 restart:
439                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
440                         if (!xfrm_state_kern(x) &&
441                             xfrm_id_proto_match(x->id.proto, proto)) {
442                                 xfrm_state_hold(x);
443                                 spin_unlock_bh(&xfrm_state_lock);
444
445                                 err = xfrm_state_delete(x);
446                                 xfrm_audit_log(audit_info->loginuid,
447                                                audit_info->secid,
448                                                AUDIT_MAC_IPSEC_DELSA,
449                                                err ? 0 : 1, NULL, x);
450                                 xfrm_state_put(x);
451
452                                 spin_lock_bh(&xfrm_state_lock);
453                                 goto restart;
454                         }
455                 }
456         }
457         err = 0;
458
459 out:
460         spin_unlock_bh(&xfrm_state_lock);
461         wake_up(&km_waitq);
462         return err;
463 }
464 EXPORT_SYMBOL(xfrm_state_flush);
465
466 void xfrm_sad_getinfo(struct xfrmk_sadinfo *si)
467 {
468         spin_lock_bh(&xfrm_state_lock);
469         si->sadcnt = xfrm_state_num;
470         si->sadhcnt = xfrm_state_hmask;
471         si->sadhmcnt = xfrm_state_hashmax;
472         spin_unlock_bh(&xfrm_state_lock);
473 }
474 EXPORT_SYMBOL(xfrm_sad_getinfo);
475
476 static int
477 xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
478                   struct xfrm_tmpl *tmpl,
479                   xfrm_address_t *daddr, xfrm_address_t *saddr,
480                   unsigned short family)
481 {
482         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
483         if (!afinfo)
484                 return -1;
485         afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
486         xfrm_state_put_afinfo(afinfo);
487         return 0;
488 }
489
490 static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
491 {
492         unsigned int h = xfrm_spi_hash(daddr, spi, proto, family);
493         struct xfrm_state *x;
494         struct hlist_node *entry;
495
496         hlist_for_each_entry(x, entry, xfrm_state_byspi+h, byspi) {
497                 if (x->props.family != family ||
498                     x->id.spi       != spi ||
499                     x->id.proto     != proto)
500                         continue;
501
502                 switch (family) {
503                 case AF_INET:
504                         if (x->id.daddr.a4 != daddr->a4)
505                                 continue;
506                         break;
507                 case AF_INET6:
508                         if (!ipv6_addr_equal((struct in6_addr *)daddr,
509                                              (struct in6_addr *)
510                                              x->id.daddr.a6))
511                                 continue;
512                         break;
513                 }
514
515                 xfrm_state_hold(x);
516                 return x;
517         }
518
519         return NULL;
520 }
521
522 static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family)
523 {
524         unsigned int h = xfrm_src_hash(daddr, saddr, family);
525         struct xfrm_state *x;
526         struct hlist_node *entry;
527
528         hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
529                 if (x->props.family != family ||
530                     x->id.proto     != proto)
531                         continue;
532
533                 switch (family) {
534                 case AF_INET:
535                         if (x->id.daddr.a4 != daddr->a4 ||
536                             x->props.saddr.a4 != saddr->a4)
537                                 continue;
538                         break;
539                 case AF_INET6:
540                         if (!ipv6_addr_equal((struct in6_addr *)daddr,
541                                              (struct in6_addr *)
542                                              x->id.daddr.a6) ||
543                             !ipv6_addr_equal((struct in6_addr *)saddr,
544                                              (struct in6_addr *)
545                                              x->props.saddr.a6))
546                                 continue;
547                         break;
548                 }
549
550                 xfrm_state_hold(x);
551                 return x;
552         }
553
554         return NULL;
555 }
556
557 static inline struct xfrm_state *
558 __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
559 {
560         if (use_spi)
561                 return __xfrm_state_lookup(&x->id.daddr, x->id.spi,
562                                            x->id.proto, family);
563         else
564                 return __xfrm_state_lookup_byaddr(&x->id.daddr,
565                                                   &x->props.saddr,
566                                                   x->id.proto, family);
567 }
568
569 static void xfrm_hash_grow_check(int have_hash_collision)
570 {
571         if (have_hash_collision &&
572             (xfrm_state_hmask + 1) < xfrm_state_hashmax &&
573             xfrm_state_num > xfrm_state_hmask)
574                 schedule_work(&xfrm_hash_work);
575 }
576
577 struct xfrm_state *
578 xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
579                 struct flowi *fl, struct xfrm_tmpl *tmpl,
580                 struct xfrm_policy *pol, int *err,
581                 unsigned short family)
582 {
583         unsigned int h = xfrm_dst_hash(daddr, saddr, tmpl->reqid, family);
584         struct hlist_node *entry;
585         struct xfrm_state *x, *x0;
586         int acquire_in_progress = 0;
587         int error = 0;
588         struct xfrm_state *best = NULL;
589
590         spin_lock_bh(&xfrm_state_lock);
591         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
592                 if (x->props.family == family &&
593                     x->props.reqid == tmpl->reqid &&
594                     !(x->props.flags & XFRM_STATE_WILDRECV) &&
595                     xfrm_state_addr_check(x, daddr, saddr, family) &&
596                     tmpl->mode == x->props.mode &&
597                     tmpl->id.proto == x->id.proto &&
598                     (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) {
599                         /* Resolution logic:
600                            1. There is a valid state with matching selector.
601                               Done.
602                            2. Valid state with inappropriate selector. Skip.
603
604                            Entering area of "sysdeps".
605
606                            3. If state is not valid, selector is temporary,
607                               it selects only session which triggered
608                               previous resolution. Key manager will do
609                               something to install a state with proper
610                               selector.
611                          */
612                         if (x->km.state == XFRM_STATE_VALID) {
613                                 if (!xfrm_selector_match(&x->sel, fl, x->sel.family) ||
614                                     !security_xfrm_state_pol_flow_match(x, pol, fl))
615                                         continue;
616                                 if (!best ||
617                                     best->km.dying > x->km.dying ||
618                                     (best->km.dying == x->km.dying &&
619                                      best->curlft.add_time < x->curlft.add_time))
620                                         best = x;
621                         } else if (x->km.state == XFRM_STATE_ACQ) {
622                                 acquire_in_progress = 1;
623                         } else if (x->km.state == XFRM_STATE_ERROR ||
624                                    x->km.state == XFRM_STATE_EXPIRED) {
625                                 if (xfrm_selector_match(&x->sel, fl, x->sel.family) &&
626                                     security_xfrm_state_pol_flow_match(x, pol, fl))
627                                         error = -ESRCH;
628                         }
629                 }
630         }
631
632         x = best;
633         if (!x && !error && !acquire_in_progress) {
634                 if (tmpl->id.spi &&
635                     (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi,
636                                               tmpl->id.proto, family)) != NULL) {
637                         xfrm_state_put(x0);
638                         error = -EEXIST;
639                         goto out;
640                 }
641                 x = xfrm_state_alloc();
642                 if (x == NULL) {
643                         error = -ENOMEM;
644                         goto out;
645                 }
646                 /* Initialize temporary selector matching only
647                  * to current session. */
648                 xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
649
650                 error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid);
651                 if (error) {
652                         x->km.state = XFRM_STATE_DEAD;
653                         xfrm_state_put(x);
654                         x = NULL;
655                         goto out;
656                 }
657
658                 if (km_query(x, tmpl, pol) == 0) {
659                         x->km.state = XFRM_STATE_ACQ;
660                         hlist_add_head(&x->bydst, xfrm_state_bydst+h);
661                         h = xfrm_src_hash(daddr, saddr, family);
662                         hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
663                         if (x->id.spi) {
664                                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
665                                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
666                         }
667                         x->lft.hard_add_expires_seconds = sysctl_xfrm_acq_expires;
668                         x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ;
669                         add_timer(&x->timer);
670                         xfrm_state_num++;
671                         xfrm_hash_grow_check(x->bydst.next != NULL);
672                 } else {
673                         x->km.state = XFRM_STATE_DEAD;
674                         xfrm_state_put(x);
675                         x = NULL;
676                         error = -ESRCH;
677                 }
678         }
679 out:
680         if (x)
681                 xfrm_state_hold(x);
682         else
683                 *err = acquire_in_progress ? -EAGAIN : error;
684         spin_unlock_bh(&xfrm_state_lock);
685         return x;
686 }
687
688 struct xfrm_state *
689 xfrm_stateonly_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
690                     unsigned short family, u8 mode, u8 proto, u32 reqid)
691 {
692         unsigned int h = xfrm_dst_hash(daddr, saddr, reqid, family);
693         struct xfrm_state *rx = NULL, *x = NULL;
694         struct hlist_node *entry;
695
696         spin_lock(&xfrm_state_lock);
697         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
698                 if (x->props.family == family &&
699                     x->props.reqid == reqid &&
700                     !(x->props.flags & XFRM_STATE_WILDRECV) &&
701                     xfrm_state_addr_check(x, daddr, saddr, family) &&
702                     mode == x->props.mode &&
703                     proto == x->id.proto &&
704                     x->km.state == XFRM_STATE_VALID) {
705                         rx = x;
706                         break;
707                 }
708         }
709
710         if (rx)
711                 xfrm_state_hold(rx);
712         spin_unlock(&xfrm_state_lock);
713
714
715         return rx;
716 }
717 EXPORT_SYMBOL(xfrm_stateonly_find);
718
719 static void __xfrm_state_insert(struct xfrm_state *x)
720 {
721         unsigned int h;
722
723         x->genid = ++xfrm_state_genid;
724
725         h = xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
726                           x->props.reqid, x->props.family);
727         hlist_add_head(&x->bydst, xfrm_state_bydst+h);
728
729         h = xfrm_src_hash(&x->id.daddr, &x->props.saddr, x->props.family);
730         hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
731
732         if (x->id.spi) {
733                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
734                                   x->props.family);
735
736                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
737         }
738
739         mod_timer(&x->timer, jiffies + HZ);
740         if (x->replay_maxage)
741                 mod_timer(&x->rtimer, jiffies + x->replay_maxage);
742
743         wake_up(&km_waitq);
744
745         xfrm_state_num++;
746
747         xfrm_hash_grow_check(x->bydst.next != NULL);
748 }
749
750 /* xfrm_state_lock is held */
751 static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
752 {
753         unsigned short family = xnew->props.family;
754         u32 reqid = xnew->props.reqid;
755         struct xfrm_state *x;
756         struct hlist_node *entry;
757         unsigned int h;
758
759         h = xfrm_dst_hash(&xnew->id.daddr, &xnew->props.saddr, reqid, family);
760         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
761                 if (x->props.family     == family &&
762                     x->props.reqid      == reqid &&
763                     !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) &&
764                     !xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family))
765                         x->genid = xfrm_state_genid;
766         }
767 }
768
769 void xfrm_state_insert(struct xfrm_state *x)
770 {
771         spin_lock_bh(&xfrm_state_lock);
772         __xfrm_state_bump_genids(x);
773         __xfrm_state_insert(x);
774         spin_unlock_bh(&xfrm_state_lock);
775 }
776 EXPORT_SYMBOL(xfrm_state_insert);
777
778 /* xfrm_state_lock is held */
779 static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create)
780 {
781         unsigned int h = xfrm_dst_hash(daddr, saddr, reqid, family);
782         struct hlist_node *entry;
783         struct xfrm_state *x;
784
785         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
786                 if (x->props.reqid  != reqid ||
787                     x->props.mode   != mode ||
788                     x->props.family != family ||
789                     x->km.state     != XFRM_STATE_ACQ ||
790                     x->id.spi       != 0 ||
791                     x->id.proto     != proto)
792                         continue;
793
794                 switch (family) {
795                 case AF_INET:
796                         if (x->id.daddr.a4    != daddr->a4 ||
797                             x->props.saddr.a4 != saddr->a4)
798                                 continue;
799                         break;
800                 case AF_INET6:
801                         if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6,
802                                              (struct in6_addr *)daddr) ||
803                             !ipv6_addr_equal((struct in6_addr *)
804                                              x->props.saddr.a6,
805                                              (struct in6_addr *)saddr))
806                                 continue;
807                         break;
808                 }
809
810                 xfrm_state_hold(x);
811                 return x;
812         }
813
814         if (!create)
815                 return NULL;
816
817         x = xfrm_state_alloc();
818         if (likely(x)) {
819                 switch (family) {
820                 case AF_INET:
821                         x->sel.daddr.a4 = daddr->a4;
822                         x->sel.saddr.a4 = saddr->a4;
823                         x->sel.prefixlen_d = 32;
824                         x->sel.prefixlen_s = 32;
825                         x->props.saddr.a4 = saddr->a4;
826                         x->id.daddr.a4 = daddr->a4;
827                         break;
828
829                 case AF_INET6:
830                         ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6,
831                                        (struct in6_addr *)daddr);
832                         ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6,
833                                        (struct in6_addr *)saddr);
834                         x->sel.prefixlen_d = 128;
835                         x->sel.prefixlen_s = 128;
836                         ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6,
837                                        (struct in6_addr *)saddr);
838                         ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6,
839                                        (struct in6_addr *)daddr);
840                         break;
841                 }
842
843                 x->km.state = XFRM_STATE_ACQ;
844                 x->id.proto = proto;
845                 x->props.family = family;
846                 x->props.mode = mode;
847                 x->props.reqid = reqid;
848                 x->lft.hard_add_expires_seconds = sysctl_xfrm_acq_expires;
849                 xfrm_state_hold(x);
850                 x->timer.expires = jiffies + sysctl_xfrm_acq_expires*HZ;
851                 add_timer(&x->timer);
852                 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
853                 h = xfrm_src_hash(daddr, saddr, family);
854                 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
855                 wake_up(&km_waitq);
856
857                 xfrm_state_num++;
858
859                 xfrm_hash_grow_check(x->bydst.next != NULL);
860         }
861
862         return x;
863 }
864
865 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
866
867 int xfrm_state_add(struct xfrm_state *x)
868 {
869         struct xfrm_state *x1;
870         int family;
871         int err;
872         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
873
874         family = x->props.family;
875
876         spin_lock_bh(&xfrm_state_lock);
877
878         x1 = __xfrm_state_locate(x, use_spi, family);
879         if (x1) {
880                 xfrm_state_put(x1);
881                 x1 = NULL;
882                 err = -EEXIST;
883                 goto out;
884         }
885
886         if (use_spi && x->km.seq) {
887                 x1 = __xfrm_find_acq_byseq(x->km.seq);
888                 if (x1 && ((x1->id.proto != x->id.proto) ||
889                     xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family))) {
890                         xfrm_state_put(x1);
891                         x1 = NULL;
892                 }
893         }
894
895         if (use_spi && !x1)
896                 x1 = __find_acq_core(family, x->props.mode, x->props.reqid,
897                                      x->id.proto,
898                                      &x->id.daddr, &x->props.saddr, 0);
899
900         __xfrm_state_bump_genids(x);
901         __xfrm_state_insert(x);
902         err = 0;
903
904 out:
905         spin_unlock_bh(&xfrm_state_lock);
906
907         if (x1) {
908                 xfrm_state_delete(x1);
909                 xfrm_state_put(x1);
910         }
911
912         return err;
913 }
914 EXPORT_SYMBOL(xfrm_state_add);
915
916 #ifdef CONFIG_XFRM_MIGRATE
917 struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)
918 {
919         int err = -ENOMEM;
920         struct xfrm_state *x = xfrm_state_alloc();
921         if (!x)
922                 goto error;
923
924         memcpy(&x->id, &orig->id, sizeof(x->id));
925         memcpy(&x->sel, &orig->sel, sizeof(x->sel));
926         memcpy(&x->lft, &orig->lft, sizeof(x->lft));
927         x->props.mode = orig->props.mode;
928         x->props.replay_window = orig->props.replay_window;
929         x->props.reqid = orig->props.reqid;
930         x->props.family = orig->props.family;
931         x->props.saddr = orig->props.saddr;
932
933         if (orig->aalg) {
934                 x->aalg = xfrm_algo_clone(orig->aalg);
935                 if (!x->aalg)
936                         goto error;
937         }
938         x->props.aalgo = orig->props.aalgo;
939
940         if (orig->ealg) {
941                 x->ealg = xfrm_algo_clone(orig->ealg);
942                 if (!x->ealg)
943                         goto error;
944         }
945         x->props.ealgo = orig->props.ealgo;
946
947         if (orig->calg) {
948                 x->calg = xfrm_algo_clone(orig->calg);
949                 if (!x->calg)
950                         goto error;
951         }
952         x->props.calgo = orig->props.calgo;
953
954         if (orig->encap) {
955                 x->encap = kmemdup(orig->encap, sizeof(*x->encap), GFP_KERNEL);
956                 if (!x->encap)
957                         goto error;
958         }
959
960         if (orig->coaddr) {
961                 x->coaddr = kmemdup(orig->coaddr, sizeof(*x->coaddr),
962                                     GFP_KERNEL);
963                 if (!x->coaddr)
964                         goto error;
965         }
966
967         err = xfrm_init_state(x);
968         if (err)
969                 goto error;
970
971         x->props.flags = orig->props.flags;
972
973         x->curlft.add_time = orig->curlft.add_time;
974         x->km.state = orig->km.state;
975         x->km.seq = orig->km.seq;
976
977         return x;
978
979  error:
980         if (errp)
981                 *errp = err;
982         if (x) {
983                 kfree(x->aalg);
984                 kfree(x->ealg);
985                 kfree(x->calg);
986                 kfree(x->encap);
987                 kfree(x->coaddr);
988         }
989         kfree(x);
990         return NULL;
991 }
992 EXPORT_SYMBOL(xfrm_state_clone);
993
994 /* xfrm_state_lock is held */
995 struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m)
996 {
997         unsigned int h;
998         struct xfrm_state *x;
999         struct hlist_node *entry;
1000
1001         if (m->reqid) {
1002                 h = xfrm_dst_hash(&m->old_daddr, &m->old_saddr,
1003                                   m->reqid, m->old_family);
1004                 hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
1005                         if (x->props.mode != m->mode ||
1006                             x->id.proto != m->proto)
1007                                 continue;
1008                         if (m->reqid && x->props.reqid != m->reqid)
1009                                 continue;
1010                         if (xfrm_addr_cmp(&x->id.daddr, &m->old_daddr,
1011                                           m->old_family) ||
1012                             xfrm_addr_cmp(&x->props.saddr, &m->old_saddr,
1013                                           m->old_family))
1014                                 continue;
1015                         xfrm_state_hold(x);
1016                         return x;
1017                 }
1018         } else {
1019                 h = xfrm_src_hash(&m->old_daddr, &m->old_saddr,
1020                                   m->old_family);
1021                 hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
1022                         if (x->props.mode != m->mode ||
1023                             x->id.proto != m->proto)
1024                                 continue;
1025                         if (xfrm_addr_cmp(&x->id.daddr, &m->old_daddr,
1026                                           m->old_family) ||
1027                             xfrm_addr_cmp(&x->props.saddr, &m->old_saddr,
1028                                           m->old_family))
1029                                 continue;
1030                         xfrm_state_hold(x);
1031                         return x;
1032                 }
1033         }
1034
1035         return NULL;
1036 }
1037 EXPORT_SYMBOL(xfrm_migrate_state_find);
1038
1039 struct xfrm_state * xfrm_state_migrate(struct xfrm_state *x,
1040                                        struct xfrm_migrate *m)
1041 {
1042         struct xfrm_state *xc;
1043         int err;
1044
1045         xc = xfrm_state_clone(x, &err);
1046         if (!xc)
1047                 return NULL;
1048
1049         memcpy(&xc->id.daddr, &m->new_daddr, sizeof(xc->id.daddr));
1050         memcpy(&xc->props.saddr, &m->new_saddr, sizeof(xc->props.saddr));
1051
1052         /* add state */
1053         if (!xfrm_addr_cmp(&x->id.daddr, &m->new_daddr, m->new_family)) {
1054                 /* a care is needed when the destination address of the
1055                    state is to be updated as it is a part of triplet */
1056                 xfrm_state_insert(xc);
1057         } else {
1058                 if ((err = xfrm_state_add(xc)) < 0)
1059                         goto error;
1060         }
1061
1062         return xc;
1063 error:
1064         kfree(xc);
1065         return NULL;
1066 }
1067 EXPORT_SYMBOL(xfrm_state_migrate);
1068 #endif
1069
1070 int xfrm_state_update(struct xfrm_state *x)
1071 {
1072         struct xfrm_state *x1;
1073         int err;
1074         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
1075
1076         spin_lock_bh(&xfrm_state_lock);
1077         x1 = __xfrm_state_locate(x, use_spi, x->props.family);
1078
1079         err = -ESRCH;
1080         if (!x1)
1081                 goto out;
1082
1083         if (xfrm_state_kern(x1)) {
1084                 xfrm_state_put(x1);
1085                 err = -EEXIST;
1086                 goto out;
1087         }
1088
1089         if (x1->km.state == XFRM_STATE_ACQ) {
1090                 __xfrm_state_insert(x);
1091                 x = NULL;
1092         }
1093         err = 0;
1094
1095 out:
1096         spin_unlock_bh(&xfrm_state_lock);
1097
1098         if (err)
1099                 return err;
1100
1101         if (!x) {
1102                 xfrm_state_delete(x1);
1103                 xfrm_state_put(x1);
1104                 return 0;
1105         }
1106
1107         err = -EINVAL;
1108         spin_lock_bh(&x1->lock);
1109         if (likely(x1->km.state == XFRM_STATE_VALID)) {
1110                 if (x->encap && x1->encap)
1111                         memcpy(x1->encap, x->encap, sizeof(*x1->encap));
1112                 if (x->coaddr && x1->coaddr) {
1113                         memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
1114                 }
1115                 if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
1116                         memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
1117                 memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
1118                 x1->km.dying = 0;
1119
1120                 mod_timer(&x1->timer, jiffies + HZ);
1121                 if (x1->curlft.use_time)
1122                         xfrm_state_check_expire(x1);
1123
1124                 err = 0;
1125         }
1126         spin_unlock_bh(&x1->lock);
1127
1128         xfrm_state_put(x1);
1129
1130         return err;
1131 }
1132 EXPORT_SYMBOL(xfrm_state_update);
1133
1134 int xfrm_state_check_expire(struct xfrm_state *x)
1135 {
1136         if (!x->curlft.use_time)
1137                 x->curlft.use_time = get_seconds();
1138
1139         if (x->km.state != XFRM_STATE_VALID)
1140                 return -EINVAL;
1141
1142         if (x->curlft.bytes >= x->lft.hard_byte_limit ||
1143             x->curlft.packets >= x->lft.hard_packet_limit) {
1144                 x->km.state = XFRM_STATE_EXPIRED;
1145                 mod_timer(&x->timer, jiffies);
1146                 return -EINVAL;
1147         }
1148
1149         if (!x->km.dying &&
1150             (x->curlft.bytes >= x->lft.soft_byte_limit ||
1151              x->curlft.packets >= x->lft.soft_packet_limit)) {
1152                 x->km.dying = 1;
1153                 km_state_expired(x, 0, 0);
1154         }
1155         return 0;
1156 }
1157 EXPORT_SYMBOL(xfrm_state_check_expire);
1158
1159 static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
1160 {
1161         int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
1162                 - skb_headroom(skb);
1163
1164         if (nhead > 0)
1165                 return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
1166
1167         /* Check tail too... */
1168         return 0;
1169 }
1170
1171 int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
1172 {
1173         int err = xfrm_state_check_expire(x);
1174         if (err < 0)
1175                 goto err;
1176         err = xfrm_state_check_space(x, skb);
1177 err:
1178         return err;
1179 }
1180 EXPORT_SYMBOL(xfrm_state_check);
1181
1182 struct xfrm_state *
1183 xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto,
1184                   unsigned short family)
1185 {
1186         struct xfrm_state *x;
1187
1188         spin_lock_bh(&xfrm_state_lock);
1189         x = __xfrm_state_lookup(daddr, spi, proto, family);
1190         spin_unlock_bh(&xfrm_state_lock);
1191         return x;
1192 }
1193 EXPORT_SYMBOL(xfrm_state_lookup);
1194
1195 struct xfrm_state *
1196 xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr,
1197                          u8 proto, unsigned short family)
1198 {
1199         struct xfrm_state *x;
1200
1201         spin_lock_bh(&xfrm_state_lock);
1202         x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family);
1203         spin_unlock_bh(&xfrm_state_lock);
1204         return x;
1205 }
1206 EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
1207
1208 struct xfrm_state *
1209 xfrm_find_acq(u8 mode, u32 reqid, u8 proto,
1210               xfrm_address_t *daddr, xfrm_address_t *saddr,
1211               int create, unsigned short family)
1212 {
1213         struct xfrm_state *x;
1214
1215         spin_lock_bh(&xfrm_state_lock);
1216         x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create);
1217         spin_unlock_bh(&xfrm_state_lock);
1218
1219         return x;
1220 }
1221 EXPORT_SYMBOL(xfrm_find_acq);
1222
1223 #ifdef CONFIG_XFRM_SUB_POLICY
1224 int
1225 xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
1226                unsigned short family)
1227 {
1228         int err = 0;
1229         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1230         if (!afinfo)
1231                 return -EAFNOSUPPORT;
1232
1233         spin_lock_bh(&xfrm_state_lock);
1234         if (afinfo->tmpl_sort)
1235                 err = afinfo->tmpl_sort(dst, src, n);
1236         spin_unlock_bh(&xfrm_state_lock);
1237         xfrm_state_put_afinfo(afinfo);
1238         return err;
1239 }
1240 EXPORT_SYMBOL(xfrm_tmpl_sort);
1241
1242 int
1243 xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
1244                 unsigned short family)
1245 {
1246         int err = 0;
1247         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1248         if (!afinfo)
1249                 return -EAFNOSUPPORT;
1250
1251         spin_lock_bh(&xfrm_state_lock);
1252         if (afinfo->state_sort)
1253                 err = afinfo->state_sort(dst, src, n);
1254         spin_unlock_bh(&xfrm_state_lock);
1255         xfrm_state_put_afinfo(afinfo);
1256         return err;
1257 }
1258 EXPORT_SYMBOL(xfrm_state_sort);
1259 #endif
1260
1261 /* Silly enough, but I'm lazy to build resolution list */
1262
1263 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
1264 {
1265         int i;
1266
1267         for (i = 0; i <= xfrm_state_hmask; i++) {
1268                 struct hlist_node *entry;
1269                 struct xfrm_state *x;
1270
1271                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1272                         if (x->km.seq == seq &&
1273                             x->km.state == XFRM_STATE_ACQ) {
1274                                 xfrm_state_hold(x);
1275                                 return x;
1276                         }
1277                 }
1278         }
1279         return NULL;
1280 }
1281
1282 struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
1283 {
1284         struct xfrm_state *x;
1285
1286         spin_lock_bh(&xfrm_state_lock);
1287         x = __xfrm_find_acq_byseq(seq);
1288         spin_unlock_bh(&xfrm_state_lock);
1289         return x;
1290 }
1291 EXPORT_SYMBOL(xfrm_find_acq_byseq);
1292
1293 u32 xfrm_get_acqseq(void)
1294 {
1295         u32 res;
1296         static u32 acqseq;
1297         static DEFINE_SPINLOCK(acqseq_lock);
1298
1299         spin_lock_bh(&acqseq_lock);
1300         res = (++acqseq ? : ++acqseq);
1301         spin_unlock_bh(&acqseq_lock);
1302         return res;
1303 }
1304 EXPORT_SYMBOL(xfrm_get_acqseq);
1305
1306 void
1307 xfrm_alloc_spi(struct xfrm_state *x, __be32 minspi, __be32 maxspi)
1308 {
1309         unsigned int h;
1310         struct xfrm_state *x0;
1311
1312         if (x->id.spi)
1313                 return;
1314
1315         if (minspi == maxspi) {
1316                 x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
1317                 if (x0) {
1318                         xfrm_state_put(x0);
1319                         return;
1320                 }
1321                 x->id.spi = minspi;
1322         } else {
1323                 u32 spi = 0;
1324                 u32 low = ntohl(minspi);
1325                 u32 high = ntohl(maxspi);
1326                 for (h=0; h<high-low+1; h++) {
1327                         spi = low + net_random()%(high-low+1);
1328                         x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
1329                         if (x0 == NULL) {
1330                                 x->id.spi = htonl(spi);
1331                                 break;
1332                         }
1333                         xfrm_state_put(x0);
1334                 }
1335         }
1336         if (x->id.spi) {
1337                 spin_lock_bh(&xfrm_state_lock);
1338                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
1339                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
1340                 spin_unlock_bh(&xfrm_state_lock);
1341                 wake_up(&km_waitq);
1342         }
1343 }
1344 EXPORT_SYMBOL(xfrm_alloc_spi);
1345
1346 int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
1347                     void *data)
1348 {
1349         int i;
1350         struct xfrm_state *x, *last = NULL;
1351         struct hlist_node *entry;
1352         int count = 0;
1353         int err = 0;
1354
1355         spin_lock_bh(&xfrm_state_lock);
1356         for (i = 0; i <= xfrm_state_hmask; i++) {
1357                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1358                         if (!xfrm_id_proto_match(x->id.proto, proto))
1359                                 continue;
1360                         if (last) {
1361                                 err = func(last, count, data);
1362                                 if (err)
1363                                         goto out;
1364                         }
1365                         last = x;
1366                         count++;
1367                 }
1368         }
1369         if (count == 0) {
1370                 err = -ENOENT;
1371                 goto out;
1372         }
1373         err = func(last, 0, data);
1374 out:
1375         spin_unlock_bh(&xfrm_state_lock);
1376         return err;
1377 }
1378 EXPORT_SYMBOL(xfrm_state_walk);
1379
1380
1381 void xfrm_replay_notify(struct xfrm_state *x, int event)
1382 {
1383         struct km_event c;
1384         /* we send notify messages in case
1385          *  1. we updated on of the sequence numbers, and the seqno difference
1386          *     is at least x->replay_maxdiff, in this case we also update the
1387          *     timeout of our timer function
1388          *  2. if x->replay_maxage has elapsed since last update,
1389          *     and there were changes
1390          *
1391          *  The state structure must be locked!
1392          */
1393
1394         switch (event) {
1395         case XFRM_REPLAY_UPDATE:
1396                 if (x->replay_maxdiff &&
1397                     (x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
1398                     (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) {
1399                         if (x->xflags & XFRM_TIME_DEFER)
1400                                 event = XFRM_REPLAY_TIMEOUT;
1401                         else
1402                                 return;
1403                 }
1404
1405                 break;
1406
1407         case XFRM_REPLAY_TIMEOUT:
1408                 if ((x->replay.seq == x->preplay.seq) &&
1409                     (x->replay.bitmap == x->preplay.bitmap) &&
1410                     (x->replay.oseq == x->preplay.oseq)) {
1411                         x->xflags |= XFRM_TIME_DEFER;
1412                         return;
1413                 }
1414
1415                 break;
1416         }
1417
1418         memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state));
1419         c.event = XFRM_MSG_NEWAE;
1420         c.data.aevent = event;
1421         km_state_notify(x, &c);
1422
1423         if (x->replay_maxage &&
1424             !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
1425                 x->xflags &= ~XFRM_TIME_DEFER;
1426 }
1427 EXPORT_SYMBOL(xfrm_replay_notify);
1428
1429 static void xfrm_replay_timer_handler(unsigned long data)
1430 {
1431         struct xfrm_state *x = (struct xfrm_state*)data;
1432
1433         spin_lock(&x->lock);
1434
1435         if (x->km.state == XFRM_STATE_VALID) {
1436                 if (xfrm_aevent_is_on())
1437                         xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT);
1438                 else
1439                         x->xflags |= XFRM_TIME_DEFER;
1440         }
1441
1442         spin_unlock(&x->lock);
1443 }
1444
1445 int xfrm_replay_check(struct xfrm_state *x, __be32 net_seq)
1446 {
1447         u32 diff;
1448         u32 seq = ntohl(net_seq);
1449
1450         if (unlikely(seq == 0))
1451                 return -EINVAL;
1452
1453         if (likely(seq > x->replay.seq))
1454                 return 0;
1455
1456         diff = x->replay.seq - seq;
1457         if (diff >= min_t(unsigned int, x->props.replay_window,
1458                           sizeof(x->replay.bitmap) * 8)) {
1459                 x->stats.replay_window++;
1460                 return -EINVAL;
1461         }
1462
1463         if (x->replay.bitmap & (1U << diff)) {
1464                 x->stats.replay++;
1465                 return -EINVAL;
1466         }
1467         return 0;
1468 }
1469 EXPORT_SYMBOL(xfrm_replay_check);
1470
1471 void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq)
1472 {
1473         u32 diff;
1474         u32 seq = ntohl(net_seq);
1475
1476         if (seq > x->replay.seq) {
1477                 diff = seq - x->replay.seq;
1478                 if (diff < x->props.replay_window)
1479                         x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
1480                 else
1481                         x->replay.bitmap = 1;
1482                 x->replay.seq = seq;
1483         } else {
1484                 diff = x->replay.seq - seq;
1485                 x->replay.bitmap |= (1U << diff);
1486         }
1487
1488         if (xfrm_aevent_is_on())
1489                 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
1490 }
1491 EXPORT_SYMBOL(xfrm_replay_advance);
1492
1493 static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
1494 static DEFINE_RWLOCK(xfrm_km_lock);
1495
1496 void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
1497 {
1498         struct xfrm_mgr *km;
1499
1500         read_lock(&xfrm_km_lock);
1501         list_for_each_entry(km, &xfrm_km_list, list)
1502                 if (km->notify_policy)
1503                         km->notify_policy(xp, dir, c);
1504         read_unlock(&xfrm_km_lock);
1505 }
1506
1507 void km_state_notify(struct xfrm_state *x, struct km_event *c)
1508 {
1509         struct xfrm_mgr *km;
1510         read_lock(&xfrm_km_lock);
1511         list_for_each_entry(km, &xfrm_km_list, list)
1512                 if (km->notify)
1513                         km->notify(x, c);
1514         read_unlock(&xfrm_km_lock);
1515 }
1516
1517 EXPORT_SYMBOL(km_policy_notify);
1518 EXPORT_SYMBOL(km_state_notify);
1519
1520 void km_state_expired(struct xfrm_state *x, int hard, u32 pid)
1521 {
1522         struct km_event c;
1523
1524         c.data.hard = hard;
1525         c.pid = pid;
1526         c.event = XFRM_MSG_EXPIRE;
1527         km_state_notify(x, &c);
1528
1529         if (hard)
1530                 wake_up(&km_waitq);
1531 }
1532
1533 EXPORT_SYMBOL(km_state_expired);
1534 /*
1535  * We send to all registered managers regardless of failure
1536  * We are happy with one success
1537 */
1538 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
1539 {
1540         int err = -EINVAL, acqret;
1541         struct xfrm_mgr *km;
1542
1543         read_lock(&xfrm_km_lock);
1544         list_for_each_entry(km, &xfrm_km_list, list) {
1545                 acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
1546                 if (!acqret)
1547                         err = acqret;
1548         }
1549         read_unlock(&xfrm_km_lock);
1550         return err;
1551 }
1552 EXPORT_SYMBOL(km_query);
1553
1554 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport)
1555 {
1556         int err = -EINVAL;
1557         struct xfrm_mgr *km;
1558
1559         read_lock(&xfrm_km_lock);
1560         list_for_each_entry(km, &xfrm_km_list, list) {
1561                 if (km->new_mapping)
1562                         err = km->new_mapping(x, ipaddr, sport);
1563                 if (!err)
1564                         break;
1565         }
1566         read_unlock(&xfrm_km_lock);
1567         return err;
1568 }
1569 EXPORT_SYMBOL(km_new_mapping);
1570
1571 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
1572 {
1573         struct km_event c;
1574
1575         c.data.hard = hard;
1576         c.pid = pid;
1577         c.event = XFRM_MSG_POLEXPIRE;
1578         km_policy_notify(pol, dir, &c);
1579
1580         if (hard)
1581                 wake_up(&km_waitq);
1582 }
1583 EXPORT_SYMBOL(km_policy_expired);
1584
1585 int km_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
1586                struct xfrm_migrate *m, int num_migrate)
1587 {
1588         int err = -EINVAL;
1589         int ret;
1590         struct xfrm_mgr *km;
1591
1592         read_lock(&xfrm_km_lock);
1593         list_for_each_entry(km, &xfrm_km_list, list) {
1594                 if (km->migrate) {
1595                         ret = km->migrate(sel, dir, type, m, num_migrate);
1596                         if (!ret)
1597                                 err = ret;
1598                 }
1599         }
1600         read_unlock(&xfrm_km_lock);
1601         return err;
1602 }
1603 EXPORT_SYMBOL(km_migrate);
1604
1605 int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
1606 {
1607         int err = -EINVAL;
1608         int ret;
1609         struct xfrm_mgr *km;
1610
1611         read_lock(&xfrm_km_lock);
1612         list_for_each_entry(km, &xfrm_km_list, list) {
1613                 if (km->report) {
1614                         ret = km->report(proto, sel, addr);
1615                         if (!ret)
1616                                 err = ret;
1617                 }
1618         }
1619         read_unlock(&xfrm_km_lock);
1620         return err;
1621 }
1622 EXPORT_SYMBOL(km_report);
1623
1624 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
1625 {
1626         int err;
1627         u8 *data;
1628         struct xfrm_mgr *km;
1629         struct xfrm_policy *pol = NULL;
1630
1631         if (optlen <= 0 || optlen > PAGE_SIZE)
1632                 return -EMSGSIZE;
1633
1634         data = kmalloc(optlen, GFP_KERNEL);
1635         if (!data)
1636                 return -ENOMEM;
1637
1638         err = -EFAULT;
1639         if (copy_from_user(data, optval, optlen))
1640                 goto out;
1641
1642         err = -EINVAL;
1643         read_lock(&xfrm_km_lock);
1644         list_for_each_entry(km, &xfrm_km_list, list) {
1645                 pol = km->compile_policy(sk, optname, data,
1646                                          optlen, &err);
1647                 if (err >= 0)
1648                         break;
1649         }
1650         read_unlock(&xfrm_km_lock);
1651
1652         if (err >= 0) {
1653                 xfrm_sk_policy_insert(sk, err, pol);
1654                 xfrm_pol_put(pol);
1655                 err = 0;
1656         }
1657
1658 out:
1659         kfree(data);
1660         return err;
1661 }
1662 EXPORT_SYMBOL(xfrm_user_policy);
1663
1664 int xfrm_register_km(struct xfrm_mgr *km)
1665 {
1666         write_lock_bh(&xfrm_km_lock);
1667         list_add_tail(&km->list, &xfrm_km_list);
1668         write_unlock_bh(&xfrm_km_lock);
1669         return 0;
1670 }
1671 EXPORT_SYMBOL(xfrm_register_km);
1672
1673 int xfrm_unregister_km(struct xfrm_mgr *km)
1674 {
1675         write_lock_bh(&xfrm_km_lock);
1676         list_del(&km->list);
1677         write_unlock_bh(&xfrm_km_lock);
1678         return 0;
1679 }
1680 EXPORT_SYMBOL(xfrm_unregister_km);
1681
1682 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
1683 {
1684         int err = 0;
1685         if (unlikely(afinfo == NULL))
1686                 return -EINVAL;
1687         if (unlikely(afinfo->family >= NPROTO))
1688                 return -EAFNOSUPPORT;
1689         write_lock_bh(&xfrm_state_afinfo_lock);
1690         if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
1691                 err = -ENOBUFS;
1692         else
1693                 xfrm_state_afinfo[afinfo->family] = afinfo;
1694         write_unlock_bh(&xfrm_state_afinfo_lock);
1695         return err;
1696 }
1697 EXPORT_SYMBOL(xfrm_state_register_afinfo);
1698
1699 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
1700 {
1701         int err = 0;
1702         if (unlikely(afinfo == NULL))
1703                 return -EINVAL;
1704         if (unlikely(afinfo->family >= NPROTO))
1705                 return -EAFNOSUPPORT;
1706         write_lock_bh(&xfrm_state_afinfo_lock);
1707         if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
1708                 if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
1709                         err = -EINVAL;
1710                 else
1711                         xfrm_state_afinfo[afinfo->family] = NULL;
1712         }
1713         write_unlock_bh(&xfrm_state_afinfo_lock);
1714         return err;
1715 }
1716 EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
1717
1718 struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
1719 {
1720         struct xfrm_state_afinfo *afinfo;
1721         if (unlikely(family >= NPROTO))
1722                 return NULL;
1723         read_lock(&xfrm_state_afinfo_lock);
1724         afinfo = xfrm_state_afinfo[family];
1725         if (unlikely(!afinfo))
1726                 read_unlock(&xfrm_state_afinfo_lock);
1727         return afinfo;
1728 }
1729
1730 void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
1731 {
1732         read_unlock(&xfrm_state_afinfo_lock);
1733 }
1734
1735 EXPORT_SYMBOL(xfrm_state_get_afinfo);
1736 EXPORT_SYMBOL(xfrm_state_put_afinfo);
1737
1738 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
1739 void xfrm_state_delete_tunnel(struct xfrm_state *x)
1740 {
1741         if (x->tunnel) {
1742                 struct xfrm_state *t = x->tunnel;
1743
1744                 if (atomic_read(&t->tunnel_users) == 2)
1745                         xfrm_state_delete(t);
1746                 atomic_dec(&t->tunnel_users);
1747                 xfrm_state_put(t);
1748                 x->tunnel = NULL;
1749         }
1750 }
1751 EXPORT_SYMBOL(xfrm_state_delete_tunnel);
1752
1753 int xfrm_state_mtu(struct xfrm_state *x, int mtu)
1754 {
1755         int res;
1756
1757         spin_lock_bh(&x->lock);
1758         if (x->km.state == XFRM_STATE_VALID &&
1759             x->type && x->type->get_mtu)
1760                 res = x->type->get_mtu(x, mtu);
1761         else
1762                 res = mtu - x->props.header_len;
1763         spin_unlock_bh(&x->lock);
1764         return res;
1765 }
1766
1767 int xfrm_init_state(struct xfrm_state *x)
1768 {
1769         struct xfrm_state_afinfo *afinfo;
1770         int family = x->props.family;
1771         int err;
1772
1773         err = -EAFNOSUPPORT;
1774         afinfo = xfrm_state_get_afinfo(family);
1775         if (!afinfo)
1776                 goto error;
1777
1778         err = 0;
1779         if (afinfo->init_flags)
1780                 err = afinfo->init_flags(x);
1781
1782         xfrm_state_put_afinfo(afinfo);
1783
1784         if (err)
1785                 goto error;
1786
1787         err = -EPROTONOSUPPORT;
1788         x->type = xfrm_get_type(x->id.proto, family);
1789         if (x->type == NULL)
1790                 goto error;
1791
1792         err = x->type->init_state(x);
1793         if (err)
1794                 goto error;
1795
1796         x->mode = xfrm_get_mode(x->props.mode, family);
1797         if (x->mode == NULL)
1798                 goto error;
1799
1800         x->km.state = XFRM_STATE_VALID;
1801
1802 error:
1803         return err;
1804 }
1805
1806 EXPORT_SYMBOL(xfrm_init_state);
1807
1808 void __init xfrm_state_init(void)
1809 {
1810         unsigned int sz;
1811
1812         sz = sizeof(struct hlist_head) * 8;
1813
1814         xfrm_state_bydst = xfrm_hash_alloc(sz);
1815         xfrm_state_bysrc = xfrm_hash_alloc(sz);
1816         xfrm_state_byspi = xfrm_hash_alloc(sz);
1817         if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi)
1818                 panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes.");
1819         xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
1820
1821         INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task);
1822 }
1823