[TCP]: Add two new spurious RTO responses to FRTO
[linux-2.6] / net / xfrm / xfrm_state.c
1 /*
2  * xfrm_state.c
3  *
4  * Changes:
5  *      Mitsuru KANDA @USAGI
6  *      Kazunori MIYAZAWA @USAGI
7  *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  *              IPv6 support
9  *      YOSHIFUJI Hideaki @USAGI
10  *              Split up af-specific functions
11  *      Derek Atkins <derek@ihtfp.com>
12  *              Add UDP Encapsulation
13  *
14  */
15
16 #include <linux/workqueue.h>
17 #include <net/xfrm.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <linux/module.h>
21 #include <linux/cache.h>
22 #include <asm/uaccess.h>
23 #include <linux/audit.h>
24
25 #include "xfrm_hash.h"
26
27 struct sock *xfrm_nl;
28 EXPORT_SYMBOL(xfrm_nl);
29
30 u32 sysctl_xfrm_aevent_etime = XFRM_AE_ETIME;
31 EXPORT_SYMBOL(sysctl_xfrm_aevent_etime);
32
33 u32 sysctl_xfrm_aevent_rseqth = XFRM_AE_SEQT_SIZE;
34 EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth);
35
36 /* Each xfrm_state may be linked to two tables:
37
38    1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
39    2. Hash table by (daddr,family,reqid) to find what SAs exist for given
40       destination/tunnel endpoint. (output)
41  */
42
43 static DEFINE_SPINLOCK(xfrm_state_lock);
44
45 /* Hash table to find appropriate SA towards given target (endpoint
46  * of tunnel or destination of transport mode) allowed by selector.
47  *
48  * Main use is finding SA after policy selected tunnel or transport mode.
49  * Also, it can be used by ah/esp icmp error handler to find offending SA.
50  */
51 static struct hlist_head *xfrm_state_bydst __read_mostly;
52 static struct hlist_head *xfrm_state_bysrc __read_mostly;
53 static struct hlist_head *xfrm_state_byspi __read_mostly;
54 static unsigned int xfrm_state_hmask __read_mostly;
55 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
56 static unsigned int xfrm_state_num;
57 static unsigned int xfrm_state_genid;
58
59 static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr,
60                                          xfrm_address_t *saddr,
61                                          u32 reqid,
62                                          unsigned short family)
63 {
64         return __xfrm_dst_hash(daddr, saddr, reqid, family, xfrm_state_hmask);
65 }
66
67 static inline unsigned int xfrm_src_hash(xfrm_address_t *daddr,
68                                          xfrm_address_t *saddr,
69                                          unsigned short family)
70 {
71         return __xfrm_src_hash(daddr, saddr, family, xfrm_state_hmask);
72 }
73
74 static inline unsigned int
75 xfrm_spi_hash(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
76 {
77         return __xfrm_spi_hash(daddr, spi, proto, family, xfrm_state_hmask);
78 }
79
80 static void xfrm_hash_transfer(struct hlist_head *list,
81                                struct hlist_head *ndsttable,
82                                struct hlist_head *nsrctable,
83                                struct hlist_head *nspitable,
84                                unsigned int nhashmask)
85 {
86         struct hlist_node *entry, *tmp;
87         struct xfrm_state *x;
88
89         hlist_for_each_entry_safe(x, entry, tmp, list, bydst) {
90                 unsigned int h;
91
92                 h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
93                                     x->props.reqid, x->props.family,
94                                     nhashmask);
95                 hlist_add_head(&x->bydst, ndsttable+h);
96
97                 h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr,
98                                     x->props.family,
99                                     nhashmask);
100                 hlist_add_head(&x->bysrc, nsrctable+h);
101
102                 if (x->id.spi) {
103                         h = __xfrm_spi_hash(&x->id.daddr, x->id.spi,
104                                             x->id.proto, x->props.family,
105                                             nhashmask);
106                         hlist_add_head(&x->byspi, nspitable+h);
107                 }
108         }
109 }
110
111 static unsigned long xfrm_hash_new_size(void)
112 {
113         return ((xfrm_state_hmask + 1) << 1) *
114                 sizeof(struct hlist_head);
115 }
116
117 static DEFINE_MUTEX(hash_resize_mutex);
118
119 static void xfrm_hash_resize(struct work_struct *__unused)
120 {
121         struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
122         unsigned long nsize, osize;
123         unsigned int nhashmask, ohashmask;
124         int i;
125
126         mutex_lock(&hash_resize_mutex);
127
128         nsize = xfrm_hash_new_size();
129         ndst = xfrm_hash_alloc(nsize);
130         if (!ndst)
131                 goto out_unlock;
132         nsrc = xfrm_hash_alloc(nsize);
133         if (!nsrc) {
134                 xfrm_hash_free(ndst, nsize);
135                 goto out_unlock;
136         }
137         nspi = xfrm_hash_alloc(nsize);
138         if (!nspi) {
139                 xfrm_hash_free(ndst, nsize);
140                 xfrm_hash_free(nsrc, nsize);
141                 goto out_unlock;
142         }
143
144         spin_lock_bh(&xfrm_state_lock);
145
146         nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
147         for (i = xfrm_state_hmask; i >= 0; i--)
148                 xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi,
149                                    nhashmask);
150
151         odst = xfrm_state_bydst;
152         osrc = xfrm_state_bysrc;
153         ospi = xfrm_state_byspi;
154         ohashmask = xfrm_state_hmask;
155
156         xfrm_state_bydst = ndst;
157         xfrm_state_bysrc = nsrc;
158         xfrm_state_byspi = nspi;
159         xfrm_state_hmask = nhashmask;
160
161         spin_unlock_bh(&xfrm_state_lock);
162
163         osize = (ohashmask + 1) * sizeof(struct hlist_head);
164         xfrm_hash_free(odst, osize);
165         xfrm_hash_free(osrc, osize);
166         xfrm_hash_free(ospi, osize);
167
168 out_unlock:
169         mutex_unlock(&hash_resize_mutex);
170 }
171
172 static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize);
173
174 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
175 EXPORT_SYMBOL(km_waitq);
176
177 static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
178 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
179
180 static struct work_struct xfrm_state_gc_work;
181 static HLIST_HEAD(xfrm_state_gc_list);
182 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
183
184 int __xfrm_state_delete(struct xfrm_state *x);
185
186 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
187 void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
188
189 static void xfrm_state_gc_destroy(struct xfrm_state *x)
190 {
191         del_timer_sync(&x->timer);
192         del_timer_sync(&x->rtimer);
193         kfree(x->aalg);
194         kfree(x->ealg);
195         kfree(x->calg);
196         kfree(x->encap);
197         kfree(x->coaddr);
198         if (x->mode)
199                 xfrm_put_mode(x->mode);
200         if (x->type) {
201                 x->type->destructor(x);
202                 xfrm_put_type(x->type);
203         }
204         security_xfrm_state_free(x);
205         kfree(x);
206 }
207
208 static void xfrm_state_gc_task(struct work_struct *data)
209 {
210         struct xfrm_state *x;
211         struct hlist_node *entry, *tmp;
212         struct hlist_head gc_list;
213
214         spin_lock_bh(&xfrm_state_gc_lock);
215         gc_list.first = xfrm_state_gc_list.first;
216         INIT_HLIST_HEAD(&xfrm_state_gc_list);
217         spin_unlock_bh(&xfrm_state_gc_lock);
218
219         hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst)
220                 xfrm_state_gc_destroy(x);
221
222         wake_up(&km_waitq);
223 }
224
225 static inline unsigned long make_jiffies(long secs)
226 {
227         if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
228                 return MAX_SCHEDULE_TIMEOUT-1;
229         else
230                 return secs*HZ;
231 }
232
233 static void xfrm_timer_handler(unsigned long data)
234 {
235         struct xfrm_state *x = (struct xfrm_state*)data;
236         unsigned long now = (unsigned long)xtime.tv_sec;
237         long next = LONG_MAX;
238         int warn = 0;
239         int err = 0;
240
241         spin_lock(&x->lock);
242         if (x->km.state == XFRM_STATE_DEAD)
243                 goto out;
244         if (x->km.state == XFRM_STATE_EXPIRED)
245                 goto expired;
246         if (x->lft.hard_add_expires_seconds) {
247                 long tmo = x->lft.hard_add_expires_seconds +
248                         x->curlft.add_time - now;
249                 if (tmo <= 0)
250                         goto expired;
251                 if (tmo < next)
252                         next = tmo;
253         }
254         if (x->lft.hard_use_expires_seconds) {
255                 long tmo = x->lft.hard_use_expires_seconds +
256                         (x->curlft.use_time ? : now) - now;
257                 if (tmo <= 0)
258                         goto expired;
259                 if (tmo < next)
260                         next = tmo;
261         }
262         if (x->km.dying)
263                 goto resched;
264         if (x->lft.soft_add_expires_seconds) {
265                 long tmo = x->lft.soft_add_expires_seconds +
266                         x->curlft.add_time - now;
267                 if (tmo <= 0)
268                         warn = 1;
269                 else if (tmo < next)
270                         next = tmo;
271         }
272         if (x->lft.soft_use_expires_seconds) {
273                 long tmo = x->lft.soft_use_expires_seconds +
274                         (x->curlft.use_time ? : now) - now;
275                 if (tmo <= 0)
276                         warn = 1;
277                 else if (tmo < next)
278                         next = tmo;
279         }
280
281         x->km.dying = warn;
282         if (warn)
283                 km_state_expired(x, 0, 0);
284 resched:
285         if (next != LONG_MAX)
286                 mod_timer(&x->timer, jiffies + make_jiffies(next));
287
288         goto out;
289
290 expired:
291         if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
292                 x->km.state = XFRM_STATE_EXPIRED;
293                 wake_up(&km_waitq);
294                 next = 2;
295                 goto resched;
296         }
297
298         err = __xfrm_state_delete(x);
299         if (!err && x->id.spi)
300                 km_state_expired(x, 1, 0);
301
302         xfrm_audit_log(audit_get_loginuid(current->audit_context), 0,
303                        AUDIT_MAC_IPSEC_DELSA, err ? 0 : 1, NULL, x);
304
305 out:
306         spin_unlock(&x->lock);
307 }
308
309 static void xfrm_replay_timer_handler(unsigned long data);
310
311 struct xfrm_state *xfrm_state_alloc(void)
312 {
313         struct xfrm_state *x;
314
315         x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
316
317         if (x) {
318                 atomic_set(&x->refcnt, 1);
319                 atomic_set(&x->tunnel_users, 0);
320                 INIT_HLIST_NODE(&x->bydst);
321                 INIT_HLIST_NODE(&x->bysrc);
322                 INIT_HLIST_NODE(&x->byspi);
323                 init_timer(&x->timer);
324                 x->timer.function = xfrm_timer_handler;
325                 x->timer.data     = (unsigned long)x;
326                 init_timer(&x->rtimer);
327                 x->rtimer.function = xfrm_replay_timer_handler;
328                 x->rtimer.data     = (unsigned long)x;
329                 x->curlft.add_time = (unsigned long)xtime.tv_sec;
330                 x->lft.soft_byte_limit = XFRM_INF;
331                 x->lft.soft_packet_limit = XFRM_INF;
332                 x->lft.hard_byte_limit = XFRM_INF;
333                 x->lft.hard_packet_limit = XFRM_INF;
334                 x->replay_maxage = 0;
335                 x->replay_maxdiff = 0;
336                 spin_lock_init(&x->lock);
337         }
338         return x;
339 }
340 EXPORT_SYMBOL(xfrm_state_alloc);
341
342 void __xfrm_state_destroy(struct xfrm_state *x)
343 {
344         BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
345
346         spin_lock_bh(&xfrm_state_gc_lock);
347         hlist_add_head(&x->bydst, &xfrm_state_gc_list);
348         spin_unlock_bh(&xfrm_state_gc_lock);
349         schedule_work(&xfrm_state_gc_work);
350 }
351 EXPORT_SYMBOL(__xfrm_state_destroy);
352
353 int __xfrm_state_delete(struct xfrm_state *x)
354 {
355         int err = -ESRCH;
356
357         if (x->km.state != XFRM_STATE_DEAD) {
358                 x->km.state = XFRM_STATE_DEAD;
359                 spin_lock(&xfrm_state_lock);
360                 hlist_del(&x->bydst);
361                 hlist_del(&x->bysrc);
362                 if (x->id.spi)
363                         hlist_del(&x->byspi);
364                 xfrm_state_num--;
365                 spin_unlock(&xfrm_state_lock);
366
367                 /* All xfrm_state objects are created by xfrm_state_alloc.
368                  * The xfrm_state_alloc call gives a reference, and that
369                  * is what we are dropping here.
370                  */
371                 __xfrm_state_put(x);
372                 err = 0;
373         }
374
375         return err;
376 }
377 EXPORT_SYMBOL(__xfrm_state_delete);
378
379 int xfrm_state_delete(struct xfrm_state *x)
380 {
381         int err;
382
383         spin_lock_bh(&x->lock);
384         err = __xfrm_state_delete(x);
385         spin_unlock_bh(&x->lock);
386
387         return err;
388 }
389 EXPORT_SYMBOL(xfrm_state_delete);
390
391 void xfrm_state_flush(u8 proto, struct xfrm_audit *audit_info)
392 {
393         int i;
394         int err = 0;
395
396         spin_lock_bh(&xfrm_state_lock);
397         for (i = 0; i <= xfrm_state_hmask; i++) {
398                 struct hlist_node *entry;
399                 struct xfrm_state *x;
400 restart:
401                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
402                         if (!xfrm_state_kern(x) &&
403                             xfrm_id_proto_match(x->id.proto, proto)) {
404                                 xfrm_state_hold(x);
405                                 spin_unlock_bh(&xfrm_state_lock);
406
407                                 err = xfrm_state_delete(x);
408                                 xfrm_audit_log(audit_info->loginuid,
409                                                audit_info->secid,
410                                                AUDIT_MAC_IPSEC_DELSA,
411                                                err ? 0 : 1, NULL, x);
412                                 xfrm_state_put(x);
413
414                                 spin_lock_bh(&xfrm_state_lock);
415                                 goto restart;
416                         }
417                 }
418         }
419         spin_unlock_bh(&xfrm_state_lock);
420         wake_up(&km_waitq);
421 }
422 EXPORT_SYMBOL(xfrm_state_flush);
423
424 static int
425 xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
426                   struct xfrm_tmpl *tmpl,
427                   xfrm_address_t *daddr, xfrm_address_t *saddr,
428                   unsigned short family)
429 {
430         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
431         if (!afinfo)
432                 return -1;
433         afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
434         xfrm_state_put_afinfo(afinfo);
435         return 0;
436 }
437
438 static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
439 {
440         unsigned int h = xfrm_spi_hash(daddr, spi, proto, family);
441         struct xfrm_state *x;
442         struct hlist_node *entry;
443
444         hlist_for_each_entry(x, entry, xfrm_state_byspi+h, byspi) {
445                 if (x->props.family != family ||
446                     x->id.spi       != spi ||
447                     x->id.proto     != proto)
448                         continue;
449
450                 switch (family) {
451                 case AF_INET:
452                         if (x->id.daddr.a4 != daddr->a4)
453                                 continue;
454                         break;
455                 case AF_INET6:
456                         if (!ipv6_addr_equal((struct in6_addr *)daddr,
457                                              (struct in6_addr *)
458                                              x->id.daddr.a6))
459                                 continue;
460                         break;
461                 };
462
463                 xfrm_state_hold(x);
464                 return x;
465         }
466
467         return NULL;
468 }
469
470 static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family)
471 {
472         unsigned int h = xfrm_src_hash(daddr, saddr, family);
473         struct xfrm_state *x;
474         struct hlist_node *entry;
475
476         hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
477                 if (x->props.family != family ||
478                     x->id.proto     != proto)
479                         continue;
480
481                 switch (family) {
482                 case AF_INET:
483                         if (x->id.daddr.a4 != daddr->a4 ||
484                             x->props.saddr.a4 != saddr->a4)
485                                 continue;
486                         break;
487                 case AF_INET6:
488                         if (!ipv6_addr_equal((struct in6_addr *)daddr,
489                                              (struct in6_addr *)
490                                              x->id.daddr.a6) ||
491                             !ipv6_addr_equal((struct in6_addr *)saddr,
492                                              (struct in6_addr *)
493                                              x->props.saddr.a6))
494                                 continue;
495                         break;
496                 };
497
498                 xfrm_state_hold(x);
499                 return x;
500         }
501
502         return NULL;
503 }
504
505 static inline struct xfrm_state *
506 __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
507 {
508         if (use_spi)
509                 return __xfrm_state_lookup(&x->id.daddr, x->id.spi,
510                                            x->id.proto, family);
511         else
512                 return __xfrm_state_lookup_byaddr(&x->id.daddr,
513                                                   &x->props.saddr,
514                                                   x->id.proto, family);
515 }
516
517 static void xfrm_hash_grow_check(int have_hash_collision)
518 {
519         if (have_hash_collision &&
520             (xfrm_state_hmask + 1) < xfrm_state_hashmax &&
521             xfrm_state_num > xfrm_state_hmask)
522                 schedule_work(&xfrm_hash_work);
523 }
524
525 struct xfrm_state *
526 xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
527                 struct flowi *fl, struct xfrm_tmpl *tmpl,
528                 struct xfrm_policy *pol, int *err,
529                 unsigned short family)
530 {
531         unsigned int h = xfrm_dst_hash(daddr, saddr, tmpl->reqid, family);
532         struct hlist_node *entry;
533         struct xfrm_state *x, *x0;
534         int acquire_in_progress = 0;
535         int error = 0;
536         struct xfrm_state *best = NULL;
537
538         spin_lock_bh(&xfrm_state_lock);
539         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
540                 if (x->props.family == family &&
541                     x->props.reqid == tmpl->reqid &&
542                     !(x->props.flags & XFRM_STATE_WILDRECV) &&
543                     xfrm_state_addr_check(x, daddr, saddr, family) &&
544                     tmpl->mode == x->props.mode &&
545                     tmpl->id.proto == x->id.proto &&
546                     (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) {
547                         /* Resolution logic:
548                            1. There is a valid state with matching selector.
549                               Done.
550                            2. Valid state with inappropriate selector. Skip.
551
552                            Entering area of "sysdeps".
553
554                            3. If state is not valid, selector is temporary,
555                               it selects only session which triggered
556                               previous resolution. Key manager will do
557                               something to install a state with proper
558                               selector.
559                          */
560                         if (x->km.state == XFRM_STATE_VALID) {
561                                 if (!xfrm_selector_match(&x->sel, fl, family) ||
562                                     !security_xfrm_state_pol_flow_match(x, pol, fl))
563                                         continue;
564                                 if (!best ||
565                                     best->km.dying > x->km.dying ||
566                                     (best->km.dying == x->km.dying &&
567                                      best->curlft.add_time < x->curlft.add_time))
568                                         best = x;
569                         } else if (x->km.state == XFRM_STATE_ACQ) {
570                                 acquire_in_progress = 1;
571                         } else if (x->km.state == XFRM_STATE_ERROR ||
572                                    x->km.state == XFRM_STATE_EXPIRED) {
573                                 if (xfrm_selector_match(&x->sel, fl, family) &&
574                                     security_xfrm_state_pol_flow_match(x, pol, fl))
575                                         error = -ESRCH;
576                         }
577                 }
578         }
579
580         x = best;
581         if (!x && !error && !acquire_in_progress) {
582                 if (tmpl->id.spi &&
583                     (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi,
584                                               tmpl->id.proto, family)) != NULL) {
585                         xfrm_state_put(x0);
586                         error = -EEXIST;
587                         goto out;
588                 }
589                 x = xfrm_state_alloc();
590                 if (x == NULL) {
591                         error = -ENOMEM;
592                         goto out;
593                 }
594                 /* Initialize temporary selector matching only
595                  * to current session. */
596                 xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
597
598                 error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid);
599                 if (error) {
600                         x->km.state = XFRM_STATE_DEAD;
601                         xfrm_state_put(x);
602                         x = NULL;
603                         goto out;
604                 }
605
606                 if (km_query(x, tmpl, pol) == 0) {
607                         x->km.state = XFRM_STATE_ACQ;
608                         hlist_add_head(&x->bydst, xfrm_state_bydst+h);
609                         h = xfrm_src_hash(daddr, saddr, family);
610                         hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
611                         if (x->id.spi) {
612                                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
613                                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
614                         }
615                         x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
616                         x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
617                         add_timer(&x->timer);
618                         xfrm_state_num++;
619                         xfrm_hash_grow_check(x->bydst.next != NULL);
620                 } else {
621                         x->km.state = XFRM_STATE_DEAD;
622                         xfrm_state_put(x);
623                         x = NULL;
624                         error = -ESRCH;
625                 }
626         }
627 out:
628         if (x)
629                 xfrm_state_hold(x);
630         else
631                 *err = acquire_in_progress ? -EAGAIN : error;
632         spin_unlock_bh(&xfrm_state_lock);
633         return x;
634 }
635
636 static void __xfrm_state_insert(struct xfrm_state *x)
637 {
638         unsigned int h;
639
640         x->genid = ++xfrm_state_genid;
641
642         h = xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
643                           x->props.reqid, x->props.family);
644         hlist_add_head(&x->bydst, xfrm_state_bydst+h);
645
646         h = xfrm_src_hash(&x->id.daddr, &x->props.saddr, x->props.family);
647         hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
648
649         if (x->id.spi) {
650                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
651                                   x->props.family);
652
653                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
654         }
655
656         mod_timer(&x->timer, jiffies + HZ);
657         if (x->replay_maxage)
658                 mod_timer(&x->rtimer, jiffies + x->replay_maxage);
659
660         wake_up(&km_waitq);
661
662         xfrm_state_num++;
663
664         xfrm_hash_grow_check(x->bydst.next != NULL);
665 }
666
667 /* xfrm_state_lock is held */
668 static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
669 {
670         unsigned short family = xnew->props.family;
671         u32 reqid = xnew->props.reqid;
672         struct xfrm_state *x;
673         struct hlist_node *entry;
674         unsigned int h;
675
676         h = xfrm_dst_hash(&xnew->id.daddr, &xnew->props.saddr, reqid, family);
677         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
678                 if (x->props.family     == family &&
679                     x->props.reqid      == reqid &&
680                     !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) &&
681                     !xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family))
682                         x->genid = xfrm_state_genid;
683         }
684 }
685
686 void xfrm_state_insert(struct xfrm_state *x)
687 {
688         spin_lock_bh(&xfrm_state_lock);
689         __xfrm_state_bump_genids(x);
690         __xfrm_state_insert(x);
691         spin_unlock_bh(&xfrm_state_lock);
692 }
693 EXPORT_SYMBOL(xfrm_state_insert);
694
695 /* xfrm_state_lock is held */
696 static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create)
697 {
698         unsigned int h = xfrm_dst_hash(daddr, saddr, reqid, family);
699         struct hlist_node *entry;
700         struct xfrm_state *x;
701
702         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
703                 if (x->props.reqid  != reqid ||
704                     x->props.mode   != mode ||
705                     x->props.family != family ||
706                     x->km.state     != XFRM_STATE_ACQ ||
707                     x->id.spi       != 0 ||
708                     x->id.proto     != proto)
709                         continue;
710
711                 switch (family) {
712                 case AF_INET:
713                         if (x->id.daddr.a4    != daddr->a4 ||
714                             x->props.saddr.a4 != saddr->a4)
715                                 continue;
716                         break;
717                 case AF_INET6:
718                         if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6,
719                                              (struct in6_addr *)daddr) ||
720                             !ipv6_addr_equal((struct in6_addr *)
721                                              x->props.saddr.a6,
722                                              (struct in6_addr *)saddr))
723                                 continue;
724                         break;
725                 };
726
727                 xfrm_state_hold(x);
728                 return x;
729         }
730
731         if (!create)
732                 return NULL;
733
734         x = xfrm_state_alloc();
735         if (likely(x)) {
736                 switch (family) {
737                 case AF_INET:
738                         x->sel.daddr.a4 = daddr->a4;
739                         x->sel.saddr.a4 = saddr->a4;
740                         x->sel.prefixlen_d = 32;
741                         x->sel.prefixlen_s = 32;
742                         x->props.saddr.a4 = saddr->a4;
743                         x->id.daddr.a4 = daddr->a4;
744                         break;
745
746                 case AF_INET6:
747                         ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6,
748                                        (struct in6_addr *)daddr);
749                         ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6,
750                                        (struct in6_addr *)saddr);
751                         x->sel.prefixlen_d = 128;
752                         x->sel.prefixlen_s = 128;
753                         ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6,
754                                        (struct in6_addr *)saddr);
755                         ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6,
756                                        (struct in6_addr *)daddr);
757                         break;
758                 };
759
760                 x->km.state = XFRM_STATE_ACQ;
761                 x->id.proto = proto;
762                 x->props.family = family;
763                 x->props.mode = mode;
764                 x->props.reqid = reqid;
765                 x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
766                 xfrm_state_hold(x);
767                 x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
768                 add_timer(&x->timer);
769                 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
770                 h = xfrm_src_hash(daddr, saddr, family);
771                 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
772                 wake_up(&km_waitq);
773
774                 xfrm_state_num++;
775
776                 xfrm_hash_grow_check(x->bydst.next != NULL);
777         }
778
779         return x;
780 }
781
782 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
783
784 int xfrm_state_add(struct xfrm_state *x)
785 {
786         struct xfrm_state *x1;
787         int family;
788         int err;
789         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
790
791         family = x->props.family;
792
793         spin_lock_bh(&xfrm_state_lock);
794
795         x1 = __xfrm_state_locate(x, use_spi, family);
796         if (x1) {
797                 xfrm_state_put(x1);
798                 x1 = NULL;
799                 err = -EEXIST;
800                 goto out;
801         }
802
803         if (use_spi && x->km.seq) {
804                 x1 = __xfrm_find_acq_byseq(x->km.seq);
805                 if (x1 && ((x1->id.proto != x->id.proto) ||
806                     xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family))) {
807                         xfrm_state_put(x1);
808                         x1 = NULL;
809                 }
810         }
811
812         if (use_spi && !x1)
813                 x1 = __find_acq_core(family, x->props.mode, x->props.reqid,
814                                      x->id.proto,
815                                      &x->id.daddr, &x->props.saddr, 0);
816
817         __xfrm_state_bump_genids(x);
818         __xfrm_state_insert(x);
819         err = 0;
820
821 out:
822         spin_unlock_bh(&xfrm_state_lock);
823
824         if (x1) {
825                 xfrm_state_delete(x1);
826                 xfrm_state_put(x1);
827         }
828
829         return err;
830 }
831 EXPORT_SYMBOL(xfrm_state_add);
832
833 #ifdef CONFIG_XFRM_MIGRATE
834 struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp)
835 {
836         int err = -ENOMEM;
837         struct xfrm_state *x = xfrm_state_alloc();
838         if (!x)
839                 goto error;
840
841         memcpy(&x->id, &orig->id, sizeof(x->id));
842         memcpy(&x->sel, &orig->sel, sizeof(x->sel));
843         memcpy(&x->lft, &orig->lft, sizeof(x->lft));
844         x->props.mode = orig->props.mode;
845         x->props.replay_window = orig->props.replay_window;
846         x->props.reqid = orig->props.reqid;
847         x->props.family = orig->props.family;
848         x->props.saddr = orig->props.saddr;
849
850         if (orig->aalg) {
851                 x->aalg = xfrm_algo_clone(orig->aalg);
852                 if (!x->aalg)
853                         goto error;
854         }
855         x->props.aalgo = orig->props.aalgo;
856
857         if (orig->ealg) {
858                 x->ealg = xfrm_algo_clone(orig->ealg);
859                 if (!x->ealg)
860                         goto error;
861         }
862         x->props.ealgo = orig->props.ealgo;
863
864         if (orig->calg) {
865                 x->calg = xfrm_algo_clone(orig->calg);
866                 if (!x->calg)
867                         goto error;
868         }
869         x->props.calgo = orig->props.calgo;
870
871         if (orig->encap) {
872                 x->encap = kmemdup(orig->encap, sizeof(*x->encap), GFP_KERNEL);
873                 if (!x->encap)
874                         goto error;
875         }
876
877         if (orig->coaddr) {
878                 x->coaddr = kmemdup(orig->coaddr, sizeof(*x->coaddr),
879                                     GFP_KERNEL);
880                 if (!x->coaddr)
881                         goto error;
882         }
883
884         err = xfrm_init_state(x);
885         if (err)
886                 goto error;
887
888         x->props.flags = orig->props.flags;
889
890         x->curlft.add_time = orig->curlft.add_time;
891         x->km.state = orig->km.state;
892         x->km.seq = orig->km.seq;
893
894         return x;
895
896  error:
897         if (errp)
898                 *errp = err;
899         if (x) {
900                 kfree(x->aalg);
901                 kfree(x->ealg);
902                 kfree(x->calg);
903                 kfree(x->encap);
904                 kfree(x->coaddr);
905         }
906         kfree(x);
907         return NULL;
908 }
909 EXPORT_SYMBOL(xfrm_state_clone);
910
911 /* xfrm_state_lock is held */
912 struct xfrm_state * xfrm_migrate_state_find(struct xfrm_migrate *m)
913 {
914         unsigned int h;
915         struct xfrm_state *x;
916         struct hlist_node *entry;
917
918         if (m->reqid) {
919                 h = xfrm_dst_hash(&m->old_daddr, &m->old_saddr,
920                                   m->reqid, m->old_family);
921                 hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
922                         if (x->props.mode != m->mode ||
923                             x->id.proto != m->proto)
924                                 continue;
925                         if (m->reqid && x->props.reqid != m->reqid)
926                                 continue;
927                         if (xfrm_addr_cmp(&x->id.daddr, &m->old_daddr,
928                                           m->old_family) ||
929                             xfrm_addr_cmp(&x->props.saddr, &m->old_saddr,
930                                           m->old_family))
931                                 continue;
932                         xfrm_state_hold(x);
933                         return x;
934                 }
935         } else {
936                 h = xfrm_src_hash(&m->old_daddr, &m->old_saddr,
937                                   m->old_family);
938                 hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
939                         if (x->props.mode != m->mode ||
940                             x->id.proto != m->proto)
941                                 continue;
942                         if (xfrm_addr_cmp(&x->id.daddr, &m->old_daddr,
943                                           m->old_family) ||
944                             xfrm_addr_cmp(&x->props.saddr, &m->old_saddr,
945                                           m->old_family))
946                                 continue;
947                         xfrm_state_hold(x);
948                         return x;
949                 }
950         }
951
952         return NULL;
953 }
954 EXPORT_SYMBOL(xfrm_migrate_state_find);
955
956 struct xfrm_state * xfrm_state_migrate(struct xfrm_state *x,
957                                        struct xfrm_migrate *m)
958 {
959         struct xfrm_state *xc;
960         int err;
961
962         xc = xfrm_state_clone(x, &err);
963         if (!xc)
964                 return NULL;
965
966         memcpy(&xc->id.daddr, &m->new_daddr, sizeof(xc->id.daddr));
967         memcpy(&xc->props.saddr, &m->new_saddr, sizeof(xc->props.saddr));
968
969         /* add state */
970         if (!xfrm_addr_cmp(&x->id.daddr, &m->new_daddr, m->new_family)) {
971                 /* a care is needed when the destination address of the
972                    state is to be updated as it is a part of triplet */
973                 xfrm_state_insert(xc);
974         } else {
975                 if ((err = xfrm_state_add(xc)) < 0)
976                         goto error;
977         }
978
979         return xc;
980 error:
981         kfree(xc);
982         return NULL;
983 }
984 EXPORT_SYMBOL(xfrm_state_migrate);
985 #endif
986
987 int xfrm_state_update(struct xfrm_state *x)
988 {
989         struct xfrm_state *x1;
990         int err;
991         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
992
993         spin_lock_bh(&xfrm_state_lock);
994         x1 = __xfrm_state_locate(x, use_spi, x->props.family);
995
996         err = -ESRCH;
997         if (!x1)
998                 goto out;
999
1000         if (xfrm_state_kern(x1)) {
1001                 xfrm_state_put(x1);
1002                 err = -EEXIST;
1003                 goto out;
1004         }
1005
1006         if (x1->km.state == XFRM_STATE_ACQ) {
1007                 __xfrm_state_insert(x);
1008                 x = NULL;
1009         }
1010         err = 0;
1011
1012 out:
1013         spin_unlock_bh(&xfrm_state_lock);
1014
1015         if (err)
1016                 return err;
1017
1018         if (!x) {
1019                 xfrm_state_delete(x1);
1020                 xfrm_state_put(x1);
1021                 return 0;
1022         }
1023
1024         err = -EINVAL;
1025         spin_lock_bh(&x1->lock);
1026         if (likely(x1->km.state == XFRM_STATE_VALID)) {
1027                 if (x->encap && x1->encap)
1028                         memcpy(x1->encap, x->encap, sizeof(*x1->encap));
1029                 if (x->coaddr && x1->coaddr) {
1030                         memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
1031                 }
1032                 if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
1033                         memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
1034                 memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
1035                 x1->km.dying = 0;
1036
1037                 mod_timer(&x1->timer, jiffies + HZ);
1038                 if (x1->curlft.use_time)
1039                         xfrm_state_check_expire(x1);
1040
1041                 err = 0;
1042         }
1043         spin_unlock_bh(&x1->lock);
1044
1045         xfrm_state_put(x1);
1046
1047         return err;
1048 }
1049 EXPORT_SYMBOL(xfrm_state_update);
1050
1051 int xfrm_state_check_expire(struct xfrm_state *x)
1052 {
1053         if (!x->curlft.use_time)
1054                 x->curlft.use_time = (unsigned long)xtime.tv_sec;
1055
1056         if (x->km.state != XFRM_STATE_VALID)
1057                 return -EINVAL;
1058
1059         if (x->curlft.bytes >= x->lft.hard_byte_limit ||
1060             x->curlft.packets >= x->lft.hard_packet_limit) {
1061                 x->km.state = XFRM_STATE_EXPIRED;
1062                 mod_timer(&x->timer, jiffies);
1063                 return -EINVAL;
1064         }
1065
1066         if (!x->km.dying &&
1067             (x->curlft.bytes >= x->lft.soft_byte_limit ||
1068              x->curlft.packets >= x->lft.soft_packet_limit)) {
1069                 x->km.dying = 1;
1070                 km_state_expired(x, 0, 0);
1071         }
1072         return 0;
1073 }
1074 EXPORT_SYMBOL(xfrm_state_check_expire);
1075
1076 static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
1077 {
1078         int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
1079                 - skb_headroom(skb);
1080
1081         if (nhead > 0)
1082                 return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
1083
1084         /* Check tail too... */
1085         return 0;
1086 }
1087
1088 int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
1089 {
1090         int err = xfrm_state_check_expire(x);
1091         if (err < 0)
1092                 goto err;
1093         err = xfrm_state_check_space(x, skb);
1094 err:
1095         return err;
1096 }
1097 EXPORT_SYMBOL(xfrm_state_check);
1098
1099 struct xfrm_state *
1100 xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto,
1101                   unsigned short family)
1102 {
1103         struct xfrm_state *x;
1104
1105         spin_lock_bh(&xfrm_state_lock);
1106         x = __xfrm_state_lookup(daddr, spi, proto, family);
1107         spin_unlock_bh(&xfrm_state_lock);
1108         return x;
1109 }
1110 EXPORT_SYMBOL(xfrm_state_lookup);
1111
1112 struct xfrm_state *
1113 xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr,
1114                          u8 proto, unsigned short family)
1115 {
1116         struct xfrm_state *x;
1117
1118         spin_lock_bh(&xfrm_state_lock);
1119         x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family);
1120         spin_unlock_bh(&xfrm_state_lock);
1121         return x;
1122 }
1123 EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
1124
1125 struct xfrm_state *
1126 xfrm_find_acq(u8 mode, u32 reqid, u8 proto,
1127               xfrm_address_t *daddr, xfrm_address_t *saddr,
1128               int create, unsigned short family)
1129 {
1130         struct xfrm_state *x;
1131
1132         spin_lock_bh(&xfrm_state_lock);
1133         x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create);
1134         spin_unlock_bh(&xfrm_state_lock);
1135
1136         return x;
1137 }
1138 EXPORT_SYMBOL(xfrm_find_acq);
1139
1140 #ifdef CONFIG_XFRM_SUB_POLICY
1141 int
1142 xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
1143                unsigned short family)
1144 {
1145         int err = 0;
1146         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1147         if (!afinfo)
1148                 return -EAFNOSUPPORT;
1149
1150         spin_lock_bh(&xfrm_state_lock);
1151         if (afinfo->tmpl_sort)
1152                 err = afinfo->tmpl_sort(dst, src, n);
1153         spin_unlock_bh(&xfrm_state_lock);
1154         xfrm_state_put_afinfo(afinfo);
1155         return err;
1156 }
1157 EXPORT_SYMBOL(xfrm_tmpl_sort);
1158
1159 int
1160 xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
1161                 unsigned short family)
1162 {
1163         int err = 0;
1164         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1165         if (!afinfo)
1166                 return -EAFNOSUPPORT;
1167
1168         spin_lock_bh(&xfrm_state_lock);
1169         if (afinfo->state_sort)
1170                 err = afinfo->state_sort(dst, src, n);
1171         spin_unlock_bh(&xfrm_state_lock);
1172         xfrm_state_put_afinfo(afinfo);
1173         return err;
1174 }
1175 EXPORT_SYMBOL(xfrm_state_sort);
1176 #endif
1177
1178 /* Silly enough, but I'm lazy to build resolution list */
1179
1180 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
1181 {
1182         int i;
1183
1184         for (i = 0; i <= xfrm_state_hmask; i++) {
1185                 struct hlist_node *entry;
1186                 struct xfrm_state *x;
1187
1188                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1189                         if (x->km.seq == seq &&
1190                             x->km.state == XFRM_STATE_ACQ) {
1191                                 xfrm_state_hold(x);
1192                                 return x;
1193                         }
1194                 }
1195         }
1196         return NULL;
1197 }
1198
1199 struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
1200 {
1201         struct xfrm_state *x;
1202
1203         spin_lock_bh(&xfrm_state_lock);
1204         x = __xfrm_find_acq_byseq(seq);
1205         spin_unlock_bh(&xfrm_state_lock);
1206         return x;
1207 }
1208 EXPORT_SYMBOL(xfrm_find_acq_byseq);
1209
1210 u32 xfrm_get_acqseq(void)
1211 {
1212         u32 res;
1213         static u32 acqseq;
1214         static DEFINE_SPINLOCK(acqseq_lock);
1215
1216         spin_lock_bh(&acqseq_lock);
1217         res = (++acqseq ? : ++acqseq);
1218         spin_unlock_bh(&acqseq_lock);
1219         return res;
1220 }
1221 EXPORT_SYMBOL(xfrm_get_acqseq);
1222
1223 void
1224 xfrm_alloc_spi(struct xfrm_state *x, __be32 minspi, __be32 maxspi)
1225 {
1226         unsigned int h;
1227         struct xfrm_state *x0;
1228
1229         if (x->id.spi)
1230                 return;
1231
1232         if (minspi == maxspi) {
1233                 x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
1234                 if (x0) {
1235                         xfrm_state_put(x0);
1236                         return;
1237                 }
1238                 x->id.spi = minspi;
1239         } else {
1240                 u32 spi = 0;
1241                 u32 low = ntohl(minspi);
1242                 u32 high = ntohl(maxspi);
1243                 for (h=0; h<high-low+1; h++) {
1244                         spi = low + net_random()%(high-low+1);
1245                         x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
1246                         if (x0 == NULL) {
1247                                 x->id.spi = htonl(spi);
1248                                 break;
1249                         }
1250                         xfrm_state_put(x0);
1251                 }
1252         }
1253         if (x->id.spi) {
1254                 spin_lock_bh(&xfrm_state_lock);
1255                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
1256                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
1257                 spin_unlock_bh(&xfrm_state_lock);
1258                 wake_up(&km_waitq);
1259         }
1260 }
1261 EXPORT_SYMBOL(xfrm_alloc_spi);
1262
1263 int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
1264                     void *data)
1265 {
1266         int i;
1267         struct xfrm_state *x, *last = NULL;
1268         struct hlist_node *entry;
1269         int count = 0;
1270         int err = 0;
1271
1272         spin_lock_bh(&xfrm_state_lock);
1273         for (i = 0; i <= xfrm_state_hmask; i++) {
1274                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1275                         if (!xfrm_id_proto_match(x->id.proto, proto))
1276                                 continue;
1277                         if (last) {
1278                                 err = func(last, count, data);
1279                                 if (err)
1280                                         goto out;
1281                         }
1282                         last = x;
1283                         count++;
1284                 }
1285         }
1286         if (count == 0) {
1287                 err = -ENOENT;
1288                 goto out;
1289         }
1290         err = func(last, 0, data);
1291 out:
1292         spin_unlock_bh(&xfrm_state_lock);
1293         return err;
1294 }
1295 EXPORT_SYMBOL(xfrm_state_walk);
1296
1297
1298 void xfrm_replay_notify(struct xfrm_state *x, int event)
1299 {
1300         struct km_event c;
1301         /* we send notify messages in case
1302          *  1. we updated on of the sequence numbers, and the seqno difference
1303          *     is at least x->replay_maxdiff, in this case we also update the
1304          *     timeout of our timer function
1305          *  2. if x->replay_maxage has elapsed since last update,
1306          *     and there were changes
1307          *
1308          *  The state structure must be locked!
1309          */
1310
1311         switch (event) {
1312         case XFRM_REPLAY_UPDATE:
1313                 if (x->replay_maxdiff &&
1314                     (x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
1315                     (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) {
1316                         if (x->xflags & XFRM_TIME_DEFER)
1317                                 event = XFRM_REPLAY_TIMEOUT;
1318                         else
1319                                 return;
1320                 }
1321
1322                 break;
1323
1324         case XFRM_REPLAY_TIMEOUT:
1325                 if ((x->replay.seq == x->preplay.seq) &&
1326                     (x->replay.bitmap == x->preplay.bitmap) &&
1327                     (x->replay.oseq == x->preplay.oseq)) {
1328                         x->xflags |= XFRM_TIME_DEFER;
1329                         return;
1330                 }
1331
1332                 break;
1333         }
1334
1335         memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state));
1336         c.event = XFRM_MSG_NEWAE;
1337         c.data.aevent = event;
1338         km_state_notify(x, &c);
1339
1340         if (x->replay_maxage &&
1341             !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
1342                 x->xflags &= ~XFRM_TIME_DEFER;
1343 }
1344 EXPORT_SYMBOL(xfrm_replay_notify);
1345
1346 static void xfrm_replay_timer_handler(unsigned long data)
1347 {
1348         struct xfrm_state *x = (struct xfrm_state*)data;
1349
1350         spin_lock(&x->lock);
1351
1352         if (x->km.state == XFRM_STATE_VALID) {
1353                 if (xfrm_aevent_is_on())
1354                         xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT);
1355                 else
1356                         x->xflags |= XFRM_TIME_DEFER;
1357         }
1358
1359         spin_unlock(&x->lock);
1360 }
1361
1362 int xfrm_replay_check(struct xfrm_state *x, __be32 net_seq)
1363 {
1364         u32 diff;
1365         u32 seq = ntohl(net_seq);
1366
1367         if (unlikely(seq == 0))
1368                 return -EINVAL;
1369
1370         if (likely(seq > x->replay.seq))
1371                 return 0;
1372
1373         diff = x->replay.seq - seq;
1374         if (diff >= min_t(unsigned int, x->props.replay_window,
1375                           sizeof(x->replay.bitmap) * 8)) {
1376                 x->stats.replay_window++;
1377                 return -EINVAL;
1378         }
1379
1380         if (x->replay.bitmap & (1U << diff)) {
1381                 x->stats.replay++;
1382                 return -EINVAL;
1383         }
1384         return 0;
1385 }
1386 EXPORT_SYMBOL(xfrm_replay_check);
1387
1388 void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq)
1389 {
1390         u32 diff;
1391         u32 seq = ntohl(net_seq);
1392
1393         if (seq > x->replay.seq) {
1394                 diff = seq - x->replay.seq;
1395                 if (diff < x->props.replay_window)
1396                         x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
1397                 else
1398                         x->replay.bitmap = 1;
1399                 x->replay.seq = seq;
1400         } else {
1401                 diff = x->replay.seq - seq;
1402                 x->replay.bitmap |= (1U << diff);
1403         }
1404
1405         if (xfrm_aevent_is_on())
1406                 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
1407 }
1408 EXPORT_SYMBOL(xfrm_replay_advance);
1409
1410 static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
1411 static DEFINE_RWLOCK(xfrm_km_lock);
1412
1413 void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
1414 {
1415         struct xfrm_mgr *km;
1416
1417         read_lock(&xfrm_km_lock);
1418         list_for_each_entry(km, &xfrm_km_list, list)
1419                 if (km->notify_policy)
1420                         km->notify_policy(xp, dir, c);
1421         read_unlock(&xfrm_km_lock);
1422 }
1423
1424 void km_state_notify(struct xfrm_state *x, struct km_event *c)
1425 {
1426         struct xfrm_mgr *km;
1427         read_lock(&xfrm_km_lock);
1428         list_for_each_entry(km, &xfrm_km_list, list)
1429                 if (km->notify)
1430                         km->notify(x, c);
1431         read_unlock(&xfrm_km_lock);
1432 }
1433
1434 EXPORT_SYMBOL(km_policy_notify);
1435 EXPORT_SYMBOL(km_state_notify);
1436
1437 void km_state_expired(struct xfrm_state *x, int hard, u32 pid)
1438 {
1439         struct km_event c;
1440
1441         c.data.hard = hard;
1442         c.pid = pid;
1443         c.event = XFRM_MSG_EXPIRE;
1444         km_state_notify(x, &c);
1445
1446         if (hard)
1447                 wake_up(&km_waitq);
1448 }
1449
1450 EXPORT_SYMBOL(km_state_expired);
1451 /*
1452  * We send to all registered managers regardless of failure
1453  * We are happy with one success
1454 */
1455 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
1456 {
1457         int err = -EINVAL, acqret;
1458         struct xfrm_mgr *km;
1459
1460         read_lock(&xfrm_km_lock);
1461         list_for_each_entry(km, &xfrm_km_list, list) {
1462                 acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
1463                 if (!acqret)
1464                         err = acqret;
1465         }
1466         read_unlock(&xfrm_km_lock);
1467         return err;
1468 }
1469 EXPORT_SYMBOL(km_query);
1470
1471 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport)
1472 {
1473         int err = -EINVAL;
1474         struct xfrm_mgr *km;
1475
1476         read_lock(&xfrm_km_lock);
1477         list_for_each_entry(km, &xfrm_km_list, list) {
1478                 if (km->new_mapping)
1479                         err = km->new_mapping(x, ipaddr, sport);
1480                 if (!err)
1481                         break;
1482         }
1483         read_unlock(&xfrm_km_lock);
1484         return err;
1485 }
1486 EXPORT_SYMBOL(km_new_mapping);
1487
1488 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
1489 {
1490         struct km_event c;
1491
1492         c.data.hard = hard;
1493         c.pid = pid;
1494         c.event = XFRM_MSG_POLEXPIRE;
1495         km_policy_notify(pol, dir, &c);
1496
1497         if (hard)
1498                 wake_up(&km_waitq);
1499 }
1500 EXPORT_SYMBOL(km_policy_expired);
1501
1502 int km_migrate(struct xfrm_selector *sel, u8 dir, u8 type,
1503                struct xfrm_migrate *m, int num_migrate)
1504 {
1505         int err = -EINVAL;
1506         int ret;
1507         struct xfrm_mgr *km;
1508
1509         read_lock(&xfrm_km_lock);
1510         list_for_each_entry(km, &xfrm_km_list, list) {
1511                 if (km->migrate) {
1512                         ret = km->migrate(sel, dir, type, m, num_migrate);
1513                         if (!ret)
1514                                 err = ret;
1515                 }
1516         }
1517         read_unlock(&xfrm_km_lock);
1518         return err;
1519 }
1520 EXPORT_SYMBOL(km_migrate);
1521
1522 int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
1523 {
1524         int err = -EINVAL;
1525         int ret;
1526         struct xfrm_mgr *km;
1527
1528         read_lock(&xfrm_km_lock);
1529         list_for_each_entry(km, &xfrm_km_list, list) {
1530                 if (km->report) {
1531                         ret = km->report(proto, sel, addr);
1532                         if (!ret)
1533                                 err = ret;
1534                 }
1535         }
1536         read_unlock(&xfrm_km_lock);
1537         return err;
1538 }
1539 EXPORT_SYMBOL(km_report);
1540
1541 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
1542 {
1543         int err;
1544         u8 *data;
1545         struct xfrm_mgr *km;
1546         struct xfrm_policy *pol = NULL;
1547
1548         if (optlen <= 0 || optlen > PAGE_SIZE)
1549                 return -EMSGSIZE;
1550
1551         data = kmalloc(optlen, GFP_KERNEL);
1552         if (!data)
1553                 return -ENOMEM;
1554
1555         err = -EFAULT;
1556         if (copy_from_user(data, optval, optlen))
1557                 goto out;
1558
1559         err = -EINVAL;
1560         read_lock(&xfrm_km_lock);
1561         list_for_each_entry(km, &xfrm_km_list, list) {
1562                 pol = km->compile_policy(sk, optname, data,
1563                                          optlen, &err);
1564                 if (err >= 0)
1565                         break;
1566         }
1567         read_unlock(&xfrm_km_lock);
1568
1569         if (err >= 0) {
1570                 xfrm_sk_policy_insert(sk, err, pol);
1571                 xfrm_pol_put(pol);
1572                 err = 0;
1573         }
1574
1575 out:
1576         kfree(data);
1577         return err;
1578 }
1579 EXPORT_SYMBOL(xfrm_user_policy);
1580
1581 int xfrm_register_km(struct xfrm_mgr *km)
1582 {
1583         write_lock_bh(&xfrm_km_lock);
1584         list_add_tail(&km->list, &xfrm_km_list);
1585         write_unlock_bh(&xfrm_km_lock);
1586         return 0;
1587 }
1588 EXPORT_SYMBOL(xfrm_register_km);
1589
1590 int xfrm_unregister_km(struct xfrm_mgr *km)
1591 {
1592         write_lock_bh(&xfrm_km_lock);
1593         list_del(&km->list);
1594         write_unlock_bh(&xfrm_km_lock);
1595         return 0;
1596 }
1597 EXPORT_SYMBOL(xfrm_unregister_km);
1598
1599 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
1600 {
1601         int err = 0;
1602         if (unlikely(afinfo == NULL))
1603                 return -EINVAL;
1604         if (unlikely(afinfo->family >= NPROTO))
1605                 return -EAFNOSUPPORT;
1606         write_lock_bh(&xfrm_state_afinfo_lock);
1607         if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
1608                 err = -ENOBUFS;
1609         else
1610                 xfrm_state_afinfo[afinfo->family] = afinfo;
1611         write_unlock_bh(&xfrm_state_afinfo_lock);
1612         return err;
1613 }
1614 EXPORT_SYMBOL(xfrm_state_register_afinfo);
1615
1616 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
1617 {
1618         int err = 0;
1619         if (unlikely(afinfo == NULL))
1620                 return -EINVAL;
1621         if (unlikely(afinfo->family >= NPROTO))
1622                 return -EAFNOSUPPORT;
1623         write_lock_bh(&xfrm_state_afinfo_lock);
1624         if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
1625                 if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
1626                         err = -EINVAL;
1627                 else
1628                         xfrm_state_afinfo[afinfo->family] = NULL;
1629         }
1630         write_unlock_bh(&xfrm_state_afinfo_lock);
1631         return err;
1632 }
1633 EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
1634
1635 struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
1636 {
1637         struct xfrm_state_afinfo *afinfo;
1638         if (unlikely(family >= NPROTO))
1639                 return NULL;
1640         read_lock(&xfrm_state_afinfo_lock);
1641         afinfo = xfrm_state_afinfo[family];
1642         if (unlikely(!afinfo))
1643                 read_unlock(&xfrm_state_afinfo_lock);
1644         return afinfo;
1645 }
1646
1647 void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
1648 {
1649         read_unlock(&xfrm_state_afinfo_lock);
1650 }
1651
1652 EXPORT_SYMBOL(xfrm_state_get_afinfo);
1653 EXPORT_SYMBOL(xfrm_state_put_afinfo);
1654
1655 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
1656 void xfrm_state_delete_tunnel(struct xfrm_state *x)
1657 {
1658         if (x->tunnel) {
1659                 struct xfrm_state *t = x->tunnel;
1660
1661                 if (atomic_read(&t->tunnel_users) == 2)
1662                         xfrm_state_delete(t);
1663                 atomic_dec(&t->tunnel_users);
1664                 xfrm_state_put(t);
1665                 x->tunnel = NULL;
1666         }
1667 }
1668 EXPORT_SYMBOL(xfrm_state_delete_tunnel);
1669
1670 /*
1671  * This function is NOT optimal.  For example, with ESP it will give an
1672  * MTU that's usually two bytes short of being optimal.  However, it will
1673  * usually give an answer that's a multiple of 4 provided the input is
1674  * also a multiple of 4.
1675  */
1676 int xfrm_state_mtu(struct xfrm_state *x, int mtu)
1677 {
1678         int res = mtu;
1679
1680         res -= x->props.header_len;
1681
1682         for (;;) {
1683                 int m = res;
1684
1685                 if (m < 68)
1686                         return 68;
1687
1688                 spin_lock_bh(&x->lock);
1689                 if (x->km.state == XFRM_STATE_VALID &&
1690                     x->type && x->type->get_max_size)
1691                         m = x->type->get_max_size(x, m);
1692                 else
1693                         m += x->props.header_len;
1694                 spin_unlock_bh(&x->lock);
1695
1696                 if (m <= mtu)
1697                         break;
1698                 res -= (m - mtu);
1699         }
1700
1701         return res;
1702 }
1703
1704 int xfrm_init_state(struct xfrm_state *x)
1705 {
1706         struct xfrm_state_afinfo *afinfo;
1707         int family = x->props.family;
1708         int err;
1709
1710         err = -EAFNOSUPPORT;
1711         afinfo = xfrm_state_get_afinfo(family);
1712         if (!afinfo)
1713                 goto error;
1714
1715         err = 0;
1716         if (afinfo->init_flags)
1717                 err = afinfo->init_flags(x);
1718
1719         xfrm_state_put_afinfo(afinfo);
1720
1721         if (err)
1722                 goto error;
1723
1724         err = -EPROTONOSUPPORT;
1725         x->type = xfrm_get_type(x->id.proto, family);
1726         if (x->type == NULL)
1727                 goto error;
1728
1729         err = x->type->init_state(x);
1730         if (err)
1731                 goto error;
1732
1733         x->mode = xfrm_get_mode(x->props.mode, family);
1734         if (x->mode == NULL)
1735                 goto error;
1736
1737         x->km.state = XFRM_STATE_VALID;
1738
1739 error:
1740         return err;
1741 }
1742
1743 EXPORT_SYMBOL(xfrm_init_state);
1744
1745 void __init xfrm_state_init(void)
1746 {
1747         unsigned int sz;
1748
1749         sz = sizeof(struct hlist_head) * 8;
1750
1751         xfrm_state_bydst = xfrm_hash_alloc(sz);
1752         xfrm_state_bysrc = xfrm_hash_alloc(sz);
1753         xfrm_state_byspi = xfrm_hash_alloc(sz);
1754         if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi)
1755                 panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes.");
1756         xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
1757
1758         INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task);
1759 }
1760