Merge ../linus
[linux-2.6] / net / xfrm / xfrm_state.c
1 /*
2  * xfrm_state.c
3  *
4  * Changes:
5  *      Mitsuru KANDA @USAGI
6  *      Kazunori MIYAZAWA @USAGI
7  *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  *              IPv6 support
9  *      YOSHIFUJI Hideaki @USAGI
10  *              Split up af-specific functions
11  *      Derek Atkins <derek@ihtfp.com>
12  *              Add UDP Encapsulation
13  *
14  */
15
16 #include <linux/workqueue.h>
17 #include <net/xfrm.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <linux/module.h>
21 #include <linux/cache.h>
22 #include <asm/uaccess.h>
23 #include <linux/audit.h>
24
25 #include "xfrm_hash.h"
26
27 struct sock *xfrm_nl;
28 EXPORT_SYMBOL(xfrm_nl);
29
30 u32 sysctl_xfrm_aevent_etime = XFRM_AE_ETIME;
31 EXPORT_SYMBOL(sysctl_xfrm_aevent_etime);
32
33 u32 sysctl_xfrm_aevent_rseqth = XFRM_AE_SEQT_SIZE;
34 EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth);
35
36 /* Each xfrm_state may be linked to two tables:
37
38    1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
39    2. Hash table by (daddr,family,reqid) to find what SAs exist for given
40       destination/tunnel endpoint. (output)
41  */
42
43 static DEFINE_SPINLOCK(xfrm_state_lock);
44
45 /* Hash table to find appropriate SA towards given target (endpoint
46  * of tunnel or destination of transport mode) allowed by selector.
47  *
48  * Main use is finding SA after policy selected tunnel or transport mode.
49  * Also, it can be used by ah/esp icmp error handler to find offending SA.
50  */
51 static struct hlist_head *xfrm_state_bydst __read_mostly;
52 static struct hlist_head *xfrm_state_bysrc __read_mostly;
53 static struct hlist_head *xfrm_state_byspi __read_mostly;
54 static unsigned int xfrm_state_hmask __read_mostly;
55 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
56 static unsigned int xfrm_state_num;
57 static unsigned int xfrm_state_genid;
58
59 static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr,
60                                          xfrm_address_t *saddr,
61                                          u32 reqid,
62                                          unsigned short family)
63 {
64         return __xfrm_dst_hash(daddr, saddr, reqid, family, xfrm_state_hmask);
65 }
66
67 static inline unsigned int xfrm_src_hash(xfrm_address_t *daddr,
68                                          xfrm_address_t *saddr,
69                                          unsigned short family)
70 {
71         return __xfrm_src_hash(daddr, saddr, family, xfrm_state_hmask);
72 }
73
74 static inline unsigned int
75 xfrm_spi_hash(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
76 {
77         return __xfrm_spi_hash(daddr, spi, proto, family, xfrm_state_hmask);
78 }
79
80 static void xfrm_hash_transfer(struct hlist_head *list,
81                                struct hlist_head *ndsttable,
82                                struct hlist_head *nsrctable,
83                                struct hlist_head *nspitable,
84                                unsigned int nhashmask)
85 {
86         struct hlist_node *entry, *tmp;
87         struct xfrm_state *x;
88
89         hlist_for_each_entry_safe(x, entry, tmp, list, bydst) {
90                 unsigned int h;
91
92                 h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
93                                     x->props.reqid, x->props.family,
94                                     nhashmask);
95                 hlist_add_head(&x->bydst, ndsttable+h);
96
97                 h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr,
98                                     x->props.family,
99                                     nhashmask);
100                 hlist_add_head(&x->bysrc, nsrctable+h);
101
102                 if (x->id.spi) {
103                         h = __xfrm_spi_hash(&x->id.daddr, x->id.spi,
104                                             x->id.proto, x->props.family,
105                                             nhashmask);
106                         hlist_add_head(&x->byspi, nspitable+h);
107                 }
108         }
109 }
110
111 static unsigned long xfrm_hash_new_size(void)
112 {
113         return ((xfrm_state_hmask + 1) << 1) *
114                 sizeof(struct hlist_head);
115 }
116
117 static DEFINE_MUTEX(hash_resize_mutex);
118
119 static void xfrm_hash_resize(struct work_struct *__unused)
120 {
121         struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
122         unsigned long nsize, osize;
123         unsigned int nhashmask, ohashmask;
124         int i;
125
126         mutex_lock(&hash_resize_mutex);
127
128         nsize = xfrm_hash_new_size();
129         ndst = xfrm_hash_alloc(nsize);
130         if (!ndst)
131                 goto out_unlock;
132         nsrc = xfrm_hash_alloc(nsize);
133         if (!nsrc) {
134                 xfrm_hash_free(ndst, nsize);
135                 goto out_unlock;
136         }
137         nspi = xfrm_hash_alloc(nsize);
138         if (!nspi) {
139                 xfrm_hash_free(ndst, nsize);
140                 xfrm_hash_free(nsrc, nsize);
141                 goto out_unlock;
142         }
143
144         spin_lock_bh(&xfrm_state_lock);
145
146         nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
147         for (i = xfrm_state_hmask; i >= 0; i--)
148                 xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi,
149                                    nhashmask);
150
151         odst = xfrm_state_bydst;
152         osrc = xfrm_state_bysrc;
153         ospi = xfrm_state_byspi;
154         ohashmask = xfrm_state_hmask;
155
156         xfrm_state_bydst = ndst;
157         xfrm_state_bysrc = nsrc;
158         xfrm_state_byspi = nspi;
159         xfrm_state_hmask = nhashmask;
160
161         spin_unlock_bh(&xfrm_state_lock);
162
163         osize = (ohashmask + 1) * sizeof(struct hlist_head);
164         xfrm_hash_free(odst, osize);
165         xfrm_hash_free(osrc, osize);
166         xfrm_hash_free(ospi, osize);
167
168 out_unlock:
169         mutex_unlock(&hash_resize_mutex);
170 }
171
172 static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize);
173
174 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
175 EXPORT_SYMBOL(km_waitq);
176
177 static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
178 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
179
180 static struct work_struct xfrm_state_gc_work;
181 static HLIST_HEAD(xfrm_state_gc_list);
182 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
183
184 int __xfrm_state_delete(struct xfrm_state *x);
185
186 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
187 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
188
189 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
190 void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
191
192 static void xfrm_state_gc_destroy(struct xfrm_state *x)
193 {
194         del_timer_sync(&x->timer);
195         del_timer_sync(&x->rtimer);
196         kfree(x->aalg);
197         kfree(x->ealg);
198         kfree(x->calg);
199         kfree(x->encap);
200         kfree(x->coaddr);
201         if (x->mode)
202                 xfrm_put_mode(x->mode);
203         if (x->type) {
204                 x->type->destructor(x);
205                 xfrm_put_type(x->type);
206         }
207         security_xfrm_state_free(x);
208         kfree(x);
209 }
210
211 static void xfrm_state_gc_task(struct work_struct *data)
212 {
213         struct xfrm_state *x;
214         struct hlist_node *entry, *tmp;
215         struct hlist_head gc_list;
216
217         spin_lock_bh(&xfrm_state_gc_lock);
218         gc_list.first = xfrm_state_gc_list.first;
219         INIT_HLIST_HEAD(&xfrm_state_gc_list);
220         spin_unlock_bh(&xfrm_state_gc_lock);
221
222         hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst)
223                 xfrm_state_gc_destroy(x);
224
225         wake_up(&km_waitq);
226 }
227
228 static inline unsigned long make_jiffies(long secs)
229 {
230         if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
231                 return MAX_SCHEDULE_TIMEOUT-1;
232         else
233                 return secs*HZ;
234 }
235
236 static void xfrm_timer_handler(unsigned long data)
237 {
238         struct xfrm_state *x = (struct xfrm_state*)data;
239         unsigned long now = (unsigned long)xtime.tv_sec;
240         long next = LONG_MAX;
241         int warn = 0;
242         int err = 0;
243
244         spin_lock(&x->lock);
245         if (x->km.state == XFRM_STATE_DEAD)
246                 goto out;
247         if (x->km.state == XFRM_STATE_EXPIRED)
248                 goto expired;
249         if (x->lft.hard_add_expires_seconds) {
250                 long tmo = x->lft.hard_add_expires_seconds +
251                         x->curlft.add_time - now;
252                 if (tmo <= 0)
253                         goto expired;
254                 if (tmo < next)
255                         next = tmo;
256         }
257         if (x->lft.hard_use_expires_seconds) {
258                 long tmo = x->lft.hard_use_expires_seconds +
259                         (x->curlft.use_time ? : now) - now;
260                 if (tmo <= 0)
261                         goto expired;
262                 if (tmo < next)
263                         next = tmo;
264         }
265         if (x->km.dying)
266                 goto resched;
267         if (x->lft.soft_add_expires_seconds) {
268                 long tmo = x->lft.soft_add_expires_seconds +
269                         x->curlft.add_time - now;
270                 if (tmo <= 0)
271                         warn = 1;
272                 else if (tmo < next)
273                         next = tmo;
274         }
275         if (x->lft.soft_use_expires_seconds) {
276                 long tmo = x->lft.soft_use_expires_seconds +
277                         (x->curlft.use_time ? : now) - now;
278                 if (tmo <= 0)
279                         warn = 1;
280                 else if (tmo < next)
281                         next = tmo;
282         }
283
284         x->km.dying = warn;
285         if (warn)
286                 km_state_expired(x, 0, 0);
287 resched:
288         if (next != LONG_MAX)
289                 mod_timer(&x->timer, jiffies + make_jiffies(next));
290
291         goto out;
292
293 expired:
294         if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
295                 x->km.state = XFRM_STATE_EXPIRED;
296                 wake_up(&km_waitq);
297                 next = 2;
298                 goto resched;
299         }
300
301         err = __xfrm_state_delete(x);
302         if (!err && x->id.spi)
303                 km_state_expired(x, 1, 0);
304
305         xfrm_audit_log(audit_get_loginuid(current->audit_context), 0,
306                        AUDIT_MAC_IPSEC_DELSA, err ? 0 : 1, NULL, x);
307
308 out:
309         spin_unlock(&x->lock);
310 }
311
312 static void xfrm_replay_timer_handler(unsigned long data);
313
314 struct xfrm_state *xfrm_state_alloc(void)
315 {
316         struct xfrm_state *x;
317
318         x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
319
320         if (x) {
321                 atomic_set(&x->refcnt, 1);
322                 atomic_set(&x->tunnel_users, 0);
323                 INIT_HLIST_NODE(&x->bydst);
324                 INIT_HLIST_NODE(&x->bysrc);
325                 INIT_HLIST_NODE(&x->byspi);
326                 init_timer(&x->timer);
327                 x->timer.function = xfrm_timer_handler;
328                 x->timer.data     = (unsigned long)x;
329                 init_timer(&x->rtimer);
330                 x->rtimer.function = xfrm_replay_timer_handler;
331                 x->rtimer.data     = (unsigned long)x;
332                 x->curlft.add_time = (unsigned long)xtime.tv_sec;
333                 x->lft.soft_byte_limit = XFRM_INF;
334                 x->lft.soft_packet_limit = XFRM_INF;
335                 x->lft.hard_byte_limit = XFRM_INF;
336                 x->lft.hard_packet_limit = XFRM_INF;
337                 x->replay_maxage = 0;
338                 x->replay_maxdiff = 0;
339                 spin_lock_init(&x->lock);
340         }
341         return x;
342 }
343 EXPORT_SYMBOL(xfrm_state_alloc);
344
345 void __xfrm_state_destroy(struct xfrm_state *x)
346 {
347         BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
348
349         spin_lock_bh(&xfrm_state_gc_lock);
350         hlist_add_head(&x->bydst, &xfrm_state_gc_list);
351         spin_unlock_bh(&xfrm_state_gc_lock);
352         schedule_work(&xfrm_state_gc_work);
353 }
354 EXPORT_SYMBOL(__xfrm_state_destroy);
355
356 int __xfrm_state_delete(struct xfrm_state *x)
357 {
358         int err = -ESRCH;
359
360         if (x->km.state != XFRM_STATE_DEAD) {
361                 x->km.state = XFRM_STATE_DEAD;
362                 spin_lock(&xfrm_state_lock);
363                 hlist_del(&x->bydst);
364                 hlist_del(&x->bysrc);
365                 if (x->id.spi)
366                         hlist_del(&x->byspi);
367                 xfrm_state_num--;
368                 spin_unlock(&xfrm_state_lock);
369
370                 /* All xfrm_state objects are created by xfrm_state_alloc.
371                  * The xfrm_state_alloc call gives a reference, and that
372                  * is what we are dropping here.
373                  */
374                 __xfrm_state_put(x);
375                 err = 0;
376         }
377
378         return err;
379 }
380 EXPORT_SYMBOL(__xfrm_state_delete);
381
382 int xfrm_state_delete(struct xfrm_state *x)
383 {
384         int err;
385
386         spin_lock_bh(&x->lock);
387         err = __xfrm_state_delete(x);
388         spin_unlock_bh(&x->lock);
389
390         return err;
391 }
392 EXPORT_SYMBOL(xfrm_state_delete);
393
394 void xfrm_state_flush(u8 proto, struct xfrm_audit *audit_info)
395 {
396         int i;
397         int err = 0;
398
399         spin_lock_bh(&xfrm_state_lock);
400         for (i = 0; i <= xfrm_state_hmask; i++) {
401                 struct hlist_node *entry;
402                 struct xfrm_state *x;
403 restart:
404                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
405                         if (!xfrm_state_kern(x) &&
406                             xfrm_id_proto_match(x->id.proto, proto)) {
407                                 xfrm_state_hold(x);
408                                 spin_unlock_bh(&xfrm_state_lock);
409
410                                 err = xfrm_state_delete(x);
411                                 xfrm_audit_log(audit_info->loginuid,
412                                                audit_info->secid,
413                                                AUDIT_MAC_IPSEC_DELSA,
414                                                err ? 0 : 1, NULL, x);
415                                 xfrm_state_put(x);
416
417                                 spin_lock_bh(&xfrm_state_lock);
418                                 goto restart;
419                         }
420                 }
421         }
422         spin_unlock_bh(&xfrm_state_lock);
423         wake_up(&km_waitq);
424 }
425 EXPORT_SYMBOL(xfrm_state_flush);
426
427 static int
428 xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
429                   struct xfrm_tmpl *tmpl,
430                   xfrm_address_t *daddr, xfrm_address_t *saddr,
431                   unsigned short family)
432 {
433         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
434         if (!afinfo)
435                 return -1;
436         afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
437         xfrm_state_put_afinfo(afinfo);
438         return 0;
439 }
440
441 static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
442 {
443         unsigned int h = xfrm_spi_hash(daddr, spi, proto, family);
444         struct xfrm_state *x;
445         struct hlist_node *entry;
446
447         hlist_for_each_entry(x, entry, xfrm_state_byspi+h, byspi) {
448                 if (x->props.family != family ||
449                     x->id.spi       != spi ||
450                     x->id.proto     != proto)
451                         continue;
452
453                 switch (family) {
454                 case AF_INET:
455                         if (x->id.daddr.a4 != daddr->a4)
456                                 continue;
457                         break;
458                 case AF_INET6:
459                         if (!ipv6_addr_equal((struct in6_addr *)daddr,
460                                              (struct in6_addr *)
461                                              x->id.daddr.a6))
462                                 continue;
463                         break;
464                 };
465
466                 xfrm_state_hold(x);
467                 return x;
468         }
469
470         return NULL;
471 }
472
473 static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family)
474 {
475         unsigned int h = xfrm_src_hash(daddr, saddr, family);
476         struct xfrm_state *x;
477         struct hlist_node *entry;
478
479         hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
480                 if (x->props.family != family ||
481                     x->id.proto     != proto)
482                         continue;
483
484                 switch (family) {
485                 case AF_INET:
486                         if (x->id.daddr.a4 != daddr->a4 ||
487                             x->props.saddr.a4 != saddr->a4)
488                                 continue;
489                         break;
490                 case AF_INET6:
491                         if (!ipv6_addr_equal((struct in6_addr *)daddr,
492                                              (struct in6_addr *)
493                                              x->id.daddr.a6) ||
494                             !ipv6_addr_equal((struct in6_addr *)saddr,
495                                              (struct in6_addr *)
496                                              x->props.saddr.a6))
497                                 continue;
498                         break;
499                 };
500
501                 xfrm_state_hold(x);
502                 return x;
503         }
504
505         return NULL;
506 }
507
508 static inline struct xfrm_state *
509 __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
510 {
511         if (use_spi)
512                 return __xfrm_state_lookup(&x->id.daddr, x->id.spi,
513                                            x->id.proto, family);
514         else
515                 return __xfrm_state_lookup_byaddr(&x->id.daddr,
516                                                   &x->props.saddr,
517                                                   x->id.proto, family);
518 }
519
520 static void xfrm_hash_grow_check(int have_hash_collision)
521 {
522         if (have_hash_collision &&
523             (xfrm_state_hmask + 1) < xfrm_state_hashmax &&
524             xfrm_state_num > xfrm_state_hmask)
525                 schedule_work(&xfrm_hash_work);
526 }
527
528 struct xfrm_state *
529 xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, 
530                 struct flowi *fl, struct xfrm_tmpl *tmpl,
531                 struct xfrm_policy *pol, int *err,
532                 unsigned short family)
533 {
534         unsigned int h = xfrm_dst_hash(daddr, saddr, tmpl->reqid, family);
535         struct hlist_node *entry;
536         struct xfrm_state *x, *x0;
537         int acquire_in_progress = 0;
538         int error = 0;
539         struct xfrm_state *best = NULL;
540         
541         spin_lock_bh(&xfrm_state_lock);
542         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
543                 if (x->props.family == family &&
544                     x->props.reqid == tmpl->reqid &&
545                     !(x->props.flags & XFRM_STATE_WILDRECV) &&
546                     xfrm_state_addr_check(x, daddr, saddr, family) &&
547                     tmpl->mode == x->props.mode &&
548                     tmpl->id.proto == x->id.proto &&
549                     (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) {
550                         /* Resolution logic:
551                            1. There is a valid state with matching selector.
552                               Done.
553                            2. Valid state with inappropriate selector. Skip.
554
555                            Entering area of "sysdeps".
556
557                            3. If state is not valid, selector is temporary,
558                               it selects only session which triggered
559                               previous resolution. Key manager will do
560                               something to install a state with proper
561                               selector.
562                          */
563                         if (x->km.state == XFRM_STATE_VALID) {
564                                 if (!xfrm_selector_match(&x->sel, fl, family) ||
565                                     !security_xfrm_state_pol_flow_match(x, pol, fl))
566                                         continue;
567                                 if (!best ||
568                                     best->km.dying > x->km.dying ||
569                                     (best->km.dying == x->km.dying &&
570                                      best->curlft.add_time < x->curlft.add_time))
571                                         best = x;
572                         } else if (x->km.state == XFRM_STATE_ACQ) {
573                                 acquire_in_progress = 1;
574                         } else if (x->km.state == XFRM_STATE_ERROR ||
575                                    x->km.state == XFRM_STATE_EXPIRED) {
576                                 if (xfrm_selector_match(&x->sel, fl, family) &&
577                                     security_xfrm_state_pol_flow_match(x, pol, fl))
578                                         error = -ESRCH;
579                         }
580                 }
581         }
582
583         x = best;
584         if (!x && !error && !acquire_in_progress) {
585                 if (tmpl->id.spi &&
586                     (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi,
587                                               tmpl->id.proto, family)) != NULL) {
588                         xfrm_state_put(x0);
589                         error = -EEXIST;
590                         goto out;
591                 }
592                 x = xfrm_state_alloc();
593                 if (x == NULL) {
594                         error = -ENOMEM;
595                         goto out;
596                 }
597                 /* Initialize temporary selector matching only
598                  * to current session. */
599                 xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
600
601                 error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid);
602                 if (error) {
603                         x->km.state = XFRM_STATE_DEAD;
604                         xfrm_state_put(x);
605                         x = NULL;
606                         goto out;
607                 }
608
609                 if (km_query(x, tmpl, pol) == 0) {
610                         x->km.state = XFRM_STATE_ACQ;
611                         hlist_add_head(&x->bydst, xfrm_state_bydst+h);
612                         h = xfrm_src_hash(daddr, saddr, family);
613                         hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
614                         if (x->id.spi) {
615                                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
616                                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
617                         }
618                         x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
619                         x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
620                         add_timer(&x->timer);
621                         xfrm_state_num++;
622                         xfrm_hash_grow_check(x->bydst.next != NULL);
623                 } else {
624                         x->km.state = XFRM_STATE_DEAD;
625                         xfrm_state_put(x);
626                         x = NULL;
627                         error = -ESRCH;
628                 }
629         }
630 out:
631         if (x)
632                 xfrm_state_hold(x);
633         else
634                 *err = acquire_in_progress ? -EAGAIN : error;
635         spin_unlock_bh(&xfrm_state_lock);
636         return x;
637 }
638
639 static void __xfrm_state_insert(struct xfrm_state *x)
640 {
641         unsigned int h;
642
643         x->genid = ++xfrm_state_genid;
644
645         h = xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
646                           x->props.reqid, x->props.family);
647         hlist_add_head(&x->bydst, xfrm_state_bydst+h);
648
649         h = xfrm_src_hash(&x->id.daddr, &x->props.saddr, x->props.family);
650         hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
651
652         if (x->id.spi) {
653                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
654                                   x->props.family);
655
656                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
657         }
658
659         mod_timer(&x->timer, jiffies + HZ);
660         if (x->replay_maxage)
661                 mod_timer(&x->rtimer, jiffies + x->replay_maxage);
662
663         wake_up(&km_waitq);
664
665         xfrm_state_num++;
666
667         xfrm_hash_grow_check(x->bydst.next != NULL);
668 }
669
670 /* xfrm_state_lock is held */
671 static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
672 {
673         unsigned short family = xnew->props.family;
674         u32 reqid = xnew->props.reqid;
675         struct xfrm_state *x;
676         struct hlist_node *entry;
677         unsigned int h;
678
679         h = xfrm_dst_hash(&xnew->id.daddr, &xnew->props.saddr, reqid, family);
680         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
681                 if (x->props.family     == family &&
682                     x->props.reqid      == reqid &&
683                     !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) &&
684                     !xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family))
685                         x->genid = xfrm_state_genid;
686         }
687 }
688
689 void xfrm_state_insert(struct xfrm_state *x)
690 {
691         spin_lock_bh(&xfrm_state_lock);
692         __xfrm_state_bump_genids(x);
693         __xfrm_state_insert(x);
694         spin_unlock_bh(&xfrm_state_lock);
695 }
696 EXPORT_SYMBOL(xfrm_state_insert);
697
698 /* xfrm_state_lock is held */
699 static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create)
700 {
701         unsigned int h = xfrm_dst_hash(daddr, saddr, reqid, family);
702         struct hlist_node *entry;
703         struct xfrm_state *x;
704
705         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
706                 if (x->props.reqid  != reqid ||
707                     x->props.mode   != mode ||
708                     x->props.family != family ||
709                     x->km.state     != XFRM_STATE_ACQ ||
710                     x->id.spi       != 0)
711                         continue;
712
713                 switch (family) {
714                 case AF_INET:
715                         if (x->id.daddr.a4    != daddr->a4 ||
716                             x->props.saddr.a4 != saddr->a4)
717                                 continue;
718                         break;
719                 case AF_INET6:
720                         if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6,
721                                              (struct in6_addr *)daddr) ||
722                             !ipv6_addr_equal((struct in6_addr *)
723                                              x->props.saddr.a6,
724                                              (struct in6_addr *)saddr))
725                                 continue;
726                         break;
727                 };
728
729                 xfrm_state_hold(x);
730                 return x;
731         }
732
733         if (!create)
734                 return NULL;
735
736         x = xfrm_state_alloc();
737         if (likely(x)) {
738                 switch (family) {
739                 case AF_INET:
740                         x->sel.daddr.a4 = daddr->a4;
741                         x->sel.saddr.a4 = saddr->a4;
742                         x->sel.prefixlen_d = 32;
743                         x->sel.prefixlen_s = 32;
744                         x->props.saddr.a4 = saddr->a4;
745                         x->id.daddr.a4 = daddr->a4;
746                         break;
747
748                 case AF_INET6:
749                         ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6,
750                                        (struct in6_addr *)daddr);
751                         ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6,
752                                        (struct in6_addr *)saddr);
753                         x->sel.prefixlen_d = 128;
754                         x->sel.prefixlen_s = 128;
755                         ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6,
756                                        (struct in6_addr *)saddr);
757                         ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6,
758                                        (struct in6_addr *)daddr);
759                         break;
760                 };
761
762                 x->km.state = XFRM_STATE_ACQ;
763                 x->id.proto = proto;
764                 x->props.family = family;
765                 x->props.mode = mode;
766                 x->props.reqid = reqid;
767                 x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
768                 xfrm_state_hold(x);
769                 x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
770                 add_timer(&x->timer);
771                 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
772                 h = xfrm_src_hash(daddr, saddr, family);
773                 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
774                 wake_up(&km_waitq);
775
776                 xfrm_state_num++;
777
778                 xfrm_hash_grow_check(x->bydst.next != NULL);
779         }
780
781         return x;
782 }
783
784 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
785
786 int xfrm_state_add(struct xfrm_state *x)
787 {
788         struct xfrm_state *x1;
789         int family;
790         int err;
791         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
792
793         family = x->props.family;
794
795         spin_lock_bh(&xfrm_state_lock);
796
797         x1 = __xfrm_state_locate(x, use_spi, family);
798         if (x1) {
799                 xfrm_state_put(x1);
800                 x1 = NULL;
801                 err = -EEXIST;
802                 goto out;
803         }
804
805         if (use_spi && x->km.seq) {
806                 x1 = __xfrm_find_acq_byseq(x->km.seq);
807                 if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) {
808                         xfrm_state_put(x1);
809                         x1 = NULL;
810                 }
811         }
812
813         if (use_spi && !x1)
814                 x1 = __find_acq_core(family, x->props.mode, x->props.reqid,
815                                      x->id.proto,
816                                      &x->id.daddr, &x->props.saddr, 0);
817
818         __xfrm_state_bump_genids(x);
819         __xfrm_state_insert(x);
820         err = 0;
821
822 out:
823         spin_unlock_bh(&xfrm_state_lock);
824
825         if (x1) {
826                 xfrm_state_delete(x1);
827                 xfrm_state_put(x1);
828         }
829
830         return err;
831 }
832 EXPORT_SYMBOL(xfrm_state_add);
833
834 int xfrm_state_update(struct xfrm_state *x)
835 {
836         struct xfrm_state *x1;
837         int err;
838         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
839
840         spin_lock_bh(&xfrm_state_lock);
841         x1 = __xfrm_state_locate(x, use_spi, x->props.family);
842
843         err = -ESRCH;
844         if (!x1)
845                 goto out;
846
847         if (xfrm_state_kern(x1)) {
848                 xfrm_state_put(x1);
849                 err = -EEXIST;
850                 goto out;
851         }
852
853         if (x1->km.state == XFRM_STATE_ACQ) {
854                 __xfrm_state_insert(x);
855                 x = NULL;
856         }
857         err = 0;
858
859 out:
860         spin_unlock_bh(&xfrm_state_lock);
861
862         if (err)
863                 return err;
864
865         if (!x) {
866                 xfrm_state_delete(x1);
867                 xfrm_state_put(x1);
868                 return 0;
869         }
870
871         err = -EINVAL;
872         spin_lock_bh(&x1->lock);
873         if (likely(x1->km.state == XFRM_STATE_VALID)) {
874                 if (x->encap && x1->encap)
875                         memcpy(x1->encap, x->encap, sizeof(*x1->encap));
876                 if (x->coaddr && x1->coaddr) {
877                         memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
878                 }
879                 if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
880                         memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
881                 memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
882                 x1->km.dying = 0;
883
884                 mod_timer(&x1->timer, jiffies + HZ);
885                 if (x1->curlft.use_time)
886                         xfrm_state_check_expire(x1);
887
888                 err = 0;
889         }
890         spin_unlock_bh(&x1->lock);
891
892         xfrm_state_put(x1);
893
894         return err;
895 }
896 EXPORT_SYMBOL(xfrm_state_update);
897
898 int xfrm_state_check_expire(struct xfrm_state *x)
899 {
900         if (!x->curlft.use_time)
901                 x->curlft.use_time = (unsigned long)xtime.tv_sec;
902
903         if (x->km.state != XFRM_STATE_VALID)
904                 return -EINVAL;
905
906         if (x->curlft.bytes >= x->lft.hard_byte_limit ||
907             x->curlft.packets >= x->lft.hard_packet_limit) {
908                 x->km.state = XFRM_STATE_EXPIRED;
909                 mod_timer(&x->timer, jiffies);
910                 return -EINVAL;
911         }
912
913         if (!x->km.dying &&
914             (x->curlft.bytes >= x->lft.soft_byte_limit ||
915              x->curlft.packets >= x->lft.soft_packet_limit)) {
916                 x->km.dying = 1;
917                 km_state_expired(x, 0, 0);
918         }
919         return 0;
920 }
921 EXPORT_SYMBOL(xfrm_state_check_expire);
922
923 static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
924 {
925         int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
926                 - skb_headroom(skb);
927
928         if (nhead > 0)
929                 return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
930
931         /* Check tail too... */
932         return 0;
933 }
934
935 int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
936 {
937         int err = xfrm_state_check_expire(x);
938         if (err < 0)
939                 goto err;
940         err = xfrm_state_check_space(x, skb);
941 err:
942         return err;
943 }
944 EXPORT_SYMBOL(xfrm_state_check);
945
946 struct xfrm_state *
947 xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto,
948                   unsigned short family)
949 {
950         struct xfrm_state *x;
951
952         spin_lock_bh(&xfrm_state_lock);
953         x = __xfrm_state_lookup(daddr, spi, proto, family);
954         spin_unlock_bh(&xfrm_state_lock);
955         return x;
956 }
957 EXPORT_SYMBOL(xfrm_state_lookup);
958
959 struct xfrm_state *
960 xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr,
961                          u8 proto, unsigned short family)
962 {
963         struct xfrm_state *x;
964
965         spin_lock_bh(&xfrm_state_lock);
966         x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family);
967         spin_unlock_bh(&xfrm_state_lock);
968         return x;
969 }
970 EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
971
972 struct xfrm_state *
973 xfrm_find_acq(u8 mode, u32 reqid, u8 proto, 
974               xfrm_address_t *daddr, xfrm_address_t *saddr, 
975               int create, unsigned short family)
976 {
977         struct xfrm_state *x;
978
979         spin_lock_bh(&xfrm_state_lock);
980         x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create);
981         spin_unlock_bh(&xfrm_state_lock);
982
983         return x;
984 }
985 EXPORT_SYMBOL(xfrm_find_acq);
986
987 #ifdef CONFIG_XFRM_SUB_POLICY
988 int
989 xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
990                unsigned short family)
991 {
992         int err = 0;
993         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
994         if (!afinfo)
995                 return -EAFNOSUPPORT;
996
997         spin_lock_bh(&xfrm_state_lock);
998         if (afinfo->tmpl_sort)
999                 err = afinfo->tmpl_sort(dst, src, n);
1000         spin_unlock_bh(&xfrm_state_lock);
1001         xfrm_state_put_afinfo(afinfo);
1002         return err;
1003 }
1004 EXPORT_SYMBOL(xfrm_tmpl_sort);
1005
1006 int
1007 xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
1008                 unsigned short family)
1009 {
1010         int err = 0;
1011         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
1012         if (!afinfo)
1013                 return -EAFNOSUPPORT;
1014
1015         spin_lock_bh(&xfrm_state_lock);
1016         if (afinfo->state_sort)
1017                 err = afinfo->state_sort(dst, src, n);
1018         spin_unlock_bh(&xfrm_state_lock);
1019         xfrm_state_put_afinfo(afinfo);
1020         return err;
1021 }
1022 EXPORT_SYMBOL(xfrm_state_sort);
1023 #endif
1024
1025 /* Silly enough, but I'm lazy to build resolution list */
1026
1027 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
1028 {
1029         int i;
1030
1031         for (i = 0; i <= xfrm_state_hmask; i++) {
1032                 struct hlist_node *entry;
1033                 struct xfrm_state *x;
1034
1035                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1036                         if (x->km.seq == seq &&
1037                             x->km.state == XFRM_STATE_ACQ) {
1038                                 xfrm_state_hold(x);
1039                                 return x;
1040                         }
1041                 }
1042         }
1043         return NULL;
1044 }
1045
1046 struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
1047 {
1048         struct xfrm_state *x;
1049
1050         spin_lock_bh(&xfrm_state_lock);
1051         x = __xfrm_find_acq_byseq(seq);
1052         spin_unlock_bh(&xfrm_state_lock);
1053         return x;
1054 }
1055 EXPORT_SYMBOL(xfrm_find_acq_byseq);
1056
1057 u32 xfrm_get_acqseq(void)
1058 {
1059         u32 res;
1060         static u32 acqseq;
1061         static DEFINE_SPINLOCK(acqseq_lock);
1062
1063         spin_lock_bh(&acqseq_lock);
1064         res = (++acqseq ? : ++acqseq);
1065         spin_unlock_bh(&acqseq_lock);
1066         return res;
1067 }
1068 EXPORT_SYMBOL(xfrm_get_acqseq);
1069
1070 void
1071 xfrm_alloc_spi(struct xfrm_state *x, __be32 minspi, __be32 maxspi)
1072 {
1073         unsigned int h;
1074         struct xfrm_state *x0;
1075
1076         if (x->id.spi)
1077                 return;
1078
1079         if (minspi == maxspi) {
1080                 x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
1081                 if (x0) {
1082                         xfrm_state_put(x0);
1083                         return;
1084                 }
1085                 x->id.spi = minspi;
1086         } else {
1087                 u32 spi = 0;
1088                 u32 low = ntohl(minspi);
1089                 u32 high = ntohl(maxspi);
1090                 for (h=0; h<high-low+1; h++) {
1091                         spi = low + net_random()%(high-low+1);
1092                         x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
1093                         if (x0 == NULL) {
1094                                 x->id.spi = htonl(spi);
1095                                 break;
1096                         }
1097                         xfrm_state_put(x0);
1098                 }
1099         }
1100         if (x->id.spi) {
1101                 spin_lock_bh(&xfrm_state_lock);
1102                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
1103                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
1104                 spin_unlock_bh(&xfrm_state_lock);
1105                 wake_up(&km_waitq);
1106         }
1107 }
1108 EXPORT_SYMBOL(xfrm_alloc_spi);
1109
1110 int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
1111                     void *data)
1112 {
1113         int i;
1114         struct xfrm_state *x, *last = NULL;
1115         struct hlist_node *entry;
1116         int count = 0;
1117         int err = 0;
1118
1119         spin_lock_bh(&xfrm_state_lock);
1120         for (i = 0; i <= xfrm_state_hmask; i++) {
1121                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1122                         if (!xfrm_id_proto_match(x->id.proto, proto))
1123                                 continue;
1124                         if (last) {
1125                                 err = func(last, count, data);
1126                                 if (err)
1127                                         goto out;
1128                         }
1129                         last = x;
1130                         count++;
1131                 }
1132         }
1133         if (count == 0) {
1134                 err = -ENOENT;
1135                 goto out;
1136         }
1137         err = func(last, 0, data);
1138 out:
1139         spin_unlock_bh(&xfrm_state_lock);
1140         return err;
1141 }
1142 EXPORT_SYMBOL(xfrm_state_walk);
1143
1144
1145 void xfrm_replay_notify(struct xfrm_state *x, int event)
1146 {
1147         struct km_event c;
1148         /* we send notify messages in case
1149          *  1. we updated on of the sequence numbers, and the seqno difference
1150          *     is at least x->replay_maxdiff, in this case we also update the
1151          *     timeout of our timer function
1152          *  2. if x->replay_maxage has elapsed since last update,
1153          *     and there were changes
1154          *
1155          *  The state structure must be locked!
1156          */
1157
1158         switch (event) {
1159         case XFRM_REPLAY_UPDATE:
1160                 if (x->replay_maxdiff &&
1161                     (x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
1162                     (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) {
1163                         if (x->xflags & XFRM_TIME_DEFER)
1164                                 event = XFRM_REPLAY_TIMEOUT;
1165                         else
1166                                 return;
1167                 }
1168
1169                 break;
1170
1171         case XFRM_REPLAY_TIMEOUT:
1172                 if ((x->replay.seq == x->preplay.seq) &&
1173                     (x->replay.bitmap == x->preplay.bitmap) &&
1174                     (x->replay.oseq == x->preplay.oseq)) {
1175                         x->xflags |= XFRM_TIME_DEFER;
1176                         return;
1177                 }
1178
1179                 break;
1180         }
1181
1182         memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state));
1183         c.event = XFRM_MSG_NEWAE;
1184         c.data.aevent = event;
1185         km_state_notify(x, &c);
1186
1187         if (x->replay_maxage &&
1188             !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
1189                 x->xflags &= ~XFRM_TIME_DEFER;
1190 }
1191 EXPORT_SYMBOL(xfrm_replay_notify);
1192
1193 static void xfrm_replay_timer_handler(unsigned long data)
1194 {
1195         struct xfrm_state *x = (struct xfrm_state*)data;
1196
1197         spin_lock(&x->lock);
1198
1199         if (x->km.state == XFRM_STATE_VALID) {
1200                 if (xfrm_aevent_is_on())
1201                         xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT);
1202                 else
1203                         x->xflags |= XFRM_TIME_DEFER;
1204         }
1205
1206         spin_unlock(&x->lock);
1207 }
1208
1209 int xfrm_replay_check(struct xfrm_state *x, __be32 net_seq)
1210 {
1211         u32 diff;
1212         u32 seq = ntohl(net_seq);
1213
1214         if (unlikely(seq == 0))
1215                 return -EINVAL;
1216
1217         if (likely(seq > x->replay.seq))
1218                 return 0;
1219
1220         diff = x->replay.seq - seq;
1221         if (diff >= x->props.replay_window) {
1222                 x->stats.replay_window++;
1223                 return -EINVAL;
1224         }
1225
1226         if (x->replay.bitmap & (1U << diff)) {
1227                 x->stats.replay++;
1228                 return -EINVAL;
1229         }
1230         return 0;
1231 }
1232 EXPORT_SYMBOL(xfrm_replay_check);
1233
1234 void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq)
1235 {
1236         u32 diff;
1237         u32 seq = ntohl(net_seq);
1238
1239         if (seq > x->replay.seq) {
1240                 diff = seq - x->replay.seq;
1241                 if (diff < x->props.replay_window)
1242                         x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
1243                 else
1244                         x->replay.bitmap = 1;
1245                 x->replay.seq = seq;
1246         } else {
1247                 diff = x->replay.seq - seq;
1248                 x->replay.bitmap |= (1U << diff);
1249         }
1250
1251         if (xfrm_aevent_is_on())
1252                 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
1253 }
1254 EXPORT_SYMBOL(xfrm_replay_advance);
1255
1256 static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
1257 static DEFINE_RWLOCK(xfrm_km_lock);
1258
1259 void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
1260 {
1261         struct xfrm_mgr *km;
1262
1263         read_lock(&xfrm_km_lock);
1264         list_for_each_entry(km, &xfrm_km_list, list)
1265                 if (km->notify_policy)
1266                         km->notify_policy(xp, dir, c);
1267         read_unlock(&xfrm_km_lock);
1268 }
1269
1270 void km_state_notify(struct xfrm_state *x, struct km_event *c)
1271 {
1272         struct xfrm_mgr *km;
1273         read_lock(&xfrm_km_lock);
1274         list_for_each_entry(km, &xfrm_km_list, list)
1275                 if (km->notify)
1276                         km->notify(x, c);
1277         read_unlock(&xfrm_km_lock);
1278 }
1279
1280 EXPORT_SYMBOL(km_policy_notify);
1281 EXPORT_SYMBOL(km_state_notify);
1282
1283 void km_state_expired(struct xfrm_state *x, int hard, u32 pid)
1284 {
1285         struct km_event c;
1286
1287         c.data.hard = hard;
1288         c.pid = pid;
1289         c.event = XFRM_MSG_EXPIRE;
1290         km_state_notify(x, &c);
1291
1292         if (hard)
1293                 wake_up(&km_waitq);
1294 }
1295
1296 EXPORT_SYMBOL(km_state_expired);
1297 /*
1298  * We send to all registered managers regardless of failure
1299  * We are happy with one success
1300 */
1301 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
1302 {
1303         int err = -EINVAL, acqret;
1304         struct xfrm_mgr *km;
1305
1306         read_lock(&xfrm_km_lock);
1307         list_for_each_entry(km, &xfrm_km_list, list) {
1308                 acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
1309                 if (!acqret)
1310                         err = acqret;
1311         }
1312         read_unlock(&xfrm_km_lock);
1313         return err;
1314 }
1315 EXPORT_SYMBOL(km_query);
1316
1317 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport)
1318 {
1319         int err = -EINVAL;
1320         struct xfrm_mgr *km;
1321
1322         read_lock(&xfrm_km_lock);
1323         list_for_each_entry(km, &xfrm_km_list, list) {
1324                 if (km->new_mapping)
1325                         err = km->new_mapping(x, ipaddr, sport);
1326                 if (!err)
1327                         break;
1328         }
1329         read_unlock(&xfrm_km_lock);
1330         return err;
1331 }
1332 EXPORT_SYMBOL(km_new_mapping);
1333
1334 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
1335 {
1336         struct km_event c;
1337
1338         c.data.hard = hard;
1339         c.pid = pid;
1340         c.event = XFRM_MSG_POLEXPIRE;
1341         km_policy_notify(pol, dir, &c);
1342
1343         if (hard)
1344                 wake_up(&km_waitq);
1345 }
1346 EXPORT_SYMBOL(km_policy_expired);
1347
1348 int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
1349 {
1350         int err = -EINVAL;
1351         int ret;
1352         struct xfrm_mgr *km;
1353
1354         read_lock(&xfrm_km_lock);
1355         list_for_each_entry(km, &xfrm_km_list, list) {
1356                 if (km->report) {
1357                         ret = km->report(proto, sel, addr);
1358                         if (!ret)
1359                                 err = ret;
1360                 }
1361         }
1362         read_unlock(&xfrm_km_lock);
1363         return err;
1364 }
1365 EXPORT_SYMBOL(km_report);
1366
1367 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
1368 {
1369         int err;
1370         u8 *data;
1371         struct xfrm_mgr *km;
1372         struct xfrm_policy *pol = NULL;
1373
1374         if (optlen <= 0 || optlen > PAGE_SIZE)
1375                 return -EMSGSIZE;
1376
1377         data = kmalloc(optlen, GFP_KERNEL);
1378         if (!data)
1379                 return -ENOMEM;
1380
1381         err = -EFAULT;
1382         if (copy_from_user(data, optval, optlen))
1383                 goto out;
1384
1385         err = -EINVAL;
1386         read_lock(&xfrm_km_lock);
1387         list_for_each_entry(km, &xfrm_km_list, list) {
1388                 pol = km->compile_policy(sk, optname, data,
1389                                          optlen, &err);
1390                 if (err >= 0)
1391                         break;
1392         }
1393         read_unlock(&xfrm_km_lock);
1394
1395         if (err >= 0) {
1396                 xfrm_sk_policy_insert(sk, err, pol);
1397                 xfrm_pol_put(pol);
1398                 err = 0;
1399         }
1400
1401 out:
1402         kfree(data);
1403         return err;
1404 }
1405 EXPORT_SYMBOL(xfrm_user_policy);
1406
1407 int xfrm_register_km(struct xfrm_mgr *km)
1408 {
1409         write_lock_bh(&xfrm_km_lock);
1410         list_add_tail(&km->list, &xfrm_km_list);
1411         write_unlock_bh(&xfrm_km_lock);
1412         return 0;
1413 }
1414 EXPORT_SYMBOL(xfrm_register_km);
1415
1416 int xfrm_unregister_km(struct xfrm_mgr *km)
1417 {
1418         write_lock_bh(&xfrm_km_lock);
1419         list_del(&km->list);
1420         write_unlock_bh(&xfrm_km_lock);
1421         return 0;
1422 }
1423 EXPORT_SYMBOL(xfrm_unregister_km);
1424
1425 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
1426 {
1427         int err = 0;
1428         if (unlikely(afinfo == NULL))
1429                 return -EINVAL;
1430         if (unlikely(afinfo->family >= NPROTO))
1431                 return -EAFNOSUPPORT;
1432         write_lock_bh(&xfrm_state_afinfo_lock);
1433         if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
1434                 err = -ENOBUFS;
1435         else
1436                 xfrm_state_afinfo[afinfo->family] = afinfo;
1437         write_unlock_bh(&xfrm_state_afinfo_lock);
1438         return err;
1439 }
1440 EXPORT_SYMBOL(xfrm_state_register_afinfo);
1441
1442 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
1443 {
1444         int err = 0;
1445         if (unlikely(afinfo == NULL))
1446                 return -EINVAL;
1447         if (unlikely(afinfo->family >= NPROTO))
1448                 return -EAFNOSUPPORT;
1449         write_lock_bh(&xfrm_state_afinfo_lock);
1450         if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
1451                 if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
1452                         err = -EINVAL;
1453                 else
1454                         xfrm_state_afinfo[afinfo->family] = NULL;
1455         }
1456         write_unlock_bh(&xfrm_state_afinfo_lock);
1457         return err;
1458 }
1459 EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
1460
1461 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
1462 {
1463         struct xfrm_state_afinfo *afinfo;
1464         if (unlikely(family >= NPROTO))
1465                 return NULL;
1466         read_lock(&xfrm_state_afinfo_lock);
1467         afinfo = xfrm_state_afinfo[family];
1468         if (unlikely(!afinfo))
1469                 read_unlock(&xfrm_state_afinfo_lock);
1470         return afinfo;
1471 }
1472
1473 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
1474 {
1475         read_unlock(&xfrm_state_afinfo_lock);
1476 }
1477
1478 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
1479 void xfrm_state_delete_tunnel(struct xfrm_state *x)
1480 {
1481         if (x->tunnel) {
1482                 struct xfrm_state *t = x->tunnel;
1483
1484                 if (atomic_read(&t->tunnel_users) == 2)
1485                         xfrm_state_delete(t);
1486                 atomic_dec(&t->tunnel_users);
1487                 xfrm_state_put(t);
1488                 x->tunnel = NULL;
1489         }
1490 }
1491 EXPORT_SYMBOL(xfrm_state_delete_tunnel);
1492
1493 /*
1494  * This function is NOT optimal.  For example, with ESP it will give an
1495  * MTU that's usually two bytes short of being optimal.  However, it will
1496  * usually give an answer that's a multiple of 4 provided the input is
1497  * also a multiple of 4.
1498  */
1499 int xfrm_state_mtu(struct xfrm_state *x, int mtu)
1500 {
1501         int res = mtu;
1502
1503         res -= x->props.header_len;
1504
1505         for (;;) {
1506                 int m = res;
1507
1508                 if (m < 68)
1509                         return 68;
1510
1511                 spin_lock_bh(&x->lock);
1512                 if (x->km.state == XFRM_STATE_VALID &&
1513                     x->type && x->type->get_max_size)
1514                         m = x->type->get_max_size(x, m);
1515                 else
1516                         m += x->props.header_len;
1517                 spin_unlock_bh(&x->lock);
1518
1519                 if (m <= mtu)
1520                         break;
1521                 res -= (m - mtu);
1522         }
1523
1524         return res;
1525 }
1526
1527 int xfrm_init_state(struct xfrm_state *x)
1528 {
1529         struct xfrm_state_afinfo *afinfo;
1530         int family = x->props.family;
1531         int err;
1532
1533         err = -EAFNOSUPPORT;
1534         afinfo = xfrm_state_get_afinfo(family);
1535         if (!afinfo)
1536                 goto error;
1537
1538         err = 0;
1539         if (afinfo->init_flags)
1540                 err = afinfo->init_flags(x);
1541
1542         xfrm_state_put_afinfo(afinfo);
1543
1544         if (err)
1545                 goto error;
1546
1547         err = -EPROTONOSUPPORT;
1548         x->type = xfrm_get_type(x->id.proto, family);
1549         if (x->type == NULL)
1550                 goto error;
1551
1552         err = x->type->init_state(x);
1553         if (err)
1554                 goto error;
1555
1556         x->mode = xfrm_get_mode(x->props.mode, family);
1557         if (x->mode == NULL)
1558                 goto error;
1559
1560         x->km.state = XFRM_STATE_VALID;
1561
1562 error:
1563         return err;
1564 }
1565
1566 EXPORT_SYMBOL(xfrm_init_state);
1567  
1568 void __init xfrm_state_init(void)
1569 {
1570         unsigned int sz;
1571
1572         sz = sizeof(struct hlist_head) * 8;
1573
1574         xfrm_state_bydst = xfrm_hash_alloc(sz);
1575         xfrm_state_bysrc = xfrm_hash_alloc(sz);
1576         xfrm_state_byspi = xfrm_hash_alloc(sz);
1577         if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi)
1578                 panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes.");
1579         xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
1580
1581         INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task);
1582 }
1583