Merge master.kernel.org:/pub/scm/linux/kernel/git/davej/agpgart
[linux-2.6] / net / xfrm / xfrm_state.c
1 /*
2  * xfrm_state.c
3  *
4  * Changes:
5  *      Mitsuru KANDA @USAGI
6  *      Kazunori MIYAZAWA @USAGI
7  *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  *              IPv6 support
9  *      YOSHIFUJI Hideaki @USAGI
10  *              Split up af-specific functions
11  *      Derek Atkins <derek@ihtfp.com>
12  *              Add UDP Encapsulation
13  *
14  */
15
16 #include <linux/workqueue.h>
17 #include <net/xfrm.h>
18 #include <linux/pfkeyv2.h>
19 #include <linux/ipsec.h>
20 #include <linux/module.h>
21 #include <linux/cache.h>
22 #include <asm/uaccess.h>
23
24 #include "xfrm_hash.h"
25
26 struct sock *xfrm_nl;
27 EXPORT_SYMBOL(xfrm_nl);
28
29 u32 sysctl_xfrm_aevent_etime = XFRM_AE_ETIME;
30 EXPORT_SYMBOL(sysctl_xfrm_aevent_etime);
31
32 u32 sysctl_xfrm_aevent_rseqth = XFRM_AE_SEQT_SIZE;
33 EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth);
34
35 /* Each xfrm_state may be linked to two tables:
36
37    1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
38    2. Hash table by (daddr,family,reqid) to find what SAs exist for given
39       destination/tunnel endpoint. (output)
40  */
41
42 static DEFINE_SPINLOCK(xfrm_state_lock);
43
44 /* Hash table to find appropriate SA towards given target (endpoint
45  * of tunnel or destination of transport mode) allowed by selector.
46  *
47  * Main use is finding SA after policy selected tunnel or transport mode.
48  * Also, it can be used by ah/esp icmp error handler to find offending SA.
49  */
50 static struct hlist_head *xfrm_state_bydst __read_mostly;
51 static struct hlist_head *xfrm_state_bysrc __read_mostly;
52 static struct hlist_head *xfrm_state_byspi __read_mostly;
53 static unsigned int xfrm_state_hmask __read_mostly;
54 static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024;
55 static unsigned int xfrm_state_num;
56 static unsigned int xfrm_state_genid;
57
58 static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr,
59                                          xfrm_address_t *saddr,
60                                          u32 reqid,
61                                          unsigned short family)
62 {
63         return __xfrm_dst_hash(daddr, saddr, reqid, family, xfrm_state_hmask);
64 }
65
66 static inline unsigned int xfrm_src_hash(xfrm_address_t *daddr,
67                                          xfrm_address_t *saddr,
68                                          unsigned short family)
69 {
70         return __xfrm_src_hash(daddr, saddr, family, xfrm_state_hmask);
71 }
72
73 static inline unsigned int
74 xfrm_spi_hash(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
75 {
76         return __xfrm_spi_hash(daddr, spi, proto, family, xfrm_state_hmask);
77 }
78
79 static void xfrm_hash_transfer(struct hlist_head *list,
80                                struct hlist_head *ndsttable,
81                                struct hlist_head *nsrctable,
82                                struct hlist_head *nspitable,
83                                unsigned int nhashmask)
84 {
85         struct hlist_node *entry, *tmp;
86         struct xfrm_state *x;
87
88         hlist_for_each_entry_safe(x, entry, tmp, list, bydst) {
89                 unsigned int h;
90
91                 h = __xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
92                                     x->props.reqid, x->props.family,
93                                     nhashmask);
94                 hlist_add_head(&x->bydst, ndsttable+h);
95
96                 h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr,
97                                     x->props.family,
98                                     nhashmask);
99                 hlist_add_head(&x->bysrc, nsrctable+h);
100
101                 if (x->id.spi) {
102                         h = __xfrm_spi_hash(&x->id.daddr, x->id.spi,
103                                             x->id.proto, x->props.family,
104                                             nhashmask);
105                         hlist_add_head(&x->byspi, nspitable+h);
106                 }
107         }
108 }
109
110 static unsigned long xfrm_hash_new_size(void)
111 {
112         return ((xfrm_state_hmask + 1) << 1) *
113                 sizeof(struct hlist_head);
114 }
115
116 static DEFINE_MUTEX(hash_resize_mutex);
117
118 static void xfrm_hash_resize(void *__unused)
119 {
120         struct hlist_head *ndst, *nsrc, *nspi, *odst, *osrc, *ospi;
121         unsigned long nsize, osize;
122         unsigned int nhashmask, ohashmask;
123         int i;
124
125         mutex_lock(&hash_resize_mutex);
126
127         nsize = xfrm_hash_new_size();
128         ndst = xfrm_hash_alloc(nsize);
129         if (!ndst)
130                 goto out_unlock;
131         nsrc = xfrm_hash_alloc(nsize);
132         if (!nsrc) {
133                 xfrm_hash_free(ndst, nsize);
134                 goto out_unlock;
135         }
136         nspi = xfrm_hash_alloc(nsize);
137         if (!nspi) {
138                 xfrm_hash_free(ndst, nsize);
139                 xfrm_hash_free(nsrc, nsize);
140                 goto out_unlock;
141         }
142
143         spin_lock_bh(&xfrm_state_lock);
144
145         nhashmask = (nsize / sizeof(struct hlist_head)) - 1U;
146         for (i = xfrm_state_hmask; i >= 0; i--)
147                 xfrm_hash_transfer(xfrm_state_bydst+i, ndst, nsrc, nspi,
148                                    nhashmask);
149
150         odst = xfrm_state_bydst;
151         osrc = xfrm_state_bysrc;
152         ospi = xfrm_state_byspi;
153         ohashmask = xfrm_state_hmask;
154
155         xfrm_state_bydst = ndst;
156         xfrm_state_bysrc = nsrc;
157         xfrm_state_byspi = nspi;
158         xfrm_state_hmask = nhashmask;
159
160         spin_unlock_bh(&xfrm_state_lock);
161
162         osize = (ohashmask + 1) * sizeof(struct hlist_head);
163         xfrm_hash_free(odst, osize);
164         xfrm_hash_free(osrc, osize);
165         xfrm_hash_free(ospi, osize);
166
167 out_unlock:
168         mutex_unlock(&hash_resize_mutex);
169 }
170
171 static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize, NULL);
172
173 DECLARE_WAIT_QUEUE_HEAD(km_waitq);
174 EXPORT_SYMBOL(km_waitq);
175
176 static DEFINE_RWLOCK(xfrm_state_afinfo_lock);
177 static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
178
179 static struct work_struct xfrm_state_gc_work;
180 static HLIST_HEAD(xfrm_state_gc_list);
181 static DEFINE_SPINLOCK(xfrm_state_gc_lock);
182
183 int __xfrm_state_delete(struct xfrm_state *x);
184
185 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
186 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
187
188 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol);
189 void km_state_expired(struct xfrm_state *x, int hard, u32 pid);
190
191 static void xfrm_state_gc_destroy(struct xfrm_state *x)
192 {
193         del_timer_sync(&x->timer);
194         del_timer_sync(&x->rtimer);
195         kfree(x->aalg);
196         kfree(x->ealg);
197         kfree(x->calg);
198         kfree(x->encap);
199         kfree(x->coaddr);
200         if (x->mode)
201                 xfrm_put_mode(x->mode);
202         if (x->type) {
203                 x->type->destructor(x);
204                 xfrm_put_type(x->type);
205         }
206         security_xfrm_state_free(x);
207         kfree(x);
208 }
209
210 static void xfrm_state_gc_task(void *data)
211 {
212         struct xfrm_state *x;
213         struct hlist_node *entry, *tmp;
214         struct hlist_head gc_list;
215
216         spin_lock_bh(&xfrm_state_gc_lock);
217         gc_list.first = xfrm_state_gc_list.first;
218         INIT_HLIST_HEAD(&xfrm_state_gc_list);
219         spin_unlock_bh(&xfrm_state_gc_lock);
220
221         hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst)
222                 xfrm_state_gc_destroy(x);
223
224         wake_up(&km_waitq);
225 }
226
227 static inline unsigned long make_jiffies(long secs)
228 {
229         if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
230                 return MAX_SCHEDULE_TIMEOUT-1;
231         else
232                 return secs*HZ;
233 }
234
235 static void xfrm_timer_handler(unsigned long data)
236 {
237         struct xfrm_state *x = (struct xfrm_state*)data;
238         unsigned long now = (unsigned long)xtime.tv_sec;
239         long next = LONG_MAX;
240         int warn = 0;
241
242         spin_lock(&x->lock);
243         if (x->km.state == XFRM_STATE_DEAD)
244                 goto out;
245         if (x->km.state == XFRM_STATE_EXPIRED)
246                 goto expired;
247         if (x->lft.hard_add_expires_seconds) {
248                 long tmo = x->lft.hard_add_expires_seconds +
249                         x->curlft.add_time - now;
250                 if (tmo <= 0)
251                         goto expired;
252                 if (tmo < next)
253                         next = tmo;
254         }
255         if (x->lft.hard_use_expires_seconds) {
256                 long tmo = x->lft.hard_use_expires_seconds +
257                         (x->curlft.use_time ? : now) - now;
258                 if (tmo <= 0)
259                         goto expired;
260                 if (tmo < next)
261                         next = tmo;
262         }
263         if (x->km.dying)
264                 goto resched;
265         if (x->lft.soft_add_expires_seconds) {
266                 long tmo = x->lft.soft_add_expires_seconds +
267                         x->curlft.add_time - now;
268                 if (tmo <= 0)
269                         warn = 1;
270                 else if (tmo < next)
271                         next = tmo;
272         }
273         if (x->lft.soft_use_expires_seconds) {
274                 long tmo = x->lft.soft_use_expires_seconds +
275                         (x->curlft.use_time ? : now) - now;
276                 if (tmo <= 0)
277                         warn = 1;
278                 else if (tmo < next)
279                         next = tmo;
280         }
281
282         x->km.dying = warn;
283         if (warn)
284                 km_state_expired(x, 0, 0);
285 resched:
286         if (next != LONG_MAX)
287                 mod_timer(&x->timer, jiffies + make_jiffies(next));
288
289         goto out;
290
291 expired:
292         if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
293                 x->km.state = XFRM_STATE_EXPIRED;
294                 wake_up(&km_waitq);
295                 next = 2;
296                 goto resched;
297         }
298         if (!__xfrm_state_delete(x) && x->id.spi)
299                 km_state_expired(x, 1, 0);
300
301 out:
302         spin_unlock(&x->lock);
303 }
304
305 static void xfrm_replay_timer_handler(unsigned long data);
306
307 struct xfrm_state *xfrm_state_alloc(void)
308 {
309         struct xfrm_state *x;
310
311         x = kzalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
312
313         if (x) {
314                 atomic_set(&x->refcnt, 1);
315                 atomic_set(&x->tunnel_users, 0);
316                 INIT_HLIST_NODE(&x->bydst);
317                 INIT_HLIST_NODE(&x->bysrc);
318                 INIT_HLIST_NODE(&x->byspi);
319                 init_timer(&x->timer);
320                 x->timer.function = xfrm_timer_handler;
321                 x->timer.data     = (unsigned long)x;
322                 init_timer(&x->rtimer);
323                 x->rtimer.function = xfrm_replay_timer_handler;
324                 x->rtimer.data     = (unsigned long)x;
325                 x->curlft.add_time = (unsigned long)xtime.tv_sec;
326                 x->lft.soft_byte_limit = XFRM_INF;
327                 x->lft.soft_packet_limit = XFRM_INF;
328                 x->lft.hard_byte_limit = XFRM_INF;
329                 x->lft.hard_packet_limit = XFRM_INF;
330                 x->replay_maxage = 0;
331                 x->replay_maxdiff = 0;
332                 spin_lock_init(&x->lock);
333         }
334         return x;
335 }
336 EXPORT_SYMBOL(xfrm_state_alloc);
337
338 void __xfrm_state_destroy(struct xfrm_state *x)
339 {
340         BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
341
342         spin_lock_bh(&xfrm_state_gc_lock);
343         hlist_add_head(&x->bydst, &xfrm_state_gc_list);
344         spin_unlock_bh(&xfrm_state_gc_lock);
345         schedule_work(&xfrm_state_gc_work);
346 }
347 EXPORT_SYMBOL(__xfrm_state_destroy);
348
349 int __xfrm_state_delete(struct xfrm_state *x)
350 {
351         int err = -ESRCH;
352
353         if (x->km.state != XFRM_STATE_DEAD) {
354                 x->km.state = XFRM_STATE_DEAD;
355                 spin_lock(&xfrm_state_lock);
356                 hlist_del(&x->bydst);
357                 hlist_del(&x->bysrc);
358                 if (x->id.spi)
359                         hlist_del(&x->byspi);
360                 xfrm_state_num--;
361                 spin_unlock(&xfrm_state_lock);
362
363                 /* All xfrm_state objects are created by xfrm_state_alloc.
364                  * The xfrm_state_alloc call gives a reference, and that
365                  * is what we are dropping here.
366                  */
367                 __xfrm_state_put(x);
368                 err = 0;
369         }
370
371         return err;
372 }
373 EXPORT_SYMBOL(__xfrm_state_delete);
374
375 int xfrm_state_delete(struct xfrm_state *x)
376 {
377         int err;
378
379         spin_lock_bh(&x->lock);
380         err = __xfrm_state_delete(x);
381         spin_unlock_bh(&x->lock);
382
383         return err;
384 }
385 EXPORT_SYMBOL(xfrm_state_delete);
386
387 void xfrm_state_flush(u8 proto)
388 {
389         int i;
390
391         spin_lock_bh(&xfrm_state_lock);
392         for (i = 0; i <= xfrm_state_hmask; i++) {
393                 struct hlist_node *entry;
394                 struct xfrm_state *x;
395 restart:
396                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
397                         if (!xfrm_state_kern(x) &&
398                             xfrm_id_proto_match(x->id.proto, proto)) {
399                                 xfrm_state_hold(x);
400                                 spin_unlock_bh(&xfrm_state_lock);
401
402                                 xfrm_state_delete(x);
403                                 xfrm_state_put(x);
404
405                                 spin_lock_bh(&xfrm_state_lock);
406                                 goto restart;
407                         }
408                 }
409         }
410         spin_unlock_bh(&xfrm_state_lock);
411         wake_up(&km_waitq);
412 }
413 EXPORT_SYMBOL(xfrm_state_flush);
414
415 static int
416 xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
417                   struct xfrm_tmpl *tmpl,
418                   xfrm_address_t *daddr, xfrm_address_t *saddr,
419                   unsigned short family)
420 {
421         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
422         if (!afinfo)
423                 return -1;
424         afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
425         xfrm_state_put_afinfo(afinfo);
426         return 0;
427 }
428
429 static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family)
430 {
431         unsigned int h = xfrm_spi_hash(daddr, spi, proto, family);
432         struct xfrm_state *x;
433         struct hlist_node *entry;
434
435         hlist_for_each_entry(x, entry, xfrm_state_byspi+h, byspi) {
436                 if (x->props.family != family ||
437                     x->id.spi       != spi ||
438                     x->id.proto     != proto)
439                         continue;
440
441                 switch (family) {
442                 case AF_INET:
443                         if (x->id.daddr.a4 != daddr->a4)
444                                 continue;
445                         break;
446                 case AF_INET6:
447                         if (!ipv6_addr_equal((struct in6_addr *)daddr,
448                                              (struct in6_addr *)
449                                              x->id.daddr.a6))
450                                 continue;
451                         break;
452                 };
453
454                 xfrm_state_hold(x);
455                 return x;
456         }
457
458         return NULL;
459 }
460
461 static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family)
462 {
463         unsigned int h = xfrm_src_hash(daddr, saddr, family);
464         struct xfrm_state *x;
465         struct hlist_node *entry;
466
467         hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) {
468                 if (x->props.family != family ||
469                     x->id.proto     != proto)
470                         continue;
471
472                 switch (family) {
473                 case AF_INET:
474                         if (x->id.daddr.a4 != daddr->a4 ||
475                             x->props.saddr.a4 != saddr->a4)
476                                 continue;
477                         break;
478                 case AF_INET6:
479                         if (!ipv6_addr_equal((struct in6_addr *)daddr,
480                                              (struct in6_addr *)
481                                              x->id.daddr.a6) ||
482                             !ipv6_addr_equal((struct in6_addr *)saddr,
483                                              (struct in6_addr *)
484                                              x->props.saddr.a6))
485                                 continue;
486                         break;
487                 };
488
489                 xfrm_state_hold(x);
490                 return x;
491         }
492
493         return NULL;
494 }
495
496 static inline struct xfrm_state *
497 __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family)
498 {
499         if (use_spi)
500                 return __xfrm_state_lookup(&x->id.daddr, x->id.spi,
501                                            x->id.proto, family);
502         else
503                 return __xfrm_state_lookup_byaddr(&x->id.daddr,
504                                                   &x->props.saddr,
505                                                   x->id.proto, family);
506 }
507
508 struct xfrm_state *
509 xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, 
510                 struct flowi *fl, struct xfrm_tmpl *tmpl,
511                 struct xfrm_policy *pol, int *err,
512                 unsigned short family)
513 {
514         unsigned int h = xfrm_dst_hash(daddr, saddr, tmpl->reqid, family);
515         struct hlist_node *entry;
516         struct xfrm_state *x, *x0;
517         int acquire_in_progress = 0;
518         int error = 0;
519         struct xfrm_state *best = NULL;
520         
521         spin_lock_bh(&xfrm_state_lock);
522         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
523                 if (x->props.family == family &&
524                     x->props.reqid == tmpl->reqid &&
525                     !(x->props.flags & XFRM_STATE_WILDRECV) &&
526                     xfrm_state_addr_check(x, daddr, saddr, family) &&
527                     tmpl->mode == x->props.mode &&
528                     tmpl->id.proto == x->id.proto &&
529                     (tmpl->id.spi == x->id.spi || !tmpl->id.spi)) {
530                         /* Resolution logic:
531                            1. There is a valid state with matching selector.
532                               Done.
533                            2. Valid state with inappropriate selector. Skip.
534
535                            Entering area of "sysdeps".
536
537                            3. If state is not valid, selector is temporary,
538                               it selects only session which triggered
539                               previous resolution. Key manager will do
540                               something to install a state with proper
541                               selector.
542                          */
543                         if (x->km.state == XFRM_STATE_VALID) {
544                                 if (!xfrm_selector_match(&x->sel, fl, family) ||
545                                     !security_xfrm_state_pol_flow_match(x, pol, fl))
546                                         continue;
547                                 if (!best ||
548                                     best->km.dying > x->km.dying ||
549                                     (best->km.dying == x->km.dying &&
550                                      best->curlft.add_time < x->curlft.add_time))
551                                         best = x;
552                         } else if (x->km.state == XFRM_STATE_ACQ) {
553                                 acquire_in_progress = 1;
554                         } else if (x->km.state == XFRM_STATE_ERROR ||
555                                    x->km.state == XFRM_STATE_EXPIRED) {
556                                 if (xfrm_selector_match(&x->sel, fl, family) &&
557                                     security_xfrm_state_pol_flow_match(x, pol, fl))
558                                         error = -ESRCH;
559                         }
560                 }
561         }
562
563         x = best;
564         if (!x && !error && !acquire_in_progress) {
565                 if (tmpl->id.spi &&
566                     (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi,
567                                               tmpl->id.proto, family)) != NULL) {
568                         xfrm_state_put(x0);
569                         error = -EEXIST;
570                         goto out;
571                 }
572                 x = xfrm_state_alloc();
573                 if (x == NULL) {
574                         error = -ENOMEM;
575                         goto out;
576                 }
577                 /* Initialize temporary selector matching only
578                  * to current session. */
579                 xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
580
581                 error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid);
582                 if (error) {
583                         x->km.state = XFRM_STATE_DEAD;
584                         xfrm_state_put(x);
585                         x = NULL;
586                         goto out;
587                 }
588
589                 if (km_query(x, tmpl, pol) == 0) {
590                         x->km.state = XFRM_STATE_ACQ;
591                         hlist_add_head(&x->bydst, xfrm_state_bydst+h);
592                         h = xfrm_src_hash(daddr, saddr, family);
593                         hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
594                         if (x->id.spi) {
595                                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
596                                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
597                         }
598                         x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
599                         x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
600                         add_timer(&x->timer);
601                 } else {
602                         x->km.state = XFRM_STATE_DEAD;
603                         xfrm_state_put(x);
604                         x = NULL;
605                         error = -ESRCH;
606                 }
607         }
608 out:
609         if (x)
610                 xfrm_state_hold(x);
611         else
612                 *err = acquire_in_progress ? -EAGAIN : error;
613         spin_unlock_bh(&xfrm_state_lock);
614         return x;
615 }
616
617 static void xfrm_hash_grow_check(int have_hash_collision)
618 {
619         if (have_hash_collision &&
620             (xfrm_state_hmask + 1) < xfrm_state_hashmax &&
621             xfrm_state_num > xfrm_state_hmask)
622                 schedule_work(&xfrm_hash_work);
623 }
624
625 static void __xfrm_state_insert(struct xfrm_state *x)
626 {
627         unsigned int h;
628
629         x->genid = ++xfrm_state_genid;
630
631         h = xfrm_dst_hash(&x->id.daddr, &x->props.saddr,
632                           x->props.reqid, x->props.family);
633         hlist_add_head(&x->bydst, xfrm_state_bydst+h);
634
635         h = xfrm_src_hash(&x->id.daddr, &x->props.saddr, x->props.family);
636         hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
637
638         if (x->id.spi) {
639                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto,
640                                   x->props.family);
641
642                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
643         }
644
645         mod_timer(&x->timer, jiffies + HZ);
646         if (x->replay_maxage)
647                 mod_timer(&x->rtimer, jiffies + x->replay_maxage);
648
649         wake_up(&km_waitq);
650
651         xfrm_state_num++;
652
653         xfrm_hash_grow_check(x->bydst.next != NULL);
654 }
655
656 /* xfrm_state_lock is held */
657 static void __xfrm_state_bump_genids(struct xfrm_state *xnew)
658 {
659         unsigned short family = xnew->props.family;
660         u32 reqid = xnew->props.reqid;
661         struct xfrm_state *x;
662         struct hlist_node *entry;
663         unsigned int h;
664
665         h = xfrm_dst_hash(&xnew->id.daddr, &xnew->props.saddr, reqid, family);
666         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
667                 if (x->props.family     == family &&
668                     x->props.reqid      == reqid &&
669                     !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) &&
670                     !xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family))
671                         x->genid = xfrm_state_genid;
672         }
673 }
674
675 void xfrm_state_insert(struct xfrm_state *x)
676 {
677         spin_lock_bh(&xfrm_state_lock);
678         __xfrm_state_bump_genids(x);
679         __xfrm_state_insert(x);
680         spin_unlock_bh(&xfrm_state_lock);
681 }
682 EXPORT_SYMBOL(xfrm_state_insert);
683
684 /* xfrm_state_lock is held */
685 static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create)
686 {
687         unsigned int h = xfrm_dst_hash(daddr, saddr, reqid, family);
688         struct hlist_node *entry;
689         struct xfrm_state *x;
690
691         hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) {
692                 if (x->props.reqid  != reqid ||
693                     x->props.mode   != mode ||
694                     x->props.family != family ||
695                     x->km.state     != XFRM_STATE_ACQ ||
696                     x->id.spi       != 0)
697                         continue;
698
699                 switch (family) {
700                 case AF_INET:
701                         if (x->id.daddr.a4    != daddr->a4 ||
702                             x->props.saddr.a4 != saddr->a4)
703                                 continue;
704                         break;
705                 case AF_INET6:
706                         if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6,
707                                              (struct in6_addr *)daddr) ||
708                             !ipv6_addr_equal((struct in6_addr *)
709                                              x->props.saddr.a6,
710                                              (struct in6_addr *)saddr))
711                                 continue;
712                         break;
713                 };
714
715                 xfrm_state_hold(x);
716                 return x;
717         }
718
719         if (!create)
720                 return NULL;
721
722         x = xfrm_state_alloc();
723         if (likely(x)) {
724                 switch (family) {
725                 case AF_INET:
726                         x->sel.daddr.a4 = daddr->a4;
727                         x->sel.saddr.a4 = saddr->a4;
728                         x->sel.prefixlen_d = 32;
729                         x->sel.prefixlen_s = 32;
730                         x->props.saddr.a4 = saddr->a4;
731                         x->id.daddr.a4 = daddr->a4;
732                         break;
733
734                 case AF_INET6:
735                         ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6,
736                                        (struct in6_addr *)daddr);
737                         ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6,
738                                        (struct in6_addr *)saddr);
739                         x->sel.prefixlen_d = 128;
740                         x->sel.prefixlen_s = 128;
741                         ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6,
742                                        (struct in6_addr *)saddr);
743                         ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6,
744                                        (struct in6_addr *)daddr);
745                         break;
746                 };
747
748                 x->km.state = XFRM_STATE_ACQ;
749                 x->id.proto = proto;
750                 x->props.family = family;
751                 x->props.mode = mode;
752                 x->props.reqid = reqid;
753                 x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
754                 xfrm_state_hold(x);
755                 x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
756                 add_timer(&x->timer);
757                 hlist_add_head(&x->bydst, xfrm_state_bydst+h);
758                 h = xfrm_src_hash(daddr, saddr, family);
759                 hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
760                 wake_up(&km_waitq);
761
762                 xfrm_state_num++;
763
764                 xfrm_hash_grow_check(x->bydst.next != NULL);
765         }
766
767         return x;
768 }
769
770 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
771
772 int xfrm_state_add(struct xfrm_state *x)
773 {
774         struct xfrm_state *x1;
775         int family;
776         int err;
777         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
778
779         family = x->props.family;
780
781         spin_lock_bh(&xfrm_state_lock);
782
783         x1 = __xfrm_state_locate(x, use_spi, family);
784         if (x1) {
785                 xfrm_state_put(x1);
786                 x1 = NULL;
787                 err = -EEXIST;
788                 goto out;
789         }
790
791         if (use_spi && x->km.seq) {
792                 x1 = __xfrm_find_acq_byseq(x->km.seq);
793                 if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) {
794                         xfrm_state_put(x1);
795                         x1 = NULL;
796                 }
797         }
798
799         if (use_spi && !x1)
800                 x1 = __find_acq_core(family, x->props.mode, x->props.reqid,
801                                      x->id.proto,
802                                      &x->id.daddr, &x->props.saddr, 0);
803
804         __xfrm_state_bump_genids(x);
805         __xfrm_state_insert(x);
806         err = 0;
807
808 out:
809         spin_unlock_bh(&xfrm_state_lock);
810
811         if (x1) {
812                 xfrm_state_delete(x1);
813                 xfrm_state_put(x1);
814         }
815
816         return err;
817 }
818 EXPORT_SYMBOL(xfrm_state_add);
819
820 int xfrm_state_update(struct xfrm_state *x)
821 {
822         struct xfrm_state *x1;
823         int err;
824         int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY);
825
826         spin_lock_bh(&xfrm_state_lock);
827         x1 = __xfrm_state_locate(x, use_spi, x->props.family);
828
829         err = -ESRCH;
830         if (!x1)
831                 goto out;
832
833         if (xfrm_state_kern(x1)) {
834                 xfrm_state_put(x1);
835                 err = -EEXIST;
836                 goto out;
837         }
838
839         if (x1->km.state == XFRM_STATE_ACQ) {
840                 __xfrm_state_insert(x);
841                 x = NULL;
842         }
843         err = 0;
844
845 out:
846         spin_unlock_bh(&xfrm_state_lock);
847
848         if (err)
849                 return err;
850
851         if (!x) {
852                 xfrm_state_delete(x1);
853                 xfrm_state_put(x1);
854                 return 0;
855         }
856
857         err = -EINVAL;
858         spin_lock_bh(&x1->lock);
859         if (likely(x1->km.state == XFRM_STATE_VALID)) {
860                 if (x->encap && x1->encap)
861                         memcpy(x1->encap, x->encap, sizeof(*x1->encap));
862                 if (x->coaddr && x1->coaddr) {
863                         memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr));
864                 }
865                 if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel)))
866                         memcpy(&x1->sel, &x->sel, sizeof(x1->sel));
867                 memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
868                 x1->km.dying = 0;
869
870                 mod_timer(&x1->timer, jiffies + HZ);
871                 if (x1->curlft.use_time)
872                         xfrm_state_check_expire(x1);
873
874                 err = 0;
875         }
876         spin_unlock_bh(&x1->lock);
877
878         xfrm_state_put(x1);
879
880         return err;
881 }
882 EXPORT_SYMBOL(xfrm_state_update);
883
884 int xfrm_state_check_expire(struct xfrm_state *x)
885 {
886         if (!x->curlft.use_time)
887                 x->curlft.use_time = (unsigned long)xtime.tv_sec;
888
889         if (x->km.state != XFRM_STATE_VALID)
890                 return -EINVAL;
891
892         if (x->curlft.bytes >= x->lft.hard_byte_limit ||
893             x->curlft.packets >= x->lft.hard_packet_limit) {
894                 x->km.state = XFRM_STATE_EXPIRED;
895                 mod_timer(&x->timer, jiffies);
896                 return -EINVAL;
897         }
898
899         if (!x->km.dying &&
900             (x->curlft.bytes >= x->lft.soft_byte_limit ||
901              x->curlft.packets >= x->lft.soft_packet_limit)) {
902                 x->km.dying = 1;
903                 km_state_expired(x, 0, 0);
904         }
905         return 0;
906 }
907 EXPORT_SYMBOL(xfrm_state_check_expire);
908
909 static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
910 {
911         int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
912                 - skb_headroom(skb);
913
914         if (nhead > 0)
915                 return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
916
917         /* Check tail too... */
918         return 0;
919 }
920
921 int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
922 {
923         int err = xfrm_state_check_expire(x);
924         if (err < 0)
925                 goto err;
926         err = xfrm_state_check_space(x, skb);
927 err:
928         return err;
929 }
930 EXPORT_SYMBOL(xfrm_state_check);
931
932 struct xfrm_state *
933 xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto,
934                   unsigned short family)
935 {
936         struct xfrm_state *x;
937
938         spin_lock_bh(&xfrm_state_lock);
939         x = __xfrm_state_lookup(daddr, spi, proto, family);
940         spin_unlock_bh(&xfrm_state_lock);
941         return x;
942 }
943 EXPORT_SYMBOL(xfrm_state_lookup);
944
945 struct xfrm_state *
946 xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr,
947                          u8 proto, unsigned short family)
948 {
949         struct xfrm_state *x;
950
951         spin_lock_bh(&xfrm_state_lock);
952         x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family);
953         spin_unlock_bh(&xfrm_state_lock);
954         return x;
955 }
956 EXPORT_SYMBOL(xfrm_state_lookup_byaddr);
957
958 struct xfrm_state *
959 xfrm_find_acq(u8 mode, u32 reqid, u8 proto, 
960               xfrm_address_t *daddr, xfrm_address_t *saddr, 
961               int create, unsigned short family)
962 {
963         struct xfrm_state *x;
964
965         spin_lock_bh(&xfrm_state_lock);
966         x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create);
967         spin_unlock_bh(&xfrm_state_lock);
968
969         return x;
970 }
971 EXPORT_SYMBOL(xfrm_find_acq);
972
973 #ifdef CONFIG_XFRM_SUB_POLICY
974 int
975 xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
976                unsigned short family)
977 {
978         int err = 0;
979         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
980         if (!afinfo)
981                 return -EAFNOSUPPORT;
982
983         spin_lock_bh(&xfrm_state_lock);
984         if (afinfo->tmpl_sort)
985                 err = afinfo->tmpl_sort(dst, src, n);
986         spin_unlock_bh(&xfrm_state_lock);
987         xfrm_state_put_afinfo(afinfo);
988         return err;
989 }
990 EXPORT_SYMBOL(xfrm_tmpl_sort);
991
992 int
993 xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
994                 unsigned short family)
995 {
996         int err = 0;
997         struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
998         if (!afinfo)
999                 return -EAFNOSUPPORT;
1000
1001         spin_lock_bh(&xfrm_state_lock);
1002         if (afinfo->state_sort)
1003                 err = afinfo->state_sort(dst, src, n);
1004         spin_unlock_bh(&xfrm_state_lock);
1005         xfrm_state_put_afinfo(afinfo);
1006         return err;
1007 }
1008 EXPORT_SYMBOL(xfrm_state_sort);
1009 #endif
1010
1011 /* Silly enough, but I'm lazy to build resolution list */
1012
1013 static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
1014 {
1015         int i;
1016
1017         for (i = 0; i <= xfrm_state_hmask; i++) {
1018                 struct hlist_node *entry;
1019                 struct xfrm_state *x;
1020
1021                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1022                         if (x->km.seq == seq &&
1023                             x->km.state == XFRM_STATE_ACQ) {
1024                                 xfrm_state_hold(x);
1025                                 return x;
1026                         }
1027                 }
1028         }
1029         return NULL;
1030 }
1031
1032 struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
1033 {
1034         struct xfrm_state *x;
1035
1036         spin_lock_bh(&xfrm_state_lock);
1037         x = __xfrm_find_acq_byseq(seq);
1038         spin_unlock_bh(&xfrm_state_lock);
1039         return x;
1040 }
1041 EXPORT_SYMBOL(xfrm_find_acq_byseq);
1042
1043 u32 xfrm_get_acqseq(void)
1044 {
1045         u32 res;
1046         static u32 acqseq;
1047         static DEFINE_SPINLOCK(acqseq_lock);
1048
1049         spin_lock_bh(&acqseq_lock);
1050         res = (++acqseq ? : ++acqseq);
1051         spin_unlock_bh(&acqseq_lock);
1052         return res;
1053 }
1054 EXPORT_SYMBOL(xfrm_get_acqseq);
1055
1056 void
1057 xfrm_alloc_spi(struct xfrm_state *x, __be32 minspi, __be32 maxspi)
1058 {
1059         unsigned int h;
1060         struct xfrm_state *x0;
1061
1062         if (x->id.spi)
1063                 return;
1064
1065         if (minspi == maxspi) {
1066                 x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
1067                 if (x0) {
1068                         xfrm_state_put(x0);
1069                         return;
1070                 }
1071                 x->id.spi = minspi;
1072         } else {
1073                 u32 spi = 0;
1074                 u32 low = ntohl(minspi);
1075                 u32 high = ntohl(maxspi);
1076                 for (h=0; h<high-low+1; h++) {
1077                         spi = low + net_random()%(high-low+1);
1078                         x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
1079                         if (x0 == NULL) {
1080                                 x->id.spi = htonl(spi);
1081                                 break;
1082                         }
1083                         xfrm_state_put(x0);
1084                 }
1085         }
1086         if (x->id.spi) {
1087                 spin_lock_bh(&xfrm_state_lock);
1088                 h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
1089                 hlist_add_head(&x->byspi, xfrm_state_byspi+h);
1090                 spin_unlock_bh(&xfrm_state_lock);
1091                 wake_up(&km_waitq);
1092         }
1093 }
1094 EXPORT_SYMBOL(xfrm_alloc_spi);
1095
1096 int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
1097                     void *data)
1098 {
1099         int i;
1100         struct xfrm_state *x;
1101         struct hlist_node *entry;
1102         int count = 0;
1103         int err = 0;
1104
1105         spin_lock_bh(&xfrm_state_lock);
1106         for (i = 0; i <= xfrm_state_hmask; i++) {
1107                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1108                         if (xfrm_id_proto_match(x->id.proto, proto))
1109                                 count++;
1110                 }
1111         }
1112         if (count == 0) {
1113                 err = -ENOENT;
1114                 goto out;
1115         }
1116
1117         for (i = 0; i <= xfrm_state_hmask; i++) {
1118                 hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) {
1119                         if (!xfrm_id_proto_match(x->id.proto, proto))
1120                                 continue;
1121                         err = func(x, --count, data);
1122                         if (err)
1123                                 goto out;
1124                 }
1125         }
1126 out:
1127         spin_unlock_bh(&xfrm_state_lock);
1128         return err;
1129 }
1130 EXPORT_SYMBOL(xfrm_state_walk);
1131
1132
1133 void xfrm_replay_notify(struct xfrm_state *x, int event)
1134 {
1135         struct km_event c;
1136         /* we send notify messages in case
1137          *  1. we updated on of the sequence numbers, and the seqno difference
1138          *     is at least x->replay_maxdiff, in this case we also update the
1139          *     timeout of our timer function
1140          *  2. if x->replay_maxage has elapsed since last update,
1141          *     and there were changes
1142          *
1143          *  The state structure must be locked!
1144          */
1145
1146         switch (event) {
1147         case XFRM_REPLAY_UPDATE:
1148                 if (x->replay_maxdiff &&
1149                     (x->replay.seq - x->preplay.seq < x->replay_maxdiff) &&
1150                     (x->replay.oseq - x->preplay.oseq < x->replay_maxdiff)) {
1151                         if (x->xflags & XFRM_TIME_DEFER)
1152                                 event = XFRM_REPLAY_TIMEOUT;
1153                         else
1154                                 return;
1155                 }
1156
1157                 break;
1158
1159         case XFRM_REPLAY_TIMEOUT:
1160                 if ((x->replay.seq == x->preplay.seq) &&
1161                     (x->replay.bitmap == x->preplay.bitmap) &&
1162                     (x->replay.oseq == x->preplay.oseq)) {
1163                         x->xflags |= XFRM_TIME_DEFER;
1164                         return;
1165                 }
1166
1167                 break;
1168         }
1169
1170         memcpy(&x->preplay, &x->replay, sizeof(struct xfrm_replay_state));
1171         c.event = XFRM_MSG_NEWAE;
1172         c.data.aevent = event;
1173         km_state_notify(x, &c);
1174
1175         if (x->replay_maxage &&
1176             !mod_timer(&x->rtimer, jiffies + x->replay_maxage))
1177                 x->xflags &= ~XFRM_TIME_DEFER;
1178 }
1179 EXPORT_SYMBOL(xfrm_replay_notify);
1180
1181 static void xfrm_replay_timer_handler(unsigned long data)
1182 {
1183         struct xfrm_state *x = (struct xfrm_state*)data;
1184
1185         spin_lock(&x->lock);
1186
1187         if (x->km.state == XFRM_STATE_VALID) {
1188                 if (xfrm_aevent_is_on())
1189                         xfrm_replay_notify(x, XFRM_REPLAY_TIMEOUT);
1190                 else
1191                         x->xflags |= XFRM_TIME_DEFER;
1192         }
1193
1194         spin_unlock(&x->lock);
1195 }
1196
1197 int xfrm_replay_check(struct xfrm_state *x, __be32 net_seq)
1198 {
1199         u32 diff;
1200         u32 seq = ntohl(net_seq);
1201
1202         if (unlikely(seq == 0))
1203                 return -EINVAL;
1204
1205         if (likely(seq > x->replay.seq))
1206                 return 0;
1207
1208         diff = x->replay.seq - seq;
1209         if (diff >= x->props.replay_window) {
1210                 x->stats.replay_window++;
1211                 return -EINVAL;
1212         }
1213
1214         if (x->replay.bitmap & (1U << diff)) {
1215                 x->stats.replay++;
1216                 return -EINVAL;
1217         }
1218         return 0;
1219 }
1220 EXPORT_SYMBOL(xfrm_replay_check);
1221
1222 void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq)
1223 {
1224         u32 diff;
1225         u32 seq = ntohl(net_seq);
1226
1227         if (seq > x->replay.seq) {
1228                 diff = seq - x->replay.seq;
1229                 if (diff < x->props.replay_window)
1230                         x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
1231                 else
1232                         x->replay.bitmap = 1;
1233                 x->replay.seq = seq;
1234         } else {
1235                 diff = x->replay.seq - seq;
1236                 x->replay.bitmap |= (1U << diff);
1237         }
1238
1239         if (xfrm_aevent_is_on())
1240                 xfrm_replay_notify(x, XFRM_REPLAY_UPDATE);
1241 }
1242 EXPORT_SYMBOL(xfrm_replay_advance);
1243
1244 static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
1245 static DEFINE_RWLOCK(xfrm_km_lock);
1246
1247 void km_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c)
1248 {
1249         struct xfrm_mgr *km;
1250
1251         read_lock(&xfrm_km_lock);
1252         list_for_each_entry(km, &xfrm_km_list, list)
1253                 if (km->notify_policy)
1254                         km->notify_policy(xp, dir, c);
1255         read_unlock(&xfrm_km_lock);
1256 }
1257
1258 void km_state_notify(struct xfrm_state *x, struct km_event *c)
1259 {
1260         struct xfrm_mgr *km;
1261         read_lock(&xfrm_km_lock);
1262         list_for_each_entry(km, &xfrm_km_list, list)
1263                 if (km->notify)
1264                         km->notify(x, c);
1265         read_unlock(&xfrm_km_lock);
1266 }
1267
1268 EXPORT_SYMBOL(km_policy_notify);
1269 EXPORT_SYMBOL(km_state_notify);
1270
1271 void km_state_expired(struct xfrm_state *x, int hard, u32 pid)
1272 {
1273         struct km_event c;
1274
1275         c.data.hard = hard;
1276         c.pid = pid;
1277         c.event = XFRM_MSG_EXPIRE;
1278         km_state_notify(x, &c);
1279
1280         if (hard)
1281                 wake_up(&km_waitq);
1282 }
1283
1284 EXPORT_SYMBOL(km_state_expired);
1285 /*
1286  * We send to all registered managers regardless of failure
1287  * We are happy with one success
1288 */
1289 int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
1290 {
1291         int err = -EINVAL, acqret;
1292         struct xfrm_mgr *km;
1293
1294         read_lock(&xfrm_km_lock);
1295         list_for_each_entry(km, &xfrm_km_list, list) {
1296                 acqret = km->acquire(x, t, pol, XFRM_POLICY_OUT);
1297                 if (!acqret)
1298                         err = acqret;
1299         }
1300         read_unlock(&xfrm_km_lock);
1301         return err;
1302 }
1303 EXPORT_SYMBOL(km_query);
1304
1305 int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport)
1306 {
1307         int err = -EINVAL;
1308         struct xfrm_mgr *km;
1309
1310         read_lock(&xfrm_km_lock);
1311         list_for_each_entry(km, &xfrm_km_list, list) {
1312                 if (km->new_mapping)
1313                         err = km->new_mapping(x, ipaddr, sport);
1314                 if (!err)
1315                         break;
1316         }
1317         read_unlock(&xfrm_km_lock);
1318         return err;
1319 }
1320 EXPORT_SYMBOL(km_new_mapping);
1321
1322 void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid)
1323 {
1324         struct km_event c;
1325
1326         c.data.hard = hard;
1327         c.pid = pid;
1328         c.event = XFRM_MSG_POLEXPIRE;
1329         km_policy_notify(pol, dir, &c);
1330
1331         if (hard)
1332                 wake_up(&km_waitq);
1333 }
1334 EXPORT_SYMBOL(km_policy_expired);
1335
1336 int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr)
1337 {
1338         int err = -EINVAL;
1339         int ret;
1340         struct xfrm_mgr *km;
1341
1342         read_lock(&xfrm_km_lock);
1343         list_for_each_entry(km, &xfrm_km_list, list) {
1344                 if (km->report) {
1345                         ret = km->report(proto, sel, addr);
1346                         if (!ret)
1347                                 err = ret;
1348                 }
1349         }
1350         read_unlock(&xfrm_km_lock);
1351         return err;
1352 }
1353 EXPORT_SYMBOL(km_report);
1354
1355 int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen)
1356 {
1357         int err;
1358         u8 *data;
1359         struct xfrm_mgr *km;
1360         struct xfrm_policy *pol = NULL;
1361
1362         if (optlen <= 0 || optlen > PAGE_SIZE)
1363                 return -EMSGSIZE;
1364
1365         data = kmalloc(optlen, GFP_KERNEL);
1366         if (!data)
1367                 return -ENOMEM;
1368
1369         err = -EFAULT;
1370         if (copy_from_user(data, optval, optlen))
1371                 goto out;
1372
1373         err = -EINVAL;
1374         read_lock(&xfrm_km_lock);
1375         list_for_each_entry(km, &xfrm_km_list, list) {
1376                 pol = km->compile_policy(sk, optname, data,
1377                                          optlen, &err);
1378                 if (err >= 0)
1379                         break;
1380         }
1381         read_unlock(&xfrm_km_lock);
1382
1383         if (err >= 0) {
1384                 xfrm_sk_policy_insert(sk, err, pol);
1385                 xfrm_pol_put(pol);
1386                 err = 0;
1387         }
1388
1389 out:
1390         kfree(data);
1391         return err;
1392 }
1393 EXPORT_SYMBOL(xfrm_user_policy);
1394
1395 int xfrm_register_km(struct xfrm_mgr *km)
1396 {
1397         write_lock_bh(&xfrm_km_lock);
1398         list_add_tail(&km->list, &xfrm_km_list);
1399         write_unlock_bh(&xfrm_km_lock);
1400         return 0;
1401 }
1402 EXPORT_SYMBOL(xfrm_register_km);
1403
1404 int xfrm_unregister_km(struct xfrm_mgr *km)
1405 {
1406         write_lock_bh(&xfrm_km_lock);
1407         list_del(&km->list);
1408         write_unlock_bh(&xfrm_km_lock);
1409         return 0;
1410 }
1411 EXPORT_SYMBOL(xfrm_unregister_km);
1412
1413 int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
1414 {
1415         int err = 0;
1416         if (unlikely(afinfo == NULL))
1417                 return -EINVAL;
1418         if (unlikely(afinfo->family >= NPROTO))
1419                 return -EAFNOSUPPORT;
1420         write_lock_bh(&xfrm_state_afinfo_lock);
1421         if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
1422                 err = -ENOBUFS;
1423         else
1424                 xfrm_state_afinfo[afinfo->family] = afinfo;
1425         write_unlock_bh(&xfrm_state_afinfo_lock);
1426         return err;
1427 }
1428 EXPORT_SYMBOL(xfrm_state_register_afinfo);
1429
1430 int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
1431 {
1432         int err = 0;
1433         if (unlikely(afinfo == NULL))
1434                 return -EINVAL;
1435         if (unlikely(afinfo->family >= NPROTO))
1436                 return -EAFNOSUPPORT;
1437         write_lock_bh(&xfrm_state_afinfo_lock);
1438         if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
1439                 if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
1440                         err = -EINVAL;
1441                 else
1442                         xfrm_state_afinfo[afinfo->family] = NULL;
1443         }
1444         write_unlock_bh(&xfrm_state_afinfo_lock);
1445         return err;
1446 }
1447 EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
1448
1449 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
1450 {
1451         struct xfrm_state_afinfo *afinfo;
1452         if (unlikely(family >= NPROTO))
1453                 return NULL;
1454         read_lock(&xfrm_state_afinfo_lock);
1455         afinfo = xfrm_state_afinfo[family];
1456         if (unlikely(!afinfo))
1457                 read_unlock(&xfrm_state_afinfo_lock);
1458         return afinfo;
1459 }
1460
1461 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
1462 {
1463         read_unlock(&xfrm_state_afinfo_lock);
1464 }
1465
1466 /* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
1467 void xfrm_state_delete_tunnel(struct xfrm_state *x)
1468 {
1469         if (x->tunnel) {
1470                 struct xfrm_state *t = x->tunnel;
1471
1472                 if (atomic_read(&t->tunnel_users) == 2)
1473                         xfrm_state_delete(t);
1474                 atomic_dec(&t->tunnel_users);
1475                 xfrm_state_put(t);
1476                 x->tunnel = NULL;
1477         }
1478 }
1479 EXPORT_SYMBOL(xfrm_state_delete_tunnel);
1480
1481 /*
1482  * This function is NOT optimal.  For example, with ESP it will give an
1483  * MTU that's usually two bytes short of being optimal.  However, it will
1484  * usually give an answer that's a multiple of 4 provided the input is
1485  * also a multiple of 4.
1486  */
1487 int xfrm_state_mtu(struct xfrm_state *x, int mtu)
1488 {
1489         int res = mtu;
1490
1491         res -= x->props.header_len;
1492
1493         for (;;) {
1494                 int m = res;
1495
1496                 if (m < 68)
1497                         return 68;
1498
1499                 spin_lock_bh(&x->lock);
1500                 if (x->km.state == XFRM_STATE_VALID &&
1501                     x->type && x->type->get_max_size)
1502                         m = x->type->get_max_size(x, m);
1503                 else
1504                         m += x->props.header_len;
1505                 spin_unlock_bh(&x->lock);
1506
1507                 if (m <= mtu)
1508                         break;
1509                 res -= (m - mtu);
1510         }
1511
1512         return res;
1513 }
1514
1515 int xfrm_init_state(struct xfrm_state *x)
1516 {
1517         struct xfrm_state_afinfo *afinfo;
1518         int family = x->props.family;
1519         int err;
1520
1521         err = -EAFNOSUPPORT;
1522         afinfo = xfrm_state_get_afinfo(family);
1523         if (!afinfo)
1524                 goto error;
1525
1526         err = 0;
1527         if (afinfo->init_flags)
1528                 err = afinfo->init_flags(x);
1529
1530         xfrm_state_put_afinfo(afinfo);
1531
1532         if (err)
1533                 goto error;
1534
1535         err = -EPROTONOSUPPORT;
1536         x->type = xfrm_get_type(x->id.proto, family);
1537         if (x->type == NULL)
1538                 goto error;
1539
1540         err = x->type->init_state(x);
1541         if (err)
1542                 goto error;
1543
1544         x->mode = xfrm_get_mode(x->props.mode, family);
1545         if (x->mode == NULL)
1546                 goto error;
1547
1548         x->km.state = XFRM_STATE_VALID;
1549
1550 error:
1551         return err;
1552 }
1553
1554 EXPORT_SYMBOL(xfrm_init_state);
1555  
1556 void __init xfrm_state_init(void)
1557 {
1558         unsigned int sz;
1559
1560         sz = sizeof(struct hlist_head) * 8;
1561
1562         xfrm_state_bydst = xfrm_hash_alloc(sz);
1563         xfrm_state_bysrc = xfrm_hash_alloc(sz);
1564         xfrm_state_byspi = xfrm_hash_alloc(sz);
1565         if (!xfrm_state_bydst || !xfrm_state_bysrc || !xfrm_state_byspi)
1566                 panic("XFRM: Cannot allocate bydst/bysrc/byspi hashes.");
1567         xfrm_state_hmask = ((sz / sizeof(struct hlist_head)) - 1);
1568
1569         INIT_WORK(&xfrm_state_gc_work, xfrm_state_gc_task, NULL);
1570 }
1571