net: Discard and warn about LRO'd skbs received for forwarding
[linux-2.6] / net / ipv4 / ipcomp.c
1 /*
2  * IP Payload Compression Protocol (IPComp) - RFC3173.
3  *
4  * Copyright (c) 2003 James Morris <jmorris@intercode.com.au>
5  *
6  * This program is free software; you can redistribute it and/or modify it
7  * under the terms of the GNU General Public License as published by the Free
8  * Software Foundation; either version 2 of the License, or (at your option)
9  * any later version.
10  *
11  * Todo:
12  *   - Tunable compression parameters.
13  *   - Compression stats.
14  *   - Adaptive compression.
15  */
16 #include <linux/module.h>
17 #include <linux/crypto.h>
18 #include <linux/err.h>
19 #include <linux/pfkeyv2.h>
20 #include <linux/percpu.h>
21 #include <linux/smp.h>
22 #include <linux/list.h>
23 #include <linux/vmalloc.h>
24 #include <linux/rtnetlink.h>
25 #include <linux/mutex.h>
26 #include <net/ip.h>
27 #include <net/xfrm.h>
28 #include <net/icmp.h>
29 #include <net/ipcomp.h>
30 #include <net/protocol.h>
31
32 struct ipcomp_tfms {
33         struct list_head list;
34         struct crypto_comp **tfms;
35         int users;
36 };
37
38 static DEFINE_MUTEX(ipcomp_resource_mutex);
39 static void **ipcomp_scratches;
40 static int ipcomp_scratch_users;
41 static LIST_HEAD(ipcomp_tfms_list);
42
43 static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb)
44 {
45         struct ipcomp_data *ipcd = x->data;
46         const int plen = skb->len;
47         int dlen = IPCOMP_SCRATCH_SIZE;
48         const u8 *start = skb->data;
49         const int cpu = get_cpu();
50         u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu);
51         struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu);
52         int err = crypto_comp_decompress(tfm, start, plen, scratch, &dlen);
53
54         if (err)
55                 goto out;
56
57         if (dlen < (plen + sizeof(struct ip_comp_hdr))) {
58                 err = -EINVAL;
59                 goto out;
60         }
61
62         err = pskb_expand_head(skb, 0, dlen - plen, GFP_ATOMIC);
63         if (err)
64                 goto out;
65
66         skb->truesize += dlen - plen;
67         __skb_put(skb, dlen - plen);
68         skb_copy_to_linear_data(skb, scratch, dlen);
69 out:
70         put_cpu();
71         return err;
72 }
73
74 static int ipcomp_input(struct xfrm_state *x, struct sk_buff *skb)
75 {
76         int nexthdr;
77         int err = -ENOMEM;
78         struct ip_comp_hdr *ipch;
79
80         if (skb_linearize_cow(skb))
81                 goto out;
82
83         skb->ip_summed = CHECKSUM_NONE;
84
85         /* Remove ipcomp header and decompress original payload */
86         ipch = (void *)skb->data;
87         nexthdr = ipch->nexthdr;
88
89         skb->transport_header = skb->network_header + sizeof(*ipch);
90         __skb_pull(skb, sizeof(*ipch));
91         err = ipcomp_decompress(x, skb);
92         if (err)
93                 goto out;
94
95         err = nexthdr;
96
97 out:
98         return err;
99 }
100
101 static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb)
102 {
103         struct ipcomp_data *ipcd = x->data;
104         const int plen = skb->len;
105         int dlen = IPCOMP_SCRATCH_SIZE;
106         u8 *start = skb->data;
107         const int cpu = get_cpu();
108         u8 *scratch = *per_cpu_ptr(ipcomp_scratches, cpu);
109         struct crypto_comp *tfm = *per_cpu_ptr(ipcd->tfms, cpu);
110         int err;
111
112         local_bh_disable();
113         err = crypto_comp_compress(tfm, start, plen, scratch, &dlen);
114         local_bh_enable();
115         if (err)
116                 goto out;
117
118         if ((dlen + sizeof(struct ip_comp_hdr)) >= plen) {
119                 err = -EMSGSIZE;
120                 goto out;
121         }
122
123         memcpy(start + sizeof(struct ip_comp_hdr), scratch, dlen);
124         put_cpu();
125
126         pskb_trim(skb, dlen + sizeof(struct ip_comp_hdr));
127         return 0;
128
129 out:
130         put_cpu();
131         return err;
132 }
133
134 static int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb)
135 {
136         int err;
137         struct ip_comp_hdr *ipch;
138         struct ipcomp_data *ipcd = x->data;
139
140         if (skb->len < ipcd->threshold) {
141                 /* Don't bother compressing */
142                 goto out_ok;
143         }
144
145         if (skb_linearize_cow(skb))
146                 goto out_ok;
147
148         err = ipcomp_compress(x, skb);
149
150         if (err) {
151                 goto out_ok;
152         }
153
154         /* Install ipcomp header, convert into ipcomp datagram. */
155         ipch = ip_comp_hdr(skb);
156         ipch->nexthdr = *skb_mac_header(skb);
157         ipch->flags = 0;
158         ipch->cpi = htons((u16 )ntohl(x->id.spi));
159         *skb_mac_header(skb) = IPPROTO_COMP;
160 out_ok:
161         skb_push(skb, -skb_network_offset(skb));
162         return 0;
163 }
164
165 static void ipcomp4_err(struct sk_buff *skb, u32 info)
166 {
167         __be32 spi;
168         struct iphdr *iph = (struct iphdr *)skb->data;
169         struct ip_comp_hdr *ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2));
170         struct xfrm_state *x;
171
172         if (icmp_hdr(skb)->type != ICMP_DEST_UNREACH ||
173             icmp_hdr(skb)->code != ICMP_FRAG_NEEDED)
174                 return;
175
176         spi = htonl(ntohs(ipch->cpi));
177         x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr,
178                               spi, IPPROTO_COMP, AF_INET);
179         if (!x)
180                 return;
181         NETDEBUG(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/" NIPQUAD_FMT "\n",
182                  spi, NIPQUAD(iph->daddr));
183         xfrm_state_put(x);
184 }
185
186 /* We always hold one tunnel user reference to indicate a tunnel */
187 static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
188 {
189         struct xfrm_state *t;
190
191         t = xfrm_state_alloc();
192         if (t == NULL)
193                 goto out;
194
195         t->id.proto = IPPROTO_IPIP;
196         t->id.spi = x->props.saddr.a4;
197         t->id.daddr.a4 = x->id.daddr.a4;
198         memcpy(&t->sel, &x->sel, sizeof(t->sel));
199         t->props.family = AF_INET;
200         t->props.mode = x->props.mode;
201         t->props.saddr.a4 = x->props.saddr.a4;
202         t->props.flags = x->props.flags;
203
204         if (xfrm_init_state(t))
205                 goto error;
206
207         atomic_set(&t->tunnel_users, 1);
208 out:
209         return t;
210
211 error:
212         t->km.state = XFRM_STATE_DEAD;
213         xfrm_state_put(t);
214         t = NULL;
215         goto out;
216 }
217
218 /*
219  * Must be protected by xfrm_cfg_mutex.  State and tunnel user references are
220  * always incremented on success.
221  */
222 static int ipcomp_tunnel_attach(struct xfrm_state *x)
223 {
224         int err = 0;
225         struct xfrm_state *t;
226
227         t = xfrm_state_lookup((xfrm_address_t *)&x->id.daddr.a4,
228                               x->props.saddr.a4, IPPROTO_IPIP, AF_INET);
229         if (!t) {
230                 t = ipcomp_tunnel_create(x);
231                 if (!t) {
232                         err = -EINVAL;
233                         goto out;
234                 }
235                 xfrm_state_insert(t);
236                 xfrm_state_hold(t);
237         }
238         x->tunnel = t;
239         atomic_inc(&t->tunnel_users);
240 out:
241         return err;
242 }
243
244 static void ipcomp_free_scratches(void)
245 {
246         int i;
247         void **scratches;
248
249         if (--ipcomp_scratch_users)
250                 return;
251
252         scratches = ipcomp_scratches;
253         if (!scratches)
254                 return;
255
256         for_each_possible_cpu(i)
257                 vfree(*per_cpu_ptr(scratches, i));
258
259         free_percpu(scratches);
260 }
261
262 static void **ipcomp_alloc_scratches(void)
263 {
264         int i;
265         void **scratches;
266
267         if (ipcomp_scratch_users++)
268                 return ipcomp_scratches;
269
270         scratches = alloc_percpu(void *);
271         if (!scratches)
272                 return NULL;
273
274         ipcomp_scratches = scratches;
275
276         for_each_possible_cpu(i) {
277                 void *scratch = vmalloc(IPCOMP_SCRATCH_SIZE);
278                 if (!scratch)
279                         return NULL;
280                 *per_cpu_ptr(scratches, i) = scratch;
281         }
282
283         return scratches;
284 }
285
286 static void ipcomp_free_tfms(struct crypto_comp **tfms)
287 {
288         struct ipcomp_tfms *pos;
289         int cpu;
290
291         list_for_each_entry(pos, &ipcomp_tfms_list, list) {
292                 if (pos->tfms == tfms)
293                         break;
294         }
295
296         BUG_TRAP(pos);
297
298         if (--pos->users)
299                 return;
300
301         list_del(&pos->list);
302         kfree(pos);
303
304         if (!tfms)
305                 return;
306
307         for_each_possible_cpu(cpu) {
308                 struct crypto_comp *tfm = *per_cpu_ptr(tfms, cpu);
309                 crypto_free_comp(tfm);
310         }
311         free_percpu(tfms);
312 }
313
314 static struct crypto_comp **ipcomp_alloc_tfms(const char *alg_name)
315 {
316         struct ipcomp_tfms *pos;
317         struct crypto_comp **tfms;
318         int cpu;
319
320         /* This can be any valid CPU ID so we don't need locking. */
321         cpu = raw_smp_processor_id();
322
323         list_for_each_entry(pos, &ipcomp_tfms_list, list) {
324                 struct crypto_comp *tfm;
325
326                 tfms = pos->tfms;
327                 tfm = *per_cpu_ptr(tfms, cpu);
328
329                 if (!strcmp(crypto_comp_name(tfm), alg_name)) {
330                         pos->users++;
331                         return tfms;
332                 }
333         }
334
335         pos = kmalloc(sizeof(*pos), GFP_KERNEL);
336         if (!pos)
337                 return NULL;
338
339         pos->users = 1;
340         INIT_LIST_HEAD(&pos->list);
341         list_add(&pos->list, &ipcomp_tfms_list);
342
343         pos->tfms = tfms = alloc_percpu(struct crypto_comp *);
344         if (!tfms)
345                 goto error;
346
347         for_each_possible_cpu(cpu) {
348                 struct crypto_comp *tfm = crypto_alloc_comp(alg_name, 0,
349                                                             CRYPTO_ALG_ASYNC);
350                 if (IS_ERR(tfm))
351                         goto error;
352                 *per_cpu_ptr(tfms, cpu) = tfm;
353         }
354
355         return tfms;
356
357 error:
358         ipcomp_free_tfms(tfms);
359         return NULL;
360 }
361
362 static void ipcomp_free_data(struct ipcomp_data *ipcd)
363 {
364         if (ipcd->tfms)
365                 ipcomp_free_tfms(ipcd->tfms);
366         ipcomp_free_scratches();
367 }
368
369 static void ipcomp_destroy(struct xfrm_state *x)
370 {
371         struct ipcomp_data *ipcd = x->data;
372         if (!ipcd)
373                 return;
374         xfrm_state_delete_tunnel(x);
375         mutex_lock(&ipcomp_resource_mutex);
376         ipcomp_free_data(ipcd);
377         mutex_unlock(&ipcomp_resource_mutex);
378         kfree(ipcd);
379 }
380
381 static int ipcomp_init_state(struct xfrm_state *x)
382 {
383         int err;
384         struct ipcomp_data *ipcd;
385         struct xfrm_algo_desc *calg_desc;
386
387         err = -EINVAL;
388         if (!x->calg)
389                 goto out;
390
391         if (x->encap)
392                 goto out;
393
394         x->props.header_len = 0;
395         switch (x->props.mode) {
396         case XFRM_MODE_TRANSPORT:
397                 break;
398         case XFRM_MODE_TUNNEL:
399                 x->props.header_len += sizeof(struct iphdr);
400                 break;
401         default:
402                 goto out;
403         }
404
405         err = -ENOMEM;
406         ipcd = kzalloc(sizeof(*ipcd), GFP_KERNEL);
407         if (!ipcd)
408                 goto out;
409
410         mutex_lock(&ipcomp_resource_mutex);
411         if (!ipcomp_alloc_scratches())
412                 goto error;
413
414         ipcd->tfms = ipcomp_alloc_tfms(x->calg->alg_name);
415         if (!ipcd->tfms)
416                 goto error;
417         mutex_unlock(&ipcomp_resource_mutex);
418
419         if (x->props.mode == XFRM_MODE_TUNNEL) {
420                 err = ipcomp_tunnel_attach(x);
421                 if (err)
422                         goto error_tunnel;
423         }
424
425         calg_desc = xfrm_calg_get_byname(x->calg->alg_name, 0);
426         BUG_ON(!calg_desc);
427         ipcd->threshold = calg_desc->uinfo.comp.threshold;
428         x->data = ipcd;
429         err = 0;
430 out:
431         return err;
432
433 error_tunnel:
434         mutex_lock(&ipcomp_resource_mutex);
435 error:
436         ipcomp_free_data(ipcd);
437         mutex_unlock(&ipcomp_resource_mutex);
438         kfree(ipcd);
439         goto out;
440 }
441
442 static const struct xfrm_type ipcomp_type = {
443         .description    = "IPCOMP4",
444         .owner          = THIS_MODULE,
445         .proto          = IPPROTO_COMP,
446         .init_state     = ipcomp_init_state,
447         .destructor     = ipcomp_destroy,
448         .input          = ipcomp_input,
449         .output         = ipcomp_output
450 };
451
452 static struct net_protocol ipcomp4_protocol = {
453         .handler        =       xfrm4_rcv,
454         .err_handler    =       ipcomp4_err,
455         .no_policy      =       1,
456 };
457
458 static int __init ipcomp4_init(void)
459 {
460         if (xfrm_register_type(&ipcomp_type, AF_INET) < 0) {
461                 printk(KERN_INFO "ipcomp init: can't add xfrm type\n");
462                 return -EAGAIN;
463         }
464         if (inet_add_protocol(&ipcomp4_protocol, IPPROTO_COMP) < 0) {
465                 printk(KERN_INFO "ipcomp init: can't add protocol\n");
466                 xfrm_unregister_type(&ipcomp_type, AF_INET);
467                 return -EAGAIN;
468         }
469         return 0;
470 }
471
472 static void __exit ipcomp4_fini(void)
473 {
474         if (inet_del_protocol(&ipcomp4_protocol, IPPROTO_COMP) < 0)
475                 printk(KERN_INFO "ip ipcomp close: can't remove protocol\n");
476         if (xfrm_unregister_type(&ipcomp_type, AF_INET) < 0)
477                 printk(KERN_INFO "ip ipcomp close: can't remove xfrm type\n");
478 }
479
480 module_init(ipcomp4_init);
481 module_exit(ipcomp4_fini);
482
483 MODULE_LICENSE("GPL");
484 MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp) - RFC3173");
485 MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
486
487 MODULE_ALIAS_XFRM_TYPE(AF_INET, XFRM_PROTO_COMP);