IB/mthca: Optimize large messages on Sinai HCAs
[linux-2.6] / net / ipv4 / xfrm4_output.c
1 /*
2  * xfrm4_output.c - Common IPsec encapsulation code for IPv4.
3  * Copyright (c) 2004 Herbert Xu <herbert@gondor.apana.org.au>
4  * 
5  * This program is free software; you can redistribute it and/or
6  * modify it under the terms of the GNU General Public License
7  * as published by the Free Software Foundation; either version
8  * 2 of the License, or (at your option) any later version.
9  */
10
11 #include <linux/compiler.h>
12 #include <linux/skbuff.h>
13 #include <linux/spinlock.h>
14 #include <linux/netfilter_ipv4.h>
15 #include <net/inet_ecn.h>
16 #include <net/ip.h>
17 #include <net/xfrm.h>
18 #include <net/icmp.h>
19
20 /* Add encapsulation header.
21  *
22  * In transport mode, the IP header will be moved forward to make space
23  * for the encapsulation header.
24  *
25  * In tunnel mode, the top IP header will be constructed per RFC 2401.
26  * The following fields in it shall be filled in by x->type->output:
27  *      tot_len
28  *      check
29  *
30  * On exit, skb->h will be set to the start of the payload to be processed
31  * by x->type->output and skb->nh will be set to the top IP header.
32  */
33 static void xfrm4_encap(struct sk_buff *skb)
34 {
35         struct dst_entry *dst = skb->dst;
36         struct xfrm_state *x = dst->xfrm;
37         struct iphdr *iph, *top_iph;
38         int flags;
39
40         iph = skb->nh.iph;
41         skb->h.ipiph = iph;
42
43         skb->nh.raw = skb_push(skb, x->props.header_len);
44         top_iph = skb->nh.iph;
45
46         if (!x->props.mode) {
47                 skb->h.raw += iph->ihl*4;
48                 memmove(top_iph, iph, iph->ihl*4);
49                 return;
50         }
51
52         top_iph->ihl = 5;
53         top_iph->version = 4;
54
55         /* DS disclosed */
56         top_iph->tos = INET_ECN_encapsulate(iph->tos, iph->tos);
57
58         flags = x->props.flags;
59         if (flags & XFRM_STATE_NOECN)
60                 IP_ECN_clear(top_iph);
61
62         top_iph->frag_off = (flags & XFRM_STATE_NOPMTUDISC) ?
63                 0 : (iph->frag_off & htons(IP_DF));
64         if (!top_iph->frag_off)
65                 __ip_select_ident(top_iph, dst, 0);
66
67         top_iph->ttl = dst_metric(dst->child, RTAX_HOPLIMIT);
68
69         top_iph->saddr = x->props.saddr.a4;
70         top_iph->daddr = x->id.daddr.a4;
71         top_iph->protocol = IPPROTO_IPIP;
72
73         memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
74 }
75
76 static int xfrm4_tunnel_check_size(struct sk_buff *skb)
77 {
78         int mtu, ret = 0;
79         struct dst_entry *dst;
80         struct iphdr *iph = skb->nh.iph;
81
82         if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE)
83                 goto out;
84
85         IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE;
86         
87         if (!(iph->frag_off & htons(IP_DF)) || skb->local_df)
88                 goto out;
89
90         dst = skb->dst;
91         mtu = dst_mtu(dst);
92         if (skb->len > mtu) {
93                 icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
94                 ret = -EMSGSIZE;
95         }
96 out:
97         return ret;
98 }
99
100 static int xfrm4_output_one(struct sk_buff *skb)
101 {
102         struct dst_entry *dst = skb->dst;
103         struct xfrm_state *x = dst->xfrm;
104         int err;
105         
106         if (skb->ip_summed == CHECKSUM_HW) {
107                 err = skb_checksum_help(skb, 0);
108                 if (err)
109                         goto error_nolock;
110         }
111
112         if (x->props.mode) {
113                 err = xfrm4_tunnel_check_size(skb);
114                 if (err)
115                         goto error_nolock;
116         }
117
118         do {
119                 spin_lock_bh(&x->lock);
120                 err = xfrm_state_check(x, skb);
121                 if (err)
122                         goto error;
123
124                 xfrm4_encap(skb);
125
126                 err = x->type->output(x, skb);
127                 if (err)
128                         goto error;
129
130                 x->curlft.bytes += skb->len;
131                 x->curlft.packets++;
132
133                 spin_unlock_bh(&x->lock);
134         
135                 if (!(skb->dst = dst_pop(dst))) {
136                         err = -EHOSTUNREACH;
137                         goto error_nolock;
138                 }
139                 dst = skb->dst;
140                 x = dst->xfrm;
141         } while (x && !x->props.mode);
142
143         IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED;
144         err = 0;
145
146 out_exit:
147         return err;
148 error:
149         spin_unlock_bh(&x->lock);
150 error_nolock:
151         kfree_skb(skb);
152         goto out_exit;
153 }
154
155 static int xfrm4_output_finish(struct sk_buff *skb)
156 {
157         int err;
158
159 #ifdef CONFIG_NETFILTER
160         if (!skb->dst->xfrm) {
161                 IPCB(skb)->flags |= IPSKB_REROUTED;
162                 return dst_output(skb);
163         }
164 #endif
165         while (likely((err = xfrm4_output_one(skb)) == 0)) {
166                 nf_reset(skb);
167
168                 err = nf_hook(PF_INET, NF_IP_LOCAL_OUT, &skb, NULL,
169                               skb->dst->dev, dst_output);
170                 if (unlikely(err != 1))
171                         break;
172
173                 if (!skb->dst->xfrm)
174                         return dst_output(skb);
175
176                 err = nf_hook(PF_INET, NF_IP_POST_ROUTING, &skb, NULL,
177                               skb->dst->dev, xfrm4_output_finish);
178                 if (unlikely(err != 1))
179                         break;
180         }
181
182         return err;
183 }
184
185 int xfrm4_output(struct sk_buff *skb)
186 {
187         return NF_HOOK_COND(PF_INET, NF_IP_POST_ROUTING, skb, NULL, skb->dst->dev,
188                             xfrm4_output_finish,
189                             !(IPCB(skb)->flags & IPSKB_REROUTED));
190 }