Merge commit 'upstream/master'
[linux-2.6] / net / netfilter / xt_TCPMSS.c
1 /*
2  * This is a module which is used for setting the MSS option in TCP packets.
3  *
4  * Copyright (C) 2000 Marc Boucher <marc@mbsi.ca>
5  *
6  * This program is free software; you can redistribute it and/or modify
7  * it under the terms of the GNU General Public License version 2 as
8  * published by the Free Software Foundation.
9  */
10
11 #include <linux/module.h>
12 #include <linux/skbuff.h>
13 #include <linux/ip.h>
14 #include <linux/ipv6.h>
15 #include <linux/tcp.h>
16 #include <net/dst.h>
17 #include <net/flow.h>
18 #include <net/ipv6.h>
19 #include <net/route.h>
20 #include <net/tcp.h>
21
22 #include <linux/netfilter_ipv4/ip_tables.h>
23 #include <linux/netfilter_ipv6/ip6_tables.h>
24 #include <linux/netfilter/x_tables.h>
25 #include <linux/netfilter/xt_tcpudp.h>
26 #include <linux/netfilter/xt_TCPMSS.h>
27
28 MODULE_LICENSE("GPL");
29 MODULE_AUTHOR("Marc Boucher <marc@mbsi.ca>");
30 MODULE_DESCRIPTION("Xtables: TCP Maximum Segment Size (MSS) adjustment");
31 MODULE_ALIAS("ipt_TCPMSS");
32 MODULE_ALIAS("ip6t_TCPMSS");
33
34 static inline unsigned int
35 optlen(const u_int8_t *opt, unsigned int offset)
36 {
37         /* Beware zero-length options: make finite progress */
38         if (opt[offset] <= TCPOPT_NOP || opt[offset+1] == 0)
39                 return 1;
40         else
41                 return opt[offset+1];
42 }
43
44 static int
45 tcpmss_mangle_packet(struct sk_buff *skb,
46                      const struct xt_tcpmss_info *info,
47                      unsigned int in_mtu,
48                      unsigned int tcphoff,
49                      unsigned int minlen)
50 {
51         struct tcphdr *tcph;
52         unsigned int tcplen, i;
53         __be16 oldval;
54         u16 newmss;
55         u8 *opt;
56
57         if (!skb_make_writable(skb, skb->len))
58                 return -1;
59
60         tcplen = skb->len - tcphoff;
61         tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
62
63         /* Since it passed flags test in tcp match, we know it is is
64            not a fragment, and has data >= tcp header length.  SYN
65            packets should not contain data: if they did, then we risk
66            running over MTU, sending Frag Needed and breaking things
67            badly. --RR */
68         if (tcplen != tcph->doff*4) {
69                 if (net_ratelimit())
70                         printk(KERN_ERR "xt_TCPMSS: bad length (%u bytes)\n",
71                                skb->len);
72                 return -1;
73         }
74
75         if (info->mss == XT_TCPMSS_CLAMP_PMTU) {
76                 if (dst_mtu(skb->dst) <= minlen) {
77                         if (net_ratelimit())
78                                 printk(KERN_ERR "xt_TCPMSS: "
79                                        "unknown or invalid path-MTU (%u)\n",
80                                        dst_mtu(skb->dst));
81                         return -1;
82                 }
83                 if (in_mtu <= minlen) {
84                         if (net_ratelimit())
85                                 printk(KERN_ERR "xt_TCPMSS: unknown or "
86                                        "invalid path-MTU (%u)\n", in_mtu);
87                         return -1;
88                 }
89                 newmss = min(dst_mtu(skb->dst), in_mtu) - minlen;
90         } else
91                 newmss = info->mss;
92
93         opt = (u_int8_t *)tcph;
94         for (i = sizeof(struct tcphdr); i < tcph->doff*4; i += optlen(opt, i)) {
95                 if (opt[i] == TCPOPT_MSS && tcph->doff*4 - i >= TCPOLEN_MSS &&
96                     opt[i+1] == TCPOLEN_MSS) {
97                         u_int16_t oldmss;
98
99                         oldmss = (opt[i+2] << 8) | opt[i+3];
100
101                         /* Never increase MSS, even when setting it, as
102                          * doing so results in problems for hosts that rely
103                          * on MSS being set correctly.
104                          */
105                         if (oldmss <= newmss)
106                                 return 0;
107
108                         opt[i+2] = (newmss & 0xff00) >> 8;
109                         opt[i+3] = newmss & 0x00ff;
110
111                         inet_proto_csum_replace2(&tcph->check, skb,
112                                                  htons(oldmss), htons(newmss),
113                                                  0);
114                         return 0;
115                 }
116         }
117
118         /*
119          * MSS Option not found ?! add it..
120          */
121         if (skb_tailroom(skb) < TCPOLEN_MSS) {
122                 if (pskb_expand_head(skb, 0,
123                                      TCPOLEN_MSS - skb_tailroom(skb),
124                                      GFP_ATOMIC))
125                         return -1;
126                 tcph = (struct tcphdr *)(skb_network_header(skb) + tcphoff);
127         }
128
129         skb_put(skb, TCPOLEN_MSS);
130
131         opt = (u_int8_t *)tcph + sizeof(struct tcphdr);
132         memmove(opt + TCPOLEN_MSS, opt, tcplen - sizeof(struct tcphdr));
133
134         inet_proto_csum_replace2(&tcph->check, skb,
135                                  htons(tcplen), htons(tcplen + TCPOLEN_MSS), 1);
136         opt[0] = TCPOPT_MSS;
137         opt[1] = TCPOLEN_MSS;
138         opt[2] = (newmss & 0xff00) >> 8;
139         opt[3] = newmss & 0x00ff;
140
141         inet_proto_csum_replace4(&tcph->check, skb, 0, *((__be32 *)opt), 0);
142
143         oldval = ((__be16 *)tcph)[6];
144         tcph->doff += TCPOLEN_MSS/4;
145         inet_proto_csum_replace2(&tcph->check, skb,
146                                  oldval, ((__be16 *)tcph)[6], 0);
147         return TCPOLEN_MSS;
148 }
149
150 static u_int32_t tcpmss_reverse_mtu(const struct sk_buff *skb,
151                                     unsigned int family)
152 {
153         struct flowi fl = {};
154         const struct nf_afinfo *ai;
155         struct rtable *rt = NULL;
156         u_int32_t mtu     = ~0U;
157
158         if (family == PF_INET)
159                 fl.fl4_dst = ip_hdr(skb)->saddr;
160         else
161                 fl.fl6_dst = ipv6_hdr(skb)->saddr;
162
163         rcu_read_lock();
164         ai = nf_get_afinfo(family);
165         if (ai != NULL)
166                 ai->route((struct dst_entry **)&rt, &fl);
167         rcu_read_unlock();
168
169         if (rt != NULL) {
170                 mtu = dst_mtu(&rt->u.dst);
171                 dst_release(&rt->u.dst);
172         }
173         return mtu;
174 }
175
176 static unsigned int
177 tcpmss_tg4(struct sk_buff *skb, const struct net_device *in,
178            const struct net_device *out, unsigned int hooknum,
179            const struct xt_target *target, const void *targinfo)
180 {
181         struct iphdr *iph = ip_hdr(skb);
182         __be16 newlen;
183         int ret;
184
185         ret = tcpmss_mangle_packet(skb, targinfo,
186                                    tcpmss_reverse_mtu(skb, PF_INET),
187                                    iph->ihl * 4,
188                                    sizeof(*iph) + sizeof(struct tcphdr));
189         if (ret < 0)
190                 return NF_DROP;
191         if (ret > 0) {
192                 iph = ip_hdr(skb);
193                 newlen = htons(ntohs(iph->tot_len) + ret);
194                 csum_replace2(&iph->check, iph->tot_len, newlen);
195                 iph->tot_len = newlen;
196         }
197         return XT_CONTINUE;
198 }
199
200 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
201 static unsigned int
202 tcpmss_tg6(struct sk_buff *skb, const struct net_device *in,
203            const struct net_device *out, unsigned int hooknum,
204            const struct xt_target *target, const void *targinfo)
205 {
206         struct ipv6hdr *ipv6h = ipv6_hdr(skb);
207         u8 nexthdr;
208         int tcphoff;
209         int ret;
210
211         nexthdr = ipv6h->nexthdr;
212         tcphoff = ipv6_skip_exthdr(skb, sizeof(*ipv6h), &nexthdr);
213         if (tcphoff < 0)
214                 return NF_DROP;
215         ret = tcpmss_mangle_packet(skb, targinfo,
216                                    tcpmss_reverse_mtu(skb, PF_INET6),
217                                    tcphoff,
218                                    sizeof(*ipv6h) + sizeof(struct tcphdr));
219         if (ret < 0)
220                 return NF_DROP;
221         if (ret > 0) {
222                 ipv6h = ipv6_hdr(skb);
223                 ipv6h->payload_len = htons(ntohs(ipv6h->payload_len) + ret);
224         }
225         return XT_CONTINUE;
226 }
227 #endif
228
229 #define TH_SYN 0x02
230
231 /* Must specify -p tcp --syn */
232 static inline bool find_syn_match(const struct xt_entry_match *m)
233 {
234         const struct xt_tcp *tcpinfo = (const struct xt_tcp *)m->data;
235
236         if (strcmp(m->u.kernel.match->name, "tcp") == 0 &&
237             tcpinfo->flg_cmp & TH_SYN &&
238             !(tcpinfo->invflags & XT_TCP_INV_FLAGS))
239                 return true;
240
241         return false;
242 }
243
244 static bool
245 tcpmss_tg4_check(const char *tablename, const void *entry,
246                  const struct xt_target *target, void *targinfo,
247                  unsigned int hook_mask)
248 {
249         const struct xt_tcpmss_info *info = targinfo;
250         const struct ipt_entry *e = entry;
251
252         if (info->mss == XT_TCPMSS_CLAMP_PMTU &&
253             (hook_mask & ~((1 << NF_INET_FORWARD) |
254                            (1 << NF_INET_LOCAL_OUT) |
255                            (1 << NF_INET_POST_ROUTING))) != 0) {
256                 printk("xt_TCPMSS: path-MTU clamping only supported in "
257                        "FORWARD, OUTPUT and POSTROUTING hooks\n");
258                 return false;
259         }
260         if (IPT_MATCH_ITERATE(e, find_syn_match))
261                 return true;
262         printk("xt_TCPMSS: Only works on TCP SYN packets\n");
263         return false;
264 }
265
266 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
267 static bool
268 tcpmss_tg6_check(const char *tablename, const void *entry,
269                  const struct xt_target *target, void *targinfo,
270                  unsigned int hook_mask)
271 {
272         const struct xt_tcpmss_info *info = targinfo;
273         const struct ip6t_entry *e = entry;
274
275         if (info->mss == XT_TCPMSS_CLAMP_PMTU &&
276             (hook_mask & ~((1 << NF_INET_FORWARD) |
277                            (1 << NF_INET_LOCAL_OUT) |
278                            (1 << NF_INET_POST_ROUTING))) != 0) {
279                 printk("xt_TCPMSS: path-MTU clamping only supported in "
280                        "FORWARD, OUTPUT and POSTROUTING hooks\n");
281                 return false;
282         }
283         if (IP6T_MATCH_ITERATE(e, find_syn_match))
284                 return true;
285         printk("xt_TCPMSS: Only works on TCP SYN packets\n");
286         return false;
287 }
288 #endif
289
290 static struct xt_target tcpmss_tg_reg[] __read_mostly = {
291         {
292                 .family         = AF_INET,
293                 .name           = "TCPMSS",
294                 .checkentry     = tcpmss_tg4_check,
295                 .target         = tcpmss_tg4,
296                 .targetsize     = sizeof(struct xt_tcpmss_info),
297                 .proto          = IPPROTO_TCP,
298                 .me             = THIS_MODULE,
299         },
300 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
301         {
302                 .family         = AF_INET6,
303                 .name           = "TCPMSS",
304                 .checkentry     = tcpmss_tg6_check,
305                 .target         = tcpmss_tg6,
306                 .targetsize     = sizeof(struct xt_tcpmss_info),
307                 .proto          = IPPROTO_TCP,
308                 .me             = THIS_MODULE,
309         },
310 #endif
311 };
312
313 static int __init tcpmss_tg_init(void)
314 {
315         return xt_register_targets(tcpmss_tg_reg, ARRAY_SIZE(tcpmss_tg_reg));
316 }
317
318 static void __exit tcpmss_tg_exit(void)
319 {
320         xt_unregister_targets(tcpmss_tg_reg, ARRAY_SIZE(tcpmss_tg_reg));
321 }
322
323 module_init(tcpmss_tg_init);
324 module_exit(tcpmss_tg_exit);