Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/shaggy...
[linux-2.6] / net / netfilter / nf_conntrack_expect.c
1 /* Expectation handling for nf_conntrack. */
2
3 /* (C) 1999-2001 Paul `Rusty' Russell
4  * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
5  * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11
12 #include <linux/types.h>
13 #include <linux/netfilter.h>
14 #include <linux/skbuff.h>
15 #include <linux/proc_fs.h>
16 #include <linux/seq_file.h>
17 #include <linux/stddef.h>
18 #include <linux/slab.h>
19 #include <linux/err.h>
20 #include <linux/percpu.h>
21 #include <linux/kernel.h>
22
23 #include <net/netfilter/nf_conntrack.h>
24 #include <net/netfilter/nf_conntrack_core.h>
25 #include <net/netfilter/nf_conntrack_expect.h>
26 #include <net/netfilter/nf_conntrack_helper.h>
27 #include <net/netfilter/nf_conntrack_tuple.h>
28
29 LIST_HEAD(nf_conntrack_expect_list);
30 EXPORT_SYMBOL_GPL(nf_conntrack_expect_list);
31
32 struct kmem_cache *nf_conntrack_expect_cachep __read_mostly;
33 static unsigned int nf_conntrack_expect_next_id;
34
35 /* nf_conntrack_expect helper functions */
36 void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
37 {
38         struct nf_conn_help *master_help = nfct_help(exp->master);
39
40         NF_CT_ASSERT(master_help);
41         NF_CT_ASSERT(!timer_pending(&exp->timeout));
42
43         list_del(&exp->list);
44         NF_CT_STAT_INC(expect_delete);
45         master_help->expecting--;
46         nf_conntrack_expect_put(exp);
47 }
48 EXPORT_SYMBOL_GPL(nf_ct_unlink_expect);
49
50 static void expectation_timed_out(unsigned long ul_expect)
51 {
52         struct nf_conntrack_expect *exp = (void *)ul_expect;
53
54         write_lock_bh(&nf_conntrack_lock);
55         nf_ct_unlink_expect(exp);
56         write_unlock_bh(&nf_conntrack_lock);
57         nf_conntrack_expect_put(exp);
58 }
59
60 struct nf_conntrack_expect *
61 __nf_conntrack_expect_find(const struct nf_conntrack_tuple *tuple)
62 {
63         struct nf_conntrack_expect *i;
64
65         list_for_each_entry(i, &nf_conntrack_expect_list, list) {
66                 if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask))
67                         return i;
68         }
69         return NULL;
70 }
71 EXPORT_SYMBOL_GPL(__nf_conntrack_expect_find);
72
73 /* Just find a expectation corresponding to a tuple. */
74 struct nf_conntrack_expect *
75 nf_conntrack_expect_find_get(const struct nf_conntrack_tuple *tuple)
76 {
77         struct nf_conntrack_expect *i;
78
79         read_lock_bh(&nf_conntrack_lock);
80         i = __nf_conntrack_expect_find(tuple);
81         if (i)
82                 atomic_inc(&i->use);
83         read_unlock_bh(&nf_conntrack_lock);
84
85         return i;
86 }
87 EXPORT_SYMBOL_GPL(nf_conntrack_expect_find_get);
88
89 /* If an expectation for this connection is found, it gets delete from
90  * global list then returned. */
91 struct nf_conntrack_expect *
92 find_expectation(const struct nf_conntrack_tuple *tuple)
93 {
94         struct nf_conntrack_expect *exp;
95
96         exp = __nf_conntrack_expect_find(tuple);
97         if (!exp)
98                 return NULL;
99
100         /* If master is not in hash table yet (ie. packet hasn't left
101            this machine yet), how can other end know about expected?
102            Hence these are not the droids you are looking for (if
103            master ct never got confirmed, we'd hold a reference to it
104            and weird things would happen to future packets). */
105         if (!nf_ct_is_confirmed(exp->master))
106                 return NULL;
107
108         if (exp->flags & NF_CT_EXPECT_PERMANENT) {
109                 atomic_inc(&exp->use);
110                 return exp;
111         } else if (del_timer(&exp->timeout)) {
112                 nf_ct_unlink_expect(exp);
113                 return exp;
114         }
115
116         return NULL;
117 }
118
119 /* delete all expectations for this conntrack */
120 void nf_ct_remove_expectations(struct nf_conn *ct)
121 {
122         struct nf_conntrack_expect *i, *tmp;
123         struct nf_conn_help *help = nfct_help(ct);
124
125         /* Optimization: most connection never expect any others. */
126         if (!help || help->expecting == 0)
127                 return;
128
129         list_for_each_entry_safe(i, tmp, &nf_conntrack_expect_list, list) {
130                 if (i->master == ct && del_timer(&i->timeout)) {
131                         nf_ct_unlink_expect(i);
132                         nf_conntrack_expect_put(i);
133                 }
134         }
135 }
136 EXPORT_SYMBOL_GPL(nf_ct_remove_expectations);
137
138 /* Would two expected things clash? */
139 static inline int expect_clash(const struct nf_conntrack_expect *a,
140                                const struct nf_conntrack_expect *b)
141 {
142         /* Part covered by intersection of masks must be unequal,
143            otherwise they clash */
144         struct nf_conntrack_tuple intersect_mask;
145         int count;
146
147         intersect_mask.src.l3num = a->mask.src.l3num & b->mask.src.l3num;
148         intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all;
149         intersect_mask.dst.u.all = a->mask.dst.u.all & b->mask.dst.u.all;
150         intersect_mask.dst.protonum = a->mask.dst.protonum
151                                         & b->mask.dst.protonum;
152
153         for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
154                 intersect_mask.src.u3.all[count] =
155                         a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
156         }
157
158         for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
159                 intersect_mask.dst.u3.all[count] =
160                         a->mask.dst.u3.all[count] & b->mask.dst.u3.all[count];
161         }
162
163         return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
164 }
165
166 static inline int expect_matches(const struct nf_conntrack_expect *a,
167                                  const struct nf_conntrack_expect *b)
168 {
169         return a->master == b->master
170                 && nf_ct_tuple_equal(&a->tuple, &b->tuple)
171                 && nf_ct_tuple_equal(&a->mask, &b->mask);
172 }
173
174 /* Generally a bad idea to call this: could have matched already. */
175 void nf_conntrack_unexpect_related(struct nf_conntrack_expect *exp)
176 {
177         struct nf_conntrack_expect *i;
178
179         write_lock_bh(&nf_conntrack_lock);
180         /* choose the oldest expectation to evict */
181         list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) {
182                 if (expect_matches(i, exp) && del_timer(&i->timeout)) {
183                         nf_ct_unlink_expect(i);
184                         write_unlock_bh(&nf_conntrack_lock);
185                         nf_conntrack_expect_put(i);
186                         return;
187                 }
188         }
189         write_unlock_bh(&nf_conntrack_lock);
190 }
191 EXPORT_SYMBOL_GPL(nf_conntrack_unexpect_related);
192
193 /* We don't increase the master conntrack refcount for non-fulfilled
194  * conntracks. During the conntrack destruction, the expectations are
195  * always killed before the conntrack itself */
196 struct nf_conntrack_expect *nf_conntrack_expect_alloc(struct nf_conn *me)
197 {
198         struct nf_conntrack_expect *new;
199
200         new = kmem_cache_alloc(nf_conntrack_expect_cachep, GFP_ATOMIC);
201         if (!new)
202                 return NULL;
203
204         new->master = me;
205         atomic_set(&new->use, 1);
206         return new;
207 }
208 EXPORT_SYMBOL_GPL(nf_conntrack_expect_alloc);
209
210 void nf_conntrack_expect_init(struct nf_conntrack_expect *exp, int family,
211                               union nf_conntrack_address *saddr,
212                               union nf_conntrack_address *daddr,
213                               u_int8_t proto, __be16 *src, __be16 *dst)
214 {
215         int len;
216
217         if (family == AF_INET)
218                 len = 4;
219         else
220                 len = 16;
221
222         exp->flags = 0;
223         exp->expectfn = NULL;
224         exp->helper = NULL;
225         exp->tuple.src.l3num = family;
226         exp->tuple.dst.protonum = proto;
227         exp->mask.src.l3num = 0xFFFF;
228         exp->mask.dst.protonum = 0xFF;
229
230         if (saddr) {
231                 memcpy(&exp->tuple.src.u3, saddr, len);
232                 if (sizeof(exp->tuple.src.u3) > len)
233                         /* address needs to be cleared for nf_ct_tuple_equal */
234                         memset((void *)&exp->tuple.src.u3 + len, 0x00,
235                                sizeof(exp->tuple.src.u3) - len);
236                 memset(&exp->mask.src.u3, 0xFF, len);
237                 if (sizeof(exp->mask.src.u3) > len)
238                         memset((void *)&exp->mask.src.u3 + len, 0x00,
239                                sizeof(exp->mask.src.u3) - len);
240         } else {
241                 memset(&exp->tuple.src.u3, 0x00, sizeof(exp->tuple.src.u3));
242                 memset(&exp->mask.src.u3, 0x00, sizeof(exp->mask.src.u3));
243         }
244
245         if (daddr) {
246                 memcpy(&exp->tuple.dst.u3, daddr, len);
247                 if (sizeof(exp->tuple.dst.u3) > len)
248                         /* address needs to be cleared for nf_ct_tuple_equal */
249                         memset((void *)&exp->tuple.dst.u3 + len, 0x00,
250                                sizeof(exp->tuple.dst.u3) - len);
251                 memset(&exp->mask.dst.u3, 0xFF, len);
252                 if (sizeof(exp->mask.dst.u3) > len)
253                         memset((void *)&exp->mask.dst.u3 + len, 0x00,
254                                sizeof(exp->mask.dst.u3) - len);
255         } else {
256                 memset(&exp->tuple.dst.u3, 0x00, sizeof(exp->tuple.dst.u3));
257                 memset(&exp->mask.dst.u3, 0x00, sizeof(exp->mask.dst.u3));
258         }
259
260         if (src) {
261                 exp->tuple.src.u.all = (__force u16)*src;
262                 exp->mask.src.u.all = 0xFFFF;
263         } else {
264                 exp->tuple.src.u.all = 0;
265                 exp->mask.src.u.all = 0;
266         }
267
268         if (dst) {
269                 exp->tuple.dst.u.all = (__force u16)*dst;
270                 exp->mask.dst.u.all = 0xFFFF;
271         } else {
272                 exp->tuple.dst.u.all = 0;
273                 exp->mask.dst.u.all = 0;
274         }
275 }
276 EXPORT_SYMBOL_GPL(nf_conntrack_expect_init);
277
278 void nf_conntrack_expect_put(struct nf_conntrack_expect *exp)
279 {
280         if (atomic_dec_and_test(&exp->use))
281                 kmem_cache_free(nf_conntrack_expect_cachep, exp);
282 }
283 EXPORT_SYMBOL_GPL(nf_conntrack_expect_put);
284
285 static void nf_conntrack_expect_insert(struct nf_conntrack_expect *exp)
286 {
287         struct nf_conn_help *master_help = nfct_help(exp->master);
288
289         atomic_inc(&exp->use);
290         master_help->expecting++;
291         list_add(&exp->list, &nf_conntrack_expect_list);
292
293         setup_timer(&exp->timeout, expectation_timed_out, (unsigned long)exp);
294         exp->timeout.expires = jiffies + master_help->helper->timeout * HZ;
295         add_timer(&exp->timeout);
296
297         exp->id = ++nf_conntrack_expect_next_id;
298         atomic_inc(&exp->use);
299         NF_CT_STAT_INC(expect_create);
300 }
301
302 /* Race with expectations being used means we could have none to find; OK. */
303 static void evict_oldest_expect(struct nf_conn *master)
304 {
305         struct nf_conntrack_expect *i;
306
307         list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) {
308                 if (i->master == master) {
309                         if (del_timer(&i->timeout)) {
310                                 nf_ct_unlink_expect(i);
311                                 nf_conntrack_expect_put(i);
312                         }
313                         break;
314                 }
315         }
316 }
317
318 static inline int refresh_timer(struct nf_conntrack_expect *i)
319 {
320         struct nf_conn_help *master_help = nfct_help(i->master);
321
322         if (!del_timer(&i->timeout))
323                 return 0;
324
325         i->timeout.expires = jiffies + master_help->helper->timeout*HZ;
326         add_timer(&i->timeout);
327         return 1;
328 }
329
330 int nf_conntrack_expect_related(struct nf_conntrack_expect *expect)
331 {
332         struct nf_conntrack_expect *i;
333         struct nf_conn *master = expect->master;
334         struct nf_conn_help *master_help = nfct_help(master);
335         int ret;
336
337         NF_CT_ASSERT(master_help);
338
339         write_lock_bh(&nf_conntrack_lock);
340         if (!master_help->helper) {
341                 ret = -ESHUTDOWN;
342                 goto out;
343         }
344         list_for_each_entry(i, &nf_conntrack_expect_list, list) {
345                 if (expect_matches(i, expect)) {
346                         /* Refresh timer: if it's dying, ignore.. */
347                         if (refresh_timer(i)) {
348                                 ret = 0;
349                                 goto out;
350                         }
351                 } else if (expect_clash(i, expect)) {
352                         ret = -EBUSY;
353                         goto out;
354                 }
355         }
356         /* Will be over limit? */
357         if (master_help->helper->max_expected &&
358             master_help->expecting >= master_help->helper->max_expected)
359                 evict_oldest_expect(master);
360
361         nf_conntrack_expect_insert(expect);
362         nf_conntrack_expect_event(IPEXP_NEW, expect);
363         ret = 0;
364 out:
365         write_unlock_bh(&nf_conntrack_lock);
366         return ret;
367 }
368 EXPORT_SYMBOL_GPL(nf_conntrack_expect_related);
369
370 #ifdef CONFIG_PROC_FS
371 static void *exp_seq_start(struct seq_file *s, loff_t *pos)
372 {
373         struct list_head *e = &nf_conntrack_expect_list;
374         loff_t i;
375
376         /* strange seq_file api calls stop even if we fail,
377          * thus we need to grab lock since stop unlocks */
378         read_lock_bh(&nf_conntrack_lock);
379
380         if (list_empty(e))
381                 return NULL;
382
383         for (i = 0; i <= *pos; i++) {
384                 e = e->next;
385                 if (e == &nf_conntrack_expect_list)
386                         return NULL;
387         }
388         return e;
389 }
390
391 static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos)
392 {
393         struct list_head *e = v;
394
395         ++*pos;
396         e = e->next;
397
398         if (e == &nf_conntrack_expect_list)
399                 return NULL;
400
401         return e;
402 }
403
404 static void exp_seq_stop(struct seq_file *s, void *v)
405 {
406         read_unlock_bh(&nf_conntrack_lock);
407 }
408
409 static int exp_seq_show(struct seq_file *s, void *v)
410 {
411         struct nf_conntrack_expect *expect = v;
412
413         if (expect->timeout.function)
414                 seq_printf(s, "%ld ", timer_pending(&expect->timeout)
415                            ? (long)(expect->timeout.expires - jiffies)/HZ : 0);
416         else
417                 seq_printf(s, "- ");
418         seq_printf(s, "l3proto = %u proto=%u ",
419                    expect->tuple.src.l3num,
420                    expect->tuple.dst.protonum);
421         print_tuple(s, &expect->tuple,
422                     __nf_ct_l3proto_find(expect->tuple.src.l3num),
423                     __nf_ct_l4proto_find(expect->tuple.src.l3num,
424                                        expect->tuple.dst.protonum));
425         return seq_putc(s, '\n');
426 }
427
428 static struct seq_operations exp_seq_ops = {
429         .start = exp_seq_start,
430         .next = exp_seq_next,
431         .stop = exp_seq_stop,
432         .show = exp_seq_show
433 };
434
435 static int exp_open(struct inode *inode, struct file *file)
436 {
437         return seq_open(file, &exp_seq_ops);
438 }
439
440 const struct file_operations exp_file_ops = {
441         .owner   = THIS_MODULE,
442         .open    = exp_open,
443         .read    = seq_read,
444         .llseek  = seq_lseek,
445         .release = seq_release
446 };
447 #endif /* CONFIG_PROC_FS */