Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
[linux-2.6] / net / netfilter / nf_conntrack_expect.c
1 /* Expectation handling for nf_conntrack. */
2
3 /* (C) 1999-2001 Paul `Rusty' Russell
4  * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
5  * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11
12 #include <linux/types.h>
13 #include <linux/netfilter.h>
14 #include <linux/skbuff.h>
15 #include <linux/proc_fs.h>
16 #include <linux/seq_file.h>
17 #include <linux/stddef.h>
18 #include <linux/slab.h>
19 #include <linux/err.h>
20 #include <linux/percpu.h>
21 #include <linux/kernel.h>
22
23 #include <net/netfilter/nf_conntrack.h>
24 #include <net/netfilter/nf_conntrack_core.h>
25 #include <net/netfilter/nf_conntrack_expect.h>
26 #include <net/netfilter/nf_conntrack_helper.h>
27 #include <net/netfilter/nf_conntrack_tuple.h>
28
29 LIST_HEAD(nf_conntrack_expect_list);
30 EXPORT_SYMBOL_GPL(nf_conntrack_expect_list);
31
32 kmem_cache_t *nf_conntrack_expect_cachep __read_mostly;
33 static unsigned int nf_conntrack_expect_next_id;
34
35 /* nf_conntrack_expect helper functions */
36 void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
37 {
38         struct nf_conn_help *master_help = nfct_help(exp->master);
39
40         NF_CT_ASSERT(master_help);
41         NF_CT_ASSERT(!timer_pending(&exp->timeout));
42
43         list_del(&exp->list);
44         NF_CT_STAT_INC(expect_delete);
45         master_help->expecting--;
46         nf_conntrack_expect_put(exp);
47 }
48 EXPORT_SYMBOL_GPL(nf_ct_unlink_expect);
49
50 static void expectation_timed_out(unsigned long ul_expect)
51 {
52         struct nf_conntrack_expect *exp = (void *)ul_expect;
53
54         write_lock_bh(&nf_conntrack_lock);
55         nf_ct_unlink_expect(exp);
56         write_unlock_bh(&nf_conntrack_lock);
57         nf_conntrack_expect_put(exp);
58 }
59
60 struct nf_conntrack_expect *
61 __nf_conntrack_expect_find(const struct nf_conntrack_tuple *tuple)
62 {
63         struct nf_conntrack_expect *i;
64
65         list_for_each_entry(i, &nf_conntrack_expect_list, list) {
66                 if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask))
67                         return i;
68         }
69         return NULL;
70 }
71 EXPORT_SYMBOL_GPL(__nf_conntrack_expect_find);
72
73 /* Just find a expectation corresponding to a tuple. */
74 struct nf_conntrack_expect *
75 nf_conntrack_expect_find_get(const struct nf_conntrack_tuple *tuple)
76 {
77         struct nf_conntrack_expect *i;
78
79         read_lock_bh(&nf_conntrack_lock);
80         i = __nf_conntrack_expect_find(tuple);
81         if (i)
82                 atomic_inc(&i->use);
83         read_unlock_bh(&nf_conntrack_lock);
84
85         return i;
86 }
87 EXPORT_SYMBOL_GPL(nf_conntrack_expect_find_get);
88
89 /* If an expectation for this connection is found, it gets delete from
90  * global list then returned. */
91 struct nf_conntrack_expect *
92 find_expectation(const struct nf_conntrack_tuple *tuple)
93 {
94         struct nf_conntrack_expect *i;
95
96         list_for_each_entry(i, &nf_conntrack_expect_list, list) {
97         /* If master is not in hash table yet (ie. packet hasn't left
98            this machine yet), how can other end know about expected?
99            Hence these are not the droids you are looking for (if
100            master ct never got confirmed, we'd hold a reference to it
101            and weird things would happen to future packets). */
102                 if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)
103                     && nf_ct_is_confirmed(i->master)) {
104                         if (i->flags & NF_CT_EXPECT_PERMANENT) {
105                                 atomic_inc(&i->use);
106                                 return i;
107                         } else if (del_timer(&i->timeout)) {
108                                 nf_ct_unlink_expect(i);
109                                 return i;
110                         }
111                 }
112         }
113         return NULL;
114 }
115
116 /* delete all expectations for this conntrack */
117 void nf_ct_remove_expectations(struct nf_conn *ct)
118 {
119         struct nf_conntrack_expect *i, *tmp;
120         struct nf_conn_help *help = nfct_help(ct);
121
122         /* Optimization: most connection never expect any others. */
123         if (!help || help->expecting == 0)
124                 return;
125
126         list_for_each_entry_safe(i, tmp, &nf_conntrack_expect_list, list) {
127                 if (i->master == ct && del_timer(&i->timeout)) {
128                         nf_ct_unlink_expect(i);
129                         nf_conntrack_expect_put(i);
130                 }
131         }
132 }
133 EXPORT_SYMBOL_GPL(nf_ct_remove_expectations);
134
135 /* Would two expected things clash? */
136 static inline int expect_clash(const struct nf_conntrack_expect *a,
137                                const struct nf_conntrack_expect *b)
138 {
139         /* Part covered by intersection of masks must be unequal,
140            otherwise they clash */
141         struct nf_conntrack_tuple intersect_mask;
142         int count;
143
144         intersect_mask.src.l3num = a->mask.src.l3num & b->mask.src.l3num;
145         intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all;
146         intersect_mask.dst.u.all = a->mask.dst.u.all & b->mask.dst.u.all;
147         intersect_mask.dst.protonum = a->mask.dst.protonum
148                                         & b->mask.dst.protonum;
149
150         for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
151                 intersect_mask.src.u3.all[count] =
152                         a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
153         }
154
155         for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
156                 intersect_mask.dst.u3.all[count] =
157                         a->mask.dst.u3.all[count] & b->mask.dst.u3.all[count];
158         }
159
160         return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
161 }
162
163 static inline int expect_matches(const struct nf_conntrack_expect *a,
164                                  const struct nf_conntrack_expect *b)
165 {
166         return a->master == b->master
167                 && nf_ct_tuple_equal(&a->tuple, &b->tuple)
168                 && nf_ct_tuple_equal(&a->mask, &b->mask);
169 }
170
171 /* Generally a bad idea to call this: could have matched already. */
172 void nf_conntrack_unexpect_related(struct nf_conntrack_expect *exp)
173 {
174         struct nf_conntrack_expect *i;
175
176         write_lock_bh(&nf_conntrack_lock);
177         /* choose the the oldest expectation to evict */
178         list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) {
179                 if (expect_matches(i, exp) && del_timer(&i->timeout)) {
180                         nf_ct_unlink_expect(i);
181                         write_unlock_bh(&nf_conntrack_lock);
182                         nf_conntrack_expect_put(i);
183                         return;
184                 }
185         }
186         write_unlock_bh(&nf_conntrack_lock);
187 }
188 EXPORT_SYMBOL_GPL(nf_conntrack_unexpect_related);
189
190 /* We don't increase the master conntrack refcount for non-fulfilled
191  * conntracks. During the conntrack destruction, the expectations are
192  * always killed before the conntrack itself */
193 struct nf_conntrack_expect *nf_conntrack_expect_alloc(struct nf_conn *me)
194 {
195         struct nf_conntrack_expect *new;
196
197         new = kmem_cache_alloc(nf_conntrack_expect_cachep, GFP_ATOMIC);
198         if (!new)
199                 return NULL;
200
201         new->master = me;
202         atomic_set(&new->use, 1);
203         return new;
204 }
205 EXPORT_SYMBOL_GPL(nf_conntrack_expect_alloc);
206
207 void nf_conntrack_expect_init(struct nf_conntrack_expect *exp, int family,
208                               union nf_conntrack_address *saddr,
209                               union nf_conntrack_address *daddr,
210                               u_int8_t proto, __be16 *src, __be16 *dst)
211 {
212         int len;
213
214         if (family == AF_INET)
215                 len = 4;
216         else
217                 len = 16;
218
219         exp->flags = 0;
220         exp->expectfn = NULL;
221         exp->helper = NULL;
222         exp->tuple.src.l3num = family;
223         exp->tuple.dst.protonum = proto;
224         exp->mask.src.l3num = 0xFFFF;
225         exp->mask.dst.protonum = 0xFF;
226
227         if (saddr) {
228                 memcpy(&exp->tuple.src.u3, saddr, len);
229                 if (sizeof(exp->tuple.src.u3) > len)
230                         /* address needs to be cleared for nf_ct_tuple_equal */
231                         memset((void *)&exp->tuple.src.u3 + len, 0x00,
232                                sizeof(exp->tuple.src.u3) - len);
233                 memset(&exp->mask.src.u3, 0xFF, len);
234                 if (sizeof(exp->mask.src.u3) > len)
235                         memset((void *)&exp->mask.src.u3 + len, 0x00,
236                                sizeof(exp->mask.src.u3) - len);
237         } else {
238                 memset(&exp->tuple.src.u3, 0x00, sizeof(exp->tuple.src.u3));
239                 memset(&exp->mask.src.u3, 0x00, sizeof(exp->mask.src.u3));
240         }
241
242         if (daddr) {
243                 memcpy(&exp->tuple.dst.u3, daddr, len);
244                 if (sizeof(exp->tuple.dst.u3) > len)
245                         /* address needs to be cleared for nf_ct_tuple_equal */
246                         memset((void *)&exp->tuple.dst.u3 + len, 0x00,
247                                sizeof(exp->tuple.dst.u3) - len);
248                 memset(&exp->mask.dst.u3, 0xFF, len);
249                 if (sizeof(exp->mask.dst.u3) > len)
250                         memset((void *)&exp->mask.dst.u3 + len, 0x00,
251                                sizeof(exp->mask.dst.u3) - len);
252         } else {
253                 memset(&exp->tuple.dst.u3, 0x00, sizeof(exp->tuple.dst.u3));
254                 memset(&exp->mask.dst.u3, 0x00, sizeof(exp->mask.dst.u3));
255         }
256
257         if (src) {
258                 exp->tuple.src.u.all = (__force u16)*src;
259                 exp->mask.src.u.all = 0xFFFF;
260         } else {
261                 exp->tuple.src.u.all = 0;
262                 exp->mask.src.u.all = 0;
263         }
264
265         if (dst) {
266                 exp->tuple.dst.u.all = (__force u16)*dst;
267                 exp->mask.dst.u.all = 0xFFFF;
268         } else {
269                 exp->tuple.dst.u.all = 0;
270                 exp->mask.dst.u.all = 0;
271         }
272 }
273 EXPORT_SYMBOL_GPL(nf_conntrack_expect_init);
274
275 void nf_conntrack_expect_put(struct nf_conntrack_expect *exp)
276 {
277         if (atomic_dec_and_test(&exp->use))
278                 kmem_cache_free(nf_conntrack_expect_cachep, exp);
279 }
280 EXPORT_SYMBOL_GPL(nf_conntrack_expect_put);
281
282 static void nf_conntrack_expect_insert(struct nf_conntrack_expect *exp)
283 {
284         struct nf_conn_help *master_help = nfct_help(exp->master);
285
286         atomic_inc(&exp->use);
287         master_help->expecting++;
288         list_add(&exp->list, &nf_conntrack_expect_list);
289
290         init_timer(&exp->timeout);
291         exp->timeout.data = (unsigned long)exp;
292         exp->timeout.function = expectation_timed_out;
293         exp->timeout.expires = jiffies + master_help->helper->timeout * HZ;
294         add_timer(&exp->timeout);
295
296         exp->id = ++nf_conntrack_expect_next_id;
297         atomic_inc(&exp->use);
298         NF_CT_STAT_INC(expect_create);
299 }
300
301 /* Race with expectations being used means we could have none to find; OK. */
302 static void evict_oldest_expect(struct nf_conn *master)
303 {
304         struct nf_conntrack_expect *i;
305
306         list_for_each_entry_reverse(i, &nf_conntrack_expect_list, list) {
307                 if (i->master == master) {
308                         if (del_timer(&i->timeout)) {
309                                 nf_ct_unlink_expect(i);
310                                 nf_conntrack_expect_put(i);
311                         }
312                         break;
313                 }
314         }
315 }
316
317 static inline int refresh_timer(struct nf_conntrack_expect *i)
318 {
319         struct nf_conn_help *master_help = nfct_help(i->master);
320
321         if (!del_timer(&i->timeout))
322                 return 0;
323
324         i->timeout.expires = jiffies + master_help->helper->timeout*HZ;
325         add_timer(&i->timeout);
326         return 1;
327 }
328
329 int nf_conntrack_expect_related(struct nf_conntrack_expect *expect)
330 {
331         struct nf_conntrack_expect *i;
332         struct nf_conn *master = expect->master;
333         struct nf_conn_help *master_help = nfct_help(master);
334         int ret;
335
336         NF_CT_ASSERT(master_help);
337
338         write_lock_bh(&nf_conntrack_lock);
339         list_for_each_entry(i, &nf_conntrack_expect_list, list) {
340                 if (expect_matches(i, expect)) {
341                         /* Refresh timer: if it's dying, ignore.. */
342                         if (refresh_timer(i)) {
343                                 ret = 0;
344                                 goto out;
345                         }
346                 } else if (expect_clash(i, expect)) {
347                         ret = -EBUSY;
348                         goto out;
349                 }
350         }
351         /* Will be over limit? */
352         if (master_help->helper->max_expected &&
353             master_help->expecting >= master_help->helper->max_expected)
354                 evict_oldest_expect(master);
355
356         nf_conntrack_expect_insert(expect);
357         nf_conntrack_expect_event(IPEXP_NEW, expect);
358         ret = 0;
359 out:
360         write_unlock_bh(&nf_conntrack_lock);
361         return ret;
362 }
363 EXPORT_SYMBOL_GPL(nf_conntrack_expect_related);
364
365 #ifdef CONFIG_PROC_FS
366 static void *exp_seq_start(struct seq_file *s, loff_t *pos)
367 {
368         struct list_head *e = &nf_conntrack_expect_list;
369         loff_t i;
370
371         /* strange seq_file api calls stop even if we fail,
372          * thus we need to grab lock since stop unlocks */
373         read_lock_bh(&nf_conntrack_lock);
374
375         if (list_empty(e))
376                 return NULL;
377
378         for (i = 0; i <= *pos; i++) {
379                 e = e->next;
380                 if (e == &nf_conntrack_expect_list)
381                         return NULL;
382         }
383         return e;
384 }
385
386 static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos)
387 {
388         struct list_head *e = v;
389
390         ++*pos;
391         e = e->next;
392
393         if (e == &nf_conntrack_expect_list)
394                 return NULL;
395
396         return e;
397 }
398
399 static void exp_seq_stop(struct seq_file *s, void *v)
400 {
401         read_unlock_bh(&nf_conntrack_lock);
402 }
403
404 static int exp_seq_show(struct seq_file *s, void *v)
405 {
406         struct nf_conntrack_expect *expect = v;
407
408         if (expect->timeout.function)
409                 seq_printf(s, "%ld ", timer_pending(&expect->timeout)
410                            ? (long)(expect->timeout.expires - jiffies)/HZ : 0);
411         else
412                 seq_printf(s, "- ");
413         seq_printf(s, "l3proto = %u proto=%u ",
414                    expect->tuple.src.l3num,
415                    expect->tuple.dst.protonum);
416         print_tuple(s, &expect->tuple,
417                     __nf_ct_l3proto_find(expect->tuple.src.l3num),
418                     __nf_ct_l4proto_find(expect->tuple.src.l3num,
419                                        expect->tuple.dst.protonum));
420         return seq_putc(s, '\n');
421 }
422
423 static struct seq_operations exp_seq_ops = {
424         .start = exp_seq_start,
425         .next = exp_seq_next,
426         .stop = exp_seq_stop,
427         .show = exp_seq_show
428 };
429
430 static int exp_open(struct inode *inode, struct file *file)
431 {
432         return seq_open(file, &exp_seq_ops);
433 }
434
435 struct file_operations exp_file_ops = {
436         .owner   = THIS_MODULE,
437         .open    = exp_open,
438         .read    = seq_read,
439         .llseek  = seq_lseek,
440         .release = seq_release
441 };
442 #endif /* CONFIG_PROC_FS */