Merge master.kernel.org:/home/rmk/linux-2.6-arm
[linux-2.6] / net / netfilter / nf_conntrack_expect.c
1 /* Expectation handling for nf_conntrack. */
2
3 /* (C) 1999-2001 Paul `Rusty' Russell
4  * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
5  * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11
12 #include <linux/types.h>
13 #include <linux/netfilter.h>
14 #include <linux/skbuff.h>
15 #include <linux/proc_fs.h>
16 #include <linux/seq_file.h>
17 #include <linux/stddef.h>
18 #include <linux/slab.h>
19 #include <linux/err.h>
20 #include <linux/percpu.h>
21 #include <linux/kernel.h>
22 #include <linux/jhash.h>
23 #include <net/net_namespace.h>
24
25 #include <net/netfilter/nf_conntrack.h>
26 #include <net/netfilter/nf_conntrack_core.h>
27 #include <net/netfilter/nf_conntrack_expect.h>
28 #include <net/netfilter/nf_conntrack_helper.h>
29 #include <net/netfilter/nf_conntrack_tuple.h>
30
31 struct hlist_head *nf_ct_expect_hash __read_mostly;
32 EXPORT_SYMBOL_GPL(nf_ct_expect_hash);
33
34 unsigned int nf_ct_expect_hsize __read_mostly;
35 EXPORT_SYMBOL_GPL(nf_ct_expect_hsize);
36
37 static unsigned int nf_ct_expect_hash_rnd __read_mostly;
38 static unsigned int nf_ct_expect_count;
39 unsigned int nf_ct_expect_max __read_mostly;
40 static int nf_ct_expect_hash_rnd_initted __read_mostly;
41 static int nf_ct_expect_vmalloc;
42
43 static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
44
45 /* nf_conntrack_expect helper functions */
46 void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
47 {
48         struct nf_conn_help *master_help = nfct_help(exp->master);
49
50         NF_CT_ASSERT(master_help);
51         NF_CT_ASSERT(!timer_pending(&exp->timeout));
52
53         hlist_del_rcu(&exp->hnode);
54         nf_ct_expect_count--;
55
56         hlist_del(&exp->lnode);
57         master_help->expecting[exp->class]--;
58         nf_ct_expect_put(exp);
59
60         NF_CT_STAT_INC(expect_delete);
61 }
62 EXPORT_SYMBOL_GPL(nf_ct_unlink_expect);
63
64 static void nf_ct_expectation_timed_out(unsigned long ul_expect)
65 {
66         struct nf_conntrack_expect *exp = (void *)ul_expect;
67
68         spin_lock_bh(&nf_conntrack_lock);
69         nf_ct_unlink_expect(exp);
70         spin_unlock_bh(&nf_conntrack_lock);
71         nf_ct_expect_put(exp);
72 }
73
74 static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple)
75 {
76         unsigned int hash;
77
78         if (unlikely(!nf_ct_expect_hash_rnd_initted)) {
79                 get_random_bytes(&nf_ct_expect_hash_rnd, 4);
80                 nf_ct_expect_hash_rnd_initted = 1;
81         }
82
83         hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
84                       (((tuple->dst.protonum ^ tuple->src.l3num) << 16) |
85                        (__force __u16)tuple->dst.u.all) ^ nf_ct_expect_hash_rnd);
86         return ((u64)hash * nf_ct_expect_hsize) >> 32;
87 }
88
89 struct nf_conntrack_expect *
90 __nf_ct_expect_find(const struct nf_conntrack_tuple *tuple)
91 {
92         struct nf_conntrack_expect *i;
93         struct hlist_node *n;
94         unsigned int h;
95
96         if (!nf_ct_expect_count)
97                 return NULL;
98
99         h = nf_ct_expect_dst_hash(tuple);
100         hlist_for_each_entry_rcu(i, n, &nf_ct_expect_hash[h], hnode) {
101                 if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask))
102                         return i;
103         }
104         return NULL;
105 }
106 EXPORT_SYMBOL_GPL(__nf_ct_expect_find);
107
108 /* Just find a expectation corresponding to a tuple. */
109 struct nf_conntrack_expect *
110 nf_ct_expect_find_get(const struct nf_conntrack_tuple *tuple)
111 {
112         struct nf_conntrack_expect *i;
113
114         rcu_read_lock();
115         i = __nf_ct_expect_find(tuple);
116         if (i && !atomic_inc_not_zero(&i->use))
117                 i = NULL;
118         rcu_read_unlock();
119
120         return i;
121 }
122 EXPORT_SYMBOL_GPL(nf_ct_expect_find_get);
123
124 /* If an expectation for this connection is found, it gets delete from
125  * global list then returned. */
126 struct nf_conntrack_expect *
127 nf_ct_find_expectation(const struct nf_conntrack_tuple *tuple)
128 {
129         struct nf_conntrack_expect *i, *exp = NULL;
130         struct hlist_node *n;
131         unsigned int h;
132
133         if (!nf_ct_expect_count)
134                 return NULL;
135
136         h = nf_ct_expect_dst_hash(tuple);
137         hlist_for_each_entry(i, n, &nf_ct_expect_hash[h], hnode) {
138                 if (!(i->flags & NF_CT_EXPECT_INACTIVE) &&
139                     nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) {
140                         exp = i;
141                         break;
142                 }
143         }
144         if (!exp)
145                 return NULL;
146
147         /* If master is not in hash table yet (ie. packet hasn't left
148            this machine yet), how can other end know about expected?
149            Hence these are not the droids you are looking for (if
150            master ct never got confirmed, we'd hold a reference to it
151            and weird things would happen to future packets). */
152         if (!nf_ct_is_confirmed(exp->master))
153                 return NULL;
154
155         if (exp->flags & NF_CT_EXPECT_PERMANENT) {
156                 atomic_inc(&exp->use);
157                 return exp;
158         } else if (del_timer(&exp->timeout)) {
159                 nf_ct_unlink_expect(exp);
160                 return exp;
161         }
162
163         return NULL;
164 }
165
166 /* delete all expectations for this conntrack */
167 void nf_ct_remove_expectations(struct nf_conn *ct)
168 {
169         struct nf_conn_help *help = nfct_help(ct);
170         struct nf_conntrack_expect *exp;
171         struct hlist_node *n, *next;
172
173         /* Optimization: most connection never expect any others. */
174         if (!help)
175                 return;
176
177         hlist_for_each_entry_safe(exp, n, next, &help->expectations, lnode) {
178                 if (del_timer(&exp->timeout)) {
179                         nf_ct_unlink_expect(exp);
180                         nf_ct_expect_put(exp);
181                 }
182         }
183 }
184 EXPORT_SYMBOL_GPL(nf_ct_remove_expectations);
185
186 /* Would two expected things clash? */
187 static inline int expect_clash(const struct nf_conntrack_expect *a,
188                                const struct nf_conntrack_expect *b)
189 {
190         /* Part covered by intersection of masks must be unequal,
191            otherwise they clash */
192         struct nf_conntrack_tuple_mask intersect_mask;
193         int count;
194
195         intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all;
196
197         for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
198                 intersect_mask.src.u3.all[count] =
199                         a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
200         }
201
202         return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
203 }
204
205 static inline int expect_matches(const struct nf_conntrack_expect *a,
206                                  const struct nf_conntrack_expect *b)
207 {
208         return a->master == b->master && a->class == b->class
209                 && nf_ct_tuple_equal(&a->tuple, &b->tuple)
210                 && nf_ct_tuple_mask_equal(&a->mask, &b->mask);
211 }
212
213 /* Generally a bad idea to call this: could have matched already. */
214 void nf_ct_unexpect_related(struct nf_conntrack_expect *exp)
215 {
216         spin_lock_bh(&nf_conntrack_lock);
217         if (del_timer(&exp->timeout)) {
218                 nf_ct_unlink_expect(exp);
219                 nf_ct_expect_put(exp);
220         }
221         spin_unlock_bh(&nf_conntrack_lock);
222 }
223 EXPORT_SYMBOL_GPL(nf_ct_unexpect_related);
224
225 /* We don't increase the master conntrack refcount for non-fulfilled
226  * conntracks. During the conntrack destruction, the expectations are
227  * always killed before the conntrack itself */
228 struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me)
229 {
230         struct nf_conntrack_expect *new;
231
232         new = kmem_cache_alloc(nf_ct_expect_cachep, GFP_ATOMIC);
233         if (!new)
234                 return NULL;
235
236         new->master = me;
237         atomic_set(&new->use, 1);
238         INIT_RCU_HEAD(&new->rcu);
239         return new;
240 }
241 EXPORT_SYMBOL_GPL(nf_ct_expect_alloc);
242
243 void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class,
244                        int family,
245                        const union nf_inet_addr *saddr,
246                        const union nf_inet_addr *daddr,
247                        u_int8_t proto, const __be16 *src, const __be16 *dst)
248 {
249         int len;
250
251         if (family == AF_INET)
252                 len = 4;
253         else
254                 len = 16;
255
256         exp->flags = 0;
257         exp->class = class;
258         exp->expectfn = NULL;
259         exp->helper = NULL;
260         exp->tuple.src.l3num = family;
261         exp->tuple.dst.protonum = proto;
262
263         if (saddr) {
264                 memcpy(&exp->tuple.src.u3, saddr, len);
265                 if (sizeof(exp->tuple.src.u3) > len)
266                         /* address needs to be cleared for nf_ct_tuple_equal */
267                         memset((void *)&exp->tuple.src.u3 + len, 0x00,
268                                sizeof(exp->tuple.src.u3) - len);
269                 memset(&exp->mask.src.u3, 0xFF, len);
270                 if (sizeof(exp->mask.src.u3) > len)
271                         memset((void *)&exp->mask.src.u3 + len, 0x00,
272                                sizeof(exp->mask.src.u3) - len);
273         } else {
274                 memset(&exp->tuple.src.u3, 0x00, sizeof(exp->tuple.src.u3));
275                 memset(&exp->mask.src.u3, 0x00, sizeof(exp->mask.src.u3));
276         }
277
278         if (src) {
279                 exp->tuple.src.u.all = *src;
280                 exp->mask.src.u.all = htons(0xFFFF);
281         } else {
282                 exp->tuple.src.u.all = 0;
283                 exp->mask.src.u.all = 0;
284         }
285
286         memcpy(&exp->tuple.dst.u3, daddr, len);
287         if (sizeof(exp->tuple.dst.u3) > len)
288                 /* address needs to be cleared for nf_ct_tuple_equal */
289                 memset((void *)&exp->tuple.dst.u3 + len, 0x00,
290                        sizeof(exp->tuple.dst.u3) - len);
291
292         exp->tuple.dst.u.all = *dst;
293 }
294 EXPORT_SYMBOL_GPL(nf_ct_expect_init);
295
296 static void nf_ct_expect_free_rcu(struct rcu_head *head)
297 {
298         struct nf_conntrack_expect *exp;
299
300         exp = container_of(head, struct nf_conntrack_expect, rcu);
301         kmem_cache_free(nf_ct_expect_cachep, exp);
302 }
303
304 void nf_ct_expect_put(struct nf_conntrack_expect *exp)
305 {
306         if (atomic_dec_and_test(&exp->use))
307                 call_rcu(&exp->rcu, nf_ct_expect_free_rcu);
308 }
309 EXPORT_SYMBOL_GPL(nf_ct_expect_put);
310
311 static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
312 {
313         struct nf_conn_help *master_help = nfct_help(exp->master);
314         const struct nf_conntrack_expect_policy *p;
315         unsigned int h = nf_ct_expect_dst_hash(&exp->tuple);
316
317         atomic_inc(&exp->use);
318
319         hlist_add_head(&exp->lnode, &master_help->expectations);
320         master_help->expecting[exp->class]++;
321
322         hlist_add_head_rcu(&exp->hnode, &nf_ct_expect_hash[h]);
323         nf_ct_expect_count++;
324
325         setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
326                     (unsigned long)exp);
327         p = &master_help->helper->expect_policy[exp->class];
328         exp->timeout.expires = jiffies + p->timeout * HZ;
329         add_timer(&exp->timeout);
330
331         atomic_inc(&exp->use);
332         NF_CT_STAT_INC(expect_create);
333 }
334
335 /* Race with expectations being used means we could have none to find; OK. */
336 static void evict_oldest_expect(struct nf_conn *master,
337                                 struct nf_conntrack_expect *new)
338 {
339         struct nf_conn_help *master_help = nfct_help(master);
340         struct nf_conntrack_expect *exp, *last = NULL;
341         struct hlist_node *n;
342
343         hlist_for_each_entry(exp, n, &master_help->expectations, lnode) {
344                 if (exp->class == new->class)
345                         last = exp;
346         }
347
348         if (last && del_timer(&last->timeout)) {
349                 nf_ct_unlink_expect(last);
350                 nf_ct_expect_put(last);
351         }
352 }
353
354 static inline int refresh_timer(struct nf_conntrack_expect *i)
355 {
356         struct nf_conn_help *master_help = nfct_help(i->master);
357         const struct nf_conntrack_expect_policy *p;
358
359         if (!del_timer(&i->timeout))
360                 return 0;
361
362         p = &master_help->helper->expect_policy[i->class];
363         i->timeout.expires = jiffies + p->timeout * HZ;
364         add_timer(&i->timeout);
365         return 1;
366 }
367
368 int nf_ct_expect_related(struct nf_conntrack_expect *expect)
369 {
370         const struct nf_conntrack_expect_policy *p;
371         struct nf_conntrack_expect *i;
372         struct nf_conn *master = expect->master;
373         struct nf_conn_help *master_help = nfct_help(master);
374         struct hlist_node *n;
375         unsigned int h;
376         int ret;
377
378         NF_CT_ASSERT(master_help);
379
380         spin_lock_bh(&nf_conntrack_lock);
381         if (!master_help->helper) {
382                 ret = -ESHUTDOWN;
383                 goto out;
384         }
385         h = nf_ct_expect_dst_hash(&expect->tuple);
386         hlist_for_each_entry(i, n, &nf_ct_expect_hash[h], hnode) {
387                 if (expect_matches(i, expect)) {
388                         /* Refresh timer: if it's dying, ignore.. */
389                         if (refresh_timer(i)) {
390                                 ret = 0;
391                                 goto out;
392                         }
393                 } else if (expect_clash(i, expect)) {
394                         ret = -EBUSY;
395                         goto out;
396                 }
397         }
398         /* Will be over limit? */
399         p = &master_help->helper->expect_policy[expect->class];
400         if (p->max_expected &&
401             master_help->expecting[expect->class] >= p->max_expected) {
402                 evict_oldest_expect(master, expect);
403                 if (master_help->expecting[expect->class] >= p->max_expected) {
404                         ret = -EMFILE;
405                         goto out;
406                 }
407         }
408
409         if (nf_ct_expect_count >= nf_ct_expect_max) {
410                 if (net_ratelimit())
411                         printk(KERN_WARNING
412                                "nf_conntrack: expectation table full\n");
413                 ret = -EMFILE;
414                 goto out;
415         }
416
417         nf_ct_expect_insert(expect);
418         nf_ct_expect_event(IPEXP_NEW, expect);
419         ret = 0;
420 out:
421         spin_unlock_bh(&nf_conntrack_lock);
422         return ret;
423 }
424 EXPORT_SYMBOL_GPL(nf_ct_expect_related);
425
426 #ifdef CONFIG_PROC_FS
427 struct ct_expect_iter_state {
428         unsigned int bucket;
429 };
430
431 static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
432 {
433         struct ct_expect_iter_state *st = seq->private;
434         struct hlist_node *n;
435
436         for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
437                 n = rcu_dereference(nf_ct_expect_hash[st->bucket].first);
438                 if (n)
439                         return n;
440         }
441         return NULL;
442 }
443
444 static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
445                                              struct hlist_node *head)
446 {
447         struct ct_expect_iter_state *st = seq->private;
448
449         head = rcu_dereference(head->next);
450         while (head == NULL) {
451                 if (++st->bucket >= nf_ct_expect_hsize)
452                         return NULL;
453                 head = rcu_dereference(nf_ct_expect_hash[st->bucket].first);
454         }
455         return head;
456 }
457
458 static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos)
459 {
460         struct hlist_node *head = ct_expect_get_first(seq);
461
462         if (head)
463                 while (pos && (head = ct_expect_get_next(seq, head)))
464                         pos--;
465         return pos ? NULL : head;
466 }
467
468 static void *exp_seq_start(struct seq_file *seq, loff_t *pos)
469         __acquires(RCU)
470 {
471         rcu_read_lock();
472         return ct_expect_get_idx(seq, *pos);
473 }
474
475 static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
476 {
477         (*pos)++;
478         return ct_expect_get_next(seq, v);
479 }
480
481 static void exp_seq_stop(struct seq_file *seq, void *v)
482         __releases(RCU)
483 {
484         rcu_read_unlock();
485 }
486
487 static int exp_seq_show(struct seq_file *s, void *v)
488 {
489         struct nf_conntrack_expect *expect;
490         struct hlist_node *n = v;
491         char *delim = "";
492
493         expect = hlist_entry(n, struct nf_conntrack_expect, hnode);
494
495         if (expect->timeout.function)
496                 seq_printf(s, "%ld ", timer_pending(&expect->timeout)
497                            ? (long)(expect->timeout.expires - jiffies)/HZ : 0);
498         else
499                 seq_printf(s, "- ");
500         seq_printf(s, "l3proto = %u proto=%u ",
501                    expect->tuple.src.l3num,
502                    expect->tuple.dst.protonum);
503         print_tuple(s, &expect->tuple,
504                     __nf_ct_l3proto_find(expect->tuple.src.l3num),
505                     __nf_ct_l4proto_find(expect->tuple.src.l3num,
506                                        expect->tuple.dst.protonum));
507
508         if (expect->flags & NF_CT_EXPECT_PERMANENT) {
509                 seq_printf(s, "PERMANENT");
510                 delim = ",";
511         }
512         if (expect->flags & NF_CT_EXPECT_INACTIVE)
513                 seq_printf(s, "%sINACTIVE", delim);
514
515         return seq_putc(s, '\n');
516 }
517
518 static const struct seq_operations exp_seq_ops = {
519         .start = exp_seq_start,
520         .next = exp_seq_next,
521         .stop = exp_seq_stop,
522         .show = exp_seq_show
523 };
524
525 static int exp_open(struct inode *inode, struct file *file)
526 {
527         return seq_open_private(file, &exp_seq_ops,
528                         sizeof(struct ct_expect_iter_state));
529 }
530
531 static const struct file_operations exp_file_ops = {
532         .owner   = THIS_MODULE,
533         .open    = exp_open,
534         .read    = seq_read,
535         .llseek  = seq_lseek,
536         .release = seq_release_private,
537 };
538 #endif /* CONFIG_PROC_FS */
539
540 static int __init exp_proc_init(void)
541 {
542 #ifdef CONFIG_PROC_FS
543         struct proc_dir_entry *proc;
544
545         proc = proc_net_fops_create(&init_net, "nf_conntrack_expect", 0440, &exp_file_ops);
546         if (!proc)
547                 return -ENOMEM;
548 #endif /* CONFIG_PROC_FS */
549         return 0;
550 }
551
552 static void exp_proc_remove(void)
553 {
554 #ifdef CONFIG_PROC_FS
555         proc_net_remove(&init_net, "nf_conntrack_expect");
556 #endif /* CONFIG_PROC_FS */
557 }
558
559 module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0600);
560
561 int __init nf_conntrack_expect_init(void)
562 {
563         int err = -ENOMEM;
564
565         if (!nf_ct_expect_hsize) {
566                 nf_ct_expect_hsize = nf_conntrack_htable_size / 256;
567                 if (!nf_ct_expect_hsize)
568                         nf_ct_expect_hsize = 1;
569         }
570         nf_ct_expect_max = nf_ct_expect_hsize * 4;
571
572         nf_ct_expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize,
573                                                   &nf_ct_expect_vmalloc);
574         if (nf_ct_expect_hash == NULL)
575                 goto err1;
576
577         nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
578                                         sizeof(struct nf_conntrack_expect),
579                                         0, 0, NULL);
580         if (!nf_ct_expect_cachep)
581                 goto err2;
582
583         err = exp_proc_init();
584         if (err < 0)
585                 goto err3;
586
587         return 0;
588
589 err3:
590         kmem_cache_destroy(nf_ct_expect_cachep);
591 err2:
592         nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_vmalloc,
593                              nf_ct_expect_hsize);
594 err1:
595         return err;
596 }
597
598 void nf_conntrack_expect_fini(void)
599 {
600         exp_proc_remove();
601         kmem_cache_destroy(nf_ct_expect_cachep);
602         nf_ct_free_hashtable(nf_ct_expect_hash, nf_ct_expect_vmalloc,
603                              nf_ct_expect_hsize);
604 }