Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/bart/ide-2.6
[linux-2.6] / net / netfilter / nf_conntrack_expect.c
1 /* Expectation handling for nf_conntrack. */
2
3 /* (C) 1999-2001 Paul `Rusty' Russell
4  * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
5  * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11
12 #include <linux/types.h>
13 #include <linux/netfilter.h>
14 #include <linux/skbuff.h>
15 #include <linux/proc_fs.h>
16 #include <linux/seq_file.h>
17 #include <linux/stddef.h>
18 #include <linux/slab.h>
19 #include <linux/err.h>
20 #include <linux/percpu.h>
21 #include <linux/kernel.h>
22 #include <linux/jhash.h>
23 #include <net/net_namespace.h>
24
25 #include <net/netfilter/nf_conntrack.h>
26 #include <net/netfilter/nf_conntrack_core.h>
27 #include <net/netfilter/nf_conntrack_expect.h>
28 #include <net/netfilter/nf_conntrack_helper.h>
29 #include <net/netfilter/nf_conntrack_tuple.h>
30
31 unsigned int nf_ct_expect_hsize __read_mostly;
32 EXPORT_SYMBOL_GPL(nf_ct_expect_hsize);
33
34 static unsigned int nf_ct_expect_hash_rnd __read_mostly;
35 unsigned int nf_ct_expect_max __read_mostly;
36 static int nf_ct_expect_hash_rnd_initted __read_mostly;
37
38 static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
39
40 /* nf_conntrack_expect helper functions */
41 void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
42 {
43         struct nf_conn_help *master_help = nfct_help(exp->master);
44         struct net *net = nf_ct_exp_net(exp);
45
46         NF_CT_ASSERT(master_help);
47         NF_CT_ASSERT(!timer_pending(&exp->timeout));
48
49         hlist_del_rcu(&exp->hnode);
50         net->ct.expect_count--;
51
52         hlist_del(&exp->lnode);
53         master_help->expecting[exp->class]--;
54         nf_ct_expect_put(exp);
55
56         NF_CT_STAT_INC(net, expect_delete);
57 }
58 EXPORT_SYMBOL_GPL(nf_ct_unlink_expect);
59
60 static void nf_ct_expectation_timed_out(unsigned long ul_expect)
61 {
62         struct nf_conntrack_expect *exp = (void *)ul_expect;
63
64         spin_lock_bh(&nf_conntrack_lock);
65         nf_ct_unlink_expect(exp);
66         spin_unlock_bh(&nf_conntrack_lock);
67         nf_ct_expect_put(exp);
68 }
69
70 static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple)
71 {
72         unsigned int hash;
73
74         if (unlikely(!nf_ct_expect_hash_rnd_initted)) {
75                 get_random_bytes(&nf_ct_expect_hash_rnd,
76                                  sizeof(nf_ct_expect_hash_rnd));
77                 nf_ct_expect_hash_rnd_initted = 1;
78         }
79
80         hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
81                       (((tuple->dst.protonum ^ tuple->src.l3num) << 16) |
82                        (__force __u16)tuple->dst.u.all) ^ nf_ct_expect_hash_rnd);
83         return ((u64)hash * nf_ct_expect_hsize) >> 32;
84 }
85
86 struct nf_conntrack_expect *
87 __nf_ct_expect_find(struct net *net, const struct nf_conntrack_tuple *tuple)
88 {
89         struct nf_conntrack_expect *i;
90         struct hlist_node *n;
91         unsigned int h;
92
93         if (!net->ct.expect_count)
94                 return NULL;
95
96         h = nf_ct_expect_dst_hash(tuple);
97         hlist_for_each_entry_rcu(i, n, &net->ct.expect_hash[h], hnode) {
98                 if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask))
99                         return i;
100         }
101         return NULL;
102 }
103 EXPORT_SYMBOL_GPL(__nf_ct_expect_find);
104
105 /* Just find a expectation corresponding to a tuple. */
106 struct nf_conntrack_expect *
107 nf_ct_expect_find_get(struct net *net, const struct nf_conntrack_tuple *tuple)
108 {
109         struct nf_conntrack_expect *i;
110
111         rcu_read_lock();
112         i = __nf_ct_expect_find(net, tuple);
113         if (i && !atomic_inc_not_zero(&i->use))
114                 i = NULL;
115         rcu_read_unlock();
116
117         return i;
118 }
119 EXPORT_SYMBOL_GPL(nf_ct_expect_find_get);
120
121 /* If an expectation for this connection is found, it gets delete from
122  * global list then returned. */
123 struct nf_conntrack_expect *
124 nf_ct_find_expectation(struct net *net, const struct nf_conntrack_tuple *tuple)
125 {
126         struct nf_conntrack_expect *i, *exp = NULL;
127         struct hlist_node *n;
128         unsigned int h;
129
130         if (!net->ct.expect_count)
131                 return NULL;
132
133         h = nf_ct_expect_dst_hash(tuple);
134         hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) {
135                 if (!(i->flags & NF_CT_EXPECT_INACTIVE) &&
136                     nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) {
137                         exp = i;
138                         break;
139                 }
140         }
141         if (!exp)
142                 return NULL;
143
144         /* If master is not in hash table yet (ie. packet hasn't left
145            this machine yet), how can other end know about expected?
146            Hence these are not the droids you are looking for (if
147            master ct never got confirmed, we'd hold a reference to it
148            and weird things would happen to future packets). */
149         if (!nf_ct_is_confirmed(exp->master))
150                 return NULL;
151
152         if (exp->flags & NF_CT_EXPECT_PERMANENT) {
153                 atomic_inc(&exp->use);
154                 return exp;
155         } else if (del_timer(&exp->timeout)) {
156                 nf_ct_unlink_expect(exp);
157                 return exp;
158         }
159
160         return NULL;
161 }
162
163 /* delete all expectations for this conntrack */
164 void nf_ct_remove_expectations(struct nf_conn *ct)
165 {
166         struct nf_conn_help *help = nfct_help(ct);
167         struct nf_conntrack_expect *exp;
168         struct hlist_node *n, *next;
169
170         /* Optimization: most connection never expect any others. */
171         if (!help)
172                 return;
173
174         hlist_for_each_entry_safe(exp, n, next, &help->expectations, lnode) {
175                 if (del_timer(&exp->timeout)) {
176                         nf_ct_unlink_expect(exp);
177                         nf_ct_expect_put(exp);
178                 }
179         }
180 }
181 EXPORT_SYMBOL_GPL(nf_ct_remove_expectations);
182
183 /* Would two expected things clash? */
184 static inline int expect_clash(const struct nf_conntrack_expect *a,
185                                const struct nf_conntrack_expect *b)
186 {
187         /* Part covered by intersection of masks must be unequal,
188            otherwise they clash */
189         struct nf_conntrack_tuple_mask intersect_mask;
190         int count;
191
192         intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all;
193
194         for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
195                 intersect_mask.src.u3.all[count] =
196                         a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
197         }
198
199         return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
200 }
201
202 static inline int expect_matches(const struct nf_conntrack_expect *a,
203                                  const struct nf_conntrack_expect *b)
204 {
205         return a->master == b->master && a->class == b->class
206                 && nf_ct_tuple_equal(&a->tuple, &b->tuple)
207                 && nf_ct_tuple_mask_equal(&a->mask, &b->mask);
208 }
209
210 /* Generally a bad idea to call this: could have matched already. */
211 void nf_ct_unexpect_related(struct nf_conntrack_expect *exp)
212 {
213         spin_lock_bh(&nf_conntrack_lock);
214         if (del_timer(&exp->timeout)) {
215                 nf_ct_unlink_expect(exp);
216                 nf_ct_expect_put(exp);
217         }
218         spin_unlock_bh(&nf_conntrack_lock);
219 }
220 EXPORT_SYMBOL_GPL(nf_ct_unexpect_related);
221
222 /* We don't increase the master conntrack refcount for non-fulfilled
223  * conntracks. During the conntrack destruction, the expectations are
224  * always killed before the conntrack itself */
225 struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me)
226 {
227         struct nf_conntrack_expect *new;
228
229         new = kmem_cache_alloc(nf_ct_expect_cachep, GFP_ATOMIC);
230         if (!new)
231                 return NULL;
232
233         new->master = me;
234         atomic_set(&new->use, 1);
235         INIT_RCU_HEAD(&new->rcu);
236         return new;
237 }
238 EXPORT_SYMBOL_GPL(nf_ct_expect_alloc);
239
240 void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class,
241                        u_int8_t family,
242                        const union nf_inet_addr *saddr,
243                        const union nf_inet_addr *daddr,
244                        u_int8_t proto, const __be16 *src, const __be16 *dst)
245 {
246         int len;
247
248         if (family == AF_INET)
249                 len = 4;
250         else
251                 len = 16;
252
253         exp->flags = 0;
254         exp->class = class;
255         exp->expectfn = NULL;
256         exp->helper = NULL;
257         exp->tuple.src.l3num = family;
258         exp->tuple.dst.protonum = proto;
259
260         if (saddr) {
261                 memcpy(&exp->tuple.src.u3, saddr, len);
262                 if (sizeof(exp->tuple.src.u3) > len)
263                         /* address needs to be cleared for nf_ct_tuple_equal */
264                         memset((void *)&exp->tuple.src.u3 + len, 0x00,
265                                sizeof(exp->tuple.src.u3) - len);
266                 memset(&exp->mask.src.u3, 0xFF, len);
267                 if (sizeof(exp->mask.src.u3) > len)
268                         memset((void *)&exp->mask.src.u3 + len, 0x00,
269                                sizeof(exp->mask.src.u3) - len);
270         } else {
271                 memset(&exp->tuple.src.u3, 0x00, sizeof(exp->tuple.src.u3));
272                 memset(&exp->mask.src.u3, 0x00, sizeof(exp->mask.src.u3));
273         }
274
275         if (src) {
276                 exp->tuple.src.u.all = *src;
277                 exp->mask.src.u.all = htons(0xFFFF);
278         } else {
279                 exp->tuple.src.u.all = 0;
280                 exp->mask.src.u.all = 0;
281         }
282
283         memcpy(&exp->tuple.dst.u3, daddr, len);
284         if (sizeof(exp->tuple.dst.u3) > len)
285                 /* address needs to be cleared for nf_ct_tuple_equal */
286                 memset((void *)&exp->tuple.dst.u3 + len, 0x00,
287                        sizeof(exp->tuple.dst.u3) - len);
288
289         exp->tuple.dst.u.all = *dst;
290 }
291 EXPORT_SYMBOL_GPL(nf_ct_expect_init);
292
293 static void nf_ct_expect_free_rcu(struct rcu_head *head)
294 {
295         struct nf_conntrack_expect *exp;
296
297         exp = container_of(head, struct nf_conntrack_expect, rcu);
298         kmem_cache_free(nf_ct_expect_cachep, exp);
299 }
300
301 void nf_ct_expect_put(struct nf_conntrack_expect *exp)
302 {
303         if (atomic_dec_and_test(&exp->use))
304                 call_rcu(&exp->rcu, nf_ct_expect_free_rcu);
305 }
306 EXPORT_SYMBOL_GPL(nf_ct_expect_put);
307
308 static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
309 {
310         struct nf_conn_help *master_help = nfct_help(exp->master);
311         struct net *net = nf_ct_exp_net(exp);
312         const struct nf_conntrack_expect_policy *p;
313         unsigned int h = nf_ct_expect_dst_hash(&exp->tuple);
314
315         atomic_inc(&exp->use);
316
317         hlist_add_head(&exp->lnode, &master_help->expectations);
318         master_help->expecting[exp->class]++;
319
320         hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]);
321         net->ct.expect_count++;
322
323         setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
324                     (unsigned long)exp);
325         p = &master_help->helper->expect_policy[exp->class];
326         exp->timeout.expires = jiffies + p->timeout * HZ;
327         add_timer(&exp->timeout);
328
329         atomic_inc(&exp->use);
330         NF_CT_STAT_INC(net, expect_create);
331 }
332
333 /* Race with expectations being used means we could have none to find; OK. */
334 static void evict_oldest_expect(struct nf_conn *master,
335                                 struct nf_conntrack_expect *new)
336 {
337         struct nf_conn_help *master_help = nfct_help(master);
338         struct nf_conntrack_expect *exp, *last = NULL;
339         struct hlist_node *n;
340
341         hlist_for_each_entry(exp, n, &master_help->expectations, lnode) {
342                 if (exp->class == new->class)
343                         last = exp;
344         }
345
346         if (last && del_timer(&last->timeout)) {
347                 nf_ct_unlink_expect(last);
348                 nf_ct_expect_put(last);
349         }
350 }
351
352 static inline int refresh_timer(struct nf_conntrack_expect *i)
353 {
354         struct nf_conn_help *master_help = nfct_help(i->master);
355         const struct nf_conntrack_expect_policy *p;
356
357         if (!del_timer(&i->timeout))
358                 return 0;
359
360         p = &master_help->helper->expect_policy[i->class];
361         i->timeout.expires = jiffies + p->timeout * HZ;
362         add_timer(&i->timeout);
363         return 1;
364 }
365
366 static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
367 {
368         const struct nf_conntrack_expect_policy *p;
369         struct nf_conntrack_expect *i;
370         struct nf_conn *master = expect->master;
371         struct nf_conn_help *master_help = nfct_help(master);
372         struct net *net = nf_ct_exp_net(expect);
373         struct hlist_node *n;
374         unsigned int h;
375         int ret = 1;
376
377         if (!master_help->helper) {
378                 ret = -ESHUTDOWN;
379                 goto out;
380         }
381         h = nf_ct_expect_dst_hash(&expect->tuple);
382         hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) {
383                 if (expect_matches(i, expect)) {
384                         /* Refresh timer: if it's dying, ignore.. */
385                         if (refresh_timer(i)) {
386                                 ret = 0;
387                                 goto out;
388                         }
389                 } else if (expect_clash(i, expect)) {
390                         ret = -EBUSY;
391                         goto out;
392                 }
393         }
394         /* Will be over limit? */
395         p = &master_help->helper->expect_policy[expect->class];
396         if (p->max_expected &&
397             master_help->expecting[expect->class] >= p->max_expected) {
398                 evict_oldest_expect(master, expect);
399                 if (master_help->expecting[expect->class] >= p->max_expected) {
400                         ret = -EMFILE;
401                         goto out;
402                 }
403         }
404
405         if (net->ct.expect_count >= nf_ct_expect_max) {
406                 if (net_ratelimit())
407                         printk(KERN_WARNING
408                                "nf_conntrack: expectation table full\n");
409                 ret = -EMFILE;
410         }
411 out:
412         return ret;
413 }
414
415 int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, 
416                                 u32 pid, int report)
417 {
418         int ret;
419
420         spin_lock_bh(&nf_conntrack_lock);
421         ret = __nf_ct_expect_check(expect);
422         if (ret <= 0)
423                 goto out;
424
425         ret = 0;
426         nf_ct_expect_insert(expect);
427         spin_unlock_bh(&nf_conntrack_lock);
428         nf_ct_expect_event_report(IPEXP_NEW, expect, pid, report);
429         return ret;
430 out:
431         spin_unlock_bh(&nf_conntrack_lock);
432         return ret;
433 }
434 EXPORT_SYMBOL_GPL(nf_ct_expect_related_report);
435
436 #ifdef CONFIG_PROC_FS
437 struct ct_expect_iter_state {
438         struct seq_net_private p;
439         unsigned int bucket;
440 };
441
442 static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
443 {
444         struct net *net = seq_file_net(seq);
445         struct ct_expect_iter_state *st = seq->private;
446         struct hlist_node *n;
447
448         for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
449                 n = rcu_dereference(net->ct.expect_hash[st->bucket].first);
450                 if (n)
451                         return n;
452         }
453         return NULL;
454 }
455
456 static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
457                                              struct hlist_node *head)
458 {
459         struct net *net = seq_file_net(seq);
460         struct ct_expect_iter_state *st = seq->private;
461
462         head = rcu_dereference(head->next);
463         while (head == NULL) {
464                 if (++st->bucket >= nf_ct_expect_hsize)
465                         return NULL;
466                 head = rcu_dereference(net->ct.expect_hash[st->bucket].first);
467         }
468         return head;
469 }
470
471 static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos)
472 {
473         struct hlist_node *head = ct_expect_get_first(seq);
474
475         if (head)
476                 while (pos && (head = ct_expect_get_next(seq, head)))
477                         pos--;
478         return pos ? NULL : head;
479 }
480
481 static void *exp_seq_start(struct seq_file *seq, loff_t *pos)
482         __acquires(RCU)
483 {
484         rcu_read_lock();
485         return ct_expect_get_idx(seq, *pos);
486 }
487
488 static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
489 {
490         (*pos)++;
491         return ct_expect_get_next(seq, v);
492 }
493
494 static void exp_seq_stop(struct seq_file *seq, void *v)
495         __releases(RCU)
496 {
497         rcu_read_unlock();
498 }
499
500 static int exp_seq_show(struct seq_file *s, void *v)
501 {
502         struct nf_conntrack_expect *expect;
503         struct hlist_node *n = v;
504         char *delim = "";
505
506         expect = hlist_entry(n, struct nf_conntrack_expect, hnode);
507
508         if (expect->timeout.function)
509                 seq_printf(s, "%ld ", timer_pending(&expect->timeout)
510                            ? (long)(expect->timeout.expires - jiffies)/HZ : 0);
511         else
512                 seq_printf(s, "- ");
513         seq_printf(s, "l3proto = %u proto=%u ",
514                    expect->tuple.src.l3num,
515                    expect->tuple.dst.protonum);
516         print_tuple(s, &expect->tuple,
517                     __nf_ct_l3proto_find(expect->tuple.src.l3num),
518                     __nf_ct_l4proto_find(expect->tuple.src.l3num,
519                                        expect->tuple.dst.protonum));
520
521         if (expect->flags & NF_CT_EXPECT_PERMANENT) {
522                 seq_printf(s, "PERMANENT");
523                 delim = ",";
524         }
525         if (expect->flags & NF_CT_EXPECT_INACTIVE)
526                 seq_printf(s, "%sINACTIVE", delim);
527
528         return seq_putc(s, '\n');
529 }
530
531 static const struct seq_operations exp_seq_ops = {
532         .start = exp_seq_start,
533         .next = exp_seq_next,
534         .stop = exp_seq_stop,
535         .show = exp_seq_show
536 };
537
538 static int exp_open(struct inode *inode, struct file *file)
539 {
540         return seq_open_net(inode, file, &exp_seq_ops,
541                         sizeof(struct ct_expect_iter_state));
542 }
543
544 static const struct file_operations exp_file_ops = {
545         .owner   = THIS_MODULE,
546         .open    = exp_open,
547         .read    = seq_read,
548         .llseek  = seq_lseek,
549         .release = seq_release_net,
550 };
551 #endif /* CONFIG_PROC_FS */
552
553 static int exp_proc_init(struct net *net)
554 {
555 #ifdef CONFIG_PROC_FS
556         struct proc_dir_entry *proc;
557
558         proc = proc_net_fops_create(net, "nf_conntrack_expect", 0440, &exp_file_ops);
559         if (!proc)
560                 return -ENOMEM;
561 #endif /* CONFIG_PROC_FS */
562         return 0;
563 }
564
565 static void exp_proc_remove(struct net *net)
566 {
567 #ifdef CONFIG_PROC_FS
568         proc_net_remove(net, "nf_conntrack_expect");
569 #endif /* CONFIG_PROC_FS */
570 }
571
572 module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0600);
573
574 int nf_conntrack_expect_init(struct net *net)
575 {
576         int err = -ENOMEM;
577
578         if (net_eq(net, &init_net)) {
579                 if (!nf_ct_expect_hsize) {
580                         nf_ct_expect_hsize = nf_conntrack_htable_size / 256;
581                         if (!nf_ct_expect_hsize)
582                                 nf_ct_expect_hsize = 1;
583                 }
584                 nf_ct_expect_max = nf_ct_expect_hsize * 4;
585         }
586
587         net->ct.expect_count = 0;
588         net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize,
589                                                   &net->ct.expect_vmalloc, 0);
590         if (net->ct.expect_hash == NULL)
591                 goto err1;
592
593         if (net_eq(net, &init_net)) {
594                 nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
595                                         sizeof(struct nf_conntrack_expect),
596                                         0, 0, NULL);
597                 if (!nf_ct_expect_cachep)
598                         goto err2;
599         }
600
601         err = exp_proc_init(net);
602         if (err < 0)
603                 goto err3;
604
605         return 0;
606
607 err3:
608         if (net_eq(net, &init_net))
609                 kmem_cache_destroy(nf_ct_expect_cachep);
610 err2:
611         nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
612                              nf_ct_expect_hsize);
613 err1:
614         return err;
615 }
616
617 void nf_conntrack_expect_fini(struct net *net)
618 {
619         exp_proc_remove(net);
620         if (net_eq(net, &init_net))
621                 kmem_cache_destroy(nf_ct_expect_cachep);
622         nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
623                              nf_ct_expect_hsize);
624 }