1 /* Expectation handling for nf_conntrack. */
 
   3 /* (C) 1999-2001 Paul `Rusty' Russell
 
   4  * (C) 2002-2006 Netfilter Core Team <coreteam@netfilter.org>
 
   5  * (C) 2003,2004 USAGI/WIDE Project <http://www.linux-ipv6.org>
 
   7  * This program is free software; you can redistribute it and/or modify
 
   8  * it under the terms of the GNU General Public License version 2 as
 
   9  * published by the Free Software Foundation.
 
  12 #include <linux/types.h>
 
  13 #include <linux/netfilter.h>
 
  14 #include <linux/skbuff.h>
 
  15 #include <linux/proc_fs.h>
 
  16 #include <linux/seq_file.h>
 
  17 #include <linux/stddef.h>
 
  18 #include <linux/slab.h>
 
  19 #include <linux/err.h>
 
  20 #include <linux/percpu.h>
 
  21 #include <linux/kernel.h>
 
  22 #include <linux/jhash.h>
 
  23 #include <net/net_namespace.h>
 
  25 #include <net/netfilter/nf_conntrack.h>
 
  26 #include <net/netfilter/nf_conntrack_core.h>
 
  27 #include <net/netfilter/nf_conntrack_expect.h>
 
  28 #include <net/netfilter/nf_conntrack_helper.h>
 
  29 #include <net/netfilter/nf_conntrack_tuple.h>
 
  31 unsigned int nf_ct_expect_hsize __read_mostly;
 
  32 EXPORT_SYMBOL_GPL(nf_ct_expect_hsize);
 
  34 static unsigned int nf_ct_expect_hash_rnd __read_mostly;
 
  35 unsigned int nf_ct_expect_max __read_mostly;
 
  36 static int nf_ct_expect_hash_rnd_initted __read_mostly;
 
  38 static struct kmem_cache *nf_ct_expect_cachep __read_mostly;
 
  40 /* nf_conntrack_expect helper functions */
 
  41 void nf_ct_unlink_expect(struct nf_conntrack_expect *exp)
 
  43         struct nf_conn_help *master_help = nfct_help(exp->master);
 
  44         struct net *net = nf_ct_exp_net(exp);
 
  46         NF_CT_ASSERT(master_help);
 
  47         NF_CT_ASSERT(!timer_pending(&exp->timeout));
 
  49         hlist_del_rcu(&exp->hnode);
 
  50         net->ct.expect_count--;
 
  52         hlist_del(&exp->lnode);
 
  53         master_help->expecting[exp->class]--;
 
  54         nf_ct_expect_put(exp);
 
  56         NF_CT_STAT_INC(net, expect_delete);
 
  58 EXPORT_SYMBOL_GPL(nf_ct_unlink_expect);
 
  60 static void nf_ct_expectation_timed_out(unsigned long ul_expect)
 
  62         struct nf_conntrack_expect *exp = (void *)ul_expect;
 
  64         spin_lock_bh(&nf_conntrack_lock);
 
  65         nf_ct_unlink_expect(exp);
 
  66         spin_unlock_bh(&nf_conntrack_lock);
 
  67         nf_ct_expect_put(exp);
 
  70 static unsigned int nf_ct_expect_dst_hash(const struct nf_conntrack_tuple *tuple)
 
  74         if (unlikely(!nf_ct_expect_hash_rnd_initted)) {
 
  75                 get_random_bytes(&nf_ct_expect_hash_rnd, 4);
 
  76                 nf_ct_expect_hash_rnd_initted = 1;
 
  79         hash = jhash2(tuple->dst.u3.all, ARRAY_SIZE(tuple->dst.u3.all),
 
  80                       (((tuple->dst.protonum ^ tuple->src.l3num) << 16) |
 
  81                        (__force __u16)tuple->dst.u.all) ^ nf_ct_expect_hash_rnd);
 
  82         return ((u64)hash * nf_ct_expect_hsize) >> 32;
 
  85 struct nf_conntrack_expect *
 
  86 __nf_ct_expect_find(struct net *net, const struct nf_conntrack_tuple *tuple)
 
  88         struct nf_conntrack_expect *i;
 
  92         if (!net->ct.expect_count)
 
  95         h = nf_ct_expect_dst_hash(tuple);
 
  96         hlist_for_each_entry_rcu(i, n, &net->ct.expect_hash[h], hnode) {
 
  97                 if (nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask))
 
 102 EXPORT_SYMBOL_GPL(__nf_ct_expect_find);
 
 104 /* Just find a expectation corresponding to a tuple. */
 
 105 struct nf_conntrack_expect *
 
 106 nf_ct_expect_find_get(struct net *net, const struct nf_conntrack_tuple *tuple)
 
 108         struct nf_conntrack_expect *i;
 
 111         i = __nf_ct_expect_find(net, tuple);
 
 112         if (i && !atomic_inc_not_zero(&i->use))
 
 118 EXPORT_SYMBOL_GPL(nf_ct_expect_find_get);
 
 120 /* If an expectation for this connection is found, it gets delete from
 
 121  * global list then returned. */
 
 122 struct nf_conntrack_expect *
 
 123 nf_ct_find_expectation(struct net *net, const struct nf_conntrack_tuple *tuple)
 
 125         struct nf_conntrack_expect *i, *exp = NULL;
 
 126         struct hlist_node *n;
 
 129         if (!net->ct.expect_count)
 
 132         h = nf_ct_expect_dst_hash(tuple);
 
 133         hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) {
 
 134                 if (!(i->flags & NF_CT_EXPECT_INACTIVE) &&
 
 135                     nf_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)) {
 
 143         /* If master is not in hash table yet (ie. packet hasn't left
 
 144            this machine yet), how can other end know about expected?
 
 145            Hence these are not the droids you are looking for (if
 
 146            master ct never got confirmed, we'd hold a reference to it
 
 147            and weird things would happen to future packets). */
 
 148         if (!nf_ct_is_confirmed(exp->master))
 
 151         if (exp->flags & NF_CT_EXPECT_PERMANENT) {
 
 152                 atomic_inc(&exp->use);
 
 154         } else if (del_timer(&exp->timeout)) {
 
 155                 nf_ct_unlink_expect(exp);
 
 162 /* delete all expectations for this conntrack */
 
 163 void nf_ct_remove_expectations(struct nf_conn *ct)
 
 165         struct nf_conn_help *help = nfct_help(ct);
 
 166         struct nf_conntrack_expect *exp;
 
 167         struct hlist_node *n, *next;
 
 169         /* Optimization: most connection never expect any others. */
 
 173         hlist_for_each_entry_safe(exp, n, next, &help->expectations, lnode) {
 
 174                 if (del_timer(&exp->timeout)) {
 
 175                         nf_ct_unlink_expect(exp);
 
 176                         nf_ct_expect_put(exp);
 
 180 EXPORT_SYMBOL_GPL(nf_ct_remove_expectations);
 
 182 /* Would two expected things clash? */
 
 183 static inline int expect_clash(const struct nf_conntrack_expect *a,
 
 184                                const struct nf_conntrack_expect *b)
 
 186         /* Part covered by intersection of masks must be unequal,
 
 187            otherwise they clash */
 
 188         struct nf_conntrack_tuple_mask intersect_mask;
 
 191         intersect_mask.src.u.all = a->mask.src.u.all & b->mask.src.u.all;
 
 193         for (count = 0; count < NF_CT_TUPLE_L3SIZE; count++){
 
 194                 intersect_mask.src.u3.all[count] =
 
 195                         a->mask.src.u3.all[count] & b->mask.src.u3.all[count];
 
 198         return nf_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
 
 201 static inline int expect_matches(const struct nf_conntrack_expect *a,
 
 202                                  const struct nf_conntrack_expect *b)
 
 204         return a->master == b->master && a->class == b->class
 
 205                 && nf_ct_tuple_equal(&a->tuple, &b->tuple)
 
 206                 && nf_ct_tuple_mask_equal(&a->mask, &b->mask);
 
 209 /* Generally a bad idea to call this: could have matched already. */
 
 210 void nf_ct_unexpect_related(struct nf_conntrack_expect *exp)
 
 212         spin_lock_bh(&nf_conntrack_lock);
 
 213         if (del_timer(&exp->timeout)) {
 
 214                 nf_ct_unlink_expect(exp);
 
 215                 nf_ct_expect_put(exp);
 
 217         spin_unlock_bh(&nf_conntrack_lock);
 
 219 EXPORT_SYMBOL_GPL(nf_ct_unexpect_related);
 
 221 /* We don't increase the master conntrack refcount for non-fulfilled
 
 222  * conntracks. During the conntrack destruction, the expectations are
 
 223  * always killed before the conntrack itself */
 
 224 struct nf_conntrack_expect *nf_ct_expect_alloc(struct nf_conn *me)
 
 226         struct nf_conntrack_expect *new;
 
 228         new = kmem_cache_alloc(nf_ct_expect_cachep, GFP_ATOMIC);
 
 233         atomic_set(&new->use, 1);
 
 234         INIT_RCU_HEAD(&new->rcu);
 
 237 EXPORT_SYMBOL_GPL(nf_ct_expect_alloc);
 
 239 void nf_ct_expect_init(struct nf_conntrack_expect *exp, unsigned int class,
 
 241                        const union nf_inet_addr *saddr,
 
 242                        const union nf_inet_addr *daddr,
 
 243                        u_int8_t proto, const __be16 *src, const __be16 *dst)
 
 247         if (family == AF_INET)
 
 254         exp->expectfn = NULL;
 
 256         exp->tuple.src.l3num = family;
 
 257         exp->tuple.dst.protonum = proto;
 
 260                 memcpy(&exp->tuple.src.u3, saddr, len);
 
 261                 if (sizeof(exp->tuple.src.u3) > len)
 
 262                         /* address needs to be cleared for nf_ct_tuple_equal */
 
 263                         memset((void *)&exp->tuple.src.u3 + len, 0x00,
 
 264                                sizeof(exp->tuple.src.u3) - len);
 
 265                 memset(&exp->mask.src.u3, 0xFF, len);
 
 266                 if (sizeof(exp->mask.src.u3) > len)
 
 267                         memset((void *)&exp->mask.src.u3 + len, 0x00,
 
 268                                sizeof(exp->mask.src.u3) - len);
 
 270                 memset(&exp->tuple.src.u3, 0x00, sizeof(exp->tuple.src.u3));
 
 271                 memset(&exp->mask.src.u3, 0x00, sizeof(exp->mask.src.u3));
 
 275                 exp->tuple.src.u.all = *src;
 
 276                 exp->mask.src.u.all = htons(0xFFFF);
 
 278                 exp->tuple.src.u.all = 0;
 
 279                 exp->mask.src.u.all = 0;
 
 282         memcpy(&exp->tuple.dst.u3, daddr, len);
 
 283         if (sizeof(exp->tuple.dst.u3) > len)
 
 284                 /* address needs to be cleared for nf_ct_tuple_equal */
 
 285                 memset((void *)&exp->tuple.dst.u3 + len, 0x00,
 
 286                        sizeof(exp->tuple.dst.u3) - len);
 
 288         exp->tuple.dst.u.all = *dst;
 
 290 EXPORT_SYMBOL_GPL(nf_ct_expect_init);
 
 292 static void nf_ct_expect_free_rcu(struct rcu_head *head)
 
 294         struct nf_conntrack_expect *exp;
 
 296         exp = container_of(head, struct nf_conntrack_expect, rcu);
 
 297         kmem_cache_free(nf_ct_expect_cachep, exp);
 
 300 void nf_ct_expect_put(struct nf_conntrack_expect *exp)
 
 302         if (atomic_dec_and_test(&exp->use))
 
 303                 call_rcu(&exp->rcu, nf_ct_expect_free_rcu);
 
 305 EXPORT_SYMBOL_GPL(nf_ct_expect_put);
 
 307 static void nf_ct_expect_insert(struct nf_conntrack_expect *exp)
 
 309         struct nf_conn_help *master_help = nfct_help(exp->master);
 
 310         struct net *net = nf_ct_exp_net(exp);
 
 311         const struct nf_conntrack_expect_policy *p;
 
 312         unsigned int h = nf_ct_expect_dst_hash(&exp->tuple);
 
 314         atomic_inc(&exp->use);
 
 316         hlist_add_head(&exp->lnode, &master_help->expectations);
 
 317         master_help->expecting[exp->class]++;
 
 319         hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]);
 
 320         net->ct.expect_count++;
 
 322         setup_timer(&exp->timeout, nf_ct_expectation_timed_out,
 
 324         p = &master_help->helper->expect_policy[exp->class];
 
 325         exp->timeout.expires = jiffies + p->timeout * HZ;
 
 326         add_timer(&exp->timeout);
 
 328         atomic_inc(&exp->use);
 
 329         NF_CT_STAT_INC(net, expect_create);
 
 332 /* Race with expectations being used means we could have none to find; OK. */
 
 333 static void evict_oldest_expect(struct nf_conn *master,
 
 334                                 struct nf_conntrack_expect *new)
 
 336         struct nf_conn_help *master_help = nfct_help(master);
 
 337         struct nf_conntrack_expect *exp, *last = NULL;
 
 338         struct hlist_node *n;
 
 340         hlist_for_each_entry(exp, n, &master_help->expectations, lnode) {
 
 341                 if (exp->class == new->class)
 
 345         if (last && del_timer(&last->timeout)) {
 
 346                 nf_ct_unlink_expect(last);
 
 347                 nf_ct_expect_put(last);
 
 351 static inline int refresh_timer(struct nf_conntrack_expect *i)
 
 353         struct nf_conn_help *master_help = nfct_help(i->master);
 
 354         const struct nf_conntrack_expect_policy *p;
 
 356         if (!del_timer(&i->timeout))
 
 359         p = &master_help->helper->expect_policy[i->class];
 
 360         i->timeout.expires = jiffies + p->timeout * HZ;
 
 361         add_timer(&i->timeout);
 
 365 static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect)
 
 367         const struct nf_conntrack_expect_policy *p;
 
 368         struct nf_conntrack_expect *i;
 
 369         struct nf_conn *master = expect->master;
 
 370         struct nf_conn_help *master_help = nfct_help(master);
 
 371         struct net *net = nf_ct_exp_net(expect);
 
 372         struct hlist_node *n;
 
 376         if (!master_help->helper) {
 
 380         h = nf_ct_expect_dst_hash(&expect->tuple);
 
 381         hlist_for_each_entry(i, n, &net->ct.expect_hash[h], hnode) {
 
 382                 if (expect_matches(i, expect)) {
 
 383                         /* Refresh timer: if it's dying, ignore.. */
 
 384                         if (refresh_timer(i)) {
 
 388                 } else if (expect_clash(i, expect)) {
 
 393         /* Will be over limit? */
 
 394         p = &master_help->helper->expect_policy[expect->class];
 
 395         if (p->max_expected &&
 
 396             master_help->expecting[expect->class] >= p->max_expected) {
 
 397                 evict_oldest_expect(master, expect);
 
 398                 if (master_help->expecting[expect->class] >= p->max_expected) {
 
 404         if (net->ct.expect_count >= nf_ct_expect_max) {
 
 407                                "nf_conntrack: expectation table full\n");
 
 414 int nf_ct_expect_related(struct nf_conntrack_expect *expect)
 
 418         spin_lock_bh(&nf_conntrack_lock);
 
 419         ret = __nf_ct_expect_check(expect);
 
 423         nf_ct_expect_insert(expect);
 
 424         atomic_inc(&expect->use);
 
 425         spin_unlock_bh(&nf_conntrack_lock);
 
 426         nf_ct_expect_event(IPEXP_NEW, expect);
 
 427         nf_ct_expect_put(expect);
 
 430         spin_unlock_bh(&nf_conntrack_lock);
 
 433 EXPORT_SYMBOL_GPL(nf_ct_expect_related);
 
 435 int nf_ct_expect_related_report(struct nf_conntrack_expect *expect, 
 
 440         spin_lock_bh(&nf_conntrack_lock);
 
 441         ret = __nf_ct_expect_check(expect);
 
 444         nf_ct_expect_insert(expect);
 
 446         spin_unlock_bh(&nf_conntrack_lock);
 
 448                 nf_ct_expect_event_report(IPEXP_NEW, expect, pid, report);
 
 451 EXPORT_SYMBOL_GPL(nf_ct_expect_related_report);
 
 453 #ifdef CONFIG_PROC_FS
 
 454 struct ct_expect_iter_state {
 
 455         struct seq_net_private p;
 
 459 static struct hlist_node *ct_expect_get_first(struct seq_file *seq)
 
 461         struct net *net = seq_file_net(seq);
 
 462         struct ct_expect_iter_state *st = seq->private;
 
 463         struct hlist_node *n;
 
 465         for (st->bucket = 0; st->bucket < nf_ct_expect_hsize; st->bucket++) {
 
 466                 n = rcu_dereference(net->ct.expect_hash[st->bucket].first);
 
 473 static struct hlist_node *ct_expect_get_next(struct seq_file *seq,
 
 474                                              struct hlist_node *head)
 
 476         struct net *net = seq_file_net(seq);
 
 477         struct ct_expect_iter_state *st = seq->private;
 
 479         head = rcu_dereference(head->next);
 
 480         while (head == NULL) {
 
 481                 if (++st->bucket >= nf_ct_expect_hsize)
 
 483                 head = rcu_dereference(net->ct.expect_hash[st->bucket].first);
 
 488 static struct hlist_node *ct_expect_get_idx(struct seq_file *seq, loff_t pos)
 
 490         struct hlist_node *head = ct_expect_get_first(seq);
 
 493                 while (pos && (head = ct_expect_get_next(seq, head)))
 
 495         return pos ? NULL : head;
 
 498 static void *exp_seq_start(struct seq_file *seq, loff_t *pos)
 
 502         return ct_expect_get_idx(seq, *pos);
 
 505 static void *exp_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 
 508         return ct_expect_get_next(seq, v);
 
 511 static void exp_seq_stop(struct seq_file *seq, void *v)
 
 517 static int exp_seq_show(struct seq_file *s, void *v)
 
 519         struct nf_conntrack_expect *expect;
 
 520         struct hlist_node *n = v;
 
 523         expect = hlist_entry(n, struct nf_conntrack_expect, hnode);
 
 525         if (expect->timeout.function)
 
 526                 seq_printf(s, "%ld ", timer_pending(&expect->timeout)
 
 527                            ? (long)(expect->timeout.expires - jiffies)/HZ : 0);
 
 530         seq_printf(s, "l3proto = %u proto=%u ",
 
 531                    expect->tuple.src.l3num,
 
 532                    expect->tuple.dst.protonum);
 
 533         print_tuple(s, &expect->tuple,
 
 534                     __nf_ct_l3proto_find(expect->tuple.src.l3num),
 
 535                     __nf_ct_l4proto_find(expect->tuple.src.l3num,
 
 536                                        expect->tuple.dst.protonum));
 
 538         if (expect->flags & NF_CT_EXPECT_PERMANENT) {
 
 539                 seq_printf(s, "PERMANENT");
 
 542         if (expect->flags & NF_CT_EXPECT_INACTIVE)
 
 543                 seq_printf(s, "%sINACTIVE", delim);
 
 545         return seq_putc(s, '\n');
 
 548 static const struct seq_operations exp_seq_ops = {
 
 549         .start = exp_seq_start,
 
 550         .next = exp_seq_next,
 
 551         .stop = exp_seq_stop,
 
 555 static int exp_open(struct inode *inode, struct file *file)
 
 557         return seq_open_net(inode, file, &exp_seq_ops,
 
 558                         sizeof(struct ct_expect_iter_state));
 
 561 static const struct file_operations exp_file_ops = {
 
 562         .owner   = THIS_MODULE,
 
 566         .release = seq_release_net,
 
 568 #endif /* CONFIG_PROC_FS */
 
 570 static int exp_proc_init(struct net *net)
 
 572 #ifdef CONFIG_PROC_FS
 
 573         struct proc_dir_entry *proc;
 
 575         proc = proc_net_fops_create(net, "nf_conntrack_expect", 0440, &exp_file_ops);
 
 578 #endif /* CONFIG_PROC_FS */
 
 582 static void exp_proc_remove(struct net *net)
 
 584 #ifdef CONFIG_PROC_FS
 
 585         proc_net_remove(net, "nf_conntrack_expect");
 
 586 #endif /* CONFIG_PROC_FS */
 
 589 module_param_named(expect_hashsize, nf_ct_expect_hsize, uint, 0600);
 
 591 int nf_conntrack_expect_init(struct net *net)
 
 595         if (net_eq(net, &init_net)) {
 
 596                 if (!nf_ct_expect_hsize) {
 
 597                         nf_ct_expect_hsize = nf_conntrack_htable_size / 256;
 
 598                         if (!nf_ct_expect_hsize)
 
 599                                 nf_ct_expect_hsize = 1;
 
 601                 nf_ct_expect_max = nf_ct_expect_hsize * 4;
 
 604         net->ct.expect_count = 0;
 
 605         net->ct.expect_hash = nf_ct_alloc_hashtable(&nf_ct_expect_hsize,
 
 606                                                   &net->ct.expect_vmalloc);
 
 607         if (net->ct.expect_hash == NULL)
 
 610         if (net_eq(net, &init_net)) {
 
 611                 nf_ct_expect_cachep = kmem_cache_create("nf_conntrack_expect",
 
 612                                         sizeof(struct nf_conntrack_expect),
 
 614                 if (!nf_ct_expect_cachep)
 
 618         err = exp_proc_init(net);
 
 625         if (net_eq(net, &init_net))
 
 626                 kmem_cache_destroy(nf_ct_expect_cachep);
 
 628         nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,
 
 634 void nf_conntrack_expect_fini(struct net *net)
 
 636         exp_proc_remove(net);
 
 637         if (net_eq(net, &init_net))
 
 638                 kmem_cache_destroy(nf_ct_expect_cachep);
 
 639         nf_ct_free_hashtable(net->ct.expect_hash, net->ct.expect_vmalloc,