1 /* Connection state tracking for netfilter. This is separated from,
2 but required by, the NAT layer; it can also be used by an iptables
5 /* (C) 1999-2001 Paul `Rusty' Russell
6 * (C) 2002-2004 Netfilter Core Team <coreteam@netfilter.org>
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License version 2 as
10 * published by the Free Software Foundation.
12 * 23 Apr 2001: Harald Welte <laforge@gnumonks.org>
13 * - new API and handling of conntrack/nat helpers
14 * - now capable of multiple expectations for one master
15 * 16 Jul 2002: Harald Welte <laforge@gnumonks.org>
16 * - add usage/reference counts to ip_conntrack_expect
17 * - export ip_conntrack[_expect]_{find_get,put} functions
20 #include <linux/types.h>
21 #include <linux/icmp.h>
23 #include <linux/netfilter.h>
24 #include <linux/netfilter_ipv4.h>
25 #include <linux/module.h>
26 #include <linux/skbuff.h>
27 #include <linux/proc_fs.h>
28 #include <linux/vmalloc.h>
29 #include <net/checksum.h>
31 #include <linux/stddef.h>
32 #include <linux/sysctl.h>
33 #include <linux/slab.h>
34 #include <linux/random.h>
35 #include <linux/jhash.h>
36 #include <linux/err.h>
37 #include <linux/percpu.h>
38 #include <linux/moduleparam.h>
39 #include <linux/notifier.h>
41 /* ip_conntrack_lock protects the main hash table, protocol/helper/expected
42 registrations, conntrack timers*/
43 #include <linux/netfilter_ipv4/ip_conntrack.h>
44 #include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
45 #include <linux/netfilter_ipv4/ip_conntrack_helper.h>
46 #include <linux/netfilter_ipv4/ip_conntrack_core.h>
48 #define IP_CONNTRACK_VERSION "2.4"
53 #define DEBUGP(format, args...)
56 DEFINE_RWLOCK(ip_conntrack_lock);
58 /* ip_conntrack_standalone needs this */
59 atomic_t ip_conntrack_count = ATOMIC_INIT(0);
61 void (*ip_conntrack_destroyed)(struct ip_conntrack *conntrack) = NULL;
62 LIST_HEAD(ip_conntrack_expect_list);
63 struct ip_conntrack_protocol *ip_ct_protos[MAX_IP_CT_PROTO] __read_mostly;
64 static LIST_HEAD(helpers);
65 unsigned int ip_conntrack_htable_size __read_mostly = 0;
66 int ip_conntrack_max __read_mostly;
67 struct list_head *ip_conntrack_hash __read_mostly;
68 static struct kmem_cache *ip_conntrack_cachep __read_mostly;
69 static struct kmem_cache *ip_conntrack_expect_cachep __read_mostly;
70 struct ip_conntrack ip_conntrack_untracked;
71 unsigned int ip_ct_log_invalid __read_mostly;
72 static LIST_HEAD(unconfirmed);
73 static int ip_conntrack_vmalloc __read_mostly;
75 static unsigned int ip_conntrack_next_id;
76 static unsigned int ip_conntrack_expect_next_id;
77 #ifdef CONFIG_IP_NF_CONNTRACK_EVENTS
78 ATOMIC_NOTIFIER_HEAD(ip_conntrack_chain);
79 ATOMIC_NOTIFIER_HEAD(ip_conntrack_expect_chain);
81 DEFINE_PER_CPU(struct ip_conntrack_ecache, ip_conntrack_ecache);
83 /* deliver cached events and clear cache entry - must be called with locally
84 * disabled softirqs */
86 __ip_ct_deliver_cached_events(struct ip_conntrack_ecache *ecache)
88 DEBUGP("ecache: delivering events for %p\n", ecache->ct);
89 if (is_confirmed(ecache->ct) && !is_dying(ecache->ct) && ecache->events)
90 atomic_notifier_call_chain(&ip_conntrack_chain, ecache->events,
93 ip_conntrack_put(ecache->ct);
97 /* Deliver all cached events for a particular conntrack. This is called
98 * by code prior to async packet handling or freeing the skb */
99 void ip_ct_deliver_cached_events(const struct ip_conntrack *ct)
101 struct ip_conntrack_ecache *ecache;
104 ecache = &__get_cpu_var(ip_conntrack_ecache);
105 if (ecache->ct == ct)
106 __ip_ct_deliver_cached_events(ecache);
110 void __ip_ct_event_cache_init(struct ip_conntrack *ct)
112 struct ip_conntrack_ecache *ecache;
114 /* take care of delivering potentially old events */
115 ecache = &__get_cpu_var(ip_conntrack_ecache);
116 BUG_ON(ecache->ct == ct);
118 __ip_ct_deliver_cached_events(ecache);
119 /* initialize for this conntrack/packet */
121 nf_conntrack_get(&ct->ct_general);
124 /* flush the event cache - touches other CPU's data and must not be called while
125 * packets are still passing through the code */
126 static void ip_ct_event_cache_flush(void)
128 struct ip_conntrack_ecache *ecache;
131 for_each_possible_cpu(cpu) {
132 ecache = &per_cpu(ip_conntrack_ecache, cpu);
134 ip_conntrack_put(ecache->ct);
138 static inline void ip_ct_event_cache_flush(void) {}
139 #endif /* CONFIG_IP_NF_CONNTRACK_EVENTS */
141 DEFINE_PER_CPU(struct ip_conntrack_stat, ip_conntrack_stat);
143 static int ip_conntrack_hash_rnd_initted;
144 static unsigned int ip_conntrack_hash_rnd;
146 static u_int32_t __hash_conntrack(const struct ip_conntrack_tuple *tuple,
147 unsigned int size, unsigned int rnd)
149 return (jhash_3words((__force u32)tuple->src.ip,
150 ((__force u32)tuple->dst.ip ^ tuple->dst.protonum),
151 (tuple->src.u.all | (tuple->dst.u.all << 16)),
156 hash_conntrack(const struct ip_conntrack_tuple *tuple)
158 return __hash_conntrack(tuple, ip_conntrack_htable_size,
159 ip_conntrack_hash_rnd);
163 ip_ct_get_tuple(const struct iphdr *iph,
164 const struct sk_buff *skb,
165 unsigned int dataoff,
166 struct ip_conntrack_tuple *tuple,
167 const struct ip_conntrack_protocol *protocol)
170 if (iph->frag_off & htons(IP_OFFSET)) {
171 printk("ip_conntrack_core: Frag of proto %u.\n",
176 tuple->src.ip = iph->saddr;
177 tuple->dst.ip = iph->daddr;
178 tuple->dst.protonum = iph->protocol;
179 tuple->dst.dir = IP_CT_DIR_ORIGINAL;
181 return protocol->pkt_to_tuple(skb, dataoff, tuple);
185 ip_ct_invert_tuple(struct ip_conntrack_tuple *inverse,
186 const struct ip_conntrack_tuple *orig,
187 const struct ip_conntrack_protocol *protocol)
189 inverse->src.ip = orig->dst.ip;
190 inverse->dst.ip = orig->src.ip;
191 inverse->dst.protonum = orig->dst.protonum;
192 inverse->dst.dir = !orig->dst.dir;
194 return protocol->invert_tuple(inverse, orig);
198 /* ip_conntrack_expect helper functions */
199 void ip_ct_unlink_expect(struct ip_conntrack_expect *exp)
201 IP_NF_ASSERT(!timer_pending(&exp->timeout));
202 list_del(&exp->list);
203 CONNTRACK_STAT_INC(expect_delete);
204 exp->master->expecting--;
205 ip_conntrack_expect_put(exp);
208 static void expectation_timed_out(unsigned long ul_expect)
210 struct ip_conntrack_expect *exp = (void *)ul_expect;
212 write_lock_bh(&ip_conntrack_lock);
213 ip_ct_unlink_expect(exp);
214 write_unlock_bh(&ip_conntrack_lock);
215 ip_conntrack_expect_put(exp);
218 struct ip_conntrack_expect *
219 __ip_conntrack_expect_find(const struct ip_conntrack_tuple *tuple)
221 struct ip_conntrack_expect *i;
223 list_for_each_entry(i, &ip_conntrack_expect_list, list) {
224 if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask))
230 /* Just find a expectation corresponding to a tuple. */
231 struct ip_conntrack_expect *
232 ip_conntrack_expect_find_get(const struct ip_conntrack_tuple *tuple)
234 struct ip_conntrack_expect *i;
236 read_lock_bh(&ip_conntrack_lock);
237 i = __ip_conntrack_expect_find(tuple);
240 read_unlock_bh(&ip_conntrack_lock);
245 /* If an expectation for this connection is found, it gets delete from
246 * global list then returned. */
247 static struct ip_conntrack_expect *
248 find_expectation(const struct ip_conntrack_tuple *tuple)
250 struct ip_conntrack_expect *i;
252 list_for_each_entry(i, &ip_conntrack_expect_list, list) {
253 /* If master is not in hash table yet (ie. packet hasn't left
254 this machine yet), how can other end know about expected?
255 Hence these are not the droids you are looking for (if
256 master ct never got confirmed, we'd hold a reference to it
257 and weird things would happen to future packets). */
258 if (ip_ct_tuple_mask_cmp(tuple, &i->tuple, &i->mask)
259 && is_confirmed(i->master)) {
260 if (i->flags & IP_CT_EXPECT_PERMANENT) {
263 } else if (del_timer(&i->timeout)) {
264 ip_ct_unlink_expect(i);
272 /* delete all expectations for this conntrack */
273 void ip_ct_remove_expectations(struct ip_conntrack *ct)
275 struct ip_conntrack_expect *i, *tmp;
277 /* Optimization: most connection never expect any others. */
278 if (ct->expecting == 0)
281 list_for_each_entry_safe(i, tmp, &ip_conntrack_expect_list, list) {
282 if (i->master == ct && del_timer(&i->timeout)) {
283 ip_ct_unlink_expect(i);
284 ip_conntrack_expect_put(i);
290 clean_from_lists(struct ip_conntrack *ct)
292 DEBUGP("clean_from_lists(%p)\n", ct);
293 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
294 list_del(&ct->tuplehash[IP_CT_DIR_REPLY].list);
296 /* Destroy all pending expectations */
297 ip_ct_remove_expectations(ct);
301 destroy_conntrack(struct nf_conntrack *nfct)
303 struct ip_conntrack *ct = (struct ip_conntrack *)nfct;
304 struct ip_conntrack_protocol *proto;
305 struct ip_conntrack_helper *helper;
307 DEBUGP("destroy_conntrack(%p)\n", ct);
308 IP_NF_ASSERT(atomic_read(&nfct->use) == 0);
309 IP_NF_ASSERT(!timer_pending(&ct->timeout));
311 ip_conntrack_event(IPCT_DESTROY, ct);
312 set_bit(IPS_DYING_BIT, &ct->status);
315 if (helper && helper->destroy)
318 /* To make sure we don't get any weird locking issues here:
319 * destroy_conntrack() MUST NOT be called with a write lock
320 * to ip_conntrack_lock!!! -HW */
322 proto = __ip_conntrack_proto_find(ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.protonum);
323 if (proto && proto->destroy)
327 if (ip_conntrack_destroyed)
328 ip_conntrack_destroyed(ct);
330 write_lock_bh(&ip_conntrack_lock);
331 /* Expectations will have been removed in clean_from_lists,
332 * except TFTP can create an expectation on the first packet,
333 * before connection is in the list, so we need to clean here,
335 ip_ct_remove_expectations(ct);
337 /* We overload first tuple to link into unconfirmed list. */
338 if (!is_confirmed(ct)) {
339 BUG_ON(list_empty(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list));
340 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
343 CONNTRACK_STAT_INC(delete);
344 write_unlock_bh(&ip_conntrack_lock);
347 ip_conntrack_put(ct->master);
349 DEBUGP("destroy_conntrack: returning ct=%p to slab\n", ct);
350 ip_conntrack_free(ct);
353 static void death_by_timeout(unsigned long ul_conntrack)
355 struct ip_conntrack *ct = (void *)ul_conntrack;
357 write_lock_bh(&ip_conntrack_lock);
358 /* Inside lock so preempt is disabled on module removal path.
359 * Otherwise we can get spurious warnings. */
360 CONNTRACK_STAT_INC(delete_list);
361 clean_from_lists(ct);
362 write_unlock_bh(&ip_conntrack_lock);
363 ip_conntrack_put(ct);
366 struct ip_conntrack_tuple_hash *
367 __ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
368 const struct ip_conntrack *ignored_conntrack)
370 struct ip_conntrack_tuple_hash *h;
371 unsigned int hash = hash_conntrack(tuple);
373 list_for_each_entry(h, &ip_conntrack_hash[hash], list) {
374 if (tuplehash_to_ctrack(h) != ignored_conntrack &&
375 ip_ct_tuple_equal(tuple, &h->tuple)) {
376 CONNTRACK_STAT_INC(found);
379 CONNTRACK_STAT_INC(searched);
385 /* Find a connection corresponding to a tuple. */
386 struct ip_conntrack_tuple_hash *
387 ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple,
388 const struct ip_conntrack *ignored_conntrack)
390 struct ip_conntrack_tuple_hash *h;
392 read_lock_bh(&ip_conntrack_lock);
393 h = __ip_conntrack_find(tuple, ignored_conntrack);
395 atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use);
396 read_unlock_bh(&ip_conntrack_lock);
401 static void __ip_conntrack_hash_insert(struct ip_conntrack *ct,
403 unsigned int repl_hash)
405 ct->id = ++ip_conntrack_next_id;
406 list_add(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list,
407 &ip_conntrack_hash[hash]);
408 list_add(&ct->tuplehash[IP_CT_DIR_REPLY].list,
409 &ip_conntrack_hash[repl_hash]);
412 void ip_conntrack_hash_insert(struct ip_conntrack *ct)
414 unsigned int hash, repl_hash;
416 hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
417 repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
419 write_lock_bh(&ip_conntrack_lock);
420 __ip_conntrack_hash_insert(ct, hash, repl_hash);
421 write_unlock_bh(&ip_conntrack_lock);
424 /* Confirm a connection given skb; places it in hash table */
426 __ip_conntrack_confirm(struct sk_buff **pskb)
428 unsigned int hash, repl_hash;
429 struct ip_conntrack_tuple_hash *h;
430 struct ip_conntrack *ct;
431 enum ip_conntrack_info ctinfo;
433 ct = ip_conntrack_get(*pskb, &ctinfo);
435 /* ipt_REJECT uses ip_conntrack_attach to attach related
436 ICMP/TCP RST packets in other direction. Actual packet
437 which created connection will be IP_CT_NEW or for an
438 expected connection, IP_CT_RELATED. */
439 if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
442 hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
443 repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
445 /* We're not in hash table, and we refuse to set up related
446 connections for unconfirmed conns. But packet copies and
447 REJECT will give spurious warnings here. */
448 /* IP_NF_ASSERT(atomic_read(&ct->ct_general.use) == 1); */
450 /* No external references means noone else could have
452 IP_NF_ASSERT(!is_confirmed(ct));
453 DEBUGP("Confirming conntrack %p\n", ct);
455 write_lock_bh(&ip_conntrack_lock);
457 /* See if there's one in the list already, including reverse:
458 NAT could have grabbed it without realizing, since we're
459 not in the hash. If there is, we lost race. */
460 list_for_each_entry(h, &ip_conntrack_hash[hash], list)
461 if (ip_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple,
464 list_for_each_entry(h, &ip_conntrack_hash[repl_hash], list)
465 if (ip_ct_tuple_equal(&ct->tuplehash[IP_CT_DIR_REPLY].tuple,
469 /* Remove from unconfirmed list */
470 list_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].list);
472 __ip_conntrack_hash_insert(ct, hash, repl_hash);
473 /* Timer relative to confirmation time, not original
474 setting time, otherwise we'd get timer wrap in
475 weird delay cases. */
476 ct->timeout.expires += jiffies;
477 add_timer(&ct->timeout);
478 atomic_inc(&ct->ct_general.use);
479 set_bit(IPS_CONFIRMED_BIT, &ct->status);
480 CONNTRACK_STAT_INC(insert);
481 write_unlock_bh(&ip_conntrack_lock);
483 ip_conntrack_event_cache(IPCT_HELPER, *pskb);
484 #ifdef CONFIG_IP_NF_NAT_NEEDED
485 if (test_bit(IPS_SRC_NAT_DONE_BIT, &ct->status) ||
486 test_bit(IPS_DST_NAT_DONE_BIT, &ct->status))
487 ip_conntrack_event_cache(IPCT_NATINFO, *pskb);
489 ip_conntrack_event_cache(master_ct(ct) ?
490 IPCT_RELATED : IPCT_NEW, *pskb);
495 CONNTRACK_STAT_INC(insert_failed);
496 write_unlock_bh(&ip_conntrack_lock);
500 /* Returns true if a connection correspondings to the tuple (required
503 ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple,
504 const struct ip_conntrack *ignored_conntrack)
506 struct ip_conntrack_tuple_hash *h;
508 read_lock_bh(&ip_conntrack_lock);
509 h = __ip_conntrack_find(tuple, ignored_conntrack);
510 read_unlock_bh(&ip_conntrack_lock);
515 /* There's a small race here where we may free a just-assured
516 connection. Too bad: we're in trouble anyway. */
517 static int early_drop(struct list_head *chain)
519 /* Traverse backwards: gives us oldest, which is roughly LRU */
520 struct ip_conntrack_tuple_hash *h;
521 struct ip_conntrack *ct = NULL, *tmp;
524 read_lock_bh(&ip_conntrack_lock);
525 list_for_each_entry_reverse(h, chain, list) {
526 tmp = tuplehash_to_ctrack(h);
527 if (!test_bit(IPS_ASSURED_BIT, &tmp->status)) {
529 atomic_inc(&ct->ct_general.use);
533 read_unlock_bh(&ip_conntrack_lock);
538 if (del_timer(&ct->timeout)) {
539 death_by_timeout((unsigned long)ct);
541 CONNTRACK_STAT_INC(early_drop);
543 ip_conntrack_put(ct);
547 static struct ip_conntrack_helper *
548 __ip_conntrack_helper_find( const struct ip_conntrack_tuple *tuple)
550 struct ip_conntrack_helper *h;
552 list_for_each_entry(h, &helpers, list) {
553 if (ip_ct_tuple_mask_cmp(tuple, &h->tuple, &h->mask))
559 struct ip_conntrack_helper *
560 ip_conntrack_helper_find_get( const struct ip_conntrack_tuple *tuple)
562 struct ip_conntrack_helper *helper;
564 /* need ip_conntrack_lock to assure that helper exists until
565 * try_module_get() is called */
566 read_lock_bh(&ip_conntrack_lock);
568 helper = __ip_conntrack_helper_find(tuple);
570 /* need to increase module usage count to assure helper will
571 * not go away while the caller is e.g. busy putting a
572 * conntrack in the hash that uses the helper */
573 if (!try_module_get(helper->me))
577 read_unlock_bh(&ip_conntrack_lock);
582 void ip_conntrack_helper_put(struct ip_conntrack_helper *helper)
584 module_put(helper->me);
587 struct ip_conntrack_protocol *
588 __ip_conntrack_proto_find(u_int8_t protocol)
590 return ip_ct_protos[protocol];
593 /* this is guaranteed to always return a valid protocol helper, since
594 * it falls back to generic_protocol */
595 struct ip_conntrack_protocol *
596 ip_conntrack_proto_find_get(u_int8_t protocol)
598 struct ip_conntrack_protocol *p;
601 p = __ip_conntrack_proto_find(protocol);
603 if (!try_module_get(p->me))
604 p = &ip_conntrack_generic_protocol;
611 void ip_conntrack_proto_put(struct ip_conntrack_protocol *p)
616 struct ip_conntrack *ip_conntrack_alloc(struct ip_conntrack_tuple *orig,
617 struct ip_conntrack_tuple *repl)
619 struct ip_conntrack *conntrack;
621 if (!ip_conntrack_hash_rnd_initted) {
622 get_random_bytes(&ip_conntrack_hash_rnd, 4);
623 ip_conntrack_hash_rnd_initted = 1;
626 /* We don't want any race condition at early drop stage */
627 atomic_inc(&ip_conntrack_count);
630 && atomic_read(&ip_conntrack_count) > ip_conntrack_max) {
631 unsigned int hash = hash_conntrack(orig);
632 /* Try dropping from this hash chain. */
633 if (!early_drop(&ip_conntrack_hash[hash])) {
634 atomic_dec(&ip_conntrack_count);
637 "ip_conntrack: table full, dropping"
639 return ERR_PTR(-ENOMEM);
643 conntrack = kmem_cache_zalloc(ip_conntrack_cachep, GFP_ATOMIC);
645 DEBUGP("Can't allocate conntrack.\n");
646 atomic_dec(&ip_conntrack_count);
647 return ERR_PTR(-ENOMEM);
650 atomic_set(&conntrack->ct_general.use, 1);
651 conntrack->ct_general.destroy = destroy_conntrack;
652 conntrack->tuplehash[IP_CT_DIR_ORIGINAL].tuple = *orig;
653 conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *repl;
654 /* Don't set timer yet: wait for confirmation */
655 init_timer(&conntrack->timeout);
656 conntrack->timeout.data = (unsigned long)conntrack;
657 conntrack->timeout.function = death_by_timeout;
663 ip_conntrack_free(struct ip_conntrack *conntrack)
665 atomic_dec(&ip_conntrack_count);
666 kmem_cache_free(ip_conntrack_cachep, conntrack);
669 /* Allocate a new conntrack: we return -ENOMEM if classification
670 * failed due to stress. Otherwise it really is unclassifiable */
671 static struct ip_conntrack_tuple_hash *
672 init_conntrack(struct ip_conntrack_tuple *tuple,
673 struct ip_conntrack_protocol *protocol,
676 struct ip_conntrack *conntrack;
677 struct ip_conntrack_tuple repl_tuple;
678 struct ip_conntrack_expect *exp;
680 if (!ip_ct_invert_tuple(&repl_tuple, tuple, protocol)) {
681 DEBUGP("Can't invert tuple.\n");
685 conntrack = ip_conntrack_alloc(tuple, &repl_tuple);
686 if (conntrack == NULL || IS_ERR(conntrack))
687 return (struct ip_conntrack_tuple_hash *)conntrack;
689 if (!protocol->new(conntrack, skb)) {
690 ip_conntrack_free(conntrack);
694 write_lock_bh(&ip_conntrack_lock);
695 exp = find_expectation(tuple);
698 DEBUGP("conntrack: expectation arrives ct=%p exp=%p\n",
700 /* Welcome, Mr. Bond. We've been expecting you... */
701 __set_bit(IPS_EXPECTED_BIT, &conntrack->status);
702 conntrack->master = exp->master;
703 #ifdef CONFIG_IP_NF_CONNTRACK_MARK
704 conntrack->mark = exp->master->mark;
706 #if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \
707 defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE)
708 /* this is ugly, but there is no other place where to put it */
709 conntrack->nat.masq_index = exp->master->nat.masq_index;
711 #ifdef CONFIG_IP_NF_CONNTRACK_SECMARK
712 conntrack->secmark = exp->master->secmark;
714 nf_conntrack_get(&conntrack->master->ct_general);
715 CONNTRACK_STAT_INC(expect_new);
717 conntrack->helper = __ip_conntrack_helper_find(&repl_tuple);
719 CONNTRACK_STAT_INC(new);
722 /* Overload tuple linked list to put us in unconfirmed list. */
723 list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed);
725 write_unlock_bh(&ip_conntrack_lock);
729 exp->expectfn(conntrack, exp);
730 ip_conntrack_expect_put(exp);
733 return &conntrack->tuplehash[IP_CT_DIR_ORIGINAL];
736 /* On success, returns conntrack ptr, sets skb->nfct and ctinfo */
737 static inline struct ip_conntrack *
738 resolve_normal_ct(struct sk_buff *skb,
739 struct ip_conntrack_protocol *proto,
741 unsigned int hooknum,
742 enum ip_conntrack_info *ctinfo)
744 struct ip_conntrack_tuple tuple;
745 struct ip_conntrack_tuple_hash *h;
746 struct ip_conntrack *ct;
748 IP_NF_ASSERT((skb->nh.iph->frag_off & htons(IP_OFFSET)) == 0);
750 if (!ip_ct_get_tuple(skb->nh.iph, skb, skb->nh.iph->ihl*4,
754 /* look for tuple match */
755 h = ip_conntrack_find_get(&tuple, NULL);
757 h = init_conntrack(&tuple, proto, skb);
763 ct = tuplehash_to_ctrack(h);
765 /* It exists; we have (non-exclusive) reference. */
766 if (DIRECTION(h) == IP_CT_DIR_REPLY) {
767 *ctinfo = IP_CT_ESTABLISHED + IP_CT_IS_REPLY;
768 /* Please set reply bit if this packet OK */
771 /* Once we've had two way comms, always ESTABLISHED. */
772 if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
773 DEBUGP("ip_conntrack_in: normal packet for %p\n",
775 *ctinfo = IP_CT_ESTABLISHED;
776 } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
777 DEBUGP("ip_conntrack_in: related packet for %p\n",
779 *ctinfo = IP_CT_RELATED;
781 DEBUGP("ip_conntrack_in: new packet for %p\n",
787 skb->nfct = &ct->ct_general;
788 skb->nfctinfo = *ctinfo;
792 /* Netfilter hook itself. */
793 unsigned int ip_conntrack_in(unsigned int hooknum,
794 struct sk_buff **pskb,
795 const struct net_device *in,
796 const struct net_device *out,
797 int (*okfn)(struct sk_buff *))
799 struct ip_conntrack *ct;
800 enum ip_conntrack_info ctinfo;
801 struct ip_conntrack_protocol *proto;
805 /* Previously seen (loopback or untracked)? Ignore. */
807 CONNTRACK_STAT_INC(ignore);
812 if ((*pskb)->nh.iph->frag_off & htons(IP_OFFSET)) {
813 if (net_ratelimit()) {
814 printk(KERN_ERR "ip_conntrack_in: Frag of proto %u (hook=%u)\n",
815 (*pskb)->nh.iph->protocol, hooknum);
820 /* Doesn't cover locally-generated broadcast, so not worth it. */
822 /* Ignore broadcast: no `connection'. */
823 if ((*pskb)->pkt_type == PACKET_BROADCAST) {
824 printk("Broadcast packet!\n");
826 } else if (((*pskb)->nh.iph->daddr & htonl(0x000000FF))
827 == htonl(0x000000FF)) {
828 printk("Should bcast: %u.%u.%u.%u->%u.%u.%u.%u (sk=%p, ptype=%u)\n",
829 NIPQUAD((*pskb)->nh.iph->saddr),
830 NIPQUAD((*pskb)->nh.iph->daddr),
831 (*pskb)->sk, (*pskb)->pkt_type);
835 /* rcu_read_lock()ed by nf_hook_slow */
836 proto = __ip_conntrack_proto_find((*pskb)->nh.iph->protocol);
838 /* It may be an special packet, error, unclean...
839 * inverse of the return code tells to the netfilter
840 * core what to do with the packet. */
841 if (proto->error != NULL
842 && (ret = proto->error(*pskb, &ctinfo, hooknum)) <= 0) {
843 CONNTRACK_STAT_INC(error);
844 CONNTRACK_STAT_INC(invalid);
848 if (!(ct = resolve_normal_ct(*pskb, proto,&set_reply,hooknum,&ctinfo))) {
849 /* Not valid part of a connection */
850 CONNTRACK_STAT_INC(invalid);
855 /* Too stressed to deal. */
856 CONNTRACK_STAT_INC(drop);
860 IP_NF_ASSERT((*pskb)->nfct);
862 ret = proto->packet(ct, *pskb, ctinfo);
864 /* Invalid: inverse of the return code tells
865 * the netfilter core what to do*/
866 nf_conntrack_put((*pskb)->nfct);
867 (*pskb)->nfct = NULL;
868 CONNTRACK_STAT_INC(invalid);
872 if (set_reply && !test_and_set_bit(IPS_SEEN_REPLY_BIT, &ct->status))
873 ip_conntrack_event_cache(IPCT_STATUS, *pskb);
878 int invert_tuplepr(struct ip_conntrack_tuple *inverse,
879 const struct ip_conntrack_tuple *orig)
881 struct ip_conntrack_protocol *proto;
885 proto = __ip_conntrack_proto_find(orig->dst.protonum);
886 ret = ip_ct_invert_tuple(inverse, orig, proto);
892 /* Would two expected things clash? */
893 static inline int expect_clash(const struct ip_conntrack_expect *a,
894 const struct ip_conntrack_expect *b)
896 /* Part covered by intersection of masks must be unequal,
897 otherwise they clash */
898 struct ip_conntrack_tuple intersect_mask
899 = { { a->mask.src.ip & b->mask.src.ip,
900 { a->mask.src.u.all & b->mask.src.u.all } },
901 { a->mask.dst.ip & b->mask.dst.ip,
902 { a->mask.dst.u.all & b->mask.dst.u.all },
903 a->mask.dst.protonum & b->mask.dst.protonum } };
905 return ip_ct_tuple_mask_cmp(&a->tuple, &b->tuple, &intersect_mask);
908 static inline int expect_matches(const struct ip_conntrack_expect *a,
909 const struct ip_conntrack_expect *b)
911 return a->master == b->master
912 && ip_ct_tuple_equal(&a->tuple, &b->tuple)
913 && ip_ct_tuple_equal(&a->mask, &b->mask);
916 /* Generally a bad idea to call this: could have matched already. */
917 void ip_conntrack_unexpect_related(struct ip_conntrack_expect *exp)
919 struct ip_conntrack_expect *i;
921 write_lock_bh(&ip_conntrack_lock);
922 /* choose the the oldest expectation to evict */
923 list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
924 if (expect_matches(i, exp) && del_timer(&i->timeout)) {
925 ip_ct_unlink_expect(i);
926 write_unlock_bh(&ip_conntrack_lock);
927 ip_conntrack_expect_put(i);
931 write_unlock_bh(&ip_conntrack_lock);
934 /* We don't increase the master conntrack refcount for non-fulfilled
935 * conntracks. During the conntrack destruction, the expectations are
936 * always killed before the conntrack itself */
937 struct ip_conntrack_expect *ip_conntrack_expect_alloc(struct ip_conntrack *me)
939 struct ip_conntrack_expect *new;
941 new = kmem_cache_alloc(ip_conntrack_expect_cachep, GFP_ATOMIC);
943 DEBUGP("expect_related: OOM allocating expect\n");
947 atomic_set(&new->use, 1);
951 void ip_conntrack_expect_put(struct ip_conntrack_expect *exp)
953 if (atomic_dec_and_test(&exp->use))
954 kmem_cache_free(ip_conntrack_expect_cachep, exp);
957 static void ip_conntrack_expect_insert(struct ip_conntrack_expect *exp)
959 atomic_inc(&exp->use);
960 exp->master->expecting++;
961 list_add(&exp->list, &ip_conntrack_expect_list);
963 init_timer(&exp->timeout);
964 exp->timeout.data = (unsigned long)exp;
965 exp->timeout.function = expectation_timed_out;
966 exp->timeout.expires = jiffies + exp->master->helper->timeout * HZ;
967 add_timer(&exp->timeout);
969 exp->id = ++ip_conntrack_expect_next_id;
970 atomic_inc(&exp->use);
971 CONNTRACK_STAT_INC(expect_create);
974 /* Race with expectations being used means we could have none to find; OK. */
975 static void evict_oldest_expect(struct ip_conntrack *master)
977 struct ip_conntrack_expect *i;
979 list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
980 if (i->master == master) {
981 if (del_timer(&i->timeout)) {
982 ip_ct_unlink_expect(i);
983 ip_conntrack_expect_put(i);
990 static inline int refresh_timer(struct ip_conntrack_expect *i)
992 if (!del_timer(&i->timeout))
995 i->timeout.expires = jiffies + i->master->helper->timeout*HZ;
996 add_timer(&i->timeout);
1000 int ip_conntrack_expect_related(struct ip_conntrack_expect *expect)
1002 struct ip_conntrack_expect *i;
1005 DEBUGP("ip_conntrack_expect_related %p\n", related_to);
1006 DEBUGP("tuple: "); DUMP_TUPLE(&expect->tuple);
1007 DEBUGP("mask: "); DUMP_TUPLE(&expect->mask);
1009 write_lock_bh(&ip_conntrack_lock);
1010 list_for_each_entry(i, &ip_conntrack_expect_list, list) {
1011 if (expect_matches(i, expect)) {
1012 /* Refresh timer: if it's dying, ignore.. */
1013 if (refresh_timer(i)) {
1017 } else if (expect_clash(i, expect)) {
1023 /* Will be over limit? */
1024 if (expect->master->helper->max_expected &&
1025 expect->master->expecting >= expect->master->helper->max_expected)
1026 evict_oldest_expect(expect->master);
1028 ip_conntrack_expect_insert(expect);
1029 ip_conntrack_expect_event(IPEXP_NEW, expect);
1032 write_unlock_bh(&ip_conntrack_lock);
1036 /* Alter reply tuple (maybe alter helper). This is for NAT, and is
1037 implicitly racy: see __ip_conntrack_confirm */
1038 void ip_conntrack_alter_reply(struct ip_conntrack *conntrack,
1039 const struct ip_conntrack_tuple *newreply)
1041 write_lock_bh(&ip_conntrack_lock);
1042 /* Should be unconfirmed, so not in hash table yet */
1043 IP_NF_ASSERT(!is_confirmed(conntrack));
1045 DEBUGP("Altering reply tuple of %p to ", conntrack);
1046 DUMP_TUPLE(newreply);
1048 conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
1049 if (!conntrack->master && conntrack->expecting == 0)
1050 conntrack->helper = __ip_conntrack_helper_find(newreply);
1051 write_unlock_bh(&ip_conntrack_lock);
1054 int ip_conntrack_helper_register(struct ip_conntrack_helper *me)
1056 BUG_ON(me->timeout == 0);
1057 write_lock_bh(&ip_conntrack_lock);
1058 list_add(&me->list, &helpers);
1059 write_unlock_bh(&ip_conntrack_lock);
1064 struct ip_conntrack_helper *
1065 __ip_conntrack_helper_find_byname(const char *name)
1067 struct ip_conntrack_helper *h;
1069 list_for_each_entry(h, &helpers, list) {
1070 if (!strcmp(h->name, name))
1077 static inline void unhelp(struct ip_conntrack_tuple_hash *i,
1078 const struct ip_conntrack_helper *me)
1080 if (tuplehash_to_ctrack(i)->helper == me) {
1081 ip_conntrack_event(IPCT_HELPER, tuplehash_to_ctrack(i));
1082 tuplehash_to_ctrack(i)->helper = NULL;
1086 void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
1089 struct ip_conntrack_tuple_hash *h;
1090 struct ip_conntrack_expect *exp, *tmp;
1092 /* Need write lock here, to delete helper. */
1093 write_lock_bh(&ip_conntrack_lock);
1094 list_del(&me->list);
1096 /* Get rid of expectations */
1097 list_for_each_entry_safe(exp, tmp, &ip_conntrack_expect_list, list) {
1098 if (exp->master->helper == me && del_timer(&exp->timeout)) {
1099 ip_ct_unlink_expect(exp);
1100 ip_conntrack_expect_put(exp);
1103 /* Get rid of expecteds, set helpers to NULL. */
1104 list_for_each_entry(h, &unconfirmed, list)
1106 for (i = 0; i < ip_conntrack_htable_size; i++) {
1107 list_for_each_entry(h, &ip_conntrack_hash[i], list)
1110 write_unlock_bh(&ip_conntrack_lock);
1112 /* Someone could be still looking at the helper in a bh. */
1116 /* Refresh conntrack for this many jiffies and do accounting if do_acct is 1 */
1117 void __ip_ct_refresh_acct(struct ip_conntrack *ct,
1118 enum ip_conntrack_info ctinfo,
1119 const struct sk_buff *skb,
1120 unsigned long extra_jiffies,
1125 IP_NF_ASSERT(ct->timeout.data == (unsigned long)ct);
1128 write_lock_bh(&ip_conntrack_lock);
1130 /* Only update if this is not a fixed timeout */
1131 if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) {
1132 write_unlock_bh(&ip_conntrack_lock);
1136 /* If not in hash table, timer will not be active yet */
1137 if (!is_confirmed(ct)) {
1138 ct->timeout.expires = extra_jiffies;
1139 event = IPCT_REFRESH;
1141 /* Need del_timer for race avoidance (may already be dying). */
1142 if (del_timer(&ct->timeout)) {
1143 ct->timeout.expires = jiffies + extra_jiffies;
1144 add_timer(&ct->timeout);
1145 event = IPCT_REFRESH;
1149 #ifdef CONFIG_IP_NF_CT_ACCT
1151 ct->counters[CTINFO2DIR(ctinfo)].packets++;
1152 ct->counters[CTINFO2DIR(ctinfo)].bytes +=
1153 ntohs(skb->nh.iph->tot_len);
1154 if ((ct->counters[CTINFO2DIR(ctinfo)].packets & 0x80000000)
1155 || (ct->counters[CTINFO2DIR(ctinfo)].bytes & 0x80000000))
1156 event |= IPCT_COUNTER_FILLING;
1160 write_unlock_bh(&ip_conntrack_lock);
1162 /* must be unlocked when calling event cache */
1164 ip_conntrack_event_cache(event, skb);
1167 #if defined(CONFIG_IP_NF_CONNTRACK_NETLINK) || \
1168 defined(CONFIG_IP_NF_CONNTRACK_NETLINK_MODULE)
1169 /* Generic function for tcp/udp/sctp/dccp and alike. This needs to be
1170 * in ip_conntrack_core, since we don't want the protocols to autoload
1171 * or depend on ctnetlink */
1172 int ip_ct_port_tuple_to_nfattr(struct sk_buff *skb,
1173 const struct ip_conntrack_tuple *tuple)
1175 NFA_PUT(skb, CTA_PROTO_SRC_PORT, sizeof(__be16),
1176 &tuple->src.u.tcp.port);
1177 NFA_PUT(skb, CTA_PROTO_DST_PORT, sizeof(__be16),
1178 &tuple->dst.u.tcp.port);
1185 int ip_ct_port_nfattr_to_tuple(struct nfattr *tb[],
1186 struct ip_conntrack_tuple *t)
1188 if (!tb[CTA_PROTO_SRC_PORT-1] || !tb[CTA_PROTO_DST_PORT-1])
1192 *(__be16 *)NFA_DATA(tb[CTA_PROTO_SRC_PORT-1]);
1194 *(__be16 *)NFA_DATA(tb[CTA_PROTO_DST_PORT-1]);
1200 /* Returns new sk_buff, or NULL */
1202 ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user)
1207 skb = ip_defrag(skb, user);
1211 ip_send_check(skb->nh.iph);
1215 /* Used by ipt_REJECT. */
1216 static void ip_conntrack_attach(struct sk_buff *nskb, struct sk_buff *skb)
1218 struct ip_conntrack *ct;
1219 enum ip_conntrack_info ctinfo;
1221 /* This ICMP is in reverse direction to the packet which caused it */
1222 ct = ip_conntrack_get(skb, &ctinfo);
1224 if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL)
1225 ctinfo = IP_CT_RELATED + IP_CT_IS_REPLY;
1227 ctinfo = IP_CT_RELATED;
1229 /* Attach to new skbuff, and increment count */
1230 nskb->nfct = &ct->ct_general;
1231 nskb->nfctinfo = ctinfo;
1232 nf_conntrack_get(nskb->nfct);
1235 /* Bring out ya dead! */
1236 static struct ip_conntrack *
1237 get_next_corpse(int (*iter)(struct ip_conntrack *i, void *data),
1238 void *data, unsigned int *bucket)
1240 struct ip_conntrack_tuple_hash *h;
1241 struct ip_conntrack *ct;
1243 write_lock_bh(&ip_conntrack_lock);
1244 for (; *bucket < ip_conntrack_htable_size; (*bucket)++) {
1245 list_for_each_entry(h, &ip_conntrack_hash[*bucket], list) {
1246 ct = tuplehash_to_ctrack(h);
1251 list_for_each_entry(h, &unconfirmed, list) {
1252 ct = tuplehash_to_ctrack(h);
1256 write_unlock_bh(&ip_conntrack_lock);
1260 atomic_inc(&ct->ct_general.use);
1261 write_unlock_bh(&ip_conntrack_lock);
1266 ip_ct_iterate_cleanup(int (*iter)(struct ip_conntrack *i, void *), void *data)
1268 struct ip_conntrack *ct;
1269 unsigned int bucket = 0;
1271 while ((ct = get_next_corpse(iter, data, &bucket)) != NULL) {
1272 /* Time to push up daises... */
1273 if (del_timer(&ct->timeout))
1274 death_by_timeout((unsigned long)ct);
1275 /* ... else the timer will get him soon. */
1277 ip_conntrack_put(ct);
1281 /* Fast function for those who don't want to parse /proc (and I don't
1283 /* Reversing the socket's dst/src point of view gives us the reply
1286 getorigdst(struct sock *sk, int optval, void __user *user, int *len)
1288 struct inet_sock *inet = inet_sk(sk);
1289 struct ip_conntrack_tuple_hash *h;
1290 struct ip_conntrack_tuple tuple;
1292 IP_CT_TUPLE_U_BLANK(&tuple);
1293 tuple.src.ip = inet->rcv_saddr;
1294 tuple.src.u.tcp.port = inet->sport;
1295 tuple.dst.ip = inet->daddr;
1296 tuple.dst.u.tcp.port = inet->dport;
1297 tuple.dst.protonum = IPPROTO_TCP;
1299 /* We only do TCP at the moment: is there a better way? */
1300 if (strcmp(sk->sk_prot->name, "TCP")) {
1301 DEBUGP("SO_ORIGINAL_DST: Not a TCP socket\n");
1302 return -ENOPROTOOPT;
1305 if ((unsigned int) *len < sizeof(struct sockaddr_in)) {
1306 DEBUGP("SO_ORIGINAL_DST: len %u not %u\n",
1307 *len, sizeof(struct sockaddr_in));
1311 h = ip_conntrack_find_get(&tuple, NULL);
1313 struct sockaddr_in sin;
1314 struct ip_conntrack *ct = tuplehash_to_ctrack(h);
1316 sin.sin_family = AF_INET;
1317 sin.sin_port = ct->tuplehash[IP_CT_DIR_ORIGINAL]
1318 .tuple.dst.u.tcp.port;
1319 sin.sin_addr.s_addr = ct->tuplehash[IP_CT_DIR_ORIGINAL]
1321 memset(sin.sin_zero, 0, sizeof(sin.sin_zero));
1323 DEBUGP("SO_ORIGINAL_DST: %u.%u.%u.%u %u\n",
1324 NIPQUAD(sin.sin_addr.s_addr), ntohs(sin.sin_port));
1325 ip_conntrack_put(ct);
1326 if (copy_to_user(user, &sin, sizeof(sin)) != 0)
1331 DEBUGP("SO_ORIGINAL_DST: Can't find %u.%u.%u.%u/%u-%u.%u.%u.%u/%u.\n",
1332 NIPQUAD(tuple.src.ip), ntohs(tuple.src.u.tcp.port),
1333 NIPQUAD(tuple.dst.ip), ntohs(tuple.dst.u.tcp.port));
1337 static struct nf_sockopt_ops so_getorigdst = {
1339 .get_optmin = SO_ORIGINAL_DST,
1340 .get_optmax = SO_ORIGINAL_DST+1,
1344 static int kill_all(struct ip_conntrack *i, void *data)
1349 void ip_conntrack_flush(void)
1351 ip_ct_iterate_cleanup(kill_all, NULL);
1354 static void free_conntrack_hash(struct list_head *hash, int vmalloced,int size)
1359 free_pages((unsigned long)hash,
1360 get_order(sizeof(struct list_head) * size));
1363 /* Mishearing the voices in his head, our hero wonders how he's
1364 supposed to kill the mall. */
1365 void ip_conntrack_cleanup(void)
1367 rcu_assign_pointer(ip_ct_attach, NULL);
1369 /* This makes sure all current packets have passed through
1370 netfilter framework. Roll on, two-stage module
1374 ip_ct_event_cache_flush();
1376 ip_conntrack_flush();
1377 if (atomic_read(&ip_conntrack_count) != 0) {
1379 goto i_see_dead_people;
1381 /* wait until all references to ip_conntrack_untracked are dropped */
1382 while (atomic_read(&ip_conntrack_untracked.ct_general.use) > 1)
1385 kmem_cache_destroy(ip_conntrack_cachep);
1386 kmem_cache_destroy(ip_conntrack_expect_cachep);
1387 free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
1388 ip_conntrack_htable_size);
1389 nf_unregister_sockopt(&so_getorigdst);
1392 static struct list_head *alloc_hashtable(int size, int *vmalloced)
1394 struct list_head *hash;
1398 hash = (void*)__get_free_pages(GFP_KERNEL,
1399 get_order(sizeof(struct list_head)
1403 printk(KERN_WARNING"ip_conntrack: falling back to vmalloc.\n");
1404 hash = vmalloc(sizeof(struct list_head) * size);
1408 for (i = 0; i < size; i++)
1409 INIT_LIST_HEAD(&hash[i]);
1414 static int set_hashsize(const char *val, struct kernel_param *kp)
1416 int i, bucket, hashsize, vmalloced;
1417 int old_vmalloced, old_size;
1419 struct list_head *hash, *old_hash;
1420 struct ip_conntrack_tuple_hash *h;
1422 /* On boot, we can set this without any fancy locking. */
1423 if (!ip_conntrack_htable_size)
1424 return param_set_int(val, kp);
1426 hashsize = simple_strtol(val, NULL, 0);
1430 hash = alloc_hashtable(hashsize, &vmalloced);
1434 /* We have to rehash for the new table anyway, so we also can
1435 * use a new random seed */
1436 get_random_bytes(&rnd, 4);
1438 write_lock_bh(&ip_conntrack_lock);
1439 for (i = 0; i < ip_conntrack_htable_size; i++) {
1440 while (!list_empty(&ip_conntrack_hash[i])) {
1441 h = list_entry(ip_conntrack_hash[i].next,
1442 struct ip_conntrack_tuple_hash, list);
1444 bucket = __hash_conntrack(&h->tuple, hashsize, rnd);
1445 list_add_tail(&h->list, &hash[bucket]);
1448 old_size = ip_conntrack_htable_size;
1449 old_vmalloced = ip_conntrack_vmalloc;
1450 old_hash = ip_conntrack_hash;
1452 ip_conntrack_htable_size = hashsize;
1453 ip_conntrack_vmalloc = vmalloced;
1454 ip_conntrack_hash = hash;
1455 ip_conntrack_hash_rnd = rnd;
1456 write_unlock_bh(&ip_conntrack_lock);
1458 free_conntrack_hash(old_hash, old_vmalloced, old_size);
1462 module_param_call(hashsize, set_hashsize, param_get_uint,
1463 &ip_conntrack_htable_size, 0600);
1465 int __init ip_conntrack_init(void)
1470 /* Idea from tcp.c: use 1/16384 of memory. On i386: 32MB
1471 * machine has 256 buckets. >= 1GB machines have 8192 buckets. */
1472 if (!ip_conntrack_htable_size) {
1473 ip_conntrack_htable_size
1474 = (((num_physpages << PAGE_SHIFT) / 16384)
1475 / sizeof(struct list_head));
1476 if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
1477 ip_conntrack_htable_size = 8192;
1478 if (ip_conntrack_htable_size < 16)
1479 ip_conntrack_htable_size = 16;
1481 ip_conntrack_max = 8 * ip_conntrack_htable_size;
1483 printk("ip_conntrack version %s (%u buckets, %d max)"
1484 " - %Zd bytes per conntrack\n", IP_CONNTRACK_VERSION,
1485 ip_conntrack_htable_size, ip_conntrack_max,
1486 sizeof(struct ip_conntrack));
1488 ret = nf_register_sockopt(&so_getorigdst);
1490 printk(KERN_ERR "Unable to register netfilter socket option\n");
1494 ip_conntrack_hash = alloc_hashtable(ip_conntrack_htable_size,
1495 &ip_conntrack_vmalloc);
1496 if (!ip_conntrack_hash) {
1497 printk(KERN_ERR "Unable to create ip_conntrack_hash\n");
1498 goto err_unreg_sockopt;
1501 ip_conntrack_cachep = kmem_cache_create("ip_conntrack",
1502 sizeof(struct ip_conntrack), 0,
1504 if (!ip_conntrack_cachep) {
1505 printk(KERN_ERR "Unable to create ip_conntrack slab cache\n");
1509 ip_conntrack_expect_cachep = kmem_cache_create("ip_conntrack_expect",
1510 sizeof(struct ip_conntrack_expect),
1512 if (!ip_conntrack_expect_cachep) {
1513 printk(KERN_ERR "Unable to create ip_expect slab cache\n");
1514 goto err_free_conntrack_slab;
1517 /* Don't NEED lock here, but good form anyway. */
1518 write_lock_bh(&ip_conntrack_lock);
1519 for (i = 0; i < MAX_IP_CT_PROTO; i++)
1520 rcu_assign_pointer(ip_ct_protos[i], &ip_conntrack_generic_protocol);
1521 /* Sew in builtin protocols. */
1522 rcu_assign_pointer(ip_ct_protos[IPPROTO_TCP], &ip_conntrack_protocol_tcp);
1523 rcu_assign_pointer(ip_ct_protos[IPPROTO_UDP], &ip_conntrack_protocol_udp);
1524 rcu_assign_pointer(ip_ct_protos[IPPROTO_ICMP], &ip_conntrack_protocol_icmp);
1525 write_unlock_bh(&ip_conntrack_lock);
1527 /* For use by ipt_REJECT */
1528 rcu_assign_pointer(ip_ct_attach, ip_conntrack_attach);
1530 /* Set up fake conntrack:
1531 - to never be deleted, not in any hashes */
1532 atomic_set(&ip_conntrack_untracked.ct_general.use, 1);
1533 /* - and look it like as a confirmed connection */
1534 set_bit(IPS_CONFIRMED_BIT, &ip_conntrack_untracked.status);
1538 err_free_conntrack_slab:
1539 kmem_cache_destroy(ip_conntrack_cachep);
1541 free_conntrack_hash(ip_conntrack_hash, ip_conntrack_vmalloc,
1542 ip_conntrack_htable_size);
1544 nf_unregister_sockopt(&so_getorigdst);