1 /* iptables match extension to limit the number of packets per second
2 * seperately for each hashbucket (sourceip/sourceport/dstip/dstport)
4 * (C) 2003-2004 by Harald Welte <laforge@netfilter.org>
6 * $Id: ipt_hashlimit.c 3244 2004-10-20 16:24:29Z laforge@netfilter.org $
8 * Development of this code was funded by Astaro AG, http://www.astaro.com/
10 #include <linux/module.h>
11 #include <linux/spinlock.h>
12 #include <linux/random.h>
13 #include <linux/jhash.h>
14 #include <linux/slab.h>
15 #include <linux/vmalloc.h>
16 #include <linux/proc_fs.h>
17 #include <linux/seq_file.h>
18 #include <linux/list.h>
19 #include <linux/skbuff.h>
23 #include <linux/ipv6.h>
25 #include <net/net_namespace.h>
27 #include <linux/netfilter/x_tables.h>
28 #include <linux/netfilter_ipv4/ip_tables.h>
29 #include <linux/netfilter_ipv6/ip6_tables.h>
30 #include <linux/netfilter/xt_hashlimit.h>
31 #include <linux/mutex.h>
33 MODULE_LICENSE("GPL");
34 MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
35 MODULE_DESCRIPTION("iptables match for limiting per hash-bucket");
36 MODULE_ALIAS("ipt_hashlimit");
37 MODULE_ALIAS("ip6t_hashlimit");
39 /* need to declare this at the top */
40 static struct proc_dir_entry *hashlimit_procdir4;
41 static struct proc_dir_entry *hashlimit_procdir6;
42 static const struct file_operations dl_file_ops;
61 /* static / read-only parts in the beginning */
62 struct hlist_node node;
63 struct dsthash_dst dst;
65 /* modified structure members in the end */
66 unsigned long expires; /* precalculated expiry time */
68 unsigned long prev; /* last modification */
70 u_int32_t credit_cap, cost;
74 struct xt_hashlimit_htable {
75 struct hlist_node node; /* global list of all htables */
79 struct hashlimit_cfg cfg; /* config */
82 spinlock_t lock; /* lock for list_head */
83 u_int32_t rnd; /* random seed for hash */
85 unsigned int count; /* number entries in table */
86 struct timer_list timer; /* timer for gc */
89 struct proc_dir_entry *pde;
91 struct hlist_head hash[0]; /* hashtable itself */
94 static DEFINE_SPINLOCK(hashlimit_lock); /* protects htables list */
95 static DEFINE_MUTEX(hlimit_mutex); /* additional checkentry protection */
96 static HLIST_HEAD(hashlimit_htables);
97 static struct kmem_cache *hashlimit_cachep __read_mostly;
99 static inline bool dst_cmp(const struct dsthash_ent *ent,
100 const struct dsthash_dst *b)
102 return !memcmp(&ent->dst, b, sizeof(ent->dst));
106 hash_dst(const struct xt_hashlimit_htable *ht, const struct dsthash_dst *dst)
108 return jhash(dst, sizeof(*dst), ht->rnd) % ht->cfg.size;
111 static struct dsthash_ent *
112 dsthash_find(const struct xt_hashlimit_htable *ht,
113 const struct dsthash_dst *dst)
115 struct dsthash_ent *ent;
116 struct hlist_node *pos;
117 u_int32_t hash = hash_dst(ht, dst);
119 if (!hlist_empty(&ht->hash[hash])) {
120 hlist_for_each_entry(ent, pos, &ht->hash[hash], node)
121 if (dst_cmp(ent, dst))
127 /* allocate dsthash_ent, initialize dst, put in htable and lock it */
128 static struct dsthash_ent *
129 dsthash_alloc_init(struct xt_hashlimit_htable *ht,
130 const struct dsthash_dst *dst)
132 struct dsthash_ent *ent;
134 /* initialize hash with random val at the time we allocate
135 * the first hashtable entry */
136 if (!ht->rnd_initialized) {
137 get_random_bytes(&ht->rnd, 4);
138 ht->rnd_initialized = 1;
141 if (ht->cfg.max && ht->count >= ht->cfg.max) {
142 /* FIXME: do something. question is what.. */
145 "xt_hashlimit: max count of %u reached\n",
150 ent = kmem_cache_alloc(hashlimit_cachep, GFP_ATOMIC);
154 "xt_hashlimit: can't allocate dsthash_ent\n");
157 memcpy(&ent->dst, dst, sizeof(ent->dst));
159 hlist_add_head(&ent->node, &ht->hash[hash_dst(ht, dst)]);
165 dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent)
167 hlist_del(&ent->node);
168 kmem_cache_free(hashlimit_cachep, ent);
171 static void htable_gc(unsigned long htlong);
173 static int htable_create(struct xt_hashlimit_info *minfo, int family)
175 struct xt_hashlimit_htable *hinfo;
180 size = minfo->cfg.size;
182 size = ((num_physpages << PAGE_SHIFT) / 16384) /
183 sizeof(struct list_head);
184 if (num_physpages > (1024 * 1024 * 1024 / PAGE_SIZE))
189 /* FIXME: don't use vmalloc() here or anywhere else -HW */
190 hinfo = vmalloc(sizeof(struct xt_hashlimit_htable) +
191 sizeof(struct list_head) * size);
193 printk(KERN_ERR "xt_hashlimit: unable to create hashtable\n");
196 minfo->hinfo = hinfo;
198 /* copy match config into hashtable config */
199 memcpy(&hinfo->cfg, &minfo->cfg, sizeof(hinfo->cfg));
200 hinfo->cfg.size = size;
202 hinfo->cfg.max = 8 * hinfo->cfg.size;
203 else if (hinfo->cfg.max < hinfo->cfg.size)
204 hinfo->cfg.max = hinfo->cfg.size;
206 for (i = 0; i < hinfo->cfg.size; i++)
207 INIT_HLIST_HEAD(&hinfo->hash[i]);
209 atomic_set(&hinfo->use, 1);
211 hinfo->family = family;
212 hinfo->rnd_initialized = 0;
213 spin_lock_init(&hinfo->lock);
214 hinfo->pde = create_proc_entry(minfo->name, 0,
215 family == AF_INET ? hashlimit_procdir4 :
221 hinfo->pde->proc_fops = &dl_file_ops;
222 hinfo->pde->data = hinfo;
224 setup_timer(&hinfo->timer, htable_gc, (unsigned long )hinfo);
225 hinfo->timer.expires = jiffies + msecs_to_jiffies(hinfo->cfg.gc_interval);
226 add_timer(&hinfo->timer);
228 spin_lock_bh(&hashlimit_lock);
229 hlist_add_head(&hinfo->node, &hashlimit_htables);
230 spin_unlock_bh(&hashlimit_lock);
235 static bool select_all(const struct xt_hashlimit_htable *ht,
236 const struct dsthash_ent *he)
241 static bool select_gc(const struct xt_hashlimit_htable *ht,
242 const struct dsthash_ent *he)
244 return time_after_eq(jiffies, he->expires);
247 static void htable_selective_cleanup(struct xt_hashlimit_htable *ht,
248 bool (*select)(const struct xt_hashlimit_htable *ht,
249 const struct dsthash_ent *he))
253 /* lock hash table and iterate over it */
254 spin_lock_bh(&ht->lock);
255 for (i = 0; i < ht->cfg.size; i++) {
256 struct dsthash_ent *dh;
257 struct hlist_node *pos, *n;
258 hlist_for_each_entry_safe(dh, pos, n, &ht->hash[i], node) {
259 if ((*select)(ht, dh))
260 dsthash_free(ht, dh);
263 spin_unlock_bh(&ht->lock);
266 /* hash table garbage collector, run by timer */
267 static void htable_gc(unsigned long htlong)
269 struct xt_hashlimit_htable *ht = (struct xt_hashlimit_htable *)htlong;
271 htable_selective_cleanup(ht, select_gc);
273 /* re-add the timer accordingly */
274 ht->timer.expires = jiffies + msecs_to_jiffies(ht->cfg.gc_interval);
275 add_timer(&ht->timer);
278 static void htable_destroy(struct xt_hashlimit_htable *hinfo)
280 /* remove timer, if it is pending */
281 if (timer_pending(&hinfo->timer))
282 del_timer(&hinfo->timer);
284 /* remove proc entry */
285 remove_proc_entry(hinfo->pde->name,
286 hinfo->family == AF_INET ? hashlimit_procdir4 :
288 htable_selective_cleanup(hinfo, select_all);
292 static struct xt_hashlimit_htable *htable_find_get(const char *name,
295 struct xt_hashlimit_htable *hinfo;
296 struct hlist_node *pos;
298 spin_lock_bh(&hashlimit_lock);
299 hlist_for_each_entry(hinfo, pos, &hashlimit_htables, node) {
300 if (!strcmp(name, hinfo->pde->name) &&
301 hinfo->family == family) {
302 atomic_inc(&hinfo->use);
303 spin_unlock_bh(&hashlimit_lock);
307 spin_unlock_bh(&hashlimit_lock);
311 static void htable_put(struct xt_hashlimit_htable *hinfo)
313 if (atomic_dec_and_test(&hinfo->use)) {
314 spin_lock_bh(&hashlimit_lock);
315 hlist_del(&hinfo->node);
316 spin_unlock_bh(&hashlimit_lock);
317 htable_destroy(hinfo);
321 /* The algorithm used is the Simple Token Bucket Filter (TBF)
322 * see net/sched/sch_tbf.c in the linux source tree
325 /* Rusty: This is my (non-mathematically-inclined) understanding of
326 this algorithm. The `average rate' in jiffies becomes your initial
327 amount of credit `credit' and the most credit you can ever have
328 `credit_cap'. The `peak rate' becomes the cost of passing the
331 `prev' tracks the last packet hit: you gain one credit per jiffy.
332 If you get credit balance more than this, the extra credit is
333 discarded. Every time the match passes, you lose `cost' credits;
334 if you don't have that many, the test fails.
336 See Alexey's formal explanation in net/sched/sch_tbf.c.
338 To get the maximum range, we multiply by this factor (ie. you get N
339 credits per jiffy). We want to allow a rate as low as 1 per day
340 (slowest userspace tool allows), which means
341 CREDITS_PER_JIFFY*HZ*60*60*24 < 2^32 ie.
343 #define MAX_CPJ (0xFFFFFFFF / (HZ*60*60*24))
345 /* Repeated shift and or gives us all 1s, final shift and add 1 gives
346 * us the power of 2 below the theoretical max, so GCC simply does a
348 #define _POW2_BELOW2(x) ((x)|((x)>>1))
349 #define _POW2_BELOW4(x) (_POW2_BELOW2(x)|_POW2_BELOW2((x)>>2))
350 #define _POW2_BELOW8(x) (_POW2_BELOW4(x)|_POW2_BELOW4((x)>>4))
351 #define _POW2_BELOW16(x) (_POW2_BELOW8(x)|_POW2_BELOW8((x)>>8))
352 #define _POW2_BELOW32(x) (_POW2_BELOW16(x)|_POW2_BELOW16((x)>>16))
353 #define POW2_BELOW32(x) ((_POW2_BELOW32(x)>>1) + 1)
355 #define CREDITS_PER_JIFFY POW2_BELOW32(MAX_CPJ)
357 /* Precision saver. */
358 static inline u_int32_t
359 user2credits(u_int32_t user)
361 /* If multiplying would overflow... */
362 if (user > 0xFFFFFFFF / (HZ*CREDITS_PER_JIFFY))
364 return (user / XT_HASHLIMIT_SCALE) * HZ * CREDITS_PER_JIFFY;
366 return (user * HZ * CREDITS_PER_JIFFY) / XT_HASHLIMIT_SCALE;
369 static inline void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now)
371 dh->rateinfo.credit += (now - dh->rateinfo.prev) * CREDITS_PER_JIFFY;
372 if (dh->rateinfo.credit > dh->rateinfo.credit_cap)
373 dh->rateinfo.credit = dh->rateinfo.credit_cap;
374 dh->rateinfo.prev = now;
378 hashlimit_init_dst(const struct xt_hashlimit_htable *hinfo,
379 struct dsthash_dst *dst,
380 const struct sk_buff *skb, unsigned int protoff)
382 __be16 _ports[2], *ports;
385 memset(dst, 0, sizeof(*dst));
387 switch (hinfo->family) {
389 if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_DIP)
390 dst->addr.ip.dst = ip_hdr(skb)->daddr;
391 if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_SIP)
392 dst->addr.ip.src = ip_hdr(skb)->saddr;
394 if (!(hinfo->cfg.mode &
395 (XT_HASHLIMIT_HASH_DPT | XT_HASHLIMIT_HASH_SPT)))
397 nexthdr = ip_hdr(skb)->protocol;
399 #if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
401 if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_DIP)
402 memcpy(&dst->addr.ip6.dst, &ipv6_hdr(skb)->daddr,
403 sizeof(dst->addr.ip6.dst));
404 if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_SIP)
405 memcpy(&dst->addr.ip6.src, &ipv6_hdr(skb)->saddr,
406 sizeof(dst->addr.ip6.src));
408 if (!(hinfo->cfg.mode &
409 (XT_HASHLIMIT_HASH_DPT | XT_HASHLIMIT_HASH_SPT)))
411 nexthdr = ipv6_hdr(skb)->nexthdr;
412 protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr);
413 if ((int)protoff < 0)
425 case IPPROTO_UDPLITE:
428 ports = skb_header_pointer(skb, protoff, sizeof(_ports),
432 _ports[0] = _ports[1] = 0;
438 if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_SPT)
439 dst->src_port = ports[0];
440 if (hinfo->cfg.mode & XT_HASHLIMIT_HASH_DPT)
441 dst->dst_port = ports[1];
446 hashlimit_mt(const struct sk_buff *skb, const struct net_device *in,
447 const struct net_device *out, const struct xt_match *match,
448 const void *matchinfo, int offset, unsigned int protoff,
451 const struct xt_hashlimit_info *r =
452 ((const struct xt_hashlimit_info *)matchinfo)->u.master;
453 struct xt_hashlimit_htable *hinfo = r->hinfo;
454 unsigned long now = jiffies;
455 struct dsthash_ent *dh;
456 struct dsthash_dst dst;
458 if (hashlimit_init_dst(hinfo, &dst, skb, protoff) < 0)
461 spin_lock_bh(&hinfo->lock);
462 dh = dsthash_find(hinfo, &dst);
464 dh = dsthash_alloc_init(hinfo, &dst);
466 spin_unlock_bh(&hinfo->lock);
470 dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire);
471 dh->rateinfo.prev = jiffies;
472 dh->rateinfo.credit = user2credits(hinfo->cfg.avg *
474 dh->rateinfo.credit_cap = user2credits(hinfo->cfg.avg *
476 dh->rateinfo.cost = user2credits(hinfo->cfg.avg);
478 /* update expiration timeout */
479 dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire);
480 rateinfo_recalc(dh, now);
483 if (dh->rateinfo.credit >= dh->rateinfo.cost) {
484 /* We're underlimit. */
485 dh->rateinfo.credit -= dh->rateinfo.cost;
486 spin_unlock_bh(&hinfo->lock);
490 spin_unlock_bh(&hinfo->lock);
492 /* default case: we're overlimit, thus don't match */
501 hashlimit_mt_check(const char *tablename, const void *inf,
502 const struct xt_match *match, void *matchinfo,
503 unsigned int hook_mask)
505 struct xt_hashlimit_info *r = matchinfo;
507 /* Check for overflow. */
508 if (r->cfg.burst == 0 ||
509 user2credits(r->cfg.avg * r->cfg.burst) < user2credits(r->cfg.avg)) {
510 printk(KERN_ERR "xt_hashlimit: overflow, try lower: %u/%u\n",
511 r->cfg.avg, r->cfg.burst);
514 if (r->cfg.mode == 0 ||
515 r->cfg.mode > (XT_HASHLIMIT_HASH_DPT |
516 XT_HASHLIMIT_HASH_DIP |
517 XT_HASHLIMIT_HASH_SIP |
518 XT_HASHLIMIT_HASH_SPT))
520 if (!r->cfg.gc_interval)
524 if (r->name[sizeof(r->name) - 1] != '\0')
527 /* This is the best we've got: We cannot release and re-grab lock,
528 * since checkentry() is called before x_tables.c grabs xt_mutex.
529 * We also cannot grab the hashtable spinlock, since htable_create will
530 * call vmalloc, and that can sleep. And we cannot just re-search
531 * the list of htable's in htable_create(), since then we would
532 * create duplicate proc files. -HW */
533 mutex_lock(&hlimit_mutex);
534 r->hinfo = htable_find_get(r->name, match->family);
535 if (!r->hinfo && htable_create(r, match->family) != 0) {
536 mutex_unlock(&hlimit_mutex);
539 mutex_unlock(&hlimit_mutex);
541 /* Ugly hack: For SMP, we only want to use one set */
547 hashlimit_mt_destroy(const struct xt_match *match, void *matchinfo)
549 const struct xt_hashlimit_info *r = matchinfo;
551 htable_put(r->hinfo);
555 struct compat_xt_hashlimit_info {
557 struct hashlimit_cfg cfg;
559 compat_uptr_t master;
562 static void hashlimit_mt_compat_from_user(void *dst, void *src)
564 int off = offsetof(struct compat_xt_hashlimit_info, hinfo);
566 memcpy(dst, src, off);
567 memset(dst + off, 0, sizeof(struct compat_xt_hashlimit_info) - off);
570 static int hashlimit_mt_compat_to_user(void __user *dst, void *src)
572 int off = offsetof(struct compat_xt_hashlimit_info, hinfo);
574 return copy_to_user(dst, src, off) ? -EFAULT : 0;
578 static struct xt_match hashlimit_mt_reg[] __read_mostly = {
582 .match = hashlimit_mt,
583 .matchsize = sizeof(struct xt_hashlimit_info),
585 .compatsize = sizeof(struct compat_xt_hashlimit_info),
586 .compat_from_user = hashlimit_mt_compat_from_user,
587 .compat_to_user = hashlimit_mt_compat_to_user,
589 .checkentry = hashlimit_mt_check,
590 .destroy = hashlimit_mt_destroy,
596 .match = hashlimit_mt,
597 .matchsize = sizeof(struct xt_hashlimit_info),
599 .compatsize = sizeof(struct compat_xt_hashlimit_info),
600 .compat_from_user = hashlimit_mt_compat_from_user,
601 .compat_to_user = hashlimit_mt_compat_to_user,
603 .checkentry = hashlimit_mt_check,
604 .destroy = hashlimit_mt_destroy,
610 static void *dl_seq_start(struct seq_file *s, loff_t *pos)
612 struct proc_dir_entry *pde = s->private;
613 struct xt_hashlimit_htable *htable = pde->data;
614 unsigned int *bucket;
616 spin_lock_bh(&htable->lock);
617 if (*pos >= htable->cfg.size)
620 bucket = kmalloc(sizeof(unsigned int), GFP_ATOMIC);
622 return ERR_PTR(-ENOMEM);
628 static void *dl_seq_next(struct seq_file *s, void *v, loff_t *pos)
630 struct proc_dir_entry *pde = s->private;
631 struct xt_hashlimit_htable *htable = pde->data;
632 unsigned int *bucket = (unsigned int *)v;
635 if (*pos >= htable->cfg.size) {
642 static void dl_seq_stop(struct seq_file *s, void *v)
644 struct proc_dir_entry *pde = s->private;
645 struct xt_hashlimit_htable *htable = pde->data;
646 unsigned int *bucket = (unsigned int *)v;
649 spin_unlock_bh(&htable->lock);
652 static int dl_seq_real_show(struct dsthash_ent *ent, int family,
655 /* recalculate to show accurate numbers */
656 rateinfo_recalc(ent, jiffies);
660 return seq_printf(s, "%ld %u.%u.%u.%u:%u->"
661 "%u.%u.%u.%u:%u %u %u %u\n",
662 (long)(ent->expires - jiffies)/HZ,
663 NIPQUAD(ent->dst.addr.ip.src),
664 ntohs(ent->dst.src_port),
665 NIPQUAD(ent->dst.addr.ip.dst),
666 ntohs(ent->dst.dst_port),
667 ent->rateinfo.credit, ent->rateinfo.credit_cap,
670 return seq_printf(s, "%ld " NIP6_FMT ":%u->"
671 NIP6_FMT ":%u %u %u %u\n",
672 (long)(ent->expires - jiffies)/HZ,
673 NIP6(*(struct in6_addr *)&ent->dst.addr.ip6.src),
674 ntohs(ent->dst.src_port),
675 NIP6(*(struct in6_addr *)&ent->dst.addr.ip6.dst),
676 ntohs(ent->dst.dst_port),
677 ent->rateinfo.credit, ent->rateinfo.credit_cap,
685 static int dl_seq_show(struct seq_file *s, void *v)
687 struct proc_dir_entry *pde = s->private;
688 struct xt_hashlimit_htable *htable = pde->data;
689 unsigned int *bucket = (unsigned int *)v;
690 struct dsthash_ent *ent;
691 struct hlist_node *pos;
693 if (!hlist_empty(&htable->hash[*bucket])) {
694 hlist_for_each_entry(ent, pos, &htable->hash[*bucket], node)
695 if (dl_seq_real_show(ent, htable->family, s))
701 static const struct seq_operations dl_seq_ops = {
702 .start = dl_seq_start,
708 static int dl_proc_open(struct inode *inode, struct file *file)
710 int ret = seq_open(file, &dl_seq_ops);
713 struct seq_file *sf = file->private_data;
714 sf->private = PDE(inode);
719 static const struct file_operations dl_file_ops = {
720 .owner = THIS_MODULE,
721 .open = dl_proc_open,
724 .release = seq_release
727 static int __init hashlimit_mt_init(void)
731 err = xt_register_matches(hashlimit_mt_reg,
732 ARRAY_SIZE(hashlimit_mt_reg));
737 hashlimit_cachep = kmem_cache_create("xt_hashlimit",
738 sizeof(struct dsthash_ent), 0, 0,
740 if (!hashlimit_cachep) {
741 printk(KERN_ERR "xt_hashlimit: unable to create slab cache\n");
744 hashlimit_procdir4 = proc_mkdir("ipt_hashlimit", init_net.proc_net);
745 if (!hashlimit_procdir4) {
746 printk(KERN_ERR "xt_hashlimit: unable to create proc dir "
750 hashlimit_procdir6 = proc_mkdir("ip6t_hashlimit", init_net.proc_net);
751 if (!hashlimit_procdir6) {
752 printk(KERN_ERR "xt_hashlimit: unable to create proc dir "
758 remove_proc_entry("ipt_hashlimit", init_net.proc_net);
760 kmem_cache_destroy(hashlimit_cachep);
762 xt_unregister_matches(hashlimit_mt_reg, ARRAY_SIZE(hashlimit_mt_reg));
768 static void __exit hashlimit_mt_exit(void)
770 remove_proc_entry("ipt_hashlimit", init_net.proc_net);
771 remove_proc_entry("ip6t_hashlimit", init_net.proc_net);
772 kmem_cache_destroy(hashlimit_cachep);
773 xt_unregister_matches(hashlimit_mt_reg, ARRAY_SIZE(hashlimit_mt_reg));
776 module_init(hashlimit_mt_init);
777 module_exit(hashlimit_mt_exit);