2 * Packet matching code.
4 * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5 * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
11 * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
12 * - increase module usage count as soon as we have rules inside
14 * 08 Oct 2005 Harald Welte <lafore@netfilter.org>
15 * - Generalize into "x_tables" layer and "{ip,ip6,arp}_tables"
17 #include <linux/config.h>
18 #include <linux/cache.h>
19 #include <linux/capability.h>
20 #include <linux/skbuff.h>
21 #include <linux/kmod.h>
22 #include <linux/vmalloc.h>
23 #include <linux/netdevice.h>
24 #include <linux/module.h>
25 #include <linux/icmp.h>
27 #include <asm/uaccess.h>
28 #include <asm/semaphore.h>
29 #include <linux/proc_fs.h>
30 #include <linux/err.h>
31 #include <linux/cpumask.h>
33 #include <linux/netfilter/x_tables.h>
34 #include <linux/netfilter_ipv4/ip_tables.h>
36 MODULE_LICENSE("GPL");
37 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
38 MODULE_DESCRIPTION("IPv4 packet filter");
40 /*#define DEBUG_IP_FIREWALL*/
41 /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
42 /*#define DEBUG_IP_FIREWALL_USER*/
44 #ifdef DEBUG_IP_FIREWALL
45 #define dprintf(format, args...) printk(format , ## args)
47 #define dprintf(format, args...)
50 #ifdef DEBUG_IP_FIREWALL_USER
51 #define duprintf(format, args...) printk(format , ## args)
53 #define duprintf(format, args...)
56 #ifdef CONFIG_NETFILTER_DEBUG
57 #define IP_NF_ASSERT(x) \
60 printk("IP_NF_ASSERT: %s:%s:%u\n", \
61 __FUNCTION__, __FILE__, __LINE__); \
64 #define IP_NF_ASSERT(x)
68 /* All the better to debug you with... */
74 We keep a set of rules for each CPU, so we can avoid write-locking
75 them in the softirq when updating the counters and therefore
76 only need to read-lock in the softirq; doing a write_lock_bh() in user
77 context stops packets coming through and allows user context to read
78 the counters or update the rules.
80 Hence the start of any table is given by get_table() below. */
82 /* Returns whether matches rule or not. */
84 ip_packet_match(const struct iphdr *ip,
87 const struct ipt_ip *ipinfo,
93 #define FWINV(bool,invflg) ((bool) ^ !!(ipinfo->invflags & invflg))
95 if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
97 || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
99 dprintf("Source or dest mismatch.\n");
101 dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n",
103 NIPQUAD(ipinfo->smsk.s_addr),
104 NIPQUAD(ipinfo->src.s_addr),
105 ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
106 dprintf("DST: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n",
108 NIPQUAD(ipinfo->dmsk.s_addr),
109 NIPQUAD(ipinfo->dst.s_addr),
110 ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
114 /* Look for ifname matches; this should unroll nicely. */
115 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
116 ret |= (((const unsigned long *)indev)[i]
117 ^ ((const unsigned long *)ipinfo->iniface)[i])
118 & ((const unsigned long *)ipinfo->iniface_mask)[i];
121 if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
122 dprintf("VIA in mismatch (%s vs %s).%s\n",
123 indev, ipinfo->iniface,
124 ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
128 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
129 ret |= (((const unsigned long *)outdev)[i]
130 ^ ((const unsigned long *)ipinfo->outiface)[i])
131 & ((const unsigned long *)ipinfo->outiface_mask)[i];
134 if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
135 dprintf("VIA out mismatch (%s vs %s).%s\n",
136 outdev, ipinfo->outiface,
137 ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
141 /* Check specific protocol */
143 && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
144 dprintf("Packet protocol %hi does not match %hi.%s\n",
145 ip->protocol, ipinfo->proto,
146 ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
150 /* If we have a fragment rule but the packet is not a fragment
151 * then we return zero */
152 if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
153 dprintf("Fragment rule but not fragment.%s\n",
154 ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
162 ip_checkentry(const struct ipt_ip *ip)
164 if (ip->flags & ~IPT_F_MASK) {
165 duprintf("Unknown flag bits set: %08X\n",
166 ip->flags & ~IPT_F_MASK);
169 if (ip->invflags & ~IPT_INV_MASK) {
170 duprintf("Unknown invflag bits set: %08X\n",
171 ip->invflags & ~IPT_INV_MASK);
178 ipt_error(struct sk_buff **pskb,
179 const struct net_device *in,
180 const struct net_device *out,
181 unsigned int hooknum,
182 const void *targinfo,
186 printk("ip_tables: error: `%s'\n", (char *)targinfo);
192 int do_match(struct ipt_entry_match *m,
193 const struct sk_buff *skb,
194 const struct net_device *in,
195 const struct net_device *out,
199 /* Stop iteration if it doesn't match */
200 if (!m->u.kernel.match->match(skb, in, out, m->u.kernel.match, m->data,
201 offset, skb->nh.iph->ihl*4, hotdrop))
207 static inline struct ipt_entry *
208 get_entry(void *base, unsigned int offset)
210 return (struct ipt_entry *)(base + offset);
213 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
215 ipt_do_table(struct sk_buff **pskb,
217 const struct net_device *in,
218 const struct net_device *out,
219 struct ipt_table *table,
222 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
227 /* Initializing verdict to NF_DROP keeps gcc happy. */
228 unsigned int verdict = NF_DROP;
229 const char *indev, *outdev;
231 struct ipt_entry *e, *back;
232 struct xt_table_info *private = table->private;
235 ip = (*pskb)->nh.iph;
236 datalen = (*pskb)->len - ip->ihl * 4;
237 indev = in ? in->name : nulldevname;
238 outdev = out ? out->name : nulldevname;
239 /* We handle fragments by dealing with the first fragment as
240 * if it was a normal packet. All other fragments are treated
241 * normally, except that they will NEVER match rules that ask
242 * things we don't know, ie. tcp syn flag or ports). If the
243 * rule is also a fragment-specific rule, non-fragments won't
245 offset = ntohs(ip->frag_off) & IP_OFFSET;
247 read_lock_bh(&table->lock);
248 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
249 table_base = (void *)private->entries[smp_processor_id()];
250 e = get_entry(table_base, private->hook_entry[hook]);
252 /* For return from builtin chain */
253 back = get_entry(table_base, private->underflow[hook]);
258 if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
259 struct ipt_entry_target *t;
261 if (IPT_MATCH_ITERATE(e, do_match,
263 offset, &hotdrop) != 0)
266 ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
268 t = ipt_get_target(e);
269 IP_NF_ASSERT(t->u.kernel.target);
270 /* Standard target? */
271 if (!t->u.kernel.target->target) {
274 v = ((struct ipt_standard_target *)t)->verdict;
276 /* Pop from stack? */
277 if (v != IPT_RETURN) {
278 verdict = (unsigned)(-v) - 1;
282 back = get_entry(table_base,
286 if (table_base + v != (void *)e + e->next_offset
287 && !(e->ip.flags & IPT_F_GOTO)) {
288 /* Save old back ptr in next entry */
289 struct ipt_entry *next
290 = (void *)e + e->next_offset;
292 = (void *)back - table_base;
293 /* set back pointer to next entry */
297 e = get_entry(table_base, v);
299 /* Targets which reenter must return
301 #ifdef CONFIG_NETFILTER_DEBUG
302 ((struct ipt_entry *)table_base)->comefrom
305 verdict = t->u.kernel.target->target(pskb,
312 #ifdef CONFIG_NETFILTER_DEBUG
313 if (((struct ipt_entry *)table_base)->comefrom
315 && verdict == IPT_CONTINUE) {
316 printk("Target %s reentered!\n",
317 t->u.kernel.target->name);
320 ((struct ipt_entry *)table_base)->comefrom
323 /* Target might have changed stuff. */
324 ip = (*pskb)->nh.iph;
325 datalen = (*pskb)->len - ip->ihl * 4;
327 if (verdict == IPT_CONTINUE)
328 e = (void *)e + e->next_offset;
336 e = (void *)e + e->next_offset;
340 read_unlock_bh(&table->lock);
342 #ifdef DEBUG_ALLOW_ALL
351 /* All zeroes == unconditional rule. */
353 unconditional(const struct ipt_ip *ip)
357 for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++)
358 if (((__u32 *)ip)[i])
364 /* Figures out from what hook each rule can be called: returns 0 if
365 there are loops. Puts hook bitmask in comefrom. */
367 mark_source_chains(struct xt_table_info *newinfo,
368 unsigned int valid_hooks, void *entry0)
372 /* No recursion; use packet counter to save back ptrs (reset
373 to 0 as we leave), and comefrom to save source hook bitmask */
374 for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) {
375 unsigned int pos = newinfo->hook_entry[hook];
377 = (struct ipt_entry *)(entry0 + pos);
379 if (!(valid_hooks & (1 << hook)))
382 /* Set initial back pointer. */
383 e->counters.pcnt = pos;
386 struct ipt_standard_target *t
387 = (void *)ipt_get_target(e);
389 if (e->comefrom & (1 << NF_IP_NUMHOOKS)) {
390 printk("iptables: loop hook %u pos %u %08X.\n",
391 hook, pos, e->comefrom);
395 |= ((1 << hook) | (1 << NF_IP_NUMHOOKS));
397 /* Unconditional return/END. */
398 if (e->target_offset == sizeof(struct ipt_entry)
399 && (strcmp(t->target.u.user.name,
400 IPT_STANDARD_TARGET) == 0)
402 && unconditional(&e->ip)) {
403 unsigned int oldpos, size;
405 /* Return: backtrack through the last
408 e->comefrom ^= (1<<NF_IP_NUMHOOKS);
409 #ifdef DEBUG_IP_FIREWALL_USER
411 & (1 << NF_IP_NUMHOOKS)) {
412 duprintf("Back unset "
419 pos = e->counters.pcnt;
420 e->counters.pcnt = 0;
422 /* We're at the start. */
426 e = (struct ipt_entry *)
428 } while (oldpos == pos + e->next_offset);
431 size = e->next_offset;
432 e = (struct ipt_entry *)
433 (entry0 + pos + size);
434 e->counters.pcnt = pos;
437 int newpos = t->verdict;
439 if (strcmp(t->target.u.user.name,
440 IPT_STANDARD_TARGET) == 0
442 /* This a jump; chase it. */
443 duprintf("Jump rule %u -> %u\n",
446 /* ... this is a fallthru */
447 newpos = pos + e->next_offset;
449 e = (struct ipt_entry *)
451 e->counters.pcnt = pos;
456 duprintf("Finished chain %u\n", hook);
462 cleanup_match(struct ipt_entry_match *m, unsigned int *i)
464 if (i && (*i)-- == 0)
467 if (m->u.kernel.match->destroy)
468 m->u.kernel.match->destroy(m->u.kernel.match, m->data,
469 m->u.match_size - sizeof(*m));
470 module_put(m->u.kernel.match->me);
475 standard_check(const struct ipt_entry_target *t,
476 unsigned int max_offset)
478 struct ipt_standard_target *targ = (void *)t;
480 /* Check standard info. */
481 if (targ->verdict >= 0
482 && targ->verdict > max_offset - sizeof(struct ipt_entry)) {
483 duprintf("ipt_standard_check: bad verdict (%i)\n",
487 if (targ->verdict < -NF_MAX_VERDICT - 1) {
488 duprintf("ipt_standard_check: bad negative verdict (%i)\n",
496 check_match(struct ipt_entry_match *m,
498 const struct ipt_ip *ip,
499 unsigned int hookmask,
502 struct ipt_match *match;
505 match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
507 "ipt_%s", m->u.user.name);
508 if (IS_ERR(match) || !match) {
509 duprintf("check_match: `%s' not found\n", m->u.user.name);
510 return match ? PTR_ERR(match) : -ENOENT;
512 m->u.kernel.match = match;
514 ret = xt_check_match(match, AF_INET, m->u.match_size - sizeof(*m),
515 name, hookmask, ip->proto,
516 ip->invflags & IPT_INV_PROTO);
520 if (m->u.kernel.match->checkentry
521 && !m->u.kernel.match->checkentry(name, ip, match, m->data,
522 m->u.match_size - sizeof(*m),
524 duprintf("ip_tables: check failed for `%s'.\n",
525 m->u.kernel.match->name);
533 module_put(m->u.kernel.match->me);
537 static struct ipt_target ipt_standard_target;
540 check_entry(struct ipt_entry *e, const char *name, unsigned int size,
543 struct ipt_entry_target *t;
544 struct ipt_target *target;
548 if (!ip_checkentry(&e->ip)) {
549 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
554 ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j);
556 goto cleanup_matches;
558 t = ipt_get_target(e);
559 target = try_then_request_module(xt_find_target(AF_INET,
562 "ipt_%s", t->u.user.name);
563 if (IS_ERR(target) || !target) {
564 duprintf("check_entry: `%s' not found\n", t->u.user.name);
565 ret = target ? PTR_ERR(target) : -ENOENT;
566 goto cleanup_matches;
568 t->u.kernel.target = target;
570 ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t),
571 name, e->comefrom, e->ip.proto,
572 e->ip.invflags & IPT_INV_PROTO);
576 if (t->u.kernel.target == &ipt_standard_target) {
577 if (!standard_check(t, size)) {
579 goto cleanup_matches;
581 } else if (t->u.kernel.target->checkentry
582 && !t->u.kernel.target->checkentry(name, e, target, t->data,
586 duprintf("ip_tables: check failed for `%s'.\n",
587 t->u.kernel.target->name);
595 module_put(t->u.kernel.target->me);
597 IPT_MATCH_ITERATE(e, cleanup_match, &j);
602 check_entry_size_and_hooks(struct ipt_entry *e,
603 struct xt_table_info *newinfo,
605 unsigned char *limit,
606 const unsigned int *hook_entries,
607 const unsigned int *underflows,
612 if ((unsigned long)e % __alignof__(struct ipt_entry) != 0
613 || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
614 duprintf("Bad offset %p\n", e);
619 < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
620 duprintf("checking: element %p size %u\n",
625 /* Check hooks & underflows */
626 for (h = 0; h < NF_IP_NUMHOOKS; h++) {
627 if ((unsigned char *)e - base == hook_entries[h])
628 newinfo->hook_entry[h] = hook_entries[h];
629 if ((unsigned char *)e - base == underflows[h])
630 newinfo->underflow[h] = underflows[h];
633 /* FIXME: underflows must be unconditional, standard verdicts
634 < 0 (not IPT_RETURN). --RR */
636 /* Clear counters and comefrom */
637 e->counters = ((struct xt_counters) { 0, 0 });
645 cleanup_entry(struct ipt_entry *e, unsigned int *i)
647 struct ipt_entry_target *t;
649 if (i && (*i)-- == 0)
652 /* Cleanup all matches */
653 IPT_MATCH_ITERATE(e, cleanup_match, NULL);
654 t = ipt_get_target(e);
655 if (t->u.kernel.target->destroy)
656 t->u.kernel.target->destroy(t->u.kernel.target, t->data,
657 t->u.target_size - sizeof(*t));
658 module_put(t->u.kernel.target->me);
662 /* Checks and translates the user-supplied table segment (held in
665 translate_table(const char *name,
666 unsigned int valid_hooks,
667 struct xt_table_info *newinfo,
671 const unsigned int *hook_entries,
672 const unsigned int *underflows)
677 newinfo->size = size;
678 newinfo->number = number;
680 /* Init all hooks to impossible value. */
681 for (i = 0; i < NF_IP_NUMHOOKS; i++) {
682 newinfo->hook_entry[i] = 0xFFFFFFFF;
683 newinfo->underflow[i] = 0xFFFFFFFF;
686 duprintf("translate_table: size %u\n", newinfo->size);
688 /* Walk through entries, checking offsets. */
689 ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
690 check_entry_size_and_hooks,
694 hook_entries, underflows, &i);
699 duprintf("translate_table: %u not %u entries\n",
704 /* Check hooks all assigned */
705 for (i = 0; i < NF_IP_NUMHOOKS; i++) {
706 /* Only hooks which are valid */
707 if (!(valid_hooks & (1 << i)))
709 if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
710 duprintf("Invalid hook entry %u %u\n",
714 if (newinfo->underflow[i] == 0xFFFFFFFF) {
715 duprintf("Invalid underflow %u %u\n",
721 if (!mark_source_chains(newinfo, valid_hooks, entry0))
724 /* Finally, each sanity check must pass */
726 ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
727 check_entry, name, size, &i);
730 IPT_ENTRY_ITERATE(entry0, newinfo->size,
735 /* And one copy for every other CPU */
737 if (newinfo->entries[i] && newinfo->entries[i] != entry0)
738 memcpy(newinfo->entries[i], entry0, newinfo->size);
746 add_entry_to_counter(const struct ipt_entry *e,
747 struct xt_counters total[],
750 ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
757 set_entry_to_counter(const struct ipt_entry *e,
758 struct ipt_counters total[],
761 SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
768 get_counters(const struct xt_table_info *t,
769 struct xt_counters counters[])
775 /* Instead of clearing (by a previous call to memset())
776 * the counters and using adds, we set the counters
777 * with data used by 'current' CPU
778 * We dont care about preemption here.
780 curcpu = raw_smp_processor_id();
783 IPT_ENTRY_ITERATE(t->entries[curcpu],
785 set_entry_to_counter,
793 IPT_ENTRY_ITERATE(t->entries[cpu],
795 add_entry_to_counter,
802 copy_entries_to_user(unsigned int total_size,
803 struct ipt_table *table,
804 void __user *userptr)
806 unsigned int off, num, countersize;
808 struct xt_counters *counters;
809 struct xt_table_info *private = table->private;
813 /* We need atomic snapshot of counters: rest doesn't change
814 (other than comefrom, which userspace doesn't care
816 countersize = sizeof(struct xt_counters) * private->number;
817 counters = vmalloc_node(countersize, numa_node_id());
819 if (counters == NULL)
822 /* First, sum counters... */
823 write_lock_bh(&table->lock);
824 get_counters(private, counters);
825 write_unlock_bh(&table->lock);
827 /* choose the copy that is on our node/cpu, ...
828 * This choice is lazy (because current thread is
829 * allowed to migrate to another cpu)
831 loc_cpu_entry = private->entries[raw_smp_processor_id()];
832 /* ... then copy entire thing ... */
833 if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
838 /* FIXME: use iterator macros --RR */
839 /* ... then go back and fix counters and names */
840 for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
842 struct ipt_entry_match *m;
843 struct ipt_entry_target *t;
845 e = (struct ipt_entry *)(loc_cpu_entry + off);
846 if (copy_to_user(userptr + off
847 + offsetof(struct ipt_entry, counters),
849 sizeof(counters[num])) != 0) {
854 for (i = sizeof(struct ipt_entry);
855 i < e->target_offset;
856 i += m->u.match_size) {
859 if (copy_to_user(userptr + off + i
860 + offsetof(struct ipt_entry_match,
862 m->u.kernel.match->name,
863 strlen(m->u.kernel.match->name)+1)
870 t = ipt_get_target(e);
871 if (copy_to_user(userptr + off + e->target_offset
872 + offsetof(struct ipt_entry_target,
874 t->u.kernel.target->name,
875 strlen(t->u.kernel.target->name)+1) != 0) {
887 get_entries(const struct ipt_get_entries *entries,
888 struct ipt_get_entries __user *uptr)
893 t = xt_find_table_lock(AF_INET, entries->name);
894 if (t && !IS_ERR(t)) {
895 struct xt_table_info *private = t->private;
896 duprintf("t->private->number = %u\n",
898 if (entries->size == private->size)
899 ret = copy_entries_to_user(private->size,
900 t, uptr->entrytable);
902 duprintf("get_entries: I've got %u not %u!\n",
910 ret = t ? PTR_ERR(t) : -ENOENT;
916 do_replace(void __user *user, unsigned int len)
919 struct ipt_replace tmp;
921 struct xt_table_info *newinfo, *oldinfo;
922 struct xt_counters *counters;
923 void *loc_cpu_entry, *loc_cpu_old_entry;
925 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
928 /* Hack: Causes ipchains to give correct error msg --RR */
929 if (len != sizeof(tmp) + tmp.size)
933 if (tmp.size >= (INT_MAX - sizeof(struct xt_table_info)) / NR_CPUS -
936 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
939 newinfo = xt_alloc_table_info(tmp.size);
943 /* choose the copy that is our node/cpu */
944 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
945 if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
951 counters = vmalloc(tmp.num_counters * sizeof(struct xt_counters));
957 ret = translate_table(tmp.name, tmp.valid_hooks,
958 newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
959 tmp.hook_entry, tmp.underflow);
961 goto free_newinfo_counters;
963 duprintf("ip_tables: Translated table\n");
965 t = try_then_request_module(xt_find_table_lock(AF_INET, tmp.name),
966 "iptable_%s", tmp.name);
967 if (!t || IS_ERR(t)) {
968 ret = t ? PTR_ERR(t) : -ENOENT;
969 goto free_newinfo_counters_untrans;
973 if (tmp.valid_hooks != t->valid_hooks) {
974 duprintf("Valid hook crap: %08X vs %08X\n",
975 tmp.valid_hooks, t->valid_hooks);
980 oldinfo = xt_replace_table(t, tmp.num_counters, newinfo, &ret);
984 /* Update module usage count based on number of rules */
985 duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
986 oldinfo->number, oldinfo->initial_entries, newinfo->number);
987 if ((oldinfo->number > oldinfo->initial_entries) ||
988 (newinfo->number <= oldinfo->initial_entries))
990 if ((oldinfo->number > oldinfo->initial_entries) &&
991 (newinfo->number <= oldinfo->initial_entries))
994 /* Get the old counters. */
995 get_counters(oldinfo, counters);
996 /* Decrease module usage counts and free resource */
997 loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
998 IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL);
999 xt_free_table_info(oldinfo);
1000 if (copy_to_user(tmp.counters, counters,
1001 sizeof(struct xt_counters) * tmp.num_counters) != 0)
1010 free_newinfo_counters_untrans:
1011 IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL);
1012 free_newinfo_counters:
1015 xt_free_table_info(newinfo);
1019 /* We're lazy, and add to the first CPU; overflow works its fey magic
1020 * and everything is OK. */
1022 add_counter_to_entry(struct ipt_entry *e,
1023 const struct xt_counters addme[],
1027 duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
1029 (long unsigned int)e->counters.pcnt,
1030 (long unsigned int)e->counters.bcnt,
1031 (long unsigned int)addme[*i].pcnt,
1032 (long unsigned int)addme[*i].bcnt);
1035 ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1042 do_add_counters(void __user *user, unsigned int len)
1045 struct xt_counters_info tmp, *paddc;
1046 struct ipt_table *t;
1047 struct xt_table_info *private;
1049 void *loc_cpu_entry;
1051 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1054 if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct xt_counters))
1057 paddc = vmalloc_node(len, numa_node_id());
1061 if (copy_from_user(paddc, user, len) != 0) {
1066 t = xt_find_table_lock(AF_INET, tmp.name);
1067 if (!t || IS_ERR(t)) {
1068 ret = t ? PTR_ERR(t) : -ENOENT;
1072 write_lock_bh(&t->lock);
1073 private = t->private;
1074 if (private->number != paddc->num_counters) {
1076 goto unlock_up_free;
1080 /* Choose the copy that is on our node */
1081 loc_cpu_entry = private->entries[raw_smp_processor_id()];
1082 IPT_ENTRY_ITERATE(loc_cpu_entry,
1084 add_counter_to_entry,
1088 write_unlock_bh(&t->lock);
1098 do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1102 if (!capable(CAP_NET_ADMIN))
1106 case IPT_SO_SET_REPLACE:
1107 ret = do_replace(user, len);
1110 case IPT_SO_SET_ADD_COUNTERS:
1111 ret = do_add_counters(user, len);
1115 duprintf("do_ipt_set_ctl: unknown request %i\n", cmd);
1123 do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1127 if (!capable(CAP_NET_ADMIN))
1131 case IPT_SO_GET_INFO: {
1132 char name[IPT_TABLE_MAXNAMELEN];
1133 struct ipt_table *t;
1135 if (*len != sizeof(struct ipt_getinfo)) {
1136 duprintf("length %u != %u\n", *len,
1137 sizeof(struct ipt_getinfo));
1142 if (copy_from_user(name, user, sizeof(name)) != 0) {
1146 name[IPT_TABLE_MAXNAMELEN-1] = '\0';
1148 t = try_then_request_module(xt_find_table_lock(AF_INET, name),
1149 "iptable_%s", name);
1150 if (t && !IS_ERR(t)) {
1151 struct ipt_getinfo info;
1152 struct xt_table_info *private = t->private;
1154 info.valid_hooks = t->valid_hooks;
1155 memcpy(info.hook_entry, private->hook_entry,
1156 sizeof(info.hook_entry));
1157 memcpy(info.underflow, private->underflow,
1158 sizeof(info.underflow));
1159 info.num_entries = private->number;
1160 info.size = private->size;
1161 memcpy(info.name, name, sizeof(info.name));
1163 if (copy_to_user(user, &info, *len) != 0)
1170 ret = t ? PTR_ERR(t) : -ENOENT;
1174 case IPT_SO_GET_ENTRIES: {
1175 struct ipt_get_entries get;
1177 if (*len < sizeof(get)) {
1178 duprintf("get_entries: %u < %u\n", *len, sizeof(get));
1180 } else if (copy_from_user(&get, user, sizeof(get)) != 0) {
1182 } else if (*len != sizeof(struct ipt_get_entries) + get.size) {
1183 duprintf("get_entries: %u != %u\n", *len,
1184 sizeof(struct ipt_get_entries) + get.size);
1187 ret = get_entries(&get, user);
1191 case IPT_SO_GET_REVISION_MATCH:
1192 case IPT_SO_GET_REVISION_TARGET: {
1193 struct ipt_get_revision rev;
1196 if (*len != sizeof(rev)) {
1200 if (copy_from_user(&rev, user, sizeof(rev)) != 0) {
1205 if (cmd == IPT_SO_GET_REVISION_TARGET)
1210 try_then_request_module(xt_find_revision(AF_INET, rev.name,
1213 "ipt_%s", rev.name);
1218 duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
1225 int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl)
1228 struct xt_table_info *newinfo;
1229 static struct xt_table_info bootstrap
1230 = { 0, 0, 0, { 0 }, { 0 }, { } };
1231 void *loc_cpu_entry;
1233 newinfo = xt_alloc_table_info(repl->size);
1237 /* choose the copy on our node/cpu
1238 * but dont care of preemption
1240 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1241 memcpy(loc_cpu_entry, repl->entries, repl->size);
1243 ret = translate_table(table->name, table->valid_hooks,
1244 newinfo, loc_cpu_entry, repl->size,
1249 xt_free_table_info(newinfo);
1253 if (xt_register_table(table, &bootstrap, newinfo) != 0) {
1254 xt_free_table_info(newinfo);
1261 void ipt_unregister_table(struct ipt_table *table)
1263 struct xt_table_info *private;
1264 void *loc_cpu_entry;
1266 private = xt_unregister_table(table);
1268 /* Decrease module usage counts and free resources */
1269 loc_cpu_entry = private->entries[raw_smp_processor_id()];
1270 IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL);
1271 xt_free_table_info(private);
1274 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
1276 icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
1277 u_int8_t type, u_int8_t code,
1280 return ((test_type == 0xFF) || (type == test_type && code >= min_code && code <= max_code))
1285 icmp_match(const struct sk_buff *skb,
1286 const struct net_device *in,
1287 const struct net_device *out,
1288 const void *matchinfo,
1290 unsigned int protoff,
1293 struct icmphdr _icmph, *ic;
1294 const struct ipt_icmp *icmpinfo = matchinfo;
1296 /* Must not be a fragment. */
1300 ic = skb_header_pointer(skb, protoff, sizeof(_icmph), &_icmph);
1302 /* We've been asked to examine this packet, and we
1303 * can't. Hence, no choice but to drop.
1305 duprintf("Dropping evil ICMP tinygram.\n");
1310 return icmp_type_code_match(icmpinfo->type,
1314 !!(icmpinfo->invflags&IPT_ICMP_INV));
1317 /* Called when user tries to insert an entry of this type. */
1319 icmp_checkentry(const char *tablename,
1322 unsigned int matchsize,
1323 unsigned int hook_mask)
1325 const struct ipt_icmp *icmpinfo = matchinfo;
1327 /* Must specify no unknown invflags */
1328 return !(icmpinfo->invflags & ~IPT_ICMP_INV);
1331 /* The built-in targets: standard (NULL) and error. */
1332 static struct ipt_target ipt_standard_target = {
1333 .name = IPT_STANDARD_TARGET,
1334 .targetsize = sizeof(int),
1337 static struct ipt_target ipt_error_target = {
1338 .name = IPT_ERROR_TARGET,
1339 .target = ipt_error,
1340 .targetsize = IPT_FUNCTION_MAXNAMELEN,
1343 static struct nf_sockopt_ops ipt_sockopts = {
1345 .set_optmin = IPT_BASE_CTL,
1346 .set_optmax = IPT_SO_SET_MAX+1,
1347 .set = do_ipt_set_ctl,
1348 .get_optmin = IPT_BASE_CTL,
1349 .get_optmax = IPT_SO_GET_MAX+1,
1350 .get = do_ipt_get_ctl,
1353 static struct ipt_match icmp_matchstruct = {
1355 .match = icmp_match,
1356 .matchsize = sizeof(struct ipt_icmp),
1357 .proto = IPPROTO_ICMP,
1358 .checkentry = icmp_checkentry,
1361 static int __init init(void)
1365 xt_proto_init(AF_INET);
1367 /* Noone else will be downing sem now, so we won't sleep */
1368 xt_register_target(AF_INET, &ipt_standard_target);
1369 xt_register_target(AF_INET, &ipt_error_target);
1370 xt_register_match(AF_INET, &icmp_matchstruct);
1372 /* Register setsockopt */
1373 ret = nf_register_sockopt(&ipt_sockopts);
1375 duprintf("Unable to register sockopts.\n");
1379 printk("ip_tables: (C) 2000-2006 Netfilter Core Team\n");
1383 static void __exit fini(void)
1385 nf_unregister_sockopt(&ipt_sockopts);
1387 xt_unregister_match(AF_INET, &icmp_matchstruct);
1388 xt_unregister_target(AF_INET, &ipt_error_target);
1389 xt_unregister_target(AF_INET, &ipt_standard_target);
1391 xt_proto_fini(AF_INET);
1394 EXPORT_SYMBOL(ipt_register_table);
1395 EXPORT_SYMBOL(ipt_unregister_table);
1396 EXPORT_SYMBOL(ipt_do_table);