2 * Packet matching code.
4 * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5 * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
11 * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
12 * - increase module usage count as soon as we have rules inside
14 * 08 Oct 2005 Harald Welte <lafore@netfilter.org>
15 * - Generalize into "x_tables" layer and "{ip,ip6,arp}_tables"
17 #include <linux/config.h>
18 #include <linux/cache.h>
19 #include <linux/capability.h>
20 #include <linux/skbuff.h>
21 #include <linux/kmod.h>
22 #include <linux/vmalloc.h>
23 #include <linux/netdevice.h>
24 #include <linux/module.h>
25 #include <linux/icmp.h>
27 #include <asm/uaccess.h>
28 #include <linux/mutex.h>
29 #include <linux/proc_fs.h>
30 #include <linux/err.h>
31 #include <linux/cpumask.h>
33 #include <linux/netfilter/x_tables.h>
34 #include <linux/netfilter_ipv4/ip_tables.h>
36 MODULE_LICENSE("GPL");
37 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
38 MODULE_DESCRIPTION("IPv4 packet filter");
40 /*#define DEBUG_IP_FIREWALL*/
41 /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
42 /*#define DEBUG_IP_FIREWALL_USER*/
44 #ifdef DEBUG_IP_FIREWALL
45 #define dprintf(format, args...) printk(format , ## args)
47 #define dprintf(format, args...)
50 #ifdef DEBUG_IP_FIREWALL_USER
51 #define duprintf(format, args...) printk(format , ## args)
53 #define duprintf(format, args...)
56 #ifdef CONFIG_NETFILTER_DEBUG
57 #define IP_NF_ASSERT(x) \
60 printk("IP_NF_ASSERT: %s:%s:%u\n", \
61 __FUNCTION__, __FILE__, __LINE__); \
64 #define IP_NF_ASSERT(x)
68 /* All the better to debug you with... */
74 We keep a set of rules for each CPU, so we can avoid write-locking
75 them in the softirq when updating the counters and therefore
76 only need to read-lock in the softirq; doing a write_lock_bh() in user
77 context stops packets coming through and allows user context to read
78 the counters or update the rules.
80 Hence the start of any table is given by get_table() below. */
82 /* Returns whether matches rule or not. */
84 ip_packet_match(const struct iphdr *ip,
87 const struct ipt_ip *ipinfo,
93 #define FWINV(bool,invflg) ((bool) ^ !!(ipinfo->invflags & invflg))
95 if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
97 || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
99 dprintf("Source or dest mismatch.\n");
101 dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n",
103 NIPQUAD(ipinfo->smsk.s_addr),
104 NIPQUAD(ipinfo->src.s_addr),
105 ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
106 dprintf("DST: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n",
108 NIPQUAD(ipinfo->dmsk.s_addr),
109 NIPQUAD(ipinfo->dst.s_addr),
110 ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
114 /* Look for ifname matches; this should unroll nicely. */
115 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
116 ret |= (((const unsigned long *)indev)[i]
117 ^ ((const unsigned long *)ipinfo->iniface)[i])
118 & ((const unsigned long *)ipinfo->iniface_mask)[i];
121 if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
122 dprintf("VIA in mismatch (%s vs %s).%s\n",
123 indev, ipinfo->iniface,
124 ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
128 for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
129 ret |= (((const unsigned long *)outdev)[i]
130 ^ ((const unsigned long *)ipinfo->outiface)[i])
131 & ((const unsigned long *)ipinfo->outiface_mask)[i];
134 if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
135 dprintf("VIA out mismatch (%s vs %s).%s\n",
136 outdev, ipinfo->outiface,
137 ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
141 /* Check specific protocol */
143 && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
144 dprintf("Packet protocol %hi does not match %hi.%s\n",
145 ip->protocol, ipinfo->proto,
146 ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
150 /* If we have a fragment rule but the packet is not a fragment
151 * then we return zero */
152 if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
153 dprintf("Fragment rule but not fragment.%s\n",
154 ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
162 ip_checkentry(const struct ipt_ip *ip)
164 if (ip->flags & ~IPT_F_MASK) {
165 duprintf("Unknown flag bits set: %08X\n",
166 ip->flags & ~IPT_F_MASK);
169 if (ip->invflags & ~IPT_INV_MASK) {
170 duprintf("Unknown invflag bits set: %08X\n",
171 ip->invflags & ~IPT_INV_MASK);
178 ipt_error(struct sk_buff **pskb,
179 const struct net_device *in,
180 const struct net_device *out,
181 unsigned int hooknum,
182 const struct xt_target *target,
183 const void *targinfo,
187 printk("ip_tables: error: `%s'\n", (char *)targinfo);
193 int do_match(struct ipt_entry_match *m,
194 const struct sk_buff *skb,
195 const struct net_device *in,
196 const struct net_device *out,
200 /* Stop iteration if it doesn't match */
201 if (!m->u.kernel.match->match(skb, in, out, m->u.kernel.match, m->data,
202 offset, skb->nh.iph->ihl*4, hotdrop))
208 static inline struct ipt_entry *
209 get_entry(void *base, unsigned int offset)
211 return (struct ipt_entry *)(base + offset);
214 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
216 ipt_do_table(struct sk_buff **pskb,
218 const struct net_device *in,
219 const struct net_device *out,
220 struct ipt_table *table,
223 static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
228 /* Initializing verdict to NF_DROP keeps gcc happy. */
229 unsigned int verdict = NF_DROP;
230 const char *indev, *outdev;
232 struct ipt_entry *e, *back;
233 struct xt_table_info *private = table->private;
236 ip = (*pskb)->nh.iph;
237 datalen = (*pskb)->len - ip->ihl * 4;
238 indev = in ? in->name : nulldevname;
239 outdev = out ? out->name : nulldevname;
240 /* We handle fragments by dealing with the first fragment as
241 * if it was a normal packet. All other fragments are treated
242 * normally, except that they will NEVER match rules that ask
243 * things we don't know, ie. tcp syn flag or ports). If the
244 * rule is also a fragment-specific rule, non-fragments won't
246 offset = ntohs(ip->frag_off) & IP_OFFSET;
248 read_lock_bh(&table->lock);
249 IP_NF_ASSERT(table->valid_hooks & (1 << hook));
250 table_base = (void *)private->entries[smp_processor_id()];
251 e = get_entry(table_base, private->hook_entry[hook]);
253 /* For return from builtin chain */
254 back = get_entry(table_base, private->underflow[hook]);
259 if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
260 struct ipt_entry_target *t;
262 if (IPT_MATCH_ITERATE(e, do_match,
264 offset, &hotdrop) != 0)
267 ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
269 t = ipt_get_target(e);
270 IP_NF_ASSERT(t->u.kernel.target);
271 /* Standard target? */
272 if (!t->u.kernel.target->target) {
275 v = ((struct ipt_standard_target *)t)->verdict;
277 /* Pop from stack? */
278 if (v != IPT_RETURN) {
279 verdict = (unsigned)(-v) - 1;
283 back = get_entry(table_base,
287 if (table_base + v != (void *)e + e->next_offset
288 && !(e->ip.flags & IPT_F_GOTO)) {
289 /* Save old back ptr in next entry */
290 struct ipt_entry *next
291 = (void *)e + e->next_offset;
293 = (void *)back - table_base;
294 /* set back pointer to next entry */
298 e = get_entry(table_base, v);
300 /* Targets which reenter must return
302 #ifdef CONFIG_NETFILTER_DEBUG
303 ((struct ipt_entry *)table_base)->comefrom
306 verdict = t->u.kernel.target->target(pskb,
313 #ifdef CONFIG_NETFILTER_DEBUG
314 if (((struct ipt_entry *)table_base)->comefrom
316 && verdict == IPT_CONTINUE) {
317 printk("Target %s reentered!\n",
318 t->u.kernel.target->name);
321 ((struct ipt_entry *)table_base)->comefrom
324 /* Target might have changed stuff. */
325 ip = (*pskb)->nh.iph;
326 datalen = (*pskb)->len - ip->ihl * 4;
328 if (verdict == IPT_CONTINUE)
329 e = (void *)e + e->next_offset;
337 e = (void *)e + e->next_offset;
341 read_unlock_bh(&table->lock);
343 #ifdef DEBUG_ALLOW_ALL
352 /* All zeroes == unconditional rule. */
354 unconditional(const struct ipt_ip *ip)
358 for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++)
359 if (((__u32 *)ip)[i])
365 /* Figures out from what hook each rule can be called: returns 0 if
366 there are loops. Puts hook bitmask in comefrom. */
368 mark_source_chains(struct xt_table_info *newinfo,
369 unsigned int valid_hooks, void *entry0)
373 /* No recursion; use packet counter to save back ptrs (reset
374 to 0 as we leave), and comefrom to save source hook bitmask */
375 for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) {
376 unsigned int pos = newinfo->hook_entry[hook];
378 = (struct ipt_entry *)(entry0 + pos);
380 if (!(valid_hooks & (1 << hook)))
383 /* Set initial back pointer. */
384 e->counters.pcnt = pos;
387 struct ipt_standard_target *t
388 = (void *)ipt_get_target(e);
390 if (e->comefrom & (1 << NF_IP_NUMHOOKS)) {
391 printk("iptables: loop hook %u pos %u %08X.\n",
392 hook, pos, e->comefrom);
396 |= ((1 << hook) | (1 << NF_IP_NUMHOOKS));
398 /* Unconditional return/END. */
399 if (e->target_offset == sizeof(struct ipt_entry)
400 && (strcmp(t->target.u.user.name,
401 IPT_STANDARD_TARGET) == 0)
403 && unconditional(&e->ip)) {
404 unsigned int oldpos, size;
406 /* Return: backtrack through the last
409 e->comefrom ^= (1<<NF_IP_NUMHOOKS);
410 #ifdef DEBUG_IP_FIREWALL_USER
412 & (1 << NF_IP_NUMHOOKS)) {
413 duprintf("Back unset "
420 pos = e->counters.pcnt;
421 e->counters.pcnt = 0;
423 /* We're at the start. */
427 e = (struct ipt_entry *)
429 } while (oldpos == pos + e->next_offset);
432 size = e->next_offset;
433 e = (struct ipt_entry *)
434 (entry0 + pos + size);
435 e->counters.pcnt = pos;
438 int newpos = t->verdict;
440 if (strcmp(t->target.u.user.name,
441 IPT_STANDARD_TARGET) == 0
443 /* This a jump; chase it. */
444 duprintf("Jump rule %u -> %u\n",
447 /* ... this is a fallthru */
448 newpos = pos + e->next_offset;
450 e = (struct ipt_entry *)
452 e->counters.pcnt = pos;
457 duprintf("Finished chain %u\n", hook);
463 cleanup_match(struct ipt_entry_match *m, unsigned int *i)
465 if (i && (*i)-- == 0)
468 if (m->u.kernel.match->destroy)
469 m->u.kernel.match->destroy(m->u.kernel.match, m->data,
470 m->u.match_size - sizeof(*m));
471 module_put(m->u.kernel.match->me);
476 standard_check(const struct ipt_entry_target *t,
477 unsigned int max_offset)
479 struct ipt_standard_target *targ = (void *)t;
481 /* Check standard info. */
482 if (targ->verdict >= 0
483 && targ->verdict > max_offset - sizeof(struct ipt_entry)) {
484 duprintf("ipt_standard_check: bad verdict (%i)\n",
488 if (targ->verdict < -NF_MAX_VERDICT - 1) {
489 duprintf("ipt_standard_check: bad negative verdict (%i)\n",
497 check_match(struct ipt_entry_match *m,
499 const struct ipt_ip *ip,
500 unsigned int hookmask,
503 struct ipt_match *match;
506 match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
508 "ipt_%s", m->u.user.name);
509 if (IS_ERR(match) || !match) {
510 duprintf("check_match: `%s' not found\n", m->u.user.name);
511 return match ? PTR_ERR(match) : -ENOENT;
513 m->u.kernel.match = match;
515 ret = xt_check_match(match, AF_INET, m->u.match_size - sizeof(*m),
516 name, hookmask, ip->proto,
517 ip->invflags & IPT_INV_PROTO);
521 if (m->u.kernel.match->checkentry
522 && !m->u.kernel.match->checkentry(name, ip, match, m->data,
523 m->u.match_size - sizeof(*m),
525 duprintf("ip_tables: check failed for `%s'.\n",
526 m->u.kernel.match->name);
534 module_put(m->u.kernel.match->me);
538 static struct ipt_target ipt_standard_target;
541 check_entry(struct ipt_entry *e, const char *name, unsigned int size,
544 struct ipt_entry_target *t;
545 struct ipt_target *target;
549 if (!ip_checkentry(&e->ip)) {
550 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
555 ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j);
557 goto cleanup_matches;
559 t = ipt_get_target(e);
560 target = try_then_request_module(xt_find_target(AF_INET,
563 "ipt_%s", t->u.user.name);
564 if (IS_ERR(target) || !target) {
565 duprintf("check_entry: `%s' not found\n", t->u.user.name);
566 ret = target ? PTR_ERR(target) : -ENOENT;
567 goto cleanup_matches;
569 t->u.kernel.target = target;
571 ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t),
572 name, e->comefrom, e->ip.proto,
573 e->ip.invflags & IPT_INV_PROTO);
577 if (t->u.kernel.target == &ipt_standard_target) {
578 if (!standard_check(t, size)) {
580 goto cleanup_matches;
582 } else if (t->u.kernel.target->checkentry
583 && !t->u.kernel.target->checkentry(name, e, target, t->data,
587 duprintf("ip_tables: check failed for `%s'.\n",
588 t->u.kernel.target->name);
596 module_put(t->u.kernel.target->me);
598 IPT_MATCH_ITERATE(e, cleanup_match, &j);
603 check_entry_size_and_hooks(struct ipt_entry *e,
604 struct xt_table_info *newinfo,
606 unsigned char *limit,
607 const unsigned int *hook_entries,
608 const unsigned int *underflows,
613 if ((unsigned long)e % __alignof__(struct ipt_entry) != 0
614 || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
615 duprintf("Bad offset %p\n", e);
620 < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
621 duprintf("checking: element %p size %u\n",
626 /* Check hooks & underflows */
627 for (h = 0; h < NF_IP_NUMHOOKS; h++) {
628 if ((unsigned char *)e - base == hook_entries[h])
629 newinfo->hook_entry[h] = hook_entries[h];
630 if ((unsigned char *)e - base == underflows[h])
631 newinfo->underflow[h] = underflows[h];
634 /* FIXME: underflows must be unconditional, standard verdicts
635 < 0 (not IPT_RETURN). --RR */
637 /* Clear counters and comefrom */
638 e->counters = ((struct xt_counters) { 0, 0 });
646 cleanup_entry(struct ipt_entry *e, unsigned int *i)
648 struct ipt_entry_target *t;
650 if (i && (*i)-- == 0)
653 /* Cleanup all matches */
654 IPT_MATCH_ITERATE(e, cleanup_match, NULL);
655 t = ipt_get_target(e);
656 if (t->u.kernel.target->destroy)
657 t->u.kernel.target->destroy(t->u.kernel.target, t->data,
658 t->u.target_size - sizeof(*t));
659 module_put(t->u.kernel.target->me);
663 /* Checks and translates the user-supplied table segment (held in
666 translate_table(const char *name,
667 unsigned int valid_hooks,
668 struct xt_table_info *newinfo,
672 const unsigned int *hook_entries,
673 const unsigned int *underflows)
678 newinfo->size = size;
679 newinfo->number = number;
681 /* Init all hooks to impossible value. */
682 for (i = 0; i < NF_IP_NUMHOOKS; i++) {
683 newinfo->hook_entry[i] = 0xFFFFFFFF;
684 newinfo->underflow[i] = 0xFFFFFFFF;
687 duprintf("translate_table: size %u\n", newinfo->size);
689 /* Walk through entries, checking offsets. */
690 ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
691 check_entry_size_and_hooks,
695 hook_entries, underflows, &i);
700 duprintf("translate_table: %u not %u entries\n",
705 /* Check hooks all assigned */
706 for (i = 0; i < NF_IP_NUMHOOKS; i++) {
707 /* Only hooks which are valid */
708 if (!(valid_hooks & (1 << i)))
710 if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
711 duprintf("Invalid hook entry %u %u\n",
715 if (newinfo->underflow[i] == 0xFFFFFFFF) {
716 duprintf("Invalid underflow %u %u\n",
722 if (!mark_source_chains(newinfo, valid_hooks, entry0))
725 /* Finally, each sanity check must pass */
727 ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
728 check_entry, name, size, &i);
731 IPT_ENTRY_ITERATE(entry0, newinfo->size,
736 /* And one copy for every other CPU */
738 if (newinfo->entries[i] && newinfo->entries[i] != entry0)
739 memcpy(newinfo->entries[i], entry0, newinfo->size);
747 add_entry_to_counter(const struct ipt_entry *e,
748 struct xt_counters total[],
751 ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
758 set_entry_to_counter(const struct ipt_entry *e,
759 struct ipt_counters total[],
762 SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
769 get_counters(const struct xt_table_info *t,
770 struct xt_counters counters[])
776 /* Instead of clearing (by a previous call to memset())
777 * the counters and using adds, we set the counters
778 * with data used by 'current' CPU
779 * We dont care about preemption here.
781 curcpu = raw_smp_processor_id();
784 IPT_ENTRY_ITERATE(t->entries[curcpu],
786 set_entry_to_counter,
794 IPT_ENTRY_ITERATE(t->entries[cpu],
796 add_entry_to_counter,
803 copy_entries_to_user(unsigned int total_size,
804 struct ipt_table *table,
805 void __user *userptr)
807 unsigned int off, num, countersize;
809 struct xt_counters *counters;
810 struct xt_table_info *private = table->private;
814 /* We need atomic snapshot of counters: rest doesn't change
815 (other than comefrom, which userspace doesn't care
817 countersize = sizeof(struct xt_counters) * private->number;
818 counters = vmalloc_node(countersize, numa_node_id());
820 if (counters == NULL)
823 /* First, sum counters... */
824 write_lock_bh(&table->lock);
825 get_counters(private, counters);
826 write_unlock_bh(&table->lock);
828 /* choose the copy that is on our node/cpu, ...
829 * This choice is lazy (because current thread is
830 * allowed to migrate to another cpu)
832 loc_cpu_entry = private->entries[raw_smp_processor_id()];
833 /* ... then copy entire thing ... */
834 if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
839 /* FIXME: use iterator macros --RR */
840 /* ... then go back and fix counters and names */
841 for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
843 struct ipt_entry_match *m;
844 struct ipt_entry_target *t;
846 e = (struct ipt_entry *)(loc_cpu_entry + off);
847 if (copy_to_user(userptr + off
848 + offsetof(struct ipt_entry, counters),
850 sizeof(counters[num])) != 0) {
855 for (i = sizeof(struct ipt_entry);
856 i < e->target_offset;
857 i += m->u.match_size) {
860 if (copy_to_user(userptr + off + i
861 + offsetof(struct ipt_entry_match,
863 m->u.kernel.match->name,
864 strlen(m->u.kernel.match->name)+1)
871 t = ipt_get_target(e);
872 if (copy_to_user(userptr + off + e->target_offset
873 + offsetof(struct ipt_entry_target,
875 t->u.kernel.target->name,
876 strlen(t->u.kernel.target->name)+1) != 0) {
888 get_entries(const struct ipt_get_entries *entries,
889 struct ipt_get_entries __user *uptr)
894 t = xt_find_table_lock(AF_INET, entries->name);
895 if (t && !IS_ERR(t)) {
896 struct xt_table_info *private = t->private;
897 duprintf("t->private->number = %u\n",
899 if (entries->size == private->size)
900 ret = copy_entries_to_user(private->size,
901 t, uptr->entrytable);
903 duprintf("get_entries: I've got %u not %u!\n",
911 ret = t ? PTR_ERR(t) : -ENOENT;
917 do_replace(void __user *user, unsigned int len)
920 struct ipt_replace tmp;
922 struct xt_table_info *newinfo, *oldinfo;
923 struct xt_counters *counters;
924 void *loc_cpu_entry, *loc_cpu_old_entry;
926 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
929 /* Hack: Causes ipchains to give correct error msg --RR */
930 if (len != sizeof(tmp) + tmp.size)
934 if (tmp.size >= (INT_MAX - sizeof(struct xt_table_info)) / NR_CPUS -
937 if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
940 newinfo = xt_alloc_table_info(tmp.size);
944 /* choose the copy that is our node/cpu */
945 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
946 if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
952 counters = vmalloc(tmp.num_counters * sizeof(struct xt_counters));
958 ret = translate_table(tmp.name, tmp.valid_hooks,
959 newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
960 tmp.hook_entry, tmp.underflow);
962 goto free_newinfo_counters;
964 duprintf("ip_tables: Translated table\n");
966 t = try_then_request_module(xt_find_table_lock(AF_INET, tmp.name),
967 "iptable_%s", tmp.name);
968 if (!t || IS_ERR(t)) {
969 ret = t ? PTR_ERR(t) : -ENOENT;
970 goto free_newinfo_counters_untrans;
974 if (tmp.valid_hooks != t->valid_hooks) {
975 duprintf("Valid hook crap: %08X vs %08X\n",
976 tmp.valid_hooks, t->valid_hooks);
981 oldinfo = xt_replace_table(t, tmp.num_counters, newinfo, &ret);
985 /* Update module usage count based on number of rules */
986 duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
987 oldinfo->number, oldinfo->initial_entries, newinfo->number);
988 if ((oldinfo->number > oldinfo->initial_entries) ||
989 (newinfo->number <= oldinfo->initial_entries))
991 if ((oldinfo->number > oldinfo->initial_entries) &&
992 (newinfo->number <= oldinfo->initial_entries))
995 /* Get the old counters. */
996 get_counters(oldinfo, counters);
997 /* Decrease module usage counts and free resource */
998 loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
999 IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL);
1000 xt_free_table_info(oldinfo);
1001 if (copy_to_user(tmp.counters, counters,
1002 sizeof(struct xt_counters) * tmp.num_counters) != 0)
1011 free_newinfo_counters_untrans:
1012 IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL);
1013 free_newinfo_counters:
1016 xt_free_table_info(newinfo);
1020 /* We're lazy, and add to the first CPU; overflow works its fey magic
1021 * and everything is OK. */
1023 add_counter_to_entry(struct ipt_entry *e,
1024 const struct xt_counters addme[],
1028 duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
1030 (long unsigned int)e->counters.pcnt,
1031 (long unsigned int)e->counters.bcnt,
1032 (long unsigned int)addme[*i].pcnt,
1033 (long unsigned int)addme[*i].bcnt);
1036 ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1043 do_add_counters(void __user *user, unsigned int len)
1046 struct xt_counters_info tmp, *paddc;
1047 struct ipt_table *t;
1048 struct xt_table_info *private;
1050 void *loc_cpu_entry;
1052 if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1055 if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct xt_counters))
1058 paddc = vmalloc_node(len, numa_node_id());
1062 if (copy_from_user(paddc, user, len) != 0) {
1067 t = xt_find_table_lock(AF_INET, tmp.name);
1068 if (!t || IS_ERR(t)) {
1069 ret = t ? PTR_ERR(t) : -ENOENT;
1073 write_lock_bh(&t->lock);
1074 private = t->private;
1075 if (private->number != paddc->num_counters) {
1077 goto unlock_up_free;
1081 /* Choose the copy that is on our node */
1082 loc_cpu_entry = private->entries[raw_smp_processor_id()];
1083 IPT_ENTRY_ITERATE(loc_cpu_entry,
1085 add_counter_to_entry,
1089 write_unlock_bh(&t->lock);
1099 do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1103 if (!capable(CAP_NET_ADMIN))
1107 case IPT_SO_SET_REPLACE:
1108 ret = do_replace(user, len);
1111 case IPT_SO_SET_ADD_COUNTERS:
1112 ret = do_add_counters(user, len);
1116 duprintf("do_ipt_set_ctl: unknown request %i\n", cmd);
1124 do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1128 if (!capable(CAP_NET_ADMIN))
1132 case IPT_SO_GET_INFO: {
1133 char name[IPT_TABLE_MAXNAMELEN];
1134 struct ipt_table *t;
1136 if (*len != sizeof(struct ipt_getinfo)) {
1137 duprintf("length %u != %u\n", *len,
1138 sizeof(struct ipt_getinfo));
1143 if (copy_from_user(name, user, sizeof(name)) != 0) {
1147 name[IPT_TABLE_MAXNAMELEN-1] = '\0';
1149 t = try_then_request_module(xt_find_table_lock(AF_INET, name),
1150 "iptable_%s", name);
1151 if (t && !IS_ERR(t)) {
1152 struct ipt_getinfo info;
1153 struct xt_table_info *private = t->private;
1155 info.valid_hooks = t->valid_hooks;
1156 memcpy(info.hook_entry, private->hook_entry,
1157 sizeof(info.hook_entry));
1158 memcpy(info.underflow, private->underflow,
1159 sizeof(info.underflow));
1160 info.num_entries = private->number;
1161 info.size = private->size;
1162 memcpy(info.name, name, sizeof(info.name));
1164 if (copy_to_user(user, &info, *len) != 0)
1171 ret = t ? PTR_ERR(t) : -ENOENT;
1175 case IPT_SO_GET_ENTRIES: {
1176 struct ipt_get_entries get;
1178 if (*len < sizeof(get)) {
1179 duprintf("get_entries: %u < %u\n", *len, sizeof(get));
1181 } else if (copy_from_user(&get, user, sizeof(get)) != 0) {
1183 } else if (*len != sizeof(struct ipt_get_entries) + get.size) {
1184 duprintf("get_entries: %u != %u\n", *len,
1185 sizeof(struct ipt_get_entries) + get.size);
1188 ret = get_entries(&get, user);
1192 case IPT_SO_GET_REVISION_MATCH:
1193 case IPT_SO_GET_REVISION_TARGET: {
1194 struct ipt_get_revision rev;
1197 if (*len != sizeof(rev)) {
1201 if (copy_from_user(&rev, user, sizeof(rev)) != 0) {
1206 if (cmd == IPT_SO_GET_REVISION_TARGET)
1211 try_then_request_module(xt_find_revision(AF_INET, rev.name,
1214 "ipt_%s", rev.name);
1219 duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
1226 int ipt_register_table(struct xt_table *table, const struct ipt_replace *repl)
1229 struct xt_table_info *newinfo;
1230 static struct xt_table_info bootstrap
1231 = { 0, 0, 0, { 0 }, { 0 }, { } };
1232 void *loc_cpu_entry;
1234 newinfo = xt_alloc_table_info(repl->size);
1238 /* choose the copy on our node/cpu
1239 * but dont care of preemption
1241 loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1242 memcpy(loc_cpu_entry, repl->entries, repl->size);
1244 ret = translate_table(table->name, table->valid_hooks,
1245 newinfo, loc_cpu_entry, repl->size,
1250 xt_free_table_info(newinfo);
1254 if (xt_register_table(table, &bootstrap, newinfo) != 0) {
1255 xt_free_table_info(newinfo);
1262 void ipt_unregister_table(struct ipt_table *table)
1264 struct xt_table_info *private;
1265 void *loc_cpu_entry;
1267 private = xt_unregister_table(table);
1269 /* Decrease module usage counts and free resources */
1270 loc_cpu_entry = private->entries[raw_smp_processor_id()];
1271 IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL);
1272 xt_free_table_info(private);
1275 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
1277 icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
1278 u_int8_t type, u_int8_t code,
1281 return ((test_type == 0xFF) || (type == test_type && code >= min_code && code <= max_code))
1286 icmp_match(const struct sk_buff *skb,
1287 const struct net_device *in,
1288 const struct net_device *out,
1289 const struct xt_match *match,
1290 const void *matchinfo,
1292 unsigned int protoff,
1295 struct icmphdr _icmph, *ic;
1296 const struct ipt_icmp *icmpinfo = matchinfo;
1298 /* Must not be a fragment. */
1302 ic = skb_header_pointer(skb, protoff, sizeof(_icmph), &_icmph);
1304 /* We've been asked to examine this packet, and we
1305 * can't. Hence, no choice but to drop.
1307 duprintf("Dropping evil ICMP tinygram.\n");
1312 return icmp_type_code_match(icmpinfo->type,
1316 !!(icmpinfo->invflags&IPT_ICMP_INV));
1319 /* Called when user tries to insert an entry of this type. */
1321 icmp_checkentry(const char *tablename,
1323 const struct xt_match *match,
1325 unsigned int matchsize,
1326 unsigned int hook_mask)
1328 const struct ipt_icmp *icmpinfo = matchinfo;
1330 /* Must specify no unknown invflags */
1331 return !(icmpinfo->invflags & ~IPT_ICMP_INV);
1334 /* The built-in targets: standard (NULL) and error. */
1335 static struct ipt_target ipt_standard_target = {
1336 .name = IPT_STANDARD_TARGET,
1337 .targetsize = sizeof(int),
1341 static struct ipt_target ipt_error_target = {
1342 .name = IPT_ERROR_TARGET,
1343 .target = ipt_error,
1344 .targetsize = IPT_FUNCTION_MAXNAMELEN,
1348 static struct nf_sockopt_ops ipt_sockopts = {
1350 .set_optmin = IPT_BASE_CTL,
1351 .set_optmax = IPT_SO_SET_MAX+1,
1352 .set = do_ipt_set_ctl,
1353 .get_optmin = IPT_BASE_CTL,
1354 .get_optmax = IPT_SO_GET_MAX+1,
1355 .get = do_ipt_get_ctl,
1358 static struct ipt_match icmp_matchstruct = {
1360 .match = icmp_match,
1361 .matchsize = sizeof(struct ipt_icmp),
1362 .proto = IPPROTO_ICMP,
1364 .checkentry = icmp_checkentry,
1367 static int __init init(void)
1371 xt_proto_init(AF_INET);
1373 /* Noone else will be downing sem now, so we won't sleep */
1374 xt_register_target(&ipt_standard_target);
1375 xt_register_target(&ipt_error_target);
1376 xt_register_match(&icmp_matchstruct);
1378 /* Register setsockopt */
1379 ret = nf_register_sockopt(&ipt_sockopts);
1381 duprintf("Unable to register sockopts.\n");
1385 printk("ip_tables: (C) 2000-2006 Netfilter Core Team\n");
1389 static void __exit fini(void)
1391 nf_unregister_sockopt(&ipt_sockopts);
1393 xt_unregister_match(&icmp_matchstruct);
1394 xt_unregister_target(&ipt_error_target);
1395 xt_unregister_target(&ipt_standard_target);
1397 xt_proto_fini(AF_INET);
1400 EXPORT_SYMBOL(ipt_register_table);
1401 EXPORT_SYMBOL(ipt_unregister_table);
1402 EXPORT_SYMBOL(ipt_do_table);