Auto-update from upstream
[linux-2.6] / net / ipv4 / netfilter / ip_tables.c
1 /*
2  * Packet matching code.
3  *
4  * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5  * Copyright (C) 2000-2004 Netfilter Core Team <coreteam@netfilter.org>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  *
11  * 19 Jan 2002 Harald Welte <laforge@gnumonks.org>
12  *      - increase module usage count as soon as we have rules inside
13  *        a table
14  */
15 #include <linux/config.h>
16 #include <linux/cache.h>
17 #include <linux/skbuff.h>
18 #include <linux/kmod.h>
19 #include <linux/vmalloc.h>
20 #include <linux/netdevice.h>
21 #include <linux/module.h>
22 #include <linux/tcp.h>
23 #include <linux/udp.h>
24 #include <linux/icmp.h>
25 #include <net/ip.h>
26 #include <asm/uaccess.h>
27 #include <asm/semaphore.h>
28 #include <linux/proc_fs.h>
29 #include <linux/err.h>
30 #include <linux/cpumask.h>
31
32 #include <linux/netfilter_ipv4/ip_tables.h>
33
34 MODULE_LICENSE("GPL");
35 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
36 MODULE_DESCRIPTION("IPv4 packet filter");
37
38 /*#define DEBUG_IP_FIREWALL*/
39 /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
40 /*#define DEBUG_IP_FIREWALL_USER*/
41
42 #ifdef DEBUG_IP_FIREWALL
43 #define dprintf(format, args...)  printk(format , ## args)
44 #else
45 #define dprintf(format, args...)
46 #endif
47
48 #ifdef DEBUG_IP_FIREWALL_USER
49 #define duprintf(format, args...) printk(format , ## args)
50 #else
51 #define duprintf(format, args...)
52 #endif
53
54 #ifdef CONFIG_NETFILTER_DEBUG
55 #define IP_NF_ASSERT(x)                                         \
56 do {                                                            \
57         if (!(x))                                               \
58                 printk("IP_NF_ASSERT: %s:%s:%u\n",              \
59                        __FUNCTION__, __FILE__, __LINE__);       \
60 } while(0)
61 #else
62 #define IP_NF_ASSERT(x)
63 #endif
64 #define SMP_ALIGN(x) (((x) + SMP_CACHE_BYTES-1) & ~(SMP_CACHE_BYTES-1))
65
66 static DECLARE_MUTEX(ipt_mutex);
67
68 /* Must have mutex */
69 #define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
70 #define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
71 #include <linux/netfilter_ipv4/listhelp.h>
72
73 #if 0
74 /* All the better to debug you with... */
75 #define static
76 #define inline
77 #endif
78
79 /*
80    We keep a set of rules for each CPU, so we can avoid write-locking
81    them in the softirq when updating the counters and therefore
82    only need to read-lock in the softirq; doing a write_lock_bh() in user
83    context stops packets coming through and allows user context to read
84    the counters or update the rules.
85
86    Hence the start of any table is given by get_table() below.  */
87
88 /* The table itself */
89 struct ipt_table_info
90 {
91         /* Size per table */
92         unsigned int size;
93         /* Number of entries: FIXME. --RR */
94         unsigned int number;
95         /* Initial number of entries. Needed for module usage count */
96         unsigned int initial_entries;
97
98         /* Entry points and underflows */
99         unsigned int hook_entry[NF_IP_NUMHOOKS];
100         unsigned int underflow[NF_IP_NUMHOOKS];
101
102         /* ipt_entry tables: one per CPU */
103         void *entries[NR_CPUS];
104 };
105
106 static LIST_HEAD(ipt_target);
107 static LIST_HEAD(ipt_match);
108 static LIST_HEAD(ipt_tables);
109 #define SET_COUNTER(c,b,p) do { (c).bcnt = (b); (c).pcnt = (p); } while(0)
110 #define ADD_COUNTER(c,b,p) do { (c).bcnt += (b); (c).pcnt += (p); } while(0)
111
112 #if 0
113 #define down(x) do { printk("DOWN:%u:" #x "\n", __LINE__); down(x); } while(0)
114 #define down_interruptible(x) ({ int __r; printk("DOWNi:%u:" #x "\n", __LINE__); __r = down_interruptible(x); if (__r != 0) printk("ABORT-DOWNi:%u\n", __LINE__); __r; })
115 #define up(x) do { printk("UP:%u:" #x "\n", __LINE__); up(x); } while(0)
116 #endif
117
118 /* Returns whether matches rule or not. */
119 static inline int
120 ip_packet_match(const struct iphdr *ip,
121                 const char *indev,
122                 const char *outdev,
123                 const struct ipt_ip *ipinfo,
124                 int isfrag)
125 {
126         size_t i;
127         unsigned long ret;
128
129 #define FWINV(bool,invflg) ((bool) ^ !!(ipinfo->invflags & invflg))
130
131         if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
132                   IPT_INV_SRCIP)
133             || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
134                      IPT_INV_DSTIP)) {
135                 dprintf("Source or dest mismatch.\n");
136
137                 dprintf("SRC: %u.%u.%u.%u. Mask: %u.%u.%u.%u. Target: %u.%u.%u.%u.%s\n",
138                         NIPQUAD(ip->saddr),
139                         NIPQUAD(ipinfo->smsk.s_addr),
140                         NIPQUAD(ipinfo->src.s_addr),
141                         ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
142                 dprintf("DST: %u.%u.%u.%u Mask: %u.%u.%u.%u Target: %u.%u.%u.%u.%s\n",
143                         NIPQUAD(ip->daddr),
144                         NIPQUAD(ipinfo->dmsk.s_addr),
145                         NIPQUAD(ipinfo->dst.s_addr),
146                         ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
147                 return 0;
148         }
149
150         /* Look for ifname matches; this should unroll nicely. */
151         for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
152                 ret |= (((const unsigned long *)indev)[i]
153                         ^ ((const unsigned long *)ipinfo->iniface)[i])
154                         & ((const unsigned long *)ipinfo->iniface_mask)[i];
155         }
156
157         if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
158                 dprintf("VIA in mismatch (%s vs %s).%s\n",
159                         indev, ipinfo->iniface,
160                         ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
161                 return 0;
162         }
163
164         for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
165                 ret |= (((const unsigned long *)outdev)[i]
166                         ^ ((const unsigned long *)ipinfo->outiface)[i])
167                         & ((const unsigned long *)ipinfo->outiface_mask)[i];
168         }
169
170         if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
171                 dprintf("VIA out mismatch (%s vs %s).%s\n",
172                         outdev, ipinfo->outiface,
173                         ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
174                 return 0;
175         }
176
177         /* Check specific protocol */
178         if (ipinfo->proto
179             && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
180                 dprintf("Packet protocol %hi does not match %hi.%s\n",
181                         ip->protocol, ipinfo->proto,
182                         ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
183                 return 0;
184         }
185
186         /* If we have a fragment rule but the packet is not a fragment
187          * then we return zero */
188         if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
189                 dprintf("Fragment rule but not fragment.%s\n",
190                         ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
191                 return 0;
192         }
193
194         return 1;
195 }
196
197 static inline int
198 ip_checkentry(const struct ipt_ip *ip)
199 {
200         if (ip->flags & ~IPT_F_MASK) {
201                 duprintf("Unknown flag bits set: %08X\n",
202                          ip->flags & ~IPT_F_MASK);
203                 return 0;
204         }
205         if (ip->invflags & ~IPT_INV_MASK) {
206                 duprintf("Unknown invflag bits set: %08X\n",
207                          ip->invflags & ~IPT_INV_MASK);
208                 return 0;
209         }
210         return 1;
211 }
212
213 static unsigned int
214 ipt_error(struct sk_buff **pskb,
215           const struct net_device *in,
216           const struct net_device *out,
217           unsigned int hooknum,
218           const void *targinfo,
219           void *userinfo)
220 {
221         if (net_ratelimit())
222                 printk("ip_tables: error: `%s'\n", (char *)targinfo);
223
224         return NF_DROP;
225 }
226
227 static inline
228 int do_match(struct ipt_entry_match *m,
229              const struct sk_buff *skb,
230              const struct net_device *in,
231              const struct net_device *out,
232              int offset,
233              int *hotdrop)
234 {
235         /* Stop iteration if it doesn't match */
236         if (!m->u.kernel.match->match(skb, in, out, m->data, offset, hotdrop))
237                 return 1;
238         else
239                 return 0;
240 }
241
242 static inline struct ipt_entry *
243 get_entry(void *base, unsigned int offset)
244 {
245         return (struct ipt_entry *)(base + offset);
246 }
247
248 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
249 unsigned int
250 ipt_do_table(struct sk_buff **pskb,
251              unsigned int hook,
252              const struct net_device *in,
253              const struct net_device *out,
254              struct ipt_table *table,
255              void *userdata)
256 {
257         static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
258         u_int16_t offset;
259         struct iphdr *ip;
260         u_int16_t datalen;
261         int hotdrop = 0;
262         /* Initializing verdict to NF_DROP keeps gcc happy. */
263         unsigned int verdict = NF_DROP;
264         const char *indev, *outdev;
265         void *table_base;
266         struct ipt_entry *e, *back;
267
268         /* Initialization */
269         ip = (*pskb)->nh.iph;
270         datalen = (*pskb)->len - ip->ihl * 4;
271         indev = in ? in->name : nulldevname;
272         outdev = out ? out->name : nulldevname;
273         /* We handle fragments by dealing with the first fragment as
274          * if it was a normal packet.  All other fragments are treated
275          * normally, except that they will NEVER match rules that ask
276          * things we don't know, ie. tcp syn flag or ports).  If the
277          * rule is also a fragment-specific rule, non-fragments won't
278          * match it. */
279         offset = ntohs(ip->frag_off) & IP_OFFSET;
280
281         read_lock_bh(&table->lock);
282         IP_NF_ASSERT(table->valid_hooks & (1 << hook));
283         table_base = (void *)table->private->entries[smp_processor_id()];
284         e = get_entry(table_base, table->private->hook_entry[hook]);
285
286 #ifdef CONFIG_NETFILTER_DEBUG
287         /* Check noone else using our table */
288         if (((struct ipt_entry *)table_base)->comefrom != 0xdead57ac
289             && ((struct ipt_entry *)table_base)->comefrom != 0xeeeeeeec) {
290                 printk("ASSERT: CPU #%u, %s comefrom(%p) = %X\n",
291                        smp_processor_id(),
292                        table->name,
293                        &((struct ipt_entry *)table_base)->comefrom,
294                        ((struct ipt_entry *)table_base)->comefrom);
295         }
296         ((struct ipt_entry *)table_base)->comefrom = 0x57acc001;
297 #endif
298
299         /* For return from builtin chain */
300         back = get_entry(table_base, table->private->underflow[hook]);
301
302         do {
303                 IP_NF_ASSERT(e);
304                 IP_NF_ASSERT(back);
305                 if (ip_packet_match(ip, indev, outdev, &e->ip, offset)) {
306                         struct ipt_entry_target *t;
307
308                         if (IPT_MATCH_ITERATE(e, do_match,
309                                               *pskb, in, out,
310                                               offset, &hotdrop) != 0)
311                                 goto no_match;
312
313                         ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
314
315                         t = ipt_get_target(e);
316                         IP_NF_ASSERT(t->u.kernel.target);
317                         /* Standard target? */
318                         if (!t->u.kernel.target->target) {
319                                 int v;
320
321                                 v = ((struct ipt_standard_target *)t)->verdict;
322                                 if (v < 0) {
323                                         /* Pop from stack? */
324                                         if (v != IPT_RETURN) {
325                                                 verdict = (unsigned)(-v) - 1;
326                                                 break;
327                                         }
328                                         e = back;
329                                         back = get_entry(table_base,
330                                                          back->comefrom);
331                                         continue;
332                                 }
333                                 if (table_base + v != (void *)e + e->next_offset
334                                     && !(e->ip.flags & IPT_F_GOTO)) {
335                                         /* Save old back ptr in next entry */
336                                         struct ipt_entry *next
337                                                 = (void *)e + e->next_offset;
338                                         next->comefrom
339                                                 = (void *)back - table_base;
340                                         /* set back pointer to next entry */
341                                         back = next;
342                                 }
343
344                                 e = get_entry(table_base, v);
345                         } else {
346                                 /* Targets which reenter must return
347                                    abs. verdicts */
348 #ifdef CONFIG_NETFILTER_DEBUG
349                                 ((struct ipt_entry *)table_base)->comefrom
350                                         = 0xeeeeeeec;
351 #endif
352                                 verdict = t->u.kernel.target->target(pskb,
353                                                                      in, out,
354                                                                      hook,
355                                                                      t->data,
356                                                                      userdata);
357
358 #ifdef CONFIG_NETFILTER_DEBUG
359                                 if (((struct ipt_entry *)table_base)->comefrom
360                                     != 0xeeeeeeec
361                                     && verdict == IPT_CONTINUE) {
362                                         printk("Target %s reentered!\n",
363                                                t->u.kernel.target->name);
364                                         verdict = NF_DROP;
365                                 }
366                                 ((struct ipt_entry *)table_base)->comefrom
367                                         = 0x57acc001;
368 #endif
369                                 /* Target might have changed stuff. */
370                                 ip = (*pskb)->nh.iph;
371                                 datalen = (*pskb)->len - ip->ihl * 4;
372
373                                 if (verdict == IPT_CONTINUE)
374                                         e = (void *)e + e->next_offset;
375                                 else
376                                         /* Verdict */
377                                         break;
378                         }
379                 } else {
380
381                 no_match:
382                         e = (void *)e + e->next_offset;
383                 }
384         } while (!hotdrop);
385
386 #ifdef CONFIG_NETFILTER_DEBUG
387         ((struct ipt_entry *)table_base)->comefrom = 0xdead57ac;
388 #endif
389         read_unlock_bh(&table->lock);
390
391 #ifdef DEBUG_ALLOW_ALL
392         return NF_ACCEPT;
393 #else
394         if (hotdrop)
395                 return NF_DROP;
396         else return verdict;
397 #endif
398 }
399
400 /*
401  * These are weird, but module loading must not be done with mutex
402  * held (since they will register), and we have to have a single
403  * function to use try_then_request_module().
404  */
405
406 /* Find table by name, grabs mutex & ref.  Returns ERR_PTR() on error. */
407 static inline struct ipt_table *find_table_lock(const char *name)
408 {
409         struct ipt_table *t;
410
411         if (down_interruptible(&ipt_mutex) != 0)
412                 return ERR_PTR(-EINTR);
413
414         list_for_each_entry(t, &ipt_tables, list)
415                 if (strcmp(t->name, name) == 0 && try_module_get(t->me))
416                         return t;
417         up(&ipt_mutex);
418         return NULL;
419 }
420
421 /* Find match, grabs ref.  Returns ERR_PTR() on error. */
422 static inline struct ipt_match *find_match(const char *name, u8 revision)
423 {
424         struct ipt_match *m;
425         int err = 0;
426
427         if (down_interruptible(&ipt_mutex) != 0)
428                 return ERR_PTR(-EINTR);
429
430         list_for_each_entry(m, &ipt_match, list) {
431                 if (strcmp(m->name, name) == 0) {
432                         if (m->revision == revision) {
433                                 if (try_module_get(m->me)) {
434                                         up(&ipt_mutex);
435                                         return m;
436                                 }
437                         } else
438                                 err = -EPROTOTYPE; /* Found something. */
439                 }
440         }
441         up(&ipt_mutex);
442         return ERR_PTR(err);
443 }
444
445 /* Find target, grabs ref.  Returns ERR_PTR() on error. */
446 static inline struct ipt_target *find_target(const char *name, u8 revision)
447 {
448         struct ipt_target *t;
449         int err = 0;
450
451         if (down_interruptible(&ipt_mutex) != 0)
452                 return ERR_PTR(-EINTR);
453
454         list_for_each_entry(t, &ipt_target, list) {
455                 if (strcmp(t->name, name) == 0) {
456                         if (t->revision == revision) {
457                                 if (try_module_get(t->me)) {
458                                         up(&ipt_mutex);
459                                         return t;
460                                 }
461                         } else
462                                 err = -EPROTOTYPE; /* Found something. */
463                 }
464         }
465         up(&ipt_mutex);
466         return ERR_PTR(err);
467 }
468
469 struct ipt_target *ipt_find_target(const char *name, u8 revision)
470 {
471         struct ipt_target *target;
472
473         target = try_then_request_module(find_target(name, revision),
474                                          "ipt_%s", name);
475         if (IS_ERR(target) || !target)
476                 return NULL;
477         return target;
478 }
479
480 static int match_revfn(const char *name, u8 revision, int *bestp)
481 {
482         struct ipt_match *m;
483         int have_rev = 0;
484
485         list_for_each_entry(m, &ipt_match, list) {
486                 if (strcmp(m->name, name) == 0) {
487                         if (m->revision > *bestp)
488                                 *bestp = m->revision;
489                         if (m->revision == revision)
490                                 have_rev = 1;
491                 }
492         }
493         return have_rev;
494 }
495
496 static int target_revfn(const char *name, u8 revision, int *bestp)
497 {
498         struct ipt_target *t;
499         int have_rev = 0;
500
501         list_for_each_entry(t, &ipt_target, list) {
502                 if (strcmp(t->name, name) == 0) {
503                         if (t->revision > *bestp)
504                                 *bestp = t->revision;
505                         if (t->revision == revision)
506                                 have_rev = 1;
507                 }
508         }
509         return have_rev;
510 }
511
512 /* Returns true or false (if no such extension at all) */
513 static inline int find_revision(const char *name, u8 revision,
514                                 int (*revfn)(const char *, u8, int *),
515                                 int *err)
516 {
517         int have_rev, best = -1;
518
519         if (down_interruptible(&ipt_mutex) != 0) {
520                 *err = -EINTR;
521                 return 1;
522         }
523         have_rev = revfn(name, revision, &best);
524         up(&ipt_mutex);
525
526         /* Nothing at all?  Return 0 to try loading module. */
527         if (best == -1) {
528                 *err = -ENOENT;
529                 return 0;
530         }
531
532         *err = best;
533         if (!have_rev)
534                 *err = -EPROTONOSUPPORT;
535         return 1;
536 }
537
538
539 /* All zeroes == unconditional rule. */
540 static inline int
541 unconditional(const struct ipt_ip *ip)
542 {
543         unsigned int i;
544
545         for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++)
546                 if (((__u32 *)ip)[i])
547                         return 0;
548
549         return 1;
550 }
551
552 /* Figures out from what hook each rule can be called: returns 0 if
553    there are loops.  Puts hook bitmask in comefrom. */
554 static int
555 mark_source_chains(struct ipt_table_info *newinfo,
556                    unsigned int valid_hooks, void *entry0)
557 {
558         unsigned int hook;
559
560         /* No recursion; use packet counter to save back ptrs (reset
561            to 0 as we leave), and comefrom to save source hook bitmask */
562         for (hook = 0; hook < NF_IP_NUMHOOKS; hook++) {
563                 unsigned int pos = newinfo->hook_entry[hook];
564                 struct ipt_entry *e
565                         = (struct ipt_entry *)(entry0 + pos);
566
567                 if (!(valid_hooks & (1 << hook)))
568                         continue;
569
570                 /* Set initial back pointer. */
571                 e->counters.pcnt = pos;
572
573                 for (;;) {
574                         struct ipt_standard_target *t
575                                 = (void *)ipt_get_target(e);
576
577                         if (e->comefrom & (1 << NF_IP_NUMHOOKS)) {
578                                 printk("iptables: loop hook %u pos %u %08X.\n",
579                                        hook, pos, e->comefrom);
580                                 return 0;
581                         }
582                         e->comefrom
583                                 |= ((1 << hook) | (1 << NF_IP_NUMHOOKS));
584
585                         /* Unconditional return/END. */
586                         if (e->target_offset == sizeof(struct ipt_entry)
587                             && (strcmp(t->target.u.user.name,
588                                        IPT_STANDARD_TARGET) == 0)
589                             && t->verdict < 0
590                             && unconditional(&e->ip)) {
591                                 unsigned int oldpos, size;
592
593                                 /* Return: backtrack through the last
594                                    big jump. */
595                                 do {
596                                         e->comefrom ^= (1<<NF_IP_NUMHOOKS);
597 #ifdef DEBUG_IP_FIREWALL_USER
598                                         if (e->comefrom
599                                             & (1 << NF_IP_NUMHOOKS)) {
600                                                 duprintf("Back unset "
601                                                          "on hook %u "
602                                                          "rule %u\n",
603                                                          hook, pos);
604                                         }
605 #endif
606                                         oldpos = pos;
607                                         pos = e->counters.pcnt;
608                                         e->counters.pcnt = 0;
609
610                                         /* We're at the start. */
611                                         if (pos == oldpos)
612                                                 goto next;
613
614                                         e = (struct ipt_entry *)
615                                                 (entry0 + pos);
616                                 } while (oldpos == pos + e->next_offset);
617
618                                 /* Move along one */
619                                 size = e->next_offset;
620                                 e = (struct ipt_entry *)
621                                         (entry0 + pos + size);
622                                 e->counters.pcnt = pos;
623                                 pos += size;
624                         } else {
625                                 int newpos = t->verdict;
626
627                                 if (strcmp(t->target.u.user.name,
628                                            IPT_STANDARD_TARGET) == 0
629                                     && newpos >= 0) {
630                                         /* This a jump; chase it. */
631                                         duprintf("Jump rule %u -> %u\n",
632                                                  pos, newpos);
633                                 } else {
634                                         /* ... this is a fallthru */
635                                         newpos = pos + e->next_offset;
636                                 }
637                                 e = (struct ipt_entry *)
638                                         (entry0 + newpos);
639                                 e->counters.pcnt = pos;
640                                 pos = newpos;
641                         }
642                 }
643                 next:
644                 duprintf("Finished chain %u\n", hook);
645         }
646         return 1;
647 }
648
649 static inline int
650 cleanup_match(struct ipt_entry_match *m, unsigned int *i)
651 {
652         if (i && (*i)-- == 0)
653                 return 1;
654
655         if (m->u.kernel.match->destroy)
656                 m->u.kernel.match->destroy(m->data,
657                                            m->u.match_size - sizeof(*m));
658         module_put(m->u.kernel.match->me);
659         return 0;
660 }
661
662 static inline int
663 standard_check(const struct ipt_entry_target *t,
664                unsigned int max_offset)
665 {
666         struct ipt_standard_target *targ = (void *)t;
667
668         /* Check standard info. */
669         if (t->u.target_size
670             != IPT_ALIGN(sizeof(struct ipt_standard_target))) {
671                 duprintf("standard_check: target size %u != %u\n",
672                          t->u.target_size,
673                          IPT_ALIGN(sizeof(struct ipt_standard_target)));
674                 return 0;
675         }
676
677         if (targ->verdict >= 0
678             && targ->verdict > max_offset - sizeof(struct ipt_entry)) {
679                 duprintf("ipt_standard_check: bad verdict (%i)\n",
680                          targ->verdict);
681                 return 0;
682         }
683
684         if (targ->verdict < -NF_MAX_VERDICT - 1) {
685                 duprintf("ipt_standard_check: bad negative verdict (%i)\n",
686                          targ->verdict);
687                 return 0;
688         }
689         return 1;
690 }
691
692 static inline int
693 check_match(struct ipt_entry_match *m,
694             const char *name,
695             const struct ipt_ip *ip,
696             unsigned int hookmask,
697             unsigned int *i)
698 {
699         struct ipt_match *match;
700
701         match = try_then_request_module(find_match(m->u.user.name,
702                                                    m->u.user.revision),
703                                         "ipt_%s", m->u.user.name);
704         if (IS_ERR(match) || !match) {
705                 duprintf("check_match: `%s' not found\n", m->u.user.name);
706                 return match ? PTR_ERR(match) : -ENOENT;
707         }
708         m->u.kernel.match = match;
709
710         if (m->u.kernel.match->checkentry
711             && !m->u.kernel.match->checkentry(name, ip, m->data,
712                                               m->u.match_size - sizeof(*m),
713                                               hookmask)) {
714                 module_put(m->u.kernel.match->me);
715                 duprintf("ip_tables: check failed for `%s'.\n",
716                          m->u.kernel.match->name);
717                 return -EINVAL;
718         }
719
720         (*i)++;
721         return 0;
722 }
723
724 static struct ipt_target ipt_standard_target;
725
726 static inline int
727 check_entry(struct ipt_entry *e, const char *name, unsigned int size,
728             unsigned int *i)
729 {
730         struct ipt_entry_target *t;
731         struct ipt_target *target;
732         int ret;
733         unsigned int j;
734
735         if (!ip_checkentry(&e->ip)) {
736                 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
737                 return -EINVAL;
738         }
739
740         j = 0;
741         ret = IPT_MATCH_ITERATE(e, check_match, name, &e->ip, e->comefrom, &j);
742         if (ret != 0)
743                 goto cleanup_matches;
744
745         t = ipt_get_target(e);
746         target = try_then_request_module(find_target(t->u.user.name,
747                                                      t->u.user.revision),
748                                          "ipt_%s", t->u.user.name);
749         if (IS_ERR(target) || !target) {
750                 duprintf("check_entry: `%s' not found\n", t->u.user.name);
751                 ret = target ? PTR_ERR(target) : -ENOENT;
752                 goto cleanup_matches;
753         }
754         t->u.kernel.target = target;
755
756         if (t->u.kernel.target == &ipt_standard_target) {
757                 if (!standard_check(t, size)) {
758                         ret = -EINVAL;
759                         goto cleanup_matches;
760                 }
761         } else if (t->u.kernel.target->checkentry
762                    && !t->u.kernel.target->checkentry(name, e, t->data,
763                                                       t->u.target_size
764                                                       - sizeof(*t),
765                                                       e->comefrom)) {
766                 module_put(t->u.kernel.target->me);
767                 duprintf("ip_tables: check failed for `%s'.\n",
768                          t->u.kernel.target->name);
769                 ret = -EINVAL;
770                 goto cleanup_matches;
771         }
772
773         (*i)++;
774         return 0;
775
776  cleanup_matches:
777         IPT_MATCH_ITERATE(e, cleanup_match, &j);
778         return ret;
779 }
780
781 static inline int
782 check_entry_size_and_hooks(struct ipt_entry *e,
783                            struct ipt_table_info *newinfo,
784                            unsigned char *base,
785                            unsigned char *limit,
786                            const unsigned int *hook_entries,
787                            const unsigned int *underflows,
788                            unsigned int *i)
789 {
790         unsigned int h;
791
792         if ((unsigned long)e % __alignof__(struct ipt_entry) != 0
793             || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
794                 duprintf("Bad offset %p\n", e);
795                 return -EINVAL;
796         }
797
798         if (e->next_offset
799             < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
800                 duprintf("checking: element %p size %u\n",
801                          e, e->next_offset);
802                 return -EINVAL;
803         }
804
805         /* Check hooks & underflows */
806         for (h = 0; h < NF_IP_NUMHOOKS; h++) {
807                 if ((unsigned char *)e - base == hook_entries[h])
808                         newinfo->hook_entry[h] = hook_entries[h];
809                 if ((unsigned char *)e - base == underflows[h])
810                         newinfo->underflow[h] = underflows[h];
811         }
812
813         /* FIXME: underflows must be unconditional, standard verdicts
814            < 0 (not IPT_RETURN). --RR */
815
816         /* Clear counters and comefrom */
817         e->counters = ((struct ipt_counters) { 0, 0 });
818         e->comefrom = 0;
819
820         (*i)++;
821         return 0;
822 }
823
824 static inline int
825 cleanup_entry(struct ipt_entry *e, unsigned int *i)
826 {
827         struct ipt_entry_target *t;
828
829         if (i && (*i)-- == 0)
830                 return 1;
831
832         /* Cleanup all matches */
833         IPT_MATCH_ITERATE(e, cleanup_match, NULL);
834         t = ipt_get_target(e);
835         if (t->u.kernel.target->destroy)
836                 t->u.kernel.target->destroy(t->data,
837                                             t->u.target_size - sizeof(*t));
838         module_put(t->u.kernel.target->me);
839         return 0;
840 }
841
842 /* Checks and translates the user-supplied table segment (held in
843    newinfo) */
844 static int
845 translate_table(const char *name,
846                 unsigned int valid_hooks,
847                 struct ipt_table_info *newinfo,
848                 void *entry0,
849                 unsigned int size,
850                 unsigned int number,
851                 const unsigned int *hook_entries,
852                 const unsigned int *underflows)
853 {
854         unsigned int i;
855         int ret;
856
857         newinfo->size = size;
858         newinfo->number = number;
859
860         /* Init all hooks to impossible value. */
861         for (i = 0; i < NF_IP_NUMHOOKS; i++) {
862                 newinfo->hook_entry[i] = 0xFFFFFFFF;
863                 newinfo->underflow[i] = 0xFFFFFFFF;
864         }
865
866         duprintf("translate_table: size %u\n", newinfo->size);
867         i = 0;
868         /* Walk through entries, checking offsets. */
869         ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
870                                 check_entry_size_and_hooks,
871                                 newinfo,
872                                 entry0,
873                                 entry0 + size,
874                                 hook_entries, underflows, &i);
875         if (ret != 0)
876                 return ret;
877
878         if (i != number) {
879                 duprintf("translate_table: %u not %u entries\n",
880                          i, number);
881                 return -EINVAL;
882         }
883
884         /* Check hooks all assigned */
885         for (i = 0; i < NF_IP_NUMHOOKS; i++) {
886                 /* Only hooks which are valid */
887                 if (!(valid_hooks & (1 << i)))
888                         continue;
889                 if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
890                         duprintf("Invalid hook entry %u %u\n",
891                                  i, hook_entries[i]);
892                         return -EINVAL;
893                 }
894                 if (newinfo->underflow[i] == 0xFFFFFFFF) {
895                         duprintf("Invalid underflow %u %u\n",
896                                  i, underflows[i]);
897                         return -EINVAL;
898                 }
899         }
900
901         if (!mark_source_chains(newinfo, valid_hooks, entry0))
902                 return -ELOOP;
903
904         /* Finally, each sanity check must pass */
905         i = 0;
906         ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
907                                 check_entry, name, size, &i);
908
909         if (ret != 0) {
910                 IPT_ENTRY_ITERATE(entry0, newinfo->size,
911                                   cleanup_entry, &i);
912                 return ret;
913         }
914
915         /* And one copy for every other CPU */
916         for_each_cpu(i) {
917                 if (newinfo->entries[i] && newinfo->entries[i] != entry0)
918                         memcpy(newinfo->entries[i], entry0, newinfo->size);
919         }
920
921         return ret;
922 }
923
924 static struct ipt_table_info *
925 replace_table(struct ipt_table *table,
926               unsigned int num_counters,
927               struct ipt_table_info *newinfo,
928               int *error)
929 {
930         struct ipt_table_info *oldinfo;
931
932 #ifdef CONFIG_NETFILTER_DEBUG
933         {
934                 int cpu;
935
936                 for_each_cpu(cpu) {
937                         struct ipt_entry *table_base = newinfo->entries[cpu];
938                         if (table_base)
939                                 table_base->comefrom = 0xdead57ac;
940                 }
941         }
942 #endif
943
944         /* Do the substitution. */
945         write_lock_bh(&table->lock);
946         /* Check inside lock: is the old number correct? */
947         if (num_counters != table->private->number) {
948                 duprintf("num_counters != table->private->number (%u/%u)\n",
949                          num_counters, table->private->number);
950                 write_unlock_bh(&table->lock);
951                 *error = -EAGAIN;
952                 return NULL;
953         }
954         oldinfo = table->private;
955         table->private = newinfo;
956         newinfo->initial_entries = oldinfo->initial_entries;
957         write_unlock_bh(&table->lock);
958
959         return oldinfo;
960 }
961
962 /* Gets counters. */
963 static inline int
964 add_entry_to_counter(const struct ipt_entry *e,
965                      struct ipt_counters total[],
966                      unsigned int *i)
967 {
968         ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
969
970         (*i)++;
971         return 0;
972 }
973
974 static inline int
975 set_entry_to_counter(const struct ipt_entry *e,
976                      struct ipt_counters total[],
977                      unsigned int *i)
978 {
979         SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
980
981         (*i)++;
982         return 0;
983 }
984
985 static void
986 get_counters(const struct ipt_table_info *t,
987              struct ipt_counters counters[])
988 {
989         unsigned int cpu;
990         unsigned int i;
991         unsigned int curcpu;
992
993         /* Instead of clearing (by a previous call to memset())
994          * the counters and using adds, we set the counters
995          * with data used by 'current' CPU
996          * We dont care about preemption here.
997          */
998         curcpu = raw_smp_processor_id();
999
1000         i = 0;
1001         IPT_ENTRY_ITERATE(t->entries[curcpu],
1002                           t->size,
1003                           set_entry_to_counter,
1004                           counters,
1005                           &i);
1006
1007         for_each_cpu(cpu) {
1008                 if (cpu == curcpu)
1009                         continue;
1010                 i = 0;
1011                 IPT_ENTRY_ITERATE(t->entries[cpu],
1012                                   t->size,
1013                                   add_entry_to_counter,
1014                                   counters,
1015                                   &i);
1016         }
1017 }
1018
1019 static int
1020 copy_entries_to_user(unsigned int total_size,
1021                      struct ipt_table *table,
1022                      void __user *userptr)
1023 {
1024         unsigned int off, num, countersize;
1025         struct ipt_entry *e;
1026         struct ipt_counters *counters;
1027         int ret = 0;
1028         void *loc_cpu_entry;
1029
1030         /* We need atomic snapshot of counters: rest doesn't change
1031            (other than comefrom, which userspace doesn't care
1032            about). */
1033         countersize = sizeof(struct ipt_counters) * table->private->number;
1034         counters = vmalloc_node(countersize, numa_node_id());
1035
1036         if (counters == NULL)
1037                 return -ENOMEM;
1038
1039         /* First, sum counters... */
1040         write_lock_bh(&table->lock);
1041         get_counters(table->private, counters);
1042         write_unlock_bh(&table->lock);
1043
1044         /* choose the copy that is on our node/cpu, ...
1045          * This choice is lazy (because current thread is
1046          * allowed to migrate to another cpu)
1047          */
1048         loc_cpu_entry = table->private->entries[raw_smp_processor_id()];
1049         /* ... then copy entire thing ... */
1050         if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
1051                 ret = -EFAULT;
1052                 goto free_counters;
1053         }
1054
1055         /* FIXME: use iterator macros --RR */
1056         /* ... then go back and fix counters and names */
1057         for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
1058                 unsigned int i;
1059                 struct ipt_entry_match *m;
1060                 struct ipt_entry_target *t;
1061
1062                 e = (struct ipt_entry *)(loc_cpu_entry + off);
1063                 if (copy_to_user(userptr + off
1064                                  + offsetof(struct ipt_entry, counters),
1065                                  &counters[num],
1066                                  sizeof(counters[num])) != 0) {
1067                         ret = -EFAULT;
1068                         goto free_counters;
1069                 }
1070
1071                 for (i = sizeof(struct ipt_entry);
1072                      i < e->target_offset;
1073                      i += m->u.match_size) {
1074                         m = (void *)e + i;
1075
1076                         if (copy_to_user(userptr + off + i
1077                                          + offsetof(struct ipt_entry_match,
1078                                                     u.user.name),
1079                                          m->u.kernel.match->name,
1080                                          strlen(m->u.kernel.match->name)+1)
1081                             != 0) {
1082                                 ret = -EFAULT;
1083                                 goto free_counters;
1084                         }
1085                 }
1086
1087                 t = ipt_get_target(e);
1088                 if (copy_to_user(userptr + off + e->target_offset
1089                                  + offsetof(struct ipt_entry_target,
1090                                             u.user.name),
1091                                  t->u.kernel.target->name,
1092                                  strlen(t->u.kernel.target->name)+1) != 0) {
1093                         ret = -EFAULT;
1094                         goto free_counters;
1095                 }
1096         }
1097
1098  free_counters:
1099         vfree(counters);
1100         return ret;
1101 }
1102
1103 static int
1104 get_entries(const struct ipt_get_entries *entries,
1105             struct ipt_get_entries __user *uptr)
1106 {
1107         int ret;
1108         struct ipt_table *t;
1109
1110         t = find_table_lock(entries->name);
1111         if (t && !IS_ERR(t)) {
1112                 duprintf("t->private->number = %u\n",
1113                          t->private->number);
1114                 if (entries->size == t->private->size)
1115                         ret = copy_entries_to_user(t->private->size,
1116                                                    t, uptr->entrytable);
1117                 else {
1118                         duprintf("get_entries: I've got %u not %u!\n",
1119                                  t->private->size,
1120                                  entries->size);
1121                         ret = -EINVAL;
1122                 }
1123                 module_put(t->me);
1124                 up(&ipt_mutex);
1125         } else
1126                 ret = t ? PTR_ERR(t) : -ENOENT;
1127
1128         return ret;
1129 }
1130
1131 static void free_table_info(struct ipt_table_info *info)
1132 {
1133         int cpu;
1134         for_each_cpu(cpu) {
1135                 if (info->size <= PAGE_SIZE)
1136                         kfree(info->entries[cpu]);
1137                 else
1138                         vfree(info->entries[cpu]);
1139         }
1140         kfree(info);
1141 }
1142
1143 static struct ipt_table_info *alloc_table_info(unsigned int size)
1144 {
1145         struct ipt_table_info *newinfo;
1146         int cpu;
1147
1148         newinfo = kzalloc(sizeof(struct ipt_table_info), GFP_KERNEL);
1149         if (!newinfo)
1150                 return NULL;
1151
1152         newinfo->size = size;
1153
1154         for_each_cpu(cpu) {
1155                 if (size <= PAGE_SIZE)
1156                         newinfo->entries[cpu] = kmalloc_node(size,
1157                                 GFP_KERNEL,
1158                                 cpu_to_node(cpu));
1159                 else
1160                         newinfo->entries[cpu] = vmalloc_node(size, cpu_to_node(cpu));
1161                 if (newinfo->entries[cpu] == 0) {
1162                         free_table_info(newinfo);
1163                         return NULL;
1164                 }
1165         }
1166
1167         return newinfo;
1168 }
1169
1170 static int
1171 do_replace(void __user *user, unsigned int len)
1172 {
1173         int ret;
1174         struct ipt_replace tmp;
1175         struct ipt_table *t;
1176         struct ipt_table_info *newinfo, *oldinfo;
1177         struct ipt_counters *counters;
1178         void *loc_cpu_entry, *loc_cpu_old_entry;
1179
1180         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1181                 return -EFAULT;
1182
1183         /* Hack: Causes ipchains to give correct error msg --RR */
1184         if (len != sizeof(tmp) + tmp.size)
1185                 return -ENOPROTOOPT;
1186
1187         /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */
1188         if ((SMP_ALIGN(tmp.size) >> PAGE_SHIFT) + 2 > num_physpages)
1189                 return -ENOMEM;
1190
1191         newinfo = alloc_table_info(tmp.size);
1192         if (!newinfo)
1193                 return -ENOMEM;
1194
1195         /* choose the copy that is our node/cpu */
1196         loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1197         if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
1198                            tmp.size) != 0) {
1199                 ret = -EFAULT;
1200                 goto free_newinfo;
1201         }
1202
1203         counters = vmalloc(tmp.num_counters * sizeof(struct ipt_counters));
1204         if (!counters) {
1205                 ret = -ENOMEM;
1206                 goto free_newinfo;
1207         }
1208
1209         ret = translate_table(tmp.name, tmp.valid_hooks,
1210                               newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
1211                               tmp.hook_entry, tmp.underflow);
1212         if (ret != 0)
1213                 goto free_newinfo_counters;
1214
1215         duprintf("ip_tables: Translated table\n");
1216
1217         t = try_then_request_module(find_table_lock(tmp.name),
1218                                     "iptable_%s", tmp.name);
1219         if (!t || IS_ERR(t)) {
1220                 ret = t ? PTR_ERR(t) : -ENOENT;
1221                 goto free_newinfo_counters_untrans;
1222         }
1223
1224         /* You lied! */
1225         if (tmp.valid_hooks != t->valid_hooks) {
1226                 duprintf("Valid hook crap: %08X vs %08X\n",
1227                          tmp.valid_hooks, t->valid_hooks);
1228                 ret = -EINVAL;
1229                 goto put_module;
1230         }
1231
1232         oldinfo = replace_table(t, tmp.num_counters, newinfo, &ret);
1233         if (!oldinfo)
1234                 goto put_module;
1235
1236         /* Update module usage count based on number of rules */
1237         duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
1238                 oldinfo->number, oldinfo->initial_entries, newinfo->number);
1239         if ((oldinfo->number > oldinfo->initial_entries) || 
1240             (newinfo->number <= oldinfo->initial_entries)) 
1241                 module_put(t->me);
1242         if ((oldinfo->number > oldinfo->initial_entries) &&
1243             (newinfo->number <= oldinfo->initial_entries))
1244                 module_put(t->me);
1245
1246         /* Get the old counters. */
1247         get_counters(oldinfo, counters);
1248         /* Decrease module usage counts and free resource */
1249         loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
1250         IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,NULL);
1251         free_table_info(oldinfo);
1252         if (copy_to_user(tmp.counters, counters,
1253                          sizeof(struct ipt_counters) * tmp.num_counters) != 0)
1254                 ret = -EFAULT;
1255         vfree(counters);
1256         up(&ipt_mutex);
1257         return ret;
1258
1259  put_module:
1260         module_put(t->me);
1261         up(&ipt_mutex);
1262  free_newinfo_counters_untrans:
1263         IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry,NULL);
1264  free_newinfo_counters:
1265         vfree(counters);
1266  free_newinfo:
1267         free_table_info(newinfo);
1268         return ret;
1269 }
1270
1271 /* We're lazy, and add to the first CPU; overflow works its fey magic
1272  * and everything is OK. */
1273 static inline int
1274 add_counter_to_entry(struct ipt_entry *e,
1275                      const struct ipt_counters addme[],
1276                      unsigned int *i)
1277 {
1278 #if 0
1279         duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
1280                  *i,
1281                  (long unsigned int)e->counters.pcnt,
1282                  (long unsigned int)e->counters.bcnt,
1283                  (long unsigned int)addme[*i].pcnt,
1284                  (long unsigned int)addme[*i].bcnt);
1285 #endif
1286
1287         ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1288
1289         (*i)++;
1290         return 0;
1291 }
1292
1293 static int
1294 do_add_counters(void __user *user, unsigned int len)
1295 {
1296         unsigned int i;
1297         struct ipt_counters_info tmp, *paddc;
1298         struct ipt_table *t;
1299         int ret = 0;
1300         void *loc_cpu_entry;
1301
1302         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1303                 return -EFAULT;
1304
1305         if (len != sizeof(tmp) + tmp.num_counters*sizeof(struct ipt_counters))
1306                 return -EINVAL;
1307
1308         paddc = vmalloc_node(len, numa_node_id());
1309         if (!paddc)
1310                 return -ENOMEM;
1311
1312         if (copy_from_user(paddc, user, len) != 0) {
1313                 ret = -EFAULT;
1314                 goto free;
1315         }
1316
1317         t = find_table_lock(tmp.name);
1318         if (!t || IS_ERR(t)) {
1319                 ret = t ? PTR_ERR(t) : -ENOENT;
1320                 goto free;
1321         }
1322
1323         write_lock_bh(&t->lock);
1324         if (t->private->number != paddc->num_counters) {
1325                 ret = -EINVAL;
1326                 goto unlock_up_free;
1327         }
1328
1329         i = 0;
1330         /* Choose the copy that is on our node */
1331         loc_cpu_entry = t->private->entries[raw_smp_processor_id()];
1332         IPT_ENTRY_ITERATE(loc_cpu_entry,
1333                           t->private->size,
1334                           add_counter_to_entry,
1335                           paddc->counters,
1336                           &i);
1337  unlock_up_free:
1338         write_unlock_bh(&t->lock);
1339         up(&ipt_mutex);
1340         module_put(t->me);
1341  free:
1342         vfree(paddc);
1343
1344         return ret;
1345 }
1346
1347 static int
1348 do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1349 {
1350         int ret;
1351
1352         if (!capable(CAP_NET_ADMIN))
1353                 return -EPERM;
1354
1355         switch (cmd) {
1356         case IPT_SO_SET_REPLACE:
1357                 ret = do_replace(user, len);
1358                 break;
1359
1360         case IPT_SO_SET_ADD_COUNTERS:
1361                 ret = do_add_counters(user, len);
1362                 break;
1363
1364         default:
1365                 duprintf("do_ipt_set_ctl:  unknown request %i\n", cmd);
1366                 ret = -EINVAL;
1367         }
1368
1369         return ret;
1370 }
1371
1372 static int
1373 do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1374 {
1375         int ret;
1376
1377         if (!capable(CAP_NET_ADMIN))
1378                 return -EPERM;
1379
1380         switch (cmd) {
1381         case IPT_SO_GET_INFO: {
1382                 char name[IPT_TABLE_MAXNAMELEN];
1383                 struct ipt_table *t;
1384
1385                 if (*len != sizeof(struct ipt_getinfo)) {
1386                         duprintf("length %u != %u\n", *len,
1387                                  sizeof(struct ipt_getinfo));
1388                         ret = -EINVAL;
1389                         break;
1390                 }
1391
1392                 if (copy_from_user(name, user, sizeof(name)) != 0) {
1393                         ret = -EFAULT;
1394                         break;
1395                 }
1396                 name[IPT_TABLE_MAXNAMELEN-1] = '\0';
1397
1398                 t = try_then_request_module(find_table_lock(name),
1399                                             "iptable_%s", name);
1400                 if (t && !IS_ERR(t)) {
1401                         struct ipt_getinfo info;
1402
1403                         info.valid_hooks = t->valid_hooks;
1404                         memcpy(info.hook_entry, t->private->hook_entry,
1405                                sizeof(info.hook_entry));
1406                         memcpy(info.underflow, t->private->underflow,
1407                                sizeof(info.underflow));
1408                         info.num_entries = t->private->number;
1409                         info.size = t->private->size;
1410                         memcpy(info.name, name, sizeof(info.name));
1411
1412                         if (copy_to_user(user, &info, *len) != 0)
1413                                 ret = -EFAULT;
1414                         else
1415                                 ret = 0;
1416                         up(&ipt_mutex);
1417                         module_put(t->me);
1418                 } else
1419                         ret = t ? PTR_ERR(t) : -ENOENT;
1420         }
1421         break;
1422
1423         case IPT_SO_GET_ENTRIES: {
1424                 struct ipt_get_entries get;
1425
1426                 if (*len < sizeof(get)) {
1427                         duprintf("get_entries: %u < %u\n", *len, sizeof(get));
1428                         ret = -EINVAL;
1429                 } else if (copy_from_user(&get, user, sizeof(get)) != 0) {
1430                         ret = -EFAULT;
1431                 } else if (*len != sizeof(struct ipt_get_entries) + get.size) {
1432                         duprintf("get_entries: %u != %u\n", *len,
1433                                  sizeof(struct ipt_get_entries) + get.size);
1434                         ret = -EINVAL;
1435                 } else
1436                         ret = get_entries(&get, user);
1437                 break;
1438         }
1439
1440         case IPT_SO_GET_REVISION_MATCH:
1441         case IPT_SO_GET_REVISION_TARGET: {
1442                 struct ipt_get_revision rev;
1443                 int (*revfn)(const char *, u8, int *);
1444
1445                 if (*len != sizeof(rev)) {
1446                         ret = -EINVAL;
1447                         break;
1448                 }
1449                 if (copy_from_user(&rev, user, sizeof(rev)) != 0) {
1450                         ret = -EFAULT;
1451                         break;
1452                 }
1453
1454                 if (cmd == IPT_SO_GET_REVISION_TARGET)
1455                         revfn = target_revfn;
1456                 else
1457                         revfn = match_revfn;
1458
1459                 try_then_request_module(find_revision(rev.name, rev.revision,
1460                                                       revfn, &ret),
1461                                         "ipt_%s", rev.name);
1462                 break;
1463         }
1464
1465         default:
1466                 duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
1467                 ret = -EINVAL;
1468         }
1469
1470         return ret;
1471 }
1472
1473 /* Registration hooks for targets. */
1474 int
1475 ipt_register_target(struct ipt_target *target)
1476 {
1477         int ret;
1478
1479         ret = down_interruptible(&ipt_mutex);
1480         if (ret != 0)
1481                 return ret;
1482         list_add(&target->list, &ipt_target);
1483         up(&ipt_mutex);
1484         return ret;
1485 }
1486
1487 void
1488 ipt_unregister_target(struct ipt_target *target)
1489 {
1490         down(&ipt_mutex);
1491         LIST_DELETE(&ipt_target, target);
1492         up(&ipt_mutex);
1493 }
1494
1495 int
1496 ipt_register_match(struct ipt_match *match)
1497 {
1498         int ret;
1499
1500         ret = down_interruptible(&ipt_mutex);
1501         if (ret != 0)
1502                 return ret;
1503
1504         list_add(&match->list, &ipt_match);
1505         up(&ipt_mutex);
1506
1507         return ret;
1508 }
1509
1510 void
1511 ipt_unregister_match(struct ipt_match *match)
1512 {
1513         down(&ipt_mutex);
1514         LIST_DELETE(&ipt_match, match);
1515         up(&ipt_mutex);
1516 }
1517
1518 int ipt_register_table(struct ipt_table *table, const struct ipt_replace *repl)
1519 {
1520         int ret;
1521         struct ipt_table_info *newinfo;
1522         static struct ipt_table_info bootstrap
1523                 = { 0, 0, 0, { 0 }, { 0 }, { } };
1524         void *loc_cpu_entry;
1525
1526         newinfo = alloc_table_info(repl->size);
1527         if (!newinfo)
1528                 return -ENOMEM;
1529
1530         /* choose the copy on our node/cpu
1531          * but dont care of preemption
1532          */
1533         loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1534         memcpy(loc_cpu_entry, repl->entries, repl->size);
1535
1536         ret = translate_table(table->name, table->valid_hooks,
1537                               newinfo, loc_cpu_entry, repl->size,
1538                               repl->num_entries,
1539                               repl->hook_entry,
1540                               repl->underflow);
1541         if (ret != 0) {
1542                 free_table_info(newinfo);
1543                 return ret;
1544         }
1545
1546         ret = down_interruptible(&ipt_mutex);
1547         if (ret != 0) {
1548                 free_table_info(newinfo);
1549                 return ret;
1550         }
1551
1552         /* Don't autoload: we'd eat our tail... */
1553         if (list_named_find(&ipt_tables, table->name)) {
1554                 ret = -EEXIST;
1555                 goto free_unlock;
1556         }
1557
1558         /* Simplifies replace_table code. */
1559         table->private = &bootstrap;
1560         if (!replace_table(table, 0, newinfo, &ret))
1561                 goto free_unlock;
1562
1563         duprintf("table->private->number = %u\n",
1564                  table->private->number);
1565         
1566         /* save number of initial entries */
1567         table->private->initial_entries = table->private->number;
1568
1569         rwlock_init(&table->lock);
1570         list_prepend(&ipt_tables, table);
1571
1572  unlock:
1573         up(&ipt_mutex);
1574         return ret;
1575
1576  free_unlock:
1577         free_table_info(newinfo);
1578         goto unlock;
1579 }
1580
1581 void ipt_unregister_table(struct ipt_table *table)
1582 {
1583         void *loc_cpu_entry;
1584
1585         down(&ipt_mutex);
1586         LIST_DELETE(&ipt_tables, table);
1587         up(&ipt_mutex);
1588
1589         /* Decrease module usage counts and free resources */
1590         loc_cpu_entry = table->private->entries[raw_smp_processor_id()];
1591         IPT_ENTRY_ITERATE(loc_cpu_entry, table->private->size,
1592                           cleanup_entry, NULL);
1593         free_table_info(table->private);
1594 }
1595
1596 /* Returns 1 if the port is matched by the range, 0 otherwise */
1597 static inline int
1598 port_match(u_int16_t min, u_int16_t max, u_int16_t port, int invert)
1599 {
1600         int ret;
1601
1602         ret = (port >= min && port <= max) ^ invert;
1603         return ret;
1604 }
1605
1606 static int
1607 tcp_find_option(u_int8_t option,
1608                 const struct sk_buff *skb,
1609                 unsigned int optlen,
1610                 int invert,
1611                 int *hotdrop)
1612 {
1613         /* tcp.doff is only 4 bits, ie. max 15 * 4 bytes */
1614         u_int8_t _opt[60 - sizeof(struct tcphdr)], *op;
1615         unsigned int i;
1616
1617         duprintf("tcp_match: finding option\n");
1618
1619         if (!optlen)
1620                 return invert;
1621
1622         /* If we don't have the whole header, drop packet. */
1623         op = skb_header_pointer(skb,
1624                                 skb->nh.iph->ihl*4 + sizeof(struct tcphdr),
1625                                 optlen, _opt);
1626         if (op == NULL) {
1627                 *hotdrop = 1;
1628                 return 0;
1629         }
1630
1631         for (i = 0; i < optlen; ) {
1632                 if (op[i] == option) return !invert;
1633                 if (op[i] < 2) i++;
1634                 else i += op[i+1]?:1;
1635         }
1636
1637         return invert;
1638 }
1639
1640 static int
1641 tcp_match(const struct sk_buff *skb,
1642           const struct net_device *in,
1643           const struct net_device *out,
1644           const void *matchinfo,
1645           int offset,
1646           int *hotdrop)
1647 {
1648         struct tcphdr _tcph, *th;
1649         const struct ipt_tcp *tcpinfo = matchinfo;
1650
1651         if (offset) {
1652                 /* To quote Alan:
1653
1654                    Don't allow a fragment of TCP 8 bytes in. Nobody normal
1655                    causes this. Its a cracker trying to break in by doing a
1656                    flag overwrite to pass the direction checks.
1657                 */
1658                 if (offset == 1) {
1659                         duprintf("Dropping evil TCP offset=1 frag.\n");
1660                         *hotdrop = 1;
1661                 }
1662                 /* Must not be a fragment. */
1663                 return 0;
1664         }
1665
1666 #define FWINVTCP(bool,invflg) ((bool) ^ !!(tcpinfo->invflags & invflg))
1667
1668         th = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1669                                 sizeof(_tcph), &_tcph);
1670         if (th == NULL) {
1671                 /* We've been asked to examine this packet, and we
1672                    can't.  Hence, no choice but to drop. */
1673                 duprintf("Dropping evil TCP offset=0 tinygram.\n");
1674                 *hotdrop = 1;
1675                 return 0;
1676         }
1677
1678         if (!port_match(tcpinfo->spts[0], tcpinfo->spts[1],
1679                         ntohs(th->source),
1680                         !!(tcpinfo->invflags & IPT_TCP_INV_SRCPT)))
1681                 return 0;
1682         if (!port_match(tcpinfo->dpts[0], tcpinfo->dpts[1],
1683                         ntohs(th->dest),
1684                         !!(tcpinfo->invflags & IPT_TCP_INV_DSTPT)))
1685                 return 0;
1686         if (!FWINVTCP((((unsigned char *)th)[13] & tcpinfo->flg_mask)
1687                       == tcpinfo->flg_cmp,
1688                       IPT_TCP_INV_FLAGS))
1689                 return 0;
1690         if (tcpinfo->option) {
1691                 if (th->doff * 4 < sizeof(_tcph)) {
1692                         *hotdrop = 1;
1693                         return 0;
1694                 }
1695                 if (!tcp_find_option(tcpinfo->option, skb,
1696                                      th->doff*4 - sizeof(_tcph),
1697                                      tcpinfo->invflags & IPT_TCP_INV_OPTION,
1698                                      hotdrop))
1699                         return 0;
1700         }
1701         return 1;
1702 }
1703
1704 /* Called when user tries to insert an entry of this type. */
1705 static int
1706 tcp_checkentry(const char *tablename,
1707                const struct ipt_ip *ip,
1708                void *matchinfo,
1709                unsigned int matchsize,
1710                unsigned int hook_mask)
1711 {
1712         const struct ipt_tcp *tcpinfo = matchinfo;
1713
1714         /* Must specify proto == TCP, and no unknown invflags */
1715         return ip->proto == IPPROTO_TCP
1716                 && !(ip->invflags & IPT_INV_PROTO)
1717                 && matchsize == IPT_ALIGN(sizeof(struct ipt_tcp))
1718                 && !(tcpinfo->invflags & ~IPT_TCP_INV_MASK);
1719 }
1720
1721 static int
1722 udp_match(const struct sk_buff *skb,
1723           const struct net_device *in,
1724           const struct net_device *out,
1725           const void *matchinfo,
1726           int offset,
1727           int *hotdrop)
1728 {
1729         struct udphdr _udph, *uh;
1730         const struct ipt_udp *udpinfo = matchinfo;
1731
1732         /* Must not be a fragment. */
1733         if (offset)
1734                 return 0;
1735
1736         uh = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1737                                 sizeof(_udph), &_udph);
1738         if (uh == NULL) {
1739                 /* We've been asked to examine this packet, and we
1740                    can't.  Hence, no choice but to drop. */
1741                 duprintf("Dropping evil UDP tinygram.\n");
1742                 *hotdrop = 1;
1743                 return 0;
1744         }
1745
1746         return port_match(udpinfo->spts[0], udpinfo->spts[1],
1747                           ntohs(uh->source),
1748                           !!(udpinfo->invflags & IPT_UDP_INV_SRCPT))
1749                 && port_match(udpinfo->dpts[0], udpinfo->dpts[1],
1750                               ntohs(uh->dest),
1751                               !!(udpinfo->invflags & IPT_UDP_INV_DSTPT));
1752 }
1753
1754 /* Called when user tries to insert an entry of this type. */
1755 static int
1756 udp_checkentry(const char *tablename,
1757                const struct ipt_ip *ip,
1758                void *matchinfo,
1759                unsigned int matchinfosize,
1760                unsigned int hook_mask)
1761 {
1762         const struct ipt_udp *udpinfo = matchinfo;
1763
1764         /* Must specify proto == UDP, and no unknown invflags */
1765         if (ip->proto != IPPROTO_UDP || (ip->invflags & IPT_INV_PROTO)) {
1766                 duprintf("ipt_udp: Protocol %u != %u\n", ip->proto,
1767                          IPPROTO_UDP);
1768                 return 0;
1769         }
1770         if (matchinfosize != IPT_ALIGN(sizeof(struct ipt_udp))) {
1771                 duprintf("ipt_udp: matchsize %u != %u\n",
1772                          matchinfosize, IPT_ALIGN(sizeof(struct ipt_udp)));
1773                 return 0;
1774         }
1775         if (udpinfo->invflags & ~IPT_UDP_INV_MASK) {
1776                 duprintf("ipt_udp: unknown flags %X\n",
1777                          udpinfo->invflags);
1778                 return 0;
1779         }
1780
1781         return 1;
1782 }
1783
1784 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
1785 static inline int
1786 icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
1787                      u_int8_t type, u_int8_t code,
1788                      int invert)
1789 {
1790         return ((test_type == 0xFF) || (type == test_type && code >= min_code && code <= max_code))
1791                 ^ invert;
1792 }
1793
1794 static int
1795 icmp_match(const struct sk_buff *skb,
1796            const struct net_device *in,
1797            const struct net_device *out,
1798            const void *matchinfo,
1799            int offset,
1800            int *hotdrop)
1801 {
1802         struct icmphdr _icmph, *ic;
1803         const struct ipt_icmp *icmpinfo = matchinfo;
1804
1805         /* Must not be a fragment. */
1806         if (offset)
1807                 return 0;
1808
1809         ic = skb_header_pointer(skb, skb->nh.iph->ihl*4,
1810                                 sizeof(_icmph), &_icmph);
1811         if (ic == NULL) {
1812                 /* We've been asked to examine this packet, and we
1813                  * can't.  Hence, no choice but to drop.
1814                  */
1815                 duprintf("Dropping evil ICMP tinygram.\n");
1816                 *hotdrop = 1;
1817                 return 0;
1818         }
1819
1820         return icmp_type_code_match(icmpinfo->type,
1821                                     icmpinfo->code[0],
1822                                     icmpinfo->code[1],
1823                                     ic->type, ic->code,
1824                                     !!(icmpinfo->invflags&IPT_ICMP_INV));
1825 }
1826
1827 /* Called when user tries to insert an entry of this type. */
1828 static int
1829 icmp_checkentry(const char *tablename,
1830            const struct ipt_ip *ip,
1831            void *matchinfo,
1832            unsigned int matchsize,
1833            unsigned int hook_mask)
1834 {
1835         const struct ipt_icmp *icmpinfo = matchinfo;
1836
1837         /* Must specify proto == ICMP, and no unknown invflags */
1838         return ip->proto == IPPROTO_ICMP
1839                 && !(ip->invflags & IPT_INV_PROTO)
1840                 && matchsize == IPT_ALIGN(sizeof(struct ipt_icmp))
1841                 && !(icmpinfo->invflags & ~IPT_ICMP_INV);
1842 }
1843
1844 /* The built-in targets: standard (NULL) and error. */
1845 static struct ipt_target ipt_standard_target = {
1846         .name           = IPT_STANDARD_TARGET,
1847 };
1848
1849 static struct ipt_target ipt_error_target = {
1850         .name           = IPT_ERROR_TARGET,
1851         .target         = ipt_error,
1852 };
1853
1854 static struct nf_sockopt_ops ipt_sockopts = {
1855         .pf             = PF_INET,
1856         .set_optmin     = IPT_BASE_CTL,
1857         .set_optmax     = IPT_SO_SET_MAX+1,
1858         .set            = do_ipt_set_ctl,
1859         .get_optmin     = IPT_BASE_CTL,
1860         .get_optmax     = IPT_SO_GET_MAX+1,
1861         .get            = do_ipt_get_ctl,
1862 };
1863
1864 static struct ipt_match tcp_matchstruct = {
1865         .name           = "tcp",
1866         .match          = &tcp_match,
1867         .checkentry     = &tcp_checkentry,
1868 };
1869
1870 static struct ipt_match udp_matchstruct = {
1871         .name           = "udp",
1872         .match          = &udp_match,
1873         .checkentry     = &udp_checkentry,
1874 };
1875
1876 static struct ipt_match icmp_matchstruct = {
1877         .name           = "icmp",
1878         .match          = &icmp_match,
1879         .checkentry     = &icmp_checkentry,
1880 };
1881
1882 #ifdef CONFIG_PROC_FS
1883 static inline int print_name(const char *i,
1884                              off_t start_offset, char *buffer, int length,
1885                              off_t *pos, unsigned int *count)
1886 {
1887         if ((*count)++ >= start_offset) {
1888                 unsigned int namelen;
1889
1890                 namelen = sprintf(buffer + *pos, "%s\n",
1891                                   i + sizeof(struct list_head));
1892                 if (*pos + namelen > length) {
1893                         /* Stop iterating */
1894                         return 1;
1895                 }
1896                 *pos += namelen;
1897         }
1898         return 0;
1899 }
1900
1901 static inline int print_target(const struct ipt_target *t,
1902                                off_t start_offset, char *buffer, int length,
1903                                off_t *pos, unsigned int *count)
1904 {
1905         if (t == &ipt_standard_target || t == &ipt_error_target)
1906                 return 0;
1907         return print_name((char *)t, start_offset, buffer, length, pos, count);
1908 }
1909
1910 static int ipt_get_tables(char *buffer, char **start, off_t offset, int length)
1911 {
1912         off_t pos = 0;
1913         unsigned int count = 0;
1914
1915         if (down_interruptible(&ipt_mutex) != 0)
1916                 return 0;
1917
1918         LIST_FIND(&ipt_tables, print_name, void *,
1919                   offset, buffer, length, &pos, &count);
1920
1921         up(&ipt_mutex);
1922
1923         /* `start' hack - see fs/proc/generic.c line ~105 */
1924         *start=(char *)((unsigned long)count-offset);
1925         return pos;
1926 }
1927
1928 static int ipt_get_targets(char *buffer, char **start, off_t offset, int length)
1929 {
1930         off_t pos = 0;
1931         unsigned int count = 0;
1932
1933         if (down_interruptible(&ipt_mutex) != 0)
1934                 return 0;
1935
1936         LIST_FIND(&ipt_target, print_target, struct ipt_target *,
1937                   offset, buffer, length, &pos, &count);
1938         
1939         up(&ipt_mutex);
1940
1941         *start = (char *)((unsigned long)count - offset);
1942         return pos;
1943 }
1944
1945 static int ipt_get_matches(char *buffer, char **start, off_t offset, int length)
1946 {
1947         off_t pos = 0;
1948         unsigned int count = 0;
1949
1950         if (down_interruptible(&ipt_mutex) != 0)
1951                 return 0;
1952         
1953         LIST_FIND(&ipt_match, print_name, void *,
1954                   offset, buffer, length, &pos, &count);
1955
1956         up(&ipt_mutex);
1957
1958         *start = (char *)((unsigned long)count - offset);
1959         return pos;
1960 }
1961
1962 static const struct { char *name; get_info_t *get_info; } ipt_proc_entry[] =
1963 { { "ip_tables_names", ipt_get_tables },
1964   { "ip_tables_targets", ipt_get_targets },
1965   { "ip_tables_matches", ipt_get_matches },
1966   { NULL, NULL} };
1967 #endif /*CONFIG_PROC_FS*/
1968
1969 static int __init init(void)
1970 {
1971         int ret;
1972
1973         /* Noone else will be downing sem now, so we won't sleep */
1974         down(&ipt_mutex);
1975         list_append(&ipt_target, &ipt_standard_target);
1976         list_append(&ipt_target, &ipt_error_target);
1977         list_append(&ipt_match, &tcp_matchstruct);
1978         list_append(&ipt_match, &udp_matchstruct);
1979         list_append(&ipt_match, &icmp_matchstruct);
1980         up(&ipt_mutex);
1981
1982         /* Register setsockopt */
1983         ret = nf_register_sockopt(&ipt_sockopts);
1984         if (ret < 0) {
1985                 duprintf("Unable to register sockopts.\n");
1986                 return ret;
1987         }
1988
1989 #ifdef CONFIG_PROC_FS
1990         {
1991         struct proc_dir_entry *proc;
1992         int i;
1993
1994         for (i = 0; ipt_proc_entry[i].name; i++) {
1995                 proc = proc_net_create(ipt_proc_entry[i].name, 0,
1996                                        ipt_proc_entry[i].get_info);
1997                 if (!proc) {
1998                         while (--i >= 0)
1999                                 proc_net_remove(ipt_proc_entry[i].name);
2000                         nf_unregister_sockopt(&ipt_sockopts);
2001                         return -ENOMEM;
2002                 }
2003                 proc->owner = THIS_MODULE;
2004         }
2005         }
2006 #endif
2007
2008         printk("ip_tables: (C) 2000-2002 Netfilter core team\n");
2009         return 0;
2010 }
2011
2012 static void __exit fini(void)
2013 {
2014         nf_unregister_sockopt(&ipt_sockopts);
2015 #ifdef CONFIG_PROC_FS
2016         {
2017         int i;
2018         for (i = 0; ipt_proc_entry[i].name; i++)
2019                 proc_net_remove(ipt_proc_entry[i].name);
2020         }
2021 #endif
2022 }
2023
2024 EXPORT_SYMBOL(ipt_register_table);
2025 EXPORT_SYMBOL(ipt_unregister_table);
2026 EXPORT_SYMBOL(ipt_register_match);
2027 EXPORT_SYMBOL(ipt_unregister_match);
2028 EXPORT_SYMBOL(ipt_do_table);
2029 EXPORT_SYMBOL(ipt_register_target);
2030 EXPORT_SYMBOL(ipt_unregister_target);
2031 EXPORT_SYMBOL(ipt_find_target);
2032
2033 module_init(init);
2034 module_exit(fini);