Merge branch 'fix/misc' into for-linus
[linux-2.6] / net / ipv4 / netfilter / ip_tables.c
1 /*
2  * Packet matching code.
3  *
4  * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5  * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11 #include <linux/cache.h>
12 #include <linux/capability.h>
13 #include <linux/skbuff.h>
14 #include <linux/kmod.h>
15 #include <linux/vmalloc.h>
16 #include <linux/netdevice.h>
17 #include <linux/module.h>
18 #include <linux/icmp.h>
19 #include <net/ip.h>
20 #include <net/compat.h>
21 #include <asm/uaccess.h>
22 #include <linux/mutex.h>
23 #include <linux/proc_fs.h>
24 #include <linux/err.h>
25 #include <linux/cpumask.h>
26
27 #include <linux/netfilter/x_tables.h>
28 #include <linux/netfilter_ipv4/ip_tables.h>
29 #include <net/netfilter/nf_log.h>
30
31 MODULE_LICENSE("GPL");
32 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
33 MODULE_DESCRIPTION("IPv4 packet filter");
34
35 /*#define DEBUG_IP_FIREWALL*/
36 /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
37 /*#define DEBUG_IP_FIREWALL_USER*/
38
39 #ifdef DEBUG_IP_FIREWALL
40 #define dprintf(format, args...)  printk(format , ## args)
41 #else
42 #define dprintf(format, args...)
43 #endif
44
45 #ifdef DEBUG_IP_FIREWALL_USER
46 #define duprintf(format, args...) printk(format , ## args)
47 #else
48 #define duprintf(format, args...)
49 #endif
50
51 #ifdef CONFIG_NETFILTER_DEBUG
52 #define IP_NF_ASSERT(x)                                         \
53 do {                                                            \
54         if (!(x))                                               \
55                 printk("IP_NF_ASSERT: %s:%s:%u\n",              \
56                        __func__, __FILE__, __LINE__);   \
57 } while(0)
58 #else
59 #define IP_NF_ASSERT(x)
60 #endif
61
62 #if 0
63 /* All the better to debug you with... */
64 #define static
65 #define inline
66 #endif
67
68 /*
69    We keep a set of rules for each CPU, so we can avoid write-locking
70    them in the softirq when updating the counters and therefore
71    only need to read-lock in the softirq; doing a write_lock_bh() in user
72    context stops packets coming through and allows user context to read
73    the counters or update the rules.
74
75    Hence the start of any table is given by get_table() below.  */
76
77 /* Returns whether matches rule or not. */
78 /* Performance critical - called for every packet */
79 static inline bool
80 ip_packet_match(const struct iphdr *ip,
81                 const char *indev,
82                 const char *outdev,
83                 const struct ipt_ip *ipinfo,
84                 int isfrag)
85 {
86         unsigned long ret;
87
88 #define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg)))
89
90         if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
91                   IPT_INV_SRCIP)
92             || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
93                      IPT_INV_DSTIP)) {
94                 dprintf("Source or dest mismatch.\n");
95
96                 dprintf("SRC: %pI4. Mask: %pI4. Target: %pI4.%s\n",
97                         &ip->saddr, &ipinfo->smsk.s_addr, &ipinfo->src.s_addr,
98                         ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
99                 dprintf("DST: %pI4 Mask: %pI4 Target: %pI4.%s\n",
100                         &ip->daddr, &ipinfo->dmsk.s_addr, &ipinfo->dst.s_addr,
101                         ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
102                 return false;
103         }
104
105         ret = ifname_compare_aligned(indev, ipinfo->iniface, ipinfo->iniface_mask);
106
107         if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
108                 dprintf("VIA in mismatch (%s vs %s).%s\n",
109                         indev, ipinfo->iniface,
110                         ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
111                 return false;
112         }
113
114         ret = ifname_compare_aligned(outdev, ipinfo->outiface, ipinfo->outiface_mask);
115
116         if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
117                 dprintf("VIA out mismatch (%s vs %s).%s\n",
118                         outdev, ipinfo->outiface,
119                         ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
120                 return false;
121         }
122
123         /* Check specific protocol */
124         if (ipinfo->proto
125             && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
126                 dprintf("Packet protocol %hi does not match %hi.%s\n",
127                         ip->protocol, ipinfo->proto,
128                         ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
129                 return false;
130         }
131
132         /* If we have a fragment rule but the packet is not a fragment
133          * then we return zero */
134         if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
135                 dprintf("Fragment rule but not fragment.%s\n",
136                         ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
137                 return false;
138         }
139
140         return true;
141 }
142
143 static bool
144 ip_checkentry(const struct ipt_ip *ip)
145 {
146         if (ip->flags & ~IPT_F_MASK) {
147                 duprintf("Unknown flag bits set: %08X\n",
148                          ip->flags & ~IPT_F_MASK);
149                 return false;
150         }
151         if (ip->invflags & ~IPT_INV_MASK) {
152                 duprintf("Unknown invflag bits set: %08X\n",
153                          ip->invflags & ~IPT_INV_MASK);
154                 return false;
155         }
156         return true;
157 }
158
159 static unsigned int
160 ipt_error(struct sk_buff *skb, const struct xt_target_param *par)
161 {
162         if (net_ratelimit())
163                 printk("ip_tables: error: `%s'\n",
164                        (const char *)par->targinfo);
165
166         return NF_DROP;
167 }
168
169 /* Performance critical - called for every packet */
170 static inline bool
171 do_match(struct ipt_entry_match *m, const struct sk_buff *skb,
172          struct xt_match_param *par)
173 {
174         par->match     = m->u.kernel.match;
175         par->matchinfo = m->data;
176
177         /* Stop iteration if it doesn't match */
178         if (!m->u.kernel.match->match(skb, par))
179                 return true;
180         else
181                 return false;
182 }
183
184 /* Performance critical */
185 static inline struct ipt_entry *
186 get_entry(void *base, unsigned int offset)
187 {
188         return (struct ipt_entry *)(base + offset);
189 }
190
191 /* All zeroes == unconditional rule. */
192 /* Mildly perf critical (only if packet tracing is on) */
193 static inline int
194 unconditional(const struct ipt_ip *ip)
195 {
196         unsigned int i;
197
198         for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++)
199                 if (((__u32 *)ip)[i])
200                         return 0;
201
202         return 1;
203 #undef FWINV
204 }
205
206 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
207     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
208 static const char *const hooknames[] = {
209         [NF_INET_PRE_ROUTING]           = "PREROUTING",
210         [NF_INET_LOCAL_IN]              = "INPUT",
211         [NF_INET_FORWARD]               = "FORWARD",
212         [NF_INET_LOCAL_OUT]             = "OUTPUT",
213         [NF_INET_POST_ROUTING]          = "POSTROUTING",
214 };
215
216 enum nf_ip_trace_comments {
217         NF_IP_TRACE_COMMENT_RULE,
218         NF_IP_TRACE_COMMENT_RETURN,
219         NF_IP_TRACE_COMMENT_POLICY,
220 };
221
222 static const char *const comments[] = {
223         [NF_IP_TRACE_COMMENT_RULE]      = "rule",
224         [NF_IP_TRACE_COMMENT_RETURN]    = "return",
225         [NF_IP_TRACE_COMMENT_POLICY]    = "policy",
226 };
227
228 static struct nf_loginfo trace_loginfo = {
229         .type = NF_LOG_TYPE_LOG,
230         .u = {
231                 .log = {
232                         .level = 4,
233                         .logflags = NF_LOG_MASK,
234                 },
235         },
236 };
237
238 /* Mildly perf critical (only if packet tracing is on) */
239 static inline int
240 get_chainname_rulenum(struct ipt_entry *s, struct ipt_entry *e,
241                       char *hookname, char **chainname,
242                       char **comment, unsigned int *rulenum)
243 {
244         struct ipt_standard_target *t = (void *)ipt_get_target(s);
245
246         if (strcmp(t->target.u.kernel.target->name, IPT_ERROR_TARGET) == 0) {
247                 /* Head of user chain: ERROR target with chainname */
248                 *chainname = t->target.data;
249                 (*rulenum) = 0;
250         } else if (s == e) {
251                 (*rulenum)++;
252
253                 if (s->target_offset == sizeof(struct ipt_entry)
254                    && strcmp(t->target.u.kernel.target->name,
255                              IPT_STANDARD_TARGET) == 0
256                    && t->verdict < 0
257                    && unconditional(&s->ip)) {
258                         /* Tail of chains: STANDARD target (return/policy) */
259                         *comment = *chainname == hookname
260                                 ? (char *)comments[NF_IP_TRACE_COMMENT_POLICY]
261                                 : (char *)comments[NF_IP_TRACE_COMMENT_RETURN];
262                 }
263                 return 1;
264         } else
265                 (*rulenum)++;
266
267         return 0;
268 }
269
270 static void trace_packet(struct sk_buff *skb,
271                          unsigned int hook,
272                          const struct net_device *in,
273                          const struct net_device *out,
274                          const char *tablename,
275                          struct xt_table_info *private,
276                          struct ipt_entry *e)
277 {
278         void *table_base;
279         const struct ipt_entry *root;
280         char *hookname, *chainname, *comment;
281         unsigned int rulenum = 0;
282
283         table_base = (void *)private->entries[smp_processor_id()];
284         root = get_entry(table_base, private->hook_entry[hook]);
285
286         hookname = chainname = (char *)hooknames[hook];
287         comment = (char *)comments[NF_IP_TRACE_COMMENT_RULE];
288
289         IPT_ENTRY_ITERATE(root,
290                           private->size - private->hook_entry[hook],
291                           get_chainname_rulenum,
292                           e, hookname, &chainname, &comment, &rulenum);
293
294         nf_log_packet(AF_INET, hook, skb, in, out, &trace_loginfo,
295                       "TRACE: %s:%s:%s:%u ",
296                       tablename, chainname, comment, rulenum);
297 }
298 #endif
299
300 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
301 unsigned int
302 ipt_do_table(struct sk_buff *skb,
303              unsigned int hook,
304              const struct net_device *in,
305              const struct net_device *out,
306              struct xt_table *table)
307 {
308         static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
309         const struct iphdr *ip;
310         u_int16_t datalen;
311         bool hotdrop = false;
312         /* Initializing verdict to NF_DROP keeps gcc happy. */
313         unsigned int verdict = NF_DROP;
314         const char *indev, *outdev;
315         void *table_base;
316         struct ipt_entry *e, *back;
317         struct xt_table_info *private;
318         struct xt_match_param mtpar;
319         struct xt_target_param tgpar;
320
321         /* Initialization */
322         ip = ip_hdr(skb);
323         datalen = skb->len - ip->ihl * 4;
324         indev = in ? in->name : nulldevname;
325         outdev = out ? out->name : nulldevname;
326         /* We handle fragments by dealing with the first fragment as
327          * if it was a normal packet.  All other fragments are treated
328          * normally, except that they will NEVER match rules that ask
329          * things we don't know, ie. tcp syn flag or ports).  If the
330          * rule is also a fragment-specific rule, non-fragments won't
331          * match it. */
332         mtpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
333         mtpar.thoff   = ip_hdrlen(skb);
334         mtpar.hotdrop = &hotdrop;
335         mtpar.in      = tgpar.in  = in;
336         mtpar.out     = tgpar.out = out;
337         mtpar.family  = tgpar.family = NFPROTO_IPV4;
338         tgpar.hooknum = hook;
339
340         IP_NF_ASSERT(table->valid_hooks & (1 << hook));
341
342         rcu_read_lock_bh();
343         private = rcu_dereference(table->private);
344         table_base = rcu_dereference(private->entries[smp_processor_id()]);
345
346         e = get_entry(table_base, private->hook_entry[hook]);
347
348         /* For return from builtin chain */
349         back = get_entry(table_base, private->underflow[hook]);
350
351         do {
352                 IP_NF_ASSERT(e);
353                 IP_NF_ASSERT(back);
354                 if (ip_packet_match(ip, indev, outdev,
355                     &e->ip, mtpar.fragoff)) {
356                         struct ipt_entry_target *t;
357
358                         if (IPT_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0)
359                                 goto no_match;
360
361                         ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
362
363                         t = ipt_get_target(e);
364                         IP_NF_ASSERT(t->u.kernel.target);
365
366 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
367     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
368                         /* The packet is traced: log it */
369                         if (unlikely(skb->nf_trace))
370                                 trace_packet(skb, hook, in, out,
371                                              table->name, private, e);
372 #endif
373                         /* Standard target? */
374                         if (!t->u.kernel.target->target) {
375                                 int v;
376
377                                 v = ((struct ipt_standard_target *)t)->verdict;
378                                 if (v < 0) {
379                                         /* Pop from stack? */
380                                         if (v != IPT_RETURN) {
381                                                 verdict = (unsigned)(-v) - 1;
382                                                 break;
383                                         }
384                                         e = back;
385                                         back = get_entry(table_base,
386                                                          back->comefrom);
387                                         continue;
388                                 }
389                                 if (table_base + v != (void *)e + e->next_offset
390                                     && !(e->ip.flags & IPT_F_GOTO)) {
391                                         /* Save old back ptr in next entry */
392                                         struct ipt_entry *next
393                                                 = (void *)e + e->next_offset;
394                                         next->comefrom
395                                                 = (void *)back - table_base;
396                                         /* set back pointer to next entry */
397                                         back = next;
398                                 }
399
400                                 e = get_entry(table_base, v);
401                         } else {
402                                 /* Targets which reenter must return
403                                    abs. verdicts */
404                                 tgpar.target   = t->u.kernel.target;
405                                 tgpar.targinfo = t->data;
406 #ifdef CONFIG_NETFILTER_DEBUG
407                                 ((struct ipt_entry *)table_base)->comefrom
408                                         = 0xeeeeeeec;
409 #endif
410                                 verdict = t->u.kernel.target->target(skb,
411                                                                      &tgpar);
412 #ifdef CONFIG_NETFILTER_DEBUG
413                                 if (((struct ipt_entry *)table_base)->comefrom
414                                     != 0xeeeeeeec
415                                     && verdict == IPT_CONTINUE) {
416                                         printk("Target %s reentered!\n",
417                                                t->u.kernel.target->name);
418                                         verdict = NF_DROP;
419                                 }
420                                 ((struct ipt_entry *)table_base)->comefrom
421                                         = 0x57acc001;
422 #endif
423                                 /* Target might have changed stuff. */
424                                 ip = ip_hdr(skb);
425                                 datalen = skb->len - ip->ihl * 4;
426
427                                 if (verdict == IPT_CONTINUE)
428                                         e = (void *)e + e->next_offset;
429                                 else
430                                         /* Verdict */
431                                         break;
432                         }
433                 } else {
434
435                 no_match:
436                         e = (void *)e + e->next_offset;
437                 }
438         } while (!hotdrop);
439
440         rcu_read_unlock_bh();
441
442 #ifdef DEBUG_ALLOW_ALL
443         return NF_ACCEPT;
444 #else
445         if (hotdrop)
446                 return NF_DROP;
447         else return verdict;
448 #endif
449 }
450
451 /* Figures out from what hook each rule can be called: returns 0 if
452    there are loops.  Puts hook bitmask in comefrom. */
453 static int
454 mark_source_chains(struct xt_table_info *newinfo,
455                    unsigned int valid_hooks, void *entry0)
456 {
457         unsigned int hook;
458
459         /* No recursion; use packet counter to save back ptrs (reset
460            to 0 as we leave), and comefrom to save source hook bitmask */
461         for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) {
462                 unsigned int pos = newinfo->hook_entry[hook];
463                 struct ipt_entry *e = (struct ipt_entry *)(entry0 + pos);
464
465                 if (!(valid_hooks & (1 << hook)))
466                         continue;
467
468                 /* Set initial back pointer. */
469                 e->counters.pcnt = pos;
470
471                 for (;;) {
472                         struct ipt_standard_target *t
473                                 = (void *)ipt_get_target(e);
474                         int visited = e->comefrom & (1 << hook);
475
476                         if (e->comefrom & (1 << NF_INET_NUMHOOKS)) {
477                                 printk("iptables: loop hook %u pos %u %08X.\n",
478                                        hook, pos, e->comefrom);
479                                 return 0;
480                         }
481                         e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS));
482
483                         /* Unconditional return/END. */
484                         if ((e->target_offset == sizeof(struct ipt_entry)
485                             && (strcmp(t->target.u.user.name,
486                                        IPT_STANDARD_TARGET) == 0)
487                             && t->verdict < 0
488                             && unconditional(&e->ip)) || visited) {
489                                 unsigned int oldpos, size;
490
491                                 if ((strcmp(t->target.u.user.name,
492                                             IPT_STANDARD_TARGET) == 0) &&
493                                     t->verdict < -NF_MAX_VERDICT - 1) {
494                                         duprintf("mark_source_chains: bad "
495                                                 "negative verdict (%i)\n",
496                                                                 t->verdict);
497                                         return 0;
498                                 }
499
500                                 /* Return: backtrack through the last
501                                    big jump. */
502                                 do {
503                                         e->comefrom ^= (1<<NF_INET_NUMHOOKS);
504 #ifdef DEBUG_IP_FIREWALL_USER
505                                         if (e->comefrom
506                                             & (1 << NF_INET_NUMHOOKS)) {
507                                                 duprintf("Back unset "
508                                                          "on hook %u "
509                                                          "rule %u\n",
510                                                          hook, pos);
511                                         }
512 #endif
513                                         oldpos = pos;
514                                         pos = e->counters.pcnt;
515                                         e->counters.pcnt = 0;
516
517                                         /* We're at the start. */
518                                         if (pos == oldpos)
519                                                 goto next;
520
521                                         e = (struct ipt_entry *)
522                                                 (entry0 + pos);
523                                 } while (oldpos == pos + e->next_offset);
524
525                                 /* Move along one */
526                                 size = e->next_offset;
527                                 e = (struct ipt_entry *)
528                                         (entry0 + pos + size);
529                                 e->counters.pcnt = pos;
530                                 pos += size;
531                         } else {
532                                 int newpos = t->verdict;
533
534                                 if (strcmp(t->target.u.user.name,
535                                            IPT_STANDARD_TARGET) == 0
536                                     && newpos >= 0) {
537                                         if (newpos > newinfo->size -
538                                                 sizeof(struct ipt_entry)) {
539                                                 duprintf("mark_source_chains: "
540                                                         "bad verdict (%i)\n",
541                                                                 newpos);
542                                                 return 0;
543                                         }
544                                         /* This a jump; chase it. */
545                                         duprintf("Jump rule %u -> %u\n",
546                                                  pos, newpos);
547                                 } else {
548                                         /* ... this is a fallthru */
549                                         newpos = pos + e->next_offset;
550                                 }
551                                 e = (struct ipt_entry *)
552                                         (entry0 + newpos);
553                                 e->counters.pcnt = pos;
554                                 pos = newpos;
555                         }
556                 }
557                 next:
558                 duprintf("Finished chain %u\n", hook);
559         }
560         return 1;
561 }
562
563 static int
564 cleanup_match(struct ipt_entry_match *m, unsigned int *i)
565 {
566         struct xt_mtdtor_param par;
567
568         if (i && (*i)-- == 0)
569                 return 1;
570
571         par.match     = m->u.kernel.match;
572         par.matchinfo = m->data;
573         par.family    = NFPROTO_IPV4;
574         if (par.match->destroy != NULL)
575                 par.match->destroy(&par);
576         module_put(par.match->me);
577         return 0;
578 }
579
580 static int
581 check_entry(struct ipt_entry *e, const char *name)
582 {
583         struct ipt_entry_target *t;
584
585         if (!ip_checkentry(&e->ip)) {
586                 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
587                 return -EINVAL;
588         }
589
590         if (e->target_offset + sizeof(struct ipt_entry_target) >
591             e->next_offset)
592                 return -EINVAL;
593
594         t = ipt_get_target(e);
595         if (e->target_offset + t->u.target_size > e->next_offset)
596                 return -EINVAL;
597
598         return 0;
599 }
600
601 static int
602 check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par,
603             unsigned int *i)
604 {
605         const struct ipt_ip *ip = par->entryinfo;
606         int ret;
607
608         par->match     = m->u.kernel.match;
609         par->matchinfo = m->data;
610
611         ret = xt_check_match(par, m->u.match_size - sizeof(*m),
612               ip->proto, ip->invflags & IPT_INV_PROTO);
613         if (ret < 0) {
614                 duprintf("ip_tables: check failed for `%s'.\n",
615                          par.match->name);
616                 return ret;
617         }
618         ++*i;
619         return 0;
620 }
621
622 static int
623 find_check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par,
624                  unsigned int *i)
625 {
626         struct xt_match *match;
627         int ret;
628
629         match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
630                                                       m->u.user.revision),
631                                         "ipt_%s", m->u.user.name);
632         if (IS_ERR(match) || !match) {
633                 duprintf("find_check_match: `%s' not found\n", m->u.user.name);
634                 return match ? PTR_ERR(match) : -ENOENT;
635         }
636         m->u.kernel.match = match;
637
638         ret = check_match(m, par, i);
639         if (ret)
640                 goto err;
641
642         return 0;
643 err:
644         module_put(m->u.kernel.match->me);
645         return ret;
646 }
647
648 static int check_target(struct ipt_entry *e, const char *name)
649 {
650         struct ipt_entry_target *t = ipt_get_target(e);
651         struct xt_tgchk_param par = {
652                 .table     = name,
653                 .entryinfo = e,
654                 .target    = t->u.kernel.target,
655                 .targinfo  = t->data,
656                 .hook_mask = e->comefrom,
657                 .family    = NFPROTO_IPV4,
658         };
659         int ret;
660
661         ret = xt_check_target(&par, t->u.target_size - sizeof(*t),
662               e->ip.proto, e->ip.invflags & IPT_INV_PROTO);
663         if (ret < 0) {
664                 duprintf("ip_tables: check failed for `%s'.\n",
665                          t->u.kernel.target->name);
666                 return ret;
667         }
668         return 0;
669 }
670
671 static int
672 find_check_entry(struct ipt_entry *e, const char *name, unsigned int size,
673                  unsigned int *i)
674 {
675         struct ipt_entry_target *t;
676         struct xt_target *target;
677         int ret;
678         unsigned int j;
679         struct xt_mtchk_param mtpar;
680
681         ret = check_entry(e, name);
682         if (ret)
683                 return ret;
684
685         j = 0;
686         mtpar.table     = name;
687         mtpar.entryinfo = &e->ip;
688         mtpar.hook_mask = e->comefrom;
689         mtpar.family    = NFPROTO_IPV4;
690         ret = IPT_MATCH_ITERATE(e, find_check_match, &mtpar, &j);
691         if (ret != 0)
692                 goto cleanup_matches;
693
694         t = ipt_get_target(e);
695         target = try_then_request_module(xt_find_target(AF_INET,
696                                                         t->u.user.name,
697                                                         t->u.user.revision),
698                                          "ipt_%s", t->u.user.name);
699         if (IS_ERR(target) || !target) {
700                 duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
701                 ret = target ? PTR_ERR(target) : -ENOENT;
702                 goto cleanup_matches;
703         }
704         t->u.kernel.target = target;
705
706         ret = check_target(e, name);
707         if (ret)
708                 goto err;
709
710         (*i)++;
711         return 0;
712  err:
713         module_put(t->u.kernel.target->me);
714  cleanup_matches:
715         IPT_MATCH_ITERATE(e, cleanup_match, &j);
716         return ret;
717 }
718
719 static int
720 check_entry_size_and_hooks(struct ipt_entry *e,
721                            struct xt_table_info *newinfo,
722                            unsigned char *base,
723                            unsigned char *limit,
724                            const unsigned int *hook_entries,
725                            const unsigned int *underflows,
726                            unsigned int *i)
727 {
728         unsigned int h;
729
730         if ((unsigned long)e % __alignof__(struct ipt_entry) != 0
731             || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
732                 duprintf("Bad offset %p\n", e);
733                 return -EINVAL;
734         }
735
736         if (e->next_offset
737             < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
738                 duprintf("checking: element %p size %u\n",
739                          e, e->next_offset);
740                 return -EINVAL;
741         }
742
743         /* Check hooks & underflows */
744         for (h = 0; h < NF_INET_NUMHOOKS; h++) {
745                 if ((unsigned char *)e - base == hook_entries[h])
746                         newinfo->hook_entry[h] = hook_entries[h];
747                 if ((unsigned char *)e - base == underflows[h])
748                         newinfo->underflow[h] = underflows[h];
749         }
750
751         /* FIXME: underflows must be unconditional, standard verdicts
752            < 0 (not IPT_RETURN). --RR */
753
754         /* Clear counters and comefrom */
755         e->counters = ((struct xt_counters) { 0, 0 });
756         e->comefrom = 0;
757
758         (*i)++;
759         return 0;
760 }
761
762 static int
763 cleanup_entry(struct ipt_entry *e, unsigned int *i)
764 {
765         struct xt_tgdtor_param par;
766         struct ipt_entry_target *t;
767
768         if (i && (*i)-- == 0)
769                 return 1;
770
771         /* Cleanup all matches */
772         IPT_MATCH_ITERATE(e, cleanup_match, NULL);
773         t = ipt_get_target(e);
774
775         par.target   = t->u.kernel.target;
776         par.targinfo = t->data;
777         par.family   = NFPROTO_IPV4;
778         if (par.target->destroy != NULL)
779                 par.target->destroy(&par);
780         module_put(par.target->me);
781         return 0;
782 }
783
784 /* Checks and translates the user-supplied table segment (held in
785    newinfo) */
786 static int
787 translate_table(const char *name,
788                 unsigned int valid_hooks,
789                 struct xt_table_info *newinfo,
790                 void *entry0,
791                 unsigned int size,
792                 unsigned int number,
793                 const unsigned int *hook_entries,
794                 const unsigned int *underflows)
795 {
796         unsigned int i;
797         int ret;
798
799         newinfo->size = size;
800         newinfo->number = number;
801
802         /* Init all hooks to impossible value. */
803         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
804                 newinfo->hook_entry[i] = 0xFFFFFFFF;
805                 newinfo->underflow[i] = 0xFFFFFFFF;
806         }
807
808         duprintf("translate_table: size %u\n", newinfo->size);
809         i = 0;
810         /* Walk through entries, checking offsets. */
811         ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
812                                 check_entry_size_and_hooks,
813                                 newinfo,
814                                 entry0,
815                                 entry0 + size,
816                                 hook_entries, underflows, &i);
817         if (ret != 0)
818                 return ret;
819
820         if (i != number) {
821                 duprintf("translate_table: %u not %u entries\n",
822                          i, number);
823                 return -EINVAL;
824         }
825
826         /* Check hooks all assigned */
827         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
828                 /* Only hooks which are valid */
829                 if (!(valid_hooks & (1 << i)))
830                         continue;
831                 if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
832                         duprintf("Invalid hook entry %u %u\n",
833                                  i, hook_entries[i]);
834                         return -EINVAL;
835                 }
836                 if (newinfo->underflow[i] == 0xFFFFFFFF) {
837                         duprintf("Invalid underflow %u %u\n",
838                                  i, underflows[i]);
839                         return -EINVAL;
840                 }
841         }
842
843         if (!mark_source_chains(newinfo, valid_hooks, entry0))
844                 return -ELOOP;
845
846         /* Finally, each sanity check must pass */
847         i = 0;
848         ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
849                                 find_check_entry, name, size, &i);
850
851         if (ret != 0) {
852                 IPT_ENTRY_ITERATE(entry0, newinfo->size,
853                                 cleanup_entry, &i);
854                 return ret;
855         }
856
857         /* And one copy for every other CPU */
858         for_each_possible_cpu(i) {
859                 if (newinfo->entries[i] && newinfo->entries[i] != entry0)
860                         memcpy(newinfo->entries[i], entry0, newinfo->size);
861         }
862
863         return ret;
864 }
865
866 /* Gets counters. */
867 static inline int
868 add_entry_to_counter(const struct ipt_entry *e,
869                      struct xt_counters total[],
870                      unsigned int *i)
871 {
872         ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
873
874         (*i)++;
875         return 0;
876 }
877
878 static inline int
879 set_entry_to_counter(const struct ipt_entry *e,
880                      struct ipt_counters total[],
881                      unsigned int *i)
882 {
883         SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
884
885         (*i)++;
886         return 0;
887 }
888
889 static void
890 get_counters(const struct xt_table_info *t,
891              struct xt_counters counters[])
892 {
893         unsigned int cpu;
894         unsigned int i;
895         unsigned int curcpu;
896
897         /* Instead of clearing (by a previous call to memset())
898          * the counters and using adds, we set the counters
899          * with data used by 'current' CPU
900          * We dont care about preemption here.
901          */
902         curcpu = raw_smp_processor_id();
903
904         i = 0;
905         IPT_ENTRY_ITERATE(t->entries[curcpu],
906                           t->size,
907                           set_entry_to_counter,
908                           counters,
909                           &i);
910
911         for_each_possible_cpu(cpu) {
912                 if (cpu == curcpu)
913                         continue;
914                 i = 0;
915                 IPT_ENTRY_ITERATE(t->entries[cpu],
916                                   t->size,
917                                   add_entry_to_counter,
918                                   counters,
919                                   &i);
920         }
921
922 }
923
924 /* We're lazy, and add to the first CPU; overflow works its fey magic
925  * and everything is OK. */
926 static int
927 add_counter_to_entry(struct ipt_entry *e,
928                      const struct xt_counters addme[],
929                      unsigned int *i)
930 {
931         ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
932
933         (*i)++;
934         return 0;
935 }
936
937 /* Take values from counters and add them back onto the current cpu */
938 static void put_counters(struct xt_table_info *t,
939                          const struct xt_counters counters[])
940 {
941         unsigned int i, cpu;
942
943         local_bh_disable();
944         cpu = smp_processor_id();
945         i = 0;
946         IPT_ENTRY_ITERATE(t->entries[cpu],
947                           t->size,
948                           add_counter_to_entry,
949                           counters,
950                           &i);
951         local_bh_enable();
952 }
953
954
955 static inline int
956 zero_entry_counter(struct ipt_entry *e, void *arg)
957 {
958         e->counters.bcnt = 0;
959         e->counters.pcnt = 0;
960         return 0;
961 }
962
963 static void
964 clone_counters(struct xt_table_info *newinfo, const struct xt_table_info *info)
965 {
966         unsigned int cpu;
967         const void *loc_cpu_entry = info->entries[raw_smp_processor_id()];
968
969         memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
970         for_each_possible_cpu(cpu) {
971                 memcpy(newinfo->entries[cpu], loc_cpu_entry, info->size);
972                 IPT_ENTRY_ITERATE(newinfo->entries[cpu], newinfo->size,
973                                   zero_entry_counter, NULL);
974         }
975 }
976
977 static struct xt_counters * alloc_counters(struct xt_table *table)
978 {
979         unsigned int countersize;
980         struct xt_counters *counters;
981         struct xt_table_info *private = table->private;
982         struct xt_table_info *info;
983
984         /* We need atomic snapshot of counters: rest doesn't change
985            (other than comefrom, which userspace doesn't care
986            about). */
987         countersize = sizeof(struct xt_counters) * private->number;
988         counters = vmalloc_node(countersize, numa_node_id());
989
990         if (counters == NULL)
991                 goto nomem;
992
993         info = xt_alloc_table_info(private->size);
994         if (!info)
995                 goto free_counters;
996
997         clone_counters(info, private);
998
999         mutex_lock(&table->lock);
1000         xt_table_entry_swap_rcu(private, info);
1001         synchronize_net();      /* Wait until smoke has cleared */
1002
1003         get_counters(info, counters);
1004         put_counters(private, counters);
1005         mutex_unlock(&table->lock);
1006
1007         xt_free_table_info(info);
1008
1009         return counters;
1010
1011  free_counters:
1012         vfree(counters);
1013  nomem:
1014         return ERR_PTR(-ENOMEM);
1015 }
1016
1017 static int
1018 copy_entries_to_user(unsigned int total_size,
1019                      struct xt_table *table,
1020                      void __user *userptr)
1021 {
1022         unsigned int off, num;
1023         struct ipt_entry *e;
1024         struct xt_counters *counters;
1025         const struct xt_table_info *private = table->private;
1026         int ret = 0;
1027         const void *loc_cpu_entry;
1028
1029         counters = alloc_counters(table);
1030         if (IS_ERR(counters))
1031                 return PTR_ERR(counters);
1032
1033         /* choose the copy that is on our node/cpu, ...
1034          * This choice is lazy (because current thread is
1035          * allowed to migrate to another cpu)
1036          */
1037         loc_cpu_entry = private->entries[raw_smp_processor_id()];
1038         if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
1039                 ret = -EFAULT;
1040                 goto free_counters;
1041         }
1042
1043         /* FIXME: use iterator macros --RR */
1044         /* ... then go back and fix counters and names */
1045         for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
1046                 unsigned int i;
1047                 const struct ipt_entry_match *m;
1048                 const struct ipt_entry_target *t;
1049
1050                 e = (struct ipt_entry *)(loc_cpu_entry + off);
1051                 if (copy_to_user(userptr + off
1052                                  + offsetof(struct ipt_entry, counters),
1053                                  &counters[num],
1054                                  sizeof(counters[num])) != 0) {
1055                         ret = -EFAULT;
1056                         goto free_counters;
1057                 }
1058
1059                 for (i = sizeof(struct ipt_entry);
1060                      i < e->target_offset;
1061                      i += m->u.match_size) {
1062                         m = (void *)e + i;
1063
1064                         if (copy_to_user(userptr + off + i
1065                                          + offsetof(struct ipt_entry_match,
1066                                                     u.user.name),
1067                                          m->u.kernel.match->name,
1068                                          strlen(m->u.kernel.match->name)+1)
1069                             != 0) {
1070                                 ret = -EFAULT;
1071                                 goto free_counters;
1072                         }
1073                 }
1074
1075                 t = ipt_get_target(e);
1076                 if (copy_to_user(userptr + off + e->target_offset
1077                                  + offsetof(struct ipt_entry_target,
1078                                             u.user.name),
1079                                  t->u.kernel.target->name,
1080                                  strlen(t->u.kernel.target->name)+1) != 0) {
1081                         ret = -EFAULT;
1082                         goto free_counters;
1083                 }
1084         }
1085
1086  free_counters:
1087         vfree(counters);
1088         return ret;
1089 }
1090
1091 #ifdef CONFIG_COMPAT
1092 static void compat_standard_from_user(void *dst, void *src)
1093 {
1094         int v = *(compat_int_t *)src;
1095
1096         if (v > 0)
1097                 v += xt_compat_calc_jump(AF_INET, v);
1098         memcpy(dst, &v, sizeof(v));
1099 }
1100
1101 static int compat_standard_to_user(void __user *dst, void *src)
1102 {
1103         compat_int_t cv = *(int *)src;
1104
1105         if (cv > 0)
1106                 cv -= xt_compat_calc_jump(AF_INET, cv);
1107         return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0;
1108 }
1109
1110 static inline int
1111 compat_calc_match(struct ipt_entry_match *m, int *size)
1112 {
1113         *size += xt_compat_match_offset(m->u.kernel.match);
1114         return 0;
1115 }
1116
1117 static int compat_calc_entry(struct ipt_entry *e,
1118                              const struct xt_table_info *info,
1119                              void *base, struct xt_table_info *newinfo)
1120 {
1121         struct ipt_entry_target *t;
1122         unsigned int entry_offset;
1123         int off, i, ret;
1124
1125         off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1126         entry_offset = (void *)e - base;
1127         IPT_MATCH_ITERATE(e, compat_calc_match, &off);
1128         t = ipt_get_target(e);
1129         off += xt_compat_target_offset(t->u.kernel.target);
1130         newinfo->size -= off;
1131         ret = xt_compat_add_offset(AF_INET, entry_offset, off);
1132         if (ret)
1133                 return ret;
1134
1135         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1136                 if (info->hook_entry[i] &&
1137                     (e < (struct ipt_entry *)(base + info->hook_entry[i])))
1138                         newinfo->hook_entry[i] -= off;
1139                 if (info->underflow[i] &&
1140                     (e < (struct ipt_entry *)(base + info->underflow[i])))
1141                         newinfo->underflow[i] -= off;
1142         }
1143         return 0;
1144 }
1145
1146 static int compat_table_info(const struct xt_table_info *info,
1147                              struct xt_table_info *newinfo)
1148 {
1149         void *loc_cpu_entry;
1150
1151         if (!newinfo || !info)
1152                 return -EINVAL;
1153
1154         /* we dont care about newinfo->entries[] */
1155         memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
1156         newinfo->initial_entries = 0;
1157         loc_cpu_entry = info->entries[raw_smp_processor_id()];
1158         return IPT_ENTRY_ITERATE(loc_cpu_entry, info->size,
1159                                  compat_calc_entry, info, loc_cpu_entry,
1160                                  newinfo);
1161 }
1162 #endif
1163
1164 static int get_info(struct net *net, void __user *user, int *len, int compat)
1165 {
1166         char name[IPT_TABLE_MAXNAMELEN];
1167         struct xt_table *t;
1168         int ret;
1169
1170         if (*len != sizeof(struct ipt_getinfo)) {
1171                 duprintf("length %u != %zu\n", *len,
1172                          sizeof(struct ipt_getinfo));
1173                 return -EINVAL;
1174         }
1175
1176         if (copy_from_user(name, user, sizeof(name)) != 0)
1177                 return -EFAULT;
1178
1179         name[IPT_TABLE_MAXNAMELEN-1] = '\0';
1180 #ifdef CONFIG_COMPAT
1181         if (compat)
1182                 xt_compat_lock(AF_INET);
1183 #endif
1184         t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
1185                                     "iptable_%s", name);
1186         if (t && !IS_ERR(t)) {
1187                 struct ipt_getinfo info;
1188                 const struct xt_table_info *private = t->private;
1189
1190 #ifdef CONFIG_COMPAT
1191                 if (compat) {
1192                         struct xt_table_info tmp;
1193                         ret = compat_table_info(private, &tmp);
1194                         xt_compat_flush_offsets(AF_INET);
1195                         private = &tmp;
1196                 }
1197 #endif
1198                 info.valid_hooks = t->valid_hooks;
1199                 memcpy(info.hook_entry, private->hook_entry,
1200                        sizeof(info.hook_entry));
1201                 memcpy(info.underflow, private->underflow,
1202                        sizeof(info.underflow));
1203                 info.num_entries = private->number;
1204                 info.size = private->size;
1205                 strcpy(info.name, name);
1206
1207                 if (copy_to_user(user, &info, *len) != 0)
1208                         ret = -EFAULT;
1209                 else
1210                         ret = 0;
1211
1212                 xt_table_unlock(t);
1213                 module_put(t->me);
1214         } else
1215                 ret = t ? PTR_ERR(t) : -ENOENT;
1216 #ifdef CONFIG_COMPAT
1217         if (compat)
1218                 xt_compat_unlock(AF_INET);
1219 #endif
1220         return ret;
1221 }
1222
1223 static int
1224 get_entries(struct net *net, struct ipt_get_entries __user *uptr, int *len)
1225 {
1226         int ret;
1227         struct ipt_get_entries get;
1228         struct xt_table *t;
1229
1230         if (*len < sizeof(get)) {
1231                 duprintf("get_entries: %u < %zu\n", *len, sizeof(get));
1232                 return -EINVAL;
1233         }
1234         if (copy_from_user(&get, uptr, sizeof(get)) != 0)
1235                 return -EFAULT;
1236         if (*len != sizeof(struct ipt_get_entries) + get.size) {
1237                 duprintf("get_entries: %u != %zu\n",
1238                          *len, sizeof(get) + get.size);
1239                 return -EINVAL;
1240         }
1241
1242         t = xt_find_table_lock(net, AF_INET, get.name);
1243         if (t && !IS_ERR(t)) {
1244                 const struct xt_table_info *private = t->private;
1245                 duprintf("t->private->number = %u\n", private->number);
1246                 if (get.size == private->size)
1247                         ret = copy_entries_to_user(private->size,
1248                                                    t, uptr->entrytable);
1249                 else {
1250                         duprintf("get_entries: I've got %u not %u!\n",
1251                                  private->size, get.size);
1252                         ret = -EAGAIN;
1253                 }
1254                 module_put(t->me);
1255                 xt_table_unlock(t);
1256         } else
1257                 ret = t ? PTR_ERR(t) : -ENOENT;
1258
1259         return ret;
1260 }
1261
1262 static int
1263 __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
1264              struct xt_table_info *newinfo, unsigned int num_counters,
1265              void __user *counters_ptr)
1266 {
1267         int ret;
1268         struct xt_table *t;
1269         struct xt_table_info *oldinfo;
1270         struct xt_counters *counters;
1271         void *loc_cpu_old_entry;
1272
1273         ret = 0;
1274         counters = vmalloc(num_counters * sizeof(struct xt_counters));
1275         if (!counters) {
1276                 ret = -ENOMEM;
1277                 goto out;
1278         }
1279
1280         t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
1281                                     "iptable_%s", name);
1282         if (!t || IS_ERR(t)) {
1283                 ret = t ? PTR_ERR(t) : -ENOENT;
1284                 goto free_newinfo_counters_untrans;
1285         }
1286
1287         /* You lied! */
1288         if (valid_hooks != t->valid_hooks) {
1289                 duprintf("Valid hook crap: %08X vs %08X\n",
1290                          valid_hooks, t->valid_hooks);
1291                 ret = -EINVAL;
1292                 goto put_module;
1293         }
1294
1295         oldinfo = xt_replace_table(t, num_counters, newinfo, &ret);
1296         if (!oldinfo)
1297                 goto put_module;
1298
1299         /* Update module usage count based on number of rules */
1300         duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
1301                 oldinfo->number, oldinfo->initial_entries, newinfo->number);
1302         if ((oldinfo->number > oldinfo->initial_entries) ||
1303             (newinfo->number <= oldinfo->initial_entries))
1304                 module_put(t->me);
1305         if ((oldinfo->number > oldinfo->initial_entries) &&
1306             (newinfo->number <= oldinfo->initial_entries))
1307                 module_put(t->me);
1308
1309         /* Get the old counters. */
1310         get_counters(oldinfo, counters);
1311         /* Decrease module usage counts and free resource */
1312         loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
1313         IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
1314                           NULL);
1315         xt_free_table_info(oldinfo);
1316         if (copy_to_user(counters_ptr, counters,
1317                          sizeof(struct xt_counters) * num_counters) != 0)
1318                 ret = -EFAULT;
1319         vfree(counters);
1320         xt_table_unlock(t);
1321         return ret;
1322
1323  put_module:
1324         module_put(t->me);
1325         xt_table_unlock(t);
1326  free_newinfo_counters_untrans:
1327         vfree(counters);
1328  out:
1329         return ret;
1330 }
1331
1332 static int
1333 do_replace(struct net *net, void __user *user, unsigned int len)
1334 {
1335         int ret;
1336         struct ipt_replace tmp;
1337         struct xt_table_info *newinfo;
1338         void *loc_cpu_entry;
1339
1340         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1341                 return -EFAULT;
1342
1343         /* overflow check */
1344         if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1345                 return -ENOMEM;
1346
1347         newinfo = xt_alloc_table_info(tmp.size);
1348         if (!newinfo)
1349                 return -ENOMEM;
1350
1351         /* choose the copy that is on our node/cpu */
1352         loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1353         if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
1354                            tmp.size) != 0) {
1355                 ret = -EFAULT;
1356                 goto free_newinfo;
1357         }
1358
1359         ret = translate_table(tmp.name, tmp.valid_hooks,
1360                               newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
1361                               tmp.hook_entry, tmp.underflow);
1362         if (ret != 0)
1363                 goto free_newinfo;
1364
1365         duprintf("ip_tables: Translated table\n");
1366
1367         ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
1368                            tmp.num_counters, tmp.counters);
1369         if (ret)
1370                 goto free_newinfo_untrans;
1371         return 0;
1372
1373  free_newinfo_untrans:
1374         IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
1375  free_newinfo:
1376         xt_free_table_info(newinfo);
1377         return ret;
1378 }
1379
1380
1381 static int
1382 do_add_counters(struct net *net, void __user *user, unsigned int len, int compat)
1383 {
1384         unsigned int i;
1385         struct xt_counters_info tmp;
1386         struct xt_counters *paddc;
1387         unsigned int num_counters;
1388         const char *name;
1389         int size;
1390         void *ptmp;
1391         struct xt_table *t;
1392         const struct xt_table_info *private;
1393         int ret = 0;
1394         void *loc_cpu_entry;
1395 #ifdef CONFIG_COMPAT
1396         struct compat_xt_counters_info compat_tmp;
1397
1398         if (compat) {
1399                 ptmp = &compat_tmp;
1400                 size = sizeof(struct compat_xt_counters_info);
1401         } else
1402 #endif
1403         {
1404                 ptmp = &tmp;
1405                 size = sizeof(struct xt_counters_info);
1406         }
1407
1408         if (copy_from_user(ptmp, user, size) != 0)
1409                 return -EFAULT;
1410
1411 #ifdef CONFIG_COMPAT
1412         if (compat) {
1413                 num_counters = compat_tmp.num_counters;
1414                 name = compat_tmp.name;
1415         } else
1416 #endif
1417         {
1418                 num_counters = tmp.num_counters;
1419                 name = tmp.name;
1420         }
1421
1422         if (len != size + num_counters * sizeof(struct xt_counters))
1423                 return -EINVAL;
1424
1425         paddc = vmalloc_node(len - size, numa_node_id());
1426         if (!paddc)
1427                 return -ENOMEM;
1428
1429         if (copy_from_user(paddc, user + size, len - size) != 0) {
1430                 ret = -EFAULT;
1431                 goto free;
1432         }
1433
1434         t = xt_find_table_lock(net, AF_INET, name);
1435         if (!t || IS_ERR(t)) {
1436                 ret = t ? PTR_ERR(t) : -ENOENT;
1437                 goto free;
1438         }
1439
1440         mutex_lock(&t->lock);
1441         private = t->private;
1442         if (private->number != num_counters) {
1443                 ret = -EINVAL;
1444                 goto unlock_up_free;
1445         }
1446
1447         preempt_disable();
1448         i = 0;
1449         /* Choose the copy that is on our node */
1450         loc_cpu_entry = private->entries[raw_smp_processor_id()];
1451         IPT_ENTRY_ITERATE(loc_cpu_entry,
1452                           private->size,
1453                           add_counter_to_entry,
1454                           paddc,
1455                           &i);
1456         preempt_enable();
1457  unlock_up_free:
1458         mutex_unlock(&t->lock);
1459         xt_table_unlock(t);
1460         module_put(t->me);
1461  free:
1462         vfree(paddc);
1463
1464         return ret;
1465 }
1466
1467 #ifdef CONFIG_COMPAT
1468 struct compat_ipt_replace {
1469         char                    name[IPT_TABLE_MAXNAMELEN];
1470         u32                     valid_hooks;
1471         u32                     num_entries;
1472         u32                     size;
1473         u32                     hook_entry[NF_INET_NUMHOOKS];
1474         u32                     underflow[NF_INET_NUMHOOKS];
1475         u32                     num_counters;
1476         compat_uptr_t           counters;       /* struct ipt_counters * */
1477         struct compat_ipt_entry entries[0];
1478 };
1479
1480 static int
1481 compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr,
1482                           unsigned int *size, struct xt_counters *counters,
1483                           unsigned int *i)
1484 {
1485         struct ipt_entry_target *t;
1486         struct compat_ipt_entry __user *ce;
1487         u_int16_t target_offset, next_offset;
1488         compat_uint_t origsize;
1489         int ret;
1490
1491         ret = -EFAULT;
1492         origsize = *size;
1493         ce = (struct compat_ipt_entry __user *)*dstptr;
1494         if (copy_to_user(ce, e, sizeof(struct ipt_entry)))
1495                 goto out;
1496
1497         if (copy_to_user(&ce->counters, &counters[*i], sizeof(counters[*i])))
1498                 goto out;
1499
1500         *dstptr += sizeof(struct compat_ipt_entry);
1501         *size -= sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1502
1503         ret = IPT_MATCH_ITERATE(e, xt_compat_match_to_user, dstptr, size);
1504         target_offset = e->target_offset - (origsize - *size);
1505         if (ret)
1506                 goto out;
1507         t = ipt_get_target(e);
1508         ret = xt_compat_target_to_user(t, dstptr, size);
1509         if (ret)
1510                 goto out;
1511         ret = -EFAULT;
1512         next_offset = e->next_offset - (origsize - *size);
1513         if (put_user(target_offset, &ce->target_offset))
1514                 goto out;
1515         if (put_user(next_offset, &ce->next_offset))
1516                 goto out;
1517
1518         (*i)++;
1519         return 0;
1520 out:
1521         return ret;
1522 }
1523
1524 static int
1525 compat_find_calc_match(struct ipt_entry_match *m,
1526                        const char *name,
1527                        const struct ipt_ip *ip,
1528                        unsigned int hookmask,
1529                        int *size, unsigned int *i)
1530 {
1531         struct xt_match *match;
1532
1533         match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
1534                                                       m->u.user.revision),
1535                                         "ipt_%s", m->u.user.name);
1536         if (IS_ERR(match) || !match) {
1537                 duprintf("compat_check_calc_match: `%s' not found\n",
1538                          m->u.user.name);
1539                 return match ? PTR_ERR(match) : -ENOENT;
1540         }
1541         m->u.kernel.match = match;
1542         *size += xt_compat_match_offset(match);
1543
1544         (*i)++;
1545         return 0;
1546 }
1547
1548 static int
1549 compat_release_match(struct ipt_entry_match *m, unsigned int *i)
1550 {
1551         if (i && (*i)-- == 0)
1552                 return 1;
1553
1554         module_put(m->u.kernel.match->me);
1555         return 0;
1556 }
1557
1558 static int
1559 compat_release_entry(struct compat_ipt_entry *e, unsigned int *i)
1560 {
1561         struct ipt_entry_target *t;
1562
1563         if (i && (*i)-- == 0)
1564                 return 1;
1565
1566         /* Cleanup all matches */
1567         COMPAT_IPT_MATCH_ITERATE(e, compat_release_match, NULL);
1568         t = compat_ipt_get_target(e);
1569         module_put(t->u.kernel.target->me);
1570         return 0;
1571 }
1572
1573 static int
1574 check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
1575                                   struct xt_table_info *newinfo,
1576                                   unsigned int *size,
1577                                   unsigned char *base,
1578                                   unsigned char *limit,
1579                                   unsigned int *hook_entries,
1580                                   unsigned int *underflows,
1581                                   unsigned int *i,
1582                                   const char *name)
1583 {
1584         struct ipt_entry_target *t;
1585         struct xt_target *target;
1586         unsigned int entry_offset;
1587         unsigned int j;
1588         int ret, off, h;
1589
1590         duprintf("check_compat_entry_size_and_hooks %p\n", e);
1591         if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0
1592             || (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit) {
1593                 duprintf("Bad offset %p, limit = %p\n", e, limit);
1594                 return -EINVAL;
1595         }
1596
1597         if (e->next_offset < sizeof(struct compat_ipt_entry) +
1598                              sizeof(struct compat_xt_entry_target)) {
1599                 duprintf("checking: element %p size %u\n",
1600                          e, e->next_offset);
1601                 return -EINVAL;
1602         }
1603
1604         /* For purposes of check_entry casting the compat entry is fine */
1605         ret = check_entry((struct ipt_entry *)e, name);
1606         if (ret)
1607                 return ret;
1608
1609         off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1610         entry_offset = (void *)e - (void *)base;
1611         j = 0;
1612         ret = COMPAT_IPT_MATCH_ITERATE(e, compat_find_calc_match, name,
1613                                        &e->ip, e->comefrom, &off, &j);
1614         if (ret != 0)
1615                 goto release_matches;
1616
1617         t = compat_ipt_get_target(e);
1618         target = try_then_request_module(xt_find_target(AF_INET,
1619                                                         t->u.user.name,
1620                                                         t->u.user.revision),
1621                                          "ipt_%s", t->u.user.name);
1622         if (IS_ERR(target) || !target) {
1623                 duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
1624                          t->u.user.name);
1625                 ret = target ? PTR_ERR(target) : -ENOENT;
1626                 goto release_matches;
1627         }
1628         t->u.kernel.target = target;
1629
1630         off += xt_compat_target_offset(target);
1631         *size += off;
1632         ret = xt_compat_add_offset(AF_INET, entry_offset, off);
1633         if (ret)
1634                 goto out;
1635
1636         /* Check hooks & underflows */
1637         for (h = 0; h < NF_INET_NUMHOOKS; h++) {
1638                 if ((unsigned char *)e - base == hook_entries[h])
1639                         newinfo->hook_entry[h] = hook_entries[h];
1640                 if ((unsigned char *)e - base == underflows[h])
1641                         newinfo->underflow[h] = underflows[h];
1642         }
1643
1644         /* Clear counters and comefrom */
1645         memset(&e->counters, 0, sizeof(e->counters));
1646         e->comefrom = 0;
1647
1648         (*i)++;
1649         return 0;
1650
1651 out:
1652         module_put(t->u.kernel.target->me);
1653 release_matches:
1654         IPT_MATCH_ITERATE(e, compat_release_match, &j);
1655         return ret;
1656 }
1657
1658 static int
1659 compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
1660                             unsigned int *size, const char *name,
1661                             struct xt_table_info *newinfo, unsigned char *base)
1662 {
1663         struct ipt_entry_target *t;
1664         struct xt_target *target;
1665         struct ipt_entry *de;
1666         unsigned int origsize;
1667         int ret, h;
1668
1669         ret = 0;
1670         origsize = *size;
1671         de = (struct ipt_entry *)*dstptr;
1672         memcpy(de, e, sizeof(struct ipt_entry));
1673         memcpy(&de->counters, &e->counters, sizeof(e->counters));
1674
1675         *dstptr += sizeof(struct ipt_entry);
1676         *size += sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1677
1678         ret = COMPAT_IPT_MATCH_ITERATE(e, xt_compat_match_from_user,
1679                                        dstptr, size);
1680         if (ret)
1681                 return ret;
1682         de->target_offset = e->target_offset - (origsize - *size);
1683         t = compat_ipt_get_target(e);
1684         target = t->u.kernel.target;
1685         xt_compat_target_from_user(t, dstptr, size);
1686
1687         de->next_offset = e->next_offset - (origsize - *size);
1688         for (h = 0; h < NF_INET_NUMHOOKS; h++) {
1689                 if ((unsigned char *)de - base < newinfo->hook_entry[h])
1690                         newinfo->hook_entry[h] -= origsize - *size;
1691                 if ((unsigned char *)de - base < newinfo->underflow[h])
1692                         newinfo->underflow[h] -= origsize - *size;
1693         }
1694         return ret;
1695 }
1696
1697 static int
1698 compat_check_entry(struct ipt_entry *e, const char *name,
1699                                      unsigned int *i)
1700 {
1701         struct xt_mtchk_param mtpar;
1702         unsigned int j;
1703         int ret;
1704
1705         j = 0;
1706         mtpar.table     = name;
1707         mtpar.entryinfo = &e->ip;
1708         mtpar.hook_mask = e->comefrom;
1709         mtpar.family    = NFPROTO_IPV4;
1710         ret = IPT_MATCH_ITERATE(e, check_match, &mtpar, &j);
1711         if (ret)
1712                 goto cleanup_matches;
1713
1714         ret = check_target(e, name);
1715         if (ret)
1716                 goto cleanup_matches;
1717
1718         (*i)++;
1719         return 0;
1720
1721  cleanup_matches:
1722         IPT_MATCH_ITERATE(e, cleanup_match, &j);
1723         return ret;
1724 }
1725
1726 static int
1727 translate_compat_table(const char *name,
1728                        unsigned int valid_hooks,
1729                        struct xt_table_info **pinfo,
1730                        void **pentry0,
1731                        unsigned int total_size,
1732                        unsigned int number,
1733                        unsigned int *hook_entries,
1734                        unsigned int *underflows)
1735 {
1736         unsigned int i, j;
1737         struct xt_table_info *newinfo, *info;
1738         void *pos, *entry0, *entry1;
1739         unsigned int size;
1740         int ret;
1741
1742         info = *pinfo;
1743         entry0 = *pentry0;
1744         size = total_size;
1745         info->number = number;
1746
1747         /* Init all hooks to impossible value. */
1748         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1749                 info->hook_entry[i] = 0xFFFFFFFF;
1750                 info->underflow[i] = 0xFFFFFFFF;
1751         }
1752
1753         duprintf("translate_compat_table: size %u\n", info->size);
1754         j = 0;
1755         xt_compat_lock(AF_INET);
1756         /* Walk through entries, checking offsets. */
1757         ret = COMPAT_IPT_ENTRY_ITERATE(entry0, total_size,
1758                                        check_compat_entry_size_and_hooks,
1759                                        info, &size, entry0,
1760                                        entry0 + total_size,
1761                                        hook_entries, underflows, &j, name);
1762         if (ret != 0)
1763                 goto out_unlock;
1764
1765         ret = -EINVAL;
1766         if (j != number) {
1767                 duprintf("translate_compat_table: %u not %u entries\n",
1768                          j, number);
1769                 goto out_unlock;
1770         }
1771
1772         /* Check hooks all assigned */
1773         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1774                 /* Only hooks which are valid */
1775                 if (!(valid_hooks & (1 << i)))
1776                         continue;
1777                 if (info->hook_entry[i] == 0xFFFFFFFF) {
1778                         duprintf("Invalid hook entry %u %u\n",
1779                                  i, hook_entries[i]);
1780                         goto out_unlock;
1781                 }
1782                 if (info->underflow[i] == 0xFFFFFFFF) {
1783                         duprintf("Invalid underflow %u %u\n",
1784                                  i, underflows[i]);
1785                         goto out_unlock;
1786                 }
1787         }
1788
1789         ret = -ENOMEM;
1790         newinfo = xt_alloc_table_info(size);
1791         if (!newinfo)
1792                 goto out_unlock;
1793
1794         newinfo->number = number;
1795         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1796                 newinfo->hook_entry[i] = info->hook_entry[i];
1797                 newinfo->underflow[i] = info->underflow[i];
1798         }
1799         entry1 = newinfo->entries[raw_smp_processor_id()];
1800         pos = entry1;
1801         size = total_size;
1802         ret = COMPAT_IPT_ENTRY_ITERATE(entry0, total_size,
1803                                        compat_copy_entry_from_user,
1804                                        &pos, &size, name, newinfo, entry1);
1805         xt_compat_flush_offsets(AF_INET);
1806         xt_compat_unlock(AF_INET);
1807         if (ret)
1808                 goto free_newinfo;
1809
1810         ret = -ELOOP;
1811         if (!mark_source_chains(newinfo, valid_hooks, entry1))
1812                 goto free_newinfo;
1813
1814         i = 0;
1815         ret = IPT_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry,
1816                                 name, &i);
1817         if (ret) {
1818                 j -= i;
1819                 COMPAT_IPT_ENTRY_ITERATE_CONTINUE(entry0, newinfo->size, i,
1820                                                   compat_release_entry, &j);
1821                 IPT_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, &i);
1822                 xt_free_table_info(newinfo);
1823                 return ret;
1824         }
1825
1826         /* And one copy for every other CPU */
1827         for_each_possible_cpu(i)
1828                 if (newinfo->entries[i] && newinfo->entries[i] != entry1)
1829                         memcpy(newinfo->entries[i], entry1, newinfo->size);
1830
1831         *pinfo = newinfo;
1832         *pentry0 = entry1;
1833         xt_free_table_info(info);
1834         return 0;
1835
1836 free_newinfo:
1837         xt_free_table_info(newinfo);
1838 out:
1839         COMPAT_IPT_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j);
1840         return ret;
1841 out_unlock:
1842         xt_compat_flush_offsets(AF_INET);
1843         xt_compat_unlock(AF_INET);
1844         goto out;
1845 }
1846
1847 static int
1848 compat_do_replace(struct net *net, void __user *user, unsigned int len)
1849 {
1850         int ret;
1851         struct compat_ipt_replace tmp;
1852         struct xt_table_info *newinfo;
1853         void *loc_cpu_entry;
1854
1855         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1856                 return -EFAULT;
1857
1858         /* overflow check */
1859         if (tmp.size >= INT_MAX / num_possible_cpus())
1860                 return -ENOMEM;
1861         if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1862                 return -ENOMEM;
1863
1864         newinfo = xt_alloc_table_info(tmp.size);
1865         if (!newinfo)
1866                 return -ENOMEM;
1867
1868         /* choose the copy that is on our node/cpu */
1869         loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1870         if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
1871                            tmp.size) != 0) {
1872                 ret = -EFAULT;
1873                 goto free_newinfo;
1874         }
1875
1876         ret = translate_compat_table(tmp.name, tmp.valid_hooks,
1877                                      &newinfo, &loc_cpu_entry, tmp.size,
1878                                      tmp.num_entries, tmp.hook_entry,
1879                                      tmp.underflow);
1880         if (ret != 0)
1881                 goto free_newinfo;
1882
1883         duprintf("compat_do_replace: Translated table\n");
1884
1885         ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
1886                            tmp.num_counters, compat_ptr(tmp.counters));
1887         if (ret)
1888                 goto free_newinfo_untrans;
1889         return 0;
1890
1891  free_newinfo_untrans:
1892         IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
1893  free_newinfo:
1894         xt_free_table_info(newinfo);
1895         return ret;
1896 }
1897
1898 static int
1899 compat_do_ipt_set_ctl(struct sock *sk,  int cmd, void __user *user,
1900                       unsigned int len)
1901 {
1902         int ret;
1903
1904         if (!capable(CAP_NET_ADMIN))
1905                 return -EPERM;
1906
1907         switch (cmd) {
1908         case IPT_SO_SET_REPLACE:
1909                 ret = compat_do_replace(sock_net(sk), user, len);
1910                 break;
1911
1912         case IPT_SO_SET_ADD_COUNTERS:
1913                 ret = do_add_counters(sock_net(sk), user, len, 1);
1914                 break;
1915
1916         default:
1917                 duprintf("do_ipt_set_ctl:  unknown request %i\n", cmd);
1918                 ret = -EINVAL;
1919         }
1920
1921         return ret;
1922 }
1923
1924 struct compat_ipt_get_entries {
1925         char name[IPT_TABLE_MAXNAMELEN];
1926         compat_uint_t size;
1927         struct compat_ipt_entry entrytable[0];
1928 };
1929
1930 static int
1931 compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
1932                             void __user *userptr)
1933 {
1934         struct xt_counters *counters;
1935         const struct xt_table_info *private = table->private;
1936         void __user *pos;
1937         unsigned int size;
1938         int ret = 0;
1939         const void *loc_cpu_entry;
1940         unsigned int i = 0;
1941
1942         counters = alloc_counters(table);
1943         if (IS_ERR(counters))
1944                 return PTR_ERR(counters);
1945
1946         /* choose the copy that is on our node/cpu, ...
1947          * This choice is lazy (because current thread is
1948          * allowed to migrate to another cpu)
1949          */
1950         loc_cpu_entry = private->entries[raw_smp_processor_id()];
1951         pos = userptr;
1952         size = total_size;
1953         ret = IPT_ENTRY_ITERATE(loc_cpu_entry, total_size,
1954                                 compat_copy_entry_to_user,
1955                                 &pos, &size, counters, &i);
1956
1957         vfree(counters);
1958         return ret;
1959 }
1960
1961 static int
1962 compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
1963                    int *len)
1964 {
1965         int ret;
1966         struct compat_ipt_get_entries get;
1967         struct xt_table *t;
1968
1969         if (*len < sizeof(get)) {
1970                 duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get));
1971                 return -EINVAL;
1972         }
1973
1974         if (copy_from_user(&get, uptr, sizeof(get)) != 0)
1975                 return -EFAULT;
1976
1977         if (*len != sizeof(struct compat_ipt_get_entries) + get.size) {
1978                 duprintf("compat_get_entries: %u != %zu\n",
1979                          *len, sizeof(get) + get.size);
1980                 return -EINVAL;
1981         }
1982
1983         xt_compat_lock(AF_INET);
1984         t = xt_find_table_lock(net, AF_INET, get.name);
1985         if (t && !IS_ERR(t)) {
1986                 const struct xt_table_info *private = t->private;
1987                 struct xt_table_info info;
1988                 duprintf("t->private->number = %u\n", private->number);
1989                 ret = compat_table_info(private, &info);
1990                 if (!ret && get.size == info.size) {
1991                         ret = compat_copy_entries_to_user(private->size,
1992                                                           t, uptr->entrytable);
1993                 } else if (!ret) {
1994                         duprintf("compat_get_entries: I've got %u not %u!\n",
1995                                  private->size, get.size);
1996                         ret = -EAGAIN;
1997                 }
1998                 xt_compat_flush_offsets(AF_INET);
1999                 module_put(t->me);
2000                 xt_table_unlock(t);
2001         } else
2002                 ret = t ? PTR_ERR(t) : -ENOENT;
2003
2004         xt_compat_unlock(AF_INET);
2005         return ret;
2006 }
2007
2008 static int do_ipt_get_ctl(struct sock *, int, void __user *, int *);
2009
2010 static int
2011 compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2012 {
2013         int ret;
2014
2015         if (!capable(CAP_NET_ADMIN))
2016                 return -EPERM;
2017
2018         switch (cmd) {
2019         case IPT_SO_GET_INFO:
2020                 ret = get_info(sock_net(sk), user, len, 1);
2021                 break;
2022         case IPT_SO_GET_ENTRIES:
2023                 ret = compat_get_entries(sock_net(sk), user, len);
2024                 break;
2025         default:
2026                 ret = do_ipt_get_ctl(sk, cmd, user, len);
2027         }
2028         return ret;
2029 }
2030 #endif
2031
2032 static int
2033 do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
2034 {
2035         int ret;
2036
2037         if (!capable(CAP_NET_ADMIN))
2038                 return -EPERM;
2039
2040         switch (cmd) {
2041         case IPT_SO_SET_REPLACE:
2042                 ret = do_replace(sock_net(sk), user, len);
2043                 break;
2044
2045         case IPT_SO_SET_ADD_COUNTERS:
2046                 ret = do_add_counters(sock_net(sk), user, len, 0);
2047                 break;
2048
2049         default:
2050                 duprintf("do_ipt_set_ctl:  unknown request %i\n", cmd);
2051                 ret = -EINVAL;
2052         }
2053
2054         return ret;
2055 }
2056
2057 static int
2058 do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2059 {
2060         int ret;
2061
2062         if (!capable(CAP_NET_ADMIN))
2063                 return -EPERM;
2064
2065         switch (cmd) {
2066         case IPT_SO_GET_INFO:
2067                 ret = get_info(sock_net(sk), user, len, 0);
2068                 break;
2069
2070         case IPT_SO_GET_ENTRIES:
2071                 ret = get_entries(sock_net(sk), user, len);
2072                 break;
2073
2074         case IPT_SO_GET_REVISION_MATCH:
2075         case IPT_SO_GET_REVISION_TARGET: {
2076                 struct ipt_get_revision rev;
2077                 int target;
2078
2079                 if (*len != sizeof(rev)) {
2080                         ret = -EINVAL;
2081                         break;
2082                 }
2083                 if (copy_from_user(&rev, user, sizeof(rev)) != 0) {
2084                         ret = -EFAULT;
2085                         break;
2086                 }
2087
2088                 if (cmd == IPT_SO_GET_REVISION_TARGET)
2089                         target = 1;
2090                 else
2091                         target = 0;
2092
2093                 try_then_request_module(xt_find_revision(AF_INET, rev.name,
2094                                                          rev.revision,
2095                                                          target, &ret),
2096                                         "ipt_%s", rev.name);
2097                 break;
2098         }
2099
2100         default:
2101                 duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
2102                 ret = -EINVAL;
2103         }
2104
2105         return ret;
2106 }
2107
2108 struct xt_table *ipt_register_table(struct net *net, struct xt_table *table,
2109                                     const struct ipt_replace *repl)
2110 {
2111         int ret;
2112         struct xt_table_info *newinfo;
2113         struct xt_table_info bootstrap
2114                 = { 0, 0, 0, { 0 }, { 0 }, { } };
2115         void *loc_cpu_entry;
2116         struct xt_table *new_table;
2117
2118         newinfo = xt_alloc_table_info(repl->size);
2119         if (!newinfo) {
2120                 ret = -ENOMEM;
2121                 goto out;
2122         }
2123
2124         /* choose the copy on our node/cpu, but dont care about preemption */
2125         loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
2126         memcpy(loc_cpu_entry, repl->entries, repl->size);
2127
2128         ret = translate_table(table->name, table->valid_hooks,
2129                               newinfo, loc_cpu_entry, repl->size,
2130                               repl->num_entries,
2131                               repl->hook_entry,
2132                               repl->underflow);
2133         if (ret != 0)
2134                 goto out_free;
2135
2136         new_table = xt_register_table(net, table, &bootstrap, newinfo);
2137         if (IS_ERR(new_table)) {
2138                 ret = PTR_ERR(new_table);
2139                 goto out_free;
2140         }
2141
2142         return new_table;
2143
2144 out_free:
2145         xt_free_table_info(newinfo);
2146 out:
2147         return ERR_PTR(ret);
2148 }
2149
2150 void ipt_unregister_table(struct xt_table *table)
2151 {
2152         struct xt_table_info *private;
2153         void *loc_cpu_entry;
2154         struct module *table_owner = table->me;
2155
2156         private = xt_unregister_table(table);
2157
2158         /* Decrease module usage counts and free resources */
2159         loc_cpu_entry = private->entries[raw_smp_processor_id()];
2160         IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL);
2161         if (private->number > private->initial_entries)
2162                 module_put(table_owner);
2163         xt_free_table_info(private);
2164 }
2165
2166 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
2167 static inline bool
2168 icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
2169                      u_int8_t type, u_int8_t code,
2170                      bool invert)
2171 {
2172         return ((test_type == 0xFF) ||
2173                 (type == test_type && code >= min_code && code <= max_code))
2174                 ^ invert;
2175 }
2176
2177 static bool
2178 icmp_match(const struct sk_buff *skb, const struct xt_match_param *par)
2179 {
2180         const struct icmphdr *ic;
2181         struct icmphdr _icmph;
2182         const struct ipt_icmp *icmpinfo = par->matchinfo;
2183
2184         /* Must not be a fragment. */
2185         if (par->fragoff != 0)
2186                 return false;
2187
2188         ic = skb_header_pointer(skb, par->thoff, sizeof(_icmph), &_icmph);
2189         if (ic == NULL) {
2190                 /* We've been asked to examine this packet, and we
2191                  * can't.  Hence, no choice but to drop.
2192                  */
2193                 duprintf("Dropping evil ICMP tinygram.\n");
2194                 *par->hotdrop = true;
2195                 return false;
2196         }
2197
2198         return icmp_type_code_match(icmpinfo->type,
2199                                     icmpinfo->code[0],
2200                                     icmpinfo->code[1],
2201                                     ic->type, ic->code,
2202                                     !!(icmpinfo->invflags&IPT_ICMP_INV));
2203 }
2204
2205 static bool icmp_checkentry(const struct xt_mtchk_param *par)
2206 {
2207         const struct ipt_icmp *icmpinfo = par->matchinfo;
2208
2209         /* Must specify no unknown invflags */
2210         return !(icmpinfo->invflags & ~IPT_ICMP_INV);
2211 }
2212
2213 /* The built-in targets: standard (NULL) and error. */
2214 static struct xt_target ipt_standard_target __read_mostly = {
2215         .name           = IPT_STANDARD_TARGET,
2216         .targetsize     = sizeof(int),
2217         .family         = AF_INET,
2218 #ifdef CONFIG_COMPAT
2219         .compatsize     = sizeof(compat_int_t),
2220         .compat_from_user = compat_standard_from_user,
2221         .compat_to_user = compat_standard_to_user,
2222 #endif
2223 };
2224
2225 static struct xt_target ipt_error_target __read_mostly = {
2226         .name           = IPT_ERROR_TARGET,
2227         .target         = ipt_error,
2228         .targetsize     = IPT_FUNCTION_MAXNAMELEN,
2229         .family         = AF_INET,
2230 };
2231
2232 static struct nf_sockopt_ops ipt_sockopts = {
2233         .pf             = PF_INET,
2234         .set_optmin     = IPT_BASE_CTL,
2235         .set_optmax     = IPT_SO_SET_MAX+1,
2236         .set            = do_ipt_set_ctl,
2237 #ifdef CONFIG_COMPAT
2238         .compat_set     = compat_do_ipt_set_ctl,
2239 #endif
2240         .get_optmin     = IPT_BASE_CTL,
2241         .get_optmax     = IPT_SO_GET_MAX+1,
2242         .get            = do_ipt_get_ctl,
2243 #ifdef CONFIG_COMPAT
2244         .compat_get     = compat_do_ipt_get_ctl,
2245 #endif
2246         .owner          = THIS_MODULE,
2247 };
2248
2249 static struct xt_match icmp_matchstruct __read_mostly = {
2250         .name           = "icmp",
2251         .match          = icmp_match,
2252         .matchsize      = sizeof(struct ipt_icmp),
2253         .checkentry     = icmp_checkentry,
2254         .proto          = IPPROTO_ICMP,
2255         .family         = AF_INET,
2256 };
2257
2258 static int __net_init ip_tables_net_init(struct net *net)
2259 {
2260         return xt_proto_init(net, AF_INET);
2261 }
2262
2263 static void __net_exit ip_tables_net_exit(struct net *net)
2264 {
2265         xt_proto_fini(net, AF_INET);
2266 }
2267
2268 static struct pernet_operations ip_tables_net_ops = {
2269         .init = ip_tables_net_init,
2270         .exit = ip_tables_net_exit,
2271 };
2272
2273 static int __init ip_tables_init(void)
2274 {
2275         int ret;
2276
2277         ret = register_pernet_subsys(&ip_tables_net_ops);
2278         if (ret < 0)
2279                 goto err1;
2280
2281         /* Noone else will be downing sem now, so we won't sleep */
2282         ret = xt_register_target(&ipt_standard_target);
2283         if (ret < 0)
2284                 goto err2;
2285         ret = xt_register_target(&ipt_error_target);
2286         if (ret < 0)
2287                 goto err3;
2288         ret = xt_register_match(&icmp_matchstruct);
2289         if (ret < 0)
2290                 goto err4;
2291
2292         /* Register setsockopt */
2293         ret = nf_register_sockopt(&ipt_sockopts);
2294         if (ret < 0)
2295                 goto err5;
2296
2297         printk(KERN_INFO "ip_tables: (C) 2000-2006 Netfilter Core Team\n");
2298         return 0;
2299
2300 err5:
2301         xt_unregister_match(&icmp_matchstruct);
2302 err4:
2303         xt_unregister_target(&ipt_error_target);
2304 err3:
2305         xt_unregister_target(&ipt_standard_target);
2306 err2:
2307         unregister_pernet_subsys(&ip_tables_net_ops);
2308 err1:
2309         return ret;
2310 }
2311
2312 static void __exit ip_tables_fini(void)
2313 {
2314         nf_unregister_sockopt(&ipt_sockopts);
2315
2316         xt_unregister_match(&icmp_matchstruct);
2317         xt_unregister_target(&ipt_error_target);
2318         xt_unregister_target(&ipt_standard_target);
2319
2320         unregister_pernet_subsys(&ip_tables_net_ops);
2321 }
2322
2323 EXPORT_SYMBOL(ipt_register_table);
2324 EXPORT_SYMBOL(ipt_unregister_table);
2325 EXPORT_SYMBOL(ipt_do_table);
2326 module_init(ip_tables_init);
2327 module_exit(ip_tables_fini);