Merge commit 'v2.6.29-rc7' into sched/core
[linux-2.6] / net / ipv4 / netfilter / ip_tables.c
1 /*
2  * Packet matching code.
3  *
4  * Copyright (C) 1999 Paul `Rusty' Russell & Michael J. Neuling
5  * Copyright (C) 2000-2005 Netfilter Core Team <coreteam@netfilter.org>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License version 2 as
9  * published by the Free Software Foundation.
10  */
11 #include <linux/cache.h>
12 #include <linux/capability.h>
13 #include <linux/skbuff.h>
14 #include <linux/kmod.h>
15 #include <linux/vmalloc.h>
16 #include <linux/netdevice.h>
17 #include <linux/module.h>
18 #include <linux/icmp.h>
19 #include <net/ip.h>
20 #include <net/compat.h>
21 #include <asm/uaccess.h>
22 #include <linux/mutex.h>
23 #include <linux/proc_fs.h>
24 #include <linux/err.h>
25 #include <linux/cpumask.h>
26
27 #include <linux/netfilter/x_tables.h>
28 #include <linux/netfilter_ipv4/ip_tables.h>
29 #include <net/netfilter/nf_log.h>
30
31 MODULE_LICENSE("GPL");
32 MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>");
33 MODULE_DESCRIPTION("IPv4 packet filter");
34
35 /*#define DEBUG_IP_FIREWALL*/
36 /*#define DEBUG_ALLOW_ALL*/ /* Useful for remote debugging */
37 /*#define DEBUG_IP_FIREWALL_USER*/
38
39 #ifdef DEBUG_IP_FIREWALL
40 #define dprintf(format, args...)  printk(format , ## args)
41 #else
42 #define dprintf(format, args...)
43 #endif
44
45 #ifdef DEBUG_IP_FIREWALL_USER
46 #define duprintf(format, args...) printk(format , ## args)
47 #else
48 #define duprintf(format, args...)
49 #endif
50
51 #ifdef CONFIG_NETFILTER_DEBUG
52 #define IP_NF_ASSERT(x)                                         \
53 do {                                                            \
54         if (!(x))                                               \
55                 printk("IP_NF_ASSERT: %s:%s:%u\n",              \
56                        __func__, __FILE__, __LINE__);   \
57 } while(0)
58 #else
59 #define IP_NF_ASSERT(x)
60 #endif
61
62 #if 0
63 /* All the better to debug you with... */
64 #define static
65 #define inline
66 #endif
67
68 /*
69    We keep a set of rules for each CPU, so we can avoid write-locking
70    them in the softirq when updating the counters and therefore
71    only need to read-lock in the softirq; doing a write_lock_bh() in user
72    context stops packets coming through and allows user context to read
73    the counters or update the rules.
74
75    Hence the start of any table is given by get_table() below.  */
76
77 /* Returns whether matches rule or not. */
78 /* Performance critical - called for every packet */
79 static inline bool
80 ip_packet_match(const struct iphdr *ip,
81                 const char *indev,
82                 const char *outdev,
83                 const struct ipt_ip *ipinfo,
84                 int isfrag)
85 {
86         size_t i;
87         unsigned long ret;
88
89 #define FWINV(bool, invflg) ((bool) ^ !!(ipinfo->invflags & (invflg)))
90
91         if (FWINV((ip->saddr&ipinfo->smsk.s_addr) != ipinfo->src.s_addr,
92                   IPT_INV_SRCIP)
93             || FWINV((ip->daddr&ipinfo->dmsk.s_addr) != ipinfo->dst.s_addr,
94                      IPT_INV_DSTIP)) {
95                 dprintf("Source or dest mismatch.\n");
96
97                 dprintf("SRC: %pI4. Mask: %pI4. Target: %pI4.%s\n",
98                         &ip->saddr, &ipinfo->smsk.s_addr, &ipinfo->src.s_addr,
99                         ipinfo->invflags & IPT_INV_SRCIP ? " (INV)" : "");
100                 dprintf("DST: %pI4 Mask: %pI4 Target: %pI4.%s\n",
101                         &ip->daddr, &ipinfo->dmsk.s_addr, &ipinfo->dst.s_addr,
102                         ipinfo->invflags & IPT_INV_DSTIP ? " (INV)" : "");
103                 return false;
104         }
105
106         /* Look for ifname matches; this should unroll nicely. */
107         for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
108                 ret |= (((const unsigned long *)indev)[i]
109                         ^ ((const unsigned long *)ipinfo->iniface)[i])
110                         & ((const unsigned long *)ipinfo->iniface_mask)[i];
111         }
112
113         if (FWINV(ret != 0, IPT_INV_VIA_IN)) {
114                 dprintf("VIA in mismatch (%s vs %s).%s\n",
115                         indev, ipinfo->iniface,
116                         ipinfo->invflags&IPT_INV_VIA_IN ?" (INV)":"");
117                 return false;
118         }
119
120         for (i = 0, ret = 0; i < IFNAMSIZ/sizeof(unsigned long); i++) {
121                 ret |= (((const unsigned long *)outdev)[i]
122                         ^ ((const unsigned long *)ipinfo->outiface)[i])
123                         & ((const unsigned long *)ipinfo->outiface_mask)[i];
124         }
125
126         if (FWINV(ret != 0, IPT_INV_VIA_OUT)) {
127                 dprintf("VIA out mismatch (%s vs %s).%s\n",
128                         outdev, ipinfo->outiface,
129                         ipinfo->invflags&IPT_INV_VIA_OUT ?" (INV)":"");
130                 return false;
131         }
132
133         /* Check specific protocol */
134         if (ipinfo->proto
135             && FWINV(ip->protocol != ipinfo->proto, IPT_INV_PROTO)) {
136                 dprintf("Packet protocol %hi does not match %hi.%s\n",
137                         ip->protocol, ipinfo->proto,
138                         ipinfo->invflags&IPT_INV_PROTO ? " (INV)":"");
139                 return false;
140         }
141
142         /* If we have a fragment rule but the packet is not a fragment
143          * then we return zero */
144         if (FWINV((ipinfo->flags&IPT_F_FRAG) && !isfrag, IPT_INV_FRAG)) {
145                 dprintf("Fragment rule but not fragment.%s\n",
146                         ipinfo->invflags & IPT_INV_FRAG ? " (INV)" : "");
147                 return false;
148         }
149
150         return true;
151 }
152
153 static bool
154 ip_checkentry(const struct ipt_ip *ip)
155 {
156         if (ip->flags & ~IPT_F_MASK) {
157                 duprintf("Unknown flag bits set: %08X\n",
158                          ip->flags & ~IPT_F_MASK);
159                 return false;
160         }
161         if (ip->invflags & ~IPT_INV_MASK) {
162                 duprintf("Unknown invflag bits set: %08X\n",
163                          ip->invflags & ~IPT_INV_MASK);
164                 return false;
165         }
166         return true;
167 }
168
169 static unsigned int
170 ipt_error(struct sk_buff *skb, const struct xt_target_param *par)
171 {
172         if (net_ratelimit())
173                 printk("ip_tables: error: `%s'\n",
174                        (const char *)par->targinfo);
175
176         return NF_DROP;
177 }
178
179 /* Performance critical - called for every packet */
180 static inline bool
181 do_match(struct ipt_entry_match *m, const struct sk_buff *skb,
182          struct xt_match_param *par)
183 {
184         par->match     = m->u.kernel.match;
185         par->matchinfo = m->data;
186
187         /* Stop iteration if it doesn't match */
188         if (!m->u.kernel.match->match(skb, par))
189                 return true;
190         else
191                 return false;
192 }
193
194 /* Performance critical */
195 static inline struct ipt_entry *
196 get_entry(void *base, unsigned int offset)
197 {
198         return (struct ipt_entry *)(base + offset);
199 }
200
201 /* All zeroes == unconditional rule. */
202 /* Mildly perf critical (only if packet tracing is on) */
203 static inline int
204 unconditional(const struct ipt_ip *ip)
205 {
206         unsigned int i;
207
208         for (i = 0; i < sizeof(*ip)/sizeof(__u32); i++)
209                 if (((__u32 *)ip)[i])
210                         return 0;
211
212         return 1;
213 #undef FWINV
214 }
215
216 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
217     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
218 static const char *const hooknames[] = {
219         [NF_INET_PRE_ROUTING]           = "PREROUTING",
220         [NF_INET_LOCAL_IN]              = "INPUT",
221         [NF_INET_FORWARD]               = "FORWARD",
222         [NF_INET_LOCAL_OUT]             = "OUTPUT",
223         [NF_INET_POST_ROUTING]          = "POSTROUTING",
224 };
225
226 enum nf_ip_trace_comments {
227         NF_IP_TRACE_COMMENT_RULE,
228         NF_IP_TRACE_COMMENT_RETURN,
229         NF_IP_TRACE_COMMENT_POLICY,
230 };
231
232 static const char *const comments[] = {
233         [NF_IP_TRACE_COMMENT_RULE]      = "rule",
234         [NF_IP_TRACE_COMMENT_RETURN]    = "return",
235         [NF_IP_TRACE_COMMENT_POLICY]    = "policy",
236 };
237
238 static struct nf_loginfo trace_loginfo = {
239         .type = NF_LOG_TYPE_LOG,
240         .u = {
241                 .log = {
242                         .level = 4,
243                         .logflags = NF_LOG_MASK,
244                 },
245         },
246 };
247
248 /* Mildly perf critical (only if packet tracing is on) */
249 static inline int
250 get_chainname_rulenum(struct ipt_entry *s, struct ipt_entry *e,
251                       char *hookname, char **chainname,
252                       char **comment, unsigned int *rulenum)
253 {
254         struct ipt_standard_target *t = (void *)ipt_get_target(s);
255
256         if (strcmp(t->target.u.kernel.target->name, IPT_ERROR_TARGET) == 0) {
257                 /* Head of user chain: ERROR target with chainname */
258                 *chainname = t->target.data;
259                 (*rulenum) = 0;
260         } else if (s == e) {
261                 (*rulenum)++;
262
263                 if (s->target_offset == sizeof(struct ipt_entry)
264                    && strcmp(t->target.u.kernel.target->name,
265                              IPT_STANDARD_TARGET) == 0
266                    && t->verdict < 0
267                    && unconditional(&s->ip)) {
268                         /* Tail of chains: STANDARD target (return/policy) */
269                         *comment = *chainname == hookname
270                                 ? (char *)comments[NF_IP_TRACE_COMMENT_POLICY]
271                                 : (char *)comments[NF_IP_TRACE_COMMENT_RETURN];
272                 }
273                 return 1;
274         } else
275                 (*rulenum)++;
276
277         return 0;
278 }
279
280 static void trace_packet(struct sk_buff *skb,
281                          unsigned int hook,
282                          const struct net_device *in,
283                          const struct net_device *out,
284                          const char *tablename,
285                          struct xt_table_info *private,
286                          struct ipt_entry *e)
287 {
288         void *table_base;
289         const struct ipt_entry *root;
290         char *hookname, *chainname, *comment;
291         unsigned int rulenum = 0;
292
293         table_base = (void *)private->entries[smp_processor_id()];
294         root = get_entry(table_base, private->hook_entry[hook]);
295
296         hookname = chainname = (char *)hooknames[hook];
297         comment = (char *)comments[NF_IP_TRACE_COMMENT_RULE];
298
299         IPT_ENTRY_ITERATE(root,
300                           private->size - private->hook_entry[hook],
301                           get_chainname_rulenum,
302                           e, hookname, &chainname, &comment, &rulenum);
303
304         nf_log_packet(AF_INET, hook, skb, in, out, &trace_loginfo,
305                       "TRACE: %s:%s:%s:%u ",
306                       tablename, chainname, comment, rulenum);
307 }
308 #endif
309
310 /* Returns one of the generic firewall policies, like NF_ACCEPT. */
311 unsigned int
312 ipt_do_table(struct sk_buff *skb,
313              unsigned int hook,
314              const struct net_device *in,
315              const struct net_device *out,
316              struct xt_table *table)
317 {
318         static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long))));
319         const struct iphdr *ip;
320         u_int16_t datalen;
321         bool hotdrop = false;
322         /* Initializing verdict to NF_DROP keeps gcc happy. */
323         unsigned int verdict = NF_DROP;
324         const char *indev, *outdev;
325         void *table_base;
326         struct ipt_entry *e, *back;
327         struct xt_table_info *private;
328         struct xt_match_param mtpar;
329         struct xt_target_param tgpar;
330
331         /* Initialization */
332         ip = ip_hdr(skb);
333         datalen = skb->len - ip->ihl * 4;
334         indev = in ? in->name : nulldevname;
335         outdev = out ? out->name : nulldevname;
336         /* We handle fragments by dealing with the first fragment as
337          * if it was a normal packet.  All other fragments are treated
338          * normally, except that they will NEVER match rules that ask
339          * things we don't know, ie. tcp syn flag or ports).  If the
340          * rule is also a fragment-specific rule, non-fragments won't
341          * match it. */
342         mtpar.fragoff = ntohs(ip->frag_off) & IP_OFFSET;
343         mtpar.thoff   = ip_hdrlen(skb);
344         mtpar.hotdrop = &hotdrop;
345         mtpar.in      = tgpar.in  = in;
346         mtpar.out     = tgpar.out = out;
347         mtpar.family  = tgpar.family = NFPROTO_IPV4;
348         tgpar.hooknum = hook;
349
350         read_lock_bh(&table->lock);
351         IP_NF_ASSERT(table->valid_hooks & (1 << hook));
352         private = table->private;
353         table_base = (void *)private->entries[smp_processor_id()];
354         e = get_entry(table_base, private->hook_entry[hook]);
355
356         /* For return from builtin chain */
357         back = get_entry(table_base, private->underflow[hook]);
358
359         do {
360                 IP_NF_ASSERT(e);
361                 IP_NF_ASSERT(back);
362                 if (ip_packet_match(ip, indev, outdev,
363                     &e->ip, mtpar.fragoff)) {
364                         struct ipt_entry_target *t;
365
366                         if (IPT_MATCH_ITERATE(e, do_match, skb, &mtpar) != 0)
367                                 goto no_match;
368
369                         ADD_COUNTER(e->counters, ntohs(ip->tot_len), 1);
370
371                         t = ipt_get_target(e);
372                         IP_NF_ASSERT(t->u.kernel.target);
373
374 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
375     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
376                         /* The packet is traced: log it */
377                         if (unlikely(skb->nf_trace))
378                                 trace_packet(skb, hook, in, out,
379                                              table->name, private, e);
380 #endif
381                         /* Standard target? */
382                         if (!t->u.kernel.target->target) {
383                                 int v;
384
385                                 v = ((struct ipt_standard_target *)t)->verdict;
386                                 if (v < 0) {
387                                         /* Pop from stack? */
388                                         if (v != IPT_RETURN) {
389                                                 verdict = (unsigned)(-v) - 1;
390                                                 break;
391                                         }
392                                         e = back;
393                                         back = get_entry(table_base,
394                                                          back->comefrom);
395                                         continue;
396                                 }
397                                 if (table_base + v != (void *)e + e->next_offset
398                                     && !(e->ip.flags & IPT_F_GOTO)) {
399                                         /* Save old back ptr in next entry */
400                                         struct ipt_entry *next
401                                                 = (void *)e + e->next_offset;
402                                         next->comefrom
403                                                 = (void *)back - table_base;
404                                         /* set back pointer to next entry */
405                                         back = next;
406                                 }
407
408                                 e = get_entry(table_base, v);
409                         } else {
410                                 /* Targets which reenter must return
411                                    abs. verdicts */
412                                 tgpar.target   = t->u.kernel.target;
413                                 tgpar.targinfo = t->data;
414 #ifdef CONFIG_NETFILTER_DEBUG
415                                 ((struct ipt_entry *)table_base)->comefrom
416                                         = 0xeeeeeeec;
417 #endif
418                                 verdict = t->u.kernel.target->target(skb,
419                                                                      &tgpar);
420 #ifdef CONFIG_NETFILTER_DEBUG
421                                 if (((struct ipt_entry *)table_base)->comefrom
422                                     != 0xeeeeeeec
423                                     && verdict == IPT_CONTINUE) {
424                                         printk("Target %s reentered!\n",
425                                                t->u.kernel.target->name);
426                                         verdict = NF_DROP;
427                                 }
428                                 ((struct ipt_entry *)table_base)->comefrom
429                                         = 0x57acc001;
430 #endif
431                                 /* Target might have changed stuff. */
432                                 ip = ip_hdr(skb);
433                                 datalen = skb->len - ip->ihl * 4;
434
435                                 if (verdict == IPT_CONTINUE)
436                                         e = (void *)e + e->next_offset;
437                                 else
438                                         /* Verdict */
439                                         break;
440                         }
441                 } else {
442
443                 no_match:
444                         e = (void *)e + e->next_offset;
445                 }
446         } while (!hotdrop);
447
448         read_unlock_bh(&table->lock);
449
450 #ifdef DEBUG_ALLOW_ALL
451         return NF_ACCEPT;
452 #else
453         if (hotdrop)
454                 return NF_DROP;
455         else return verdict;
456 #endif
457 }
458
459 /* Figures out from what hook each rule can be called: returns 0 if
460    there are loops.  Puts hook bitmask in comefrom. */
461 static int
462 mark_source_chains(struct xt_table_info *newinfo,
463                    unsigned int valid_hooks, void *entry0)
464 {
465         unsigned int hook;
466
467         /* No recursion; use packet counter to save back ptrs (reset
468            to 0 as we leave), and comefrom to save source hook bitmask */
469         for (hook = 0; hook < NF_INET_NUMHOOKS; hook++) {
470                 unsigned int pos = newinfo->hook_entry[hook];
471                 struct ipt_entry *e = (struct ipt_entry *)(entry0 + pos);
472
473                 if (!(valid_hooks & (1 << hook)))
474                         continue;
475
476                 /* Set initial back pointer. */
477                 e->counters.pcnt = pos;
478
479                 for (;;) {
480                         struct ipt_standard_target *t
481                                 = (void *)ipt_get_target(e);
482                         int visited = e->comefrom & (1 << hook);
483
484                         if (e->comefrom & (1 << NF_INET_NUMHOOKS)) {
485                                 printk("iptables: loop hook %u pos %u %08X.\n",
486                                        hook, pos, e->comefrom);
487                                 return 0;
488                         }
489                         e->comefrom |= ((1 << hook) | (1 << NF_INET_NUMHOOKS));
490
491                         /* Unconditional return/END. */
492                         if ((e->target_offset == sizeof(struct ipt_entry)
493                             && (strcmp(t->target.u.user.name,
494                                        IPT_STANDARD_TARGET) == 0)
495                             && t->verdict < 0
496                             && unconditional(&e->ip)) || visited) {
497                                 unsigned int oldpos, size;
498
499                                 if (t->verdict < -NF_MAX_VERDICT - 1) {
500                                         duprintf("mark_source_chains: bad "
501                                                 "negative verdict (%i)\n",
502                                                                 t->verdict);
503                                         return 0;
504                                 }
505
506                                 /* Return: backtrack through the last
507                                    big jump. */
508                                 do {
509                                         e->comefrom ^= (1<<NF_INET_NUMHOOKS);
510 #ifdef DEBUG_IP_FIREWALL_USER
511                                         if (e->comefrom
512                                             & (1 << NF_INET_NUMHOOKS)) {
513                                                 duprintf("Back unset "
514                                                          "on hook %u "
515                                                          "rule %u\n",
516                                                          hook, pos);
517                                         }
518 #endif
519                                         oldpos = pos;
520                                         pos = e->counters.pcnt;
521                                         e->counters.pcnt = 0;
522
523                                         /* We're at the start. */
524                                         if (pos == oldpos)
525                                                 goto next;
526
527                                         e = (struct ipt_entry *)
528                                                 (entry0 + pos);
529                                 } while (oldpos == pos + e->next_offset);
530
531                                 /* Move along one */
532                                 size = e->next_offset;
533                                 e = (struct ipt_entry *)
534                                         (entry0 + pos + size);
535                                 e->counters.pcnt = pos;
536                                 pos += size;
537                         } else {
538                                 int newpos = t->verdict;
539
540                                 if (strcmp(t->target.u.user.name,
541                                            IPT_STANDARD_TARGET) == 0
542                                     && newpos >= 0) {
543                                         if (newpos > newinfo->size -
544                                                 sizeof(struct ipt_entry)) {
545                                                 duprintf("mark_source_chains: "
546                                                         "bad verdict (%i)\n",
547                                                                 newpos);
548                                                 return 0;
549                                         }
550                                         /* This a jump; chase it. */
551                                         duprintf("Jump rule %u -> %u\n",
552                                                  pos, newpos);
553                                 } else {
554                                         /* ... this is a fallthru */
555                                         newpos = pos + e->next_offset;
556                                 }
557                                 e = (struct ipt_entry *)
558                                         (entry0 + newpos);
559                                 e->counters.pcnt = pos;
560                                 pos = newpos;
561                         }
562                 }
563                 next:
564                 duprintf("Finished chain %u\n", hook);
565         }
566         return 1;
567 }
568
569 static int
570 cleanup_match(struct ipt_entry_match *m, unsigned int *i)
571 {
572         struct xt_mtdtor_param par;
573
574         if (i && (*i)-- == 0)
575                 return 1;
576
577         par.match     = m->u.kernel.match;
578         par.matchinfo = m->data;
579         par.family    = NFPROTO_IPV4;
580         if (par.match->destroy != NULL)
581                 par.match->destroy(&par);
582         module_put(par.match->me);
583         return 0;
584 }
585
586 static int
587 check_entry(struct ipt_entry *e, const char *name)
588 {
589         struct ipt_entry_target *t;
590
591         if (!ip_checkentry(&e->ip)) {
592                 duprintf("ip_tables: ip check failed %p %s.\n", e, name);
593                 return -EINVAL;
594         }
595
596         if (e->target_offset + sizeof(struct ipt_entry_target) >
597             e->next_offset)
598                 return -EINVAL;
599
600         t = ipt_get_target(e);
601         if (e->target_offset + t->u.target_size > e->next_offset)
602                 return -EINVAL;
603
604         return 0;
605 }
606
607 static int
608 check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par,
609             unsigned int *i)
610 {
611         const struct ipt_ip *ip = par->entryinfo;
612         int ret;
613
614         par->match     = m->u.kernel.match;
615         par->matchinfo = m->data;
616
617         ret = xt_check_match(par, m->u.match_size - sizeof(*m),
618               ip->proto, ip->invflags & IPT_INV_PROTO);
619         if (ret < 0) {
620                 duprintf("ip_tables: check failed for `%s'.\n",
621                          par.match->name);
622                 return ret;
623         }
624         ++*i;
625         return 0;
626 }
627
628 static int
629 find_check_match(struct ipt_entry_match *m, struct xt_mtchk_param *par,
630                  unsigned int *i)
631 {
632         struct xt_match *match;
633         int ret;
634
635         match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
636                                                       m->u.user.revision),
637                                         "ipt_%s", m->u.user.name);
638         if (IS_ERR(match) || !match) {
639                 duprintf("find_check_match: `%s' not found\n", m->u.user.name);
640                 return match ? PTR_ERR(match) : -ENOENT;
641         }
642         m->u.kernel.match = match;
643
644         ret = check_match(m, par, i);
645         if (ret)
646                 goto err;
647
648         return 0;
649 err:
650         module_put(m->u.kernel.match->me);
651         return ret;
652 }
653
654 static int check_target(struct ipt_entry *e, const char *name)
655 {
656         struct ipt_entry_target *t = ipt_get_target(e);
657         struct xt_tgchk_param par = {
658                 .table     = name,
659                 .entryinfo = e,
660                 .target    = t->u.kernel.target,
661                 .targinfo  = t->data,
662                 .hook_mask = e->comefrom,
663                 .family    = NFPROTO_IPV4,
664         };
665         int ret;
666
667         ret = xt_check_target(&par, t->u.target_size - sizeof(*t),
668               e->ip.proto, e->ip.invflags & IPT_INV_PROTO);
669         if (ret < 0) {
670                 duprintf("ip_tables: check failed for `%s'.\n",
671                          t->u.kernel.target->name);
672                 return ret;
673         }
674         return 0;
675 }
676
677 static int
678 find_check_entry(struct ipt_entry *e, const char *name, unsigned int size,
679                  unsigned int *i)
680 {
681         struct ipt_entry_target *t;
682         struct xt_target *target;
683         int ret;
684         unsigned int j;
685         struct xt_mtchk_param mtpar;
686
687         ret = check_entry(e, name);
688         if (ret)
689                 return ret;
690
691         j = 0;
692         mtpar.table     = name;
693         mtpar.entryinfo = &e->ip;
694         mtpar.hook_mask = e->comefrom;
695         mtpar.family    = NFPROTO_IPV4;
696         ret = IPT_MATCH_ITERATE(e, find_check_match, &mtpar, &j);
697         if (ret != 0)
698                 goto cleanup_matches;
699
700         t = ipt_get_target(e);
701         target = try_then_request_module(xt_find_target(AF_INET,
702                                                         t->u.user.name,
703                                                         t->u.user.revision),
704                                          "ipt_%s", t->u.user.name);
705         if (IS_ERR(target) || !target) {
706                 duprintf("find_check_entry: `%s' not found\n", t->u.user.name);
707                 ret = target ? PTR_ERR(target) : -ENOENT;
708                 goto cleanup_matches;
709         }
710         t->u.kernel.target = target;
711
712         ret = check_target(e, name);
713         if (ret)
714                 goto err;
715
716         (*i)++;
717         return 0;
718  err:
719         module_put(t->u.kernel.target->me);
720  cleanup_matches:
721         IPT_MATCH_ITERATE(e, cleanup_match, &j);
722         return ret;
723 }
724
725 static int
726 check_entry_size_and_hooks(struct ipt_entry *e,
727                            struct xt_table_info *newinfo,
728                            unsigned char *base,
729                            unsigned char *limit,
730                            const unsigned int *hook_entries,
731                            const unsigned int *underflows,
732                            unsigned int *i)
733 {
734         unsigned int h;
735
736         if ((unsigned long)e % __alignof__(struct ipt_entry) != 0
737             || (unsigned char *)e + sizeof(struct ipt_entry) >= limit) {
738                 duprintf("Bad offset %p\n", e);
739                 return -EINVAL;
740         }
741
742         if (e->next_offset
743             < sizeof(struct ipt_entry) + sizeof(struct ipt_entry_target)) {
744                 duprintf("checking: element %p size %u\n",
745                          e, e->next_offset);
746                 return -EINVAL;
747         }
748
749         /* Check hooks & underflows */
750         for (h = 0; h < NF_INET_NUMHOOKS; h++) {
751                 if ((unsigned char *)e - base == hook_entries[h])
752                         newinfo->hook_entry[h] = hook_entries[h];
753                 if ((unsigned char *)e - base == underflows[h])
754                         newinfo->underflow[h] = underflows[h];
755         }
756
757         /* FIXME: underflows must be unconditional, standard verdicts
758            < 0 (not IPT_RETURN). --RR */
759
760         /* Clear counters and comefrom */
761         e->counters = ((struct xt_counters) { 0, 0 });
762         e->comefrom = 0;
763
764         (*i)++;
765         return 0;
766 }
767
768 static int
769 cleanup_entry(struct ipt_entry *e, unsigned int *i)
770 {
771         struct xt_tgdtor_param par;
772         struct ipt_entry_target *t;
773
774         if (i && (*i)-- == 0)
775                 return 1;
776
777         /* Cleanup all matches */
778         IPT_MATCH_ITERATE(e, cleanup_match, NULL);
779         t = ipt_get_target(e);
780
781         par.target   = t->u.kernel.target;
782         par.targinfo = t->data;
783         par.family   = NFPROTO_IPV4;
784         if (par.target->destroy != NULL)
785                 par.target->destroy(&par);
786         module_put(par.target->me);
787         return 0;
788 }
789
790 /* Checks and translates the user-supplied table segment (held in
791    newinfo) */
792 static int
793 translate_table(const char *name,
794                 unsigned int valid_hooks,
795                 struct xt_table_info *newinfo,
796                 void *entry0,
797                 unsigned int size,
798                 unsigned int number,
799                 const unsigned int *hook_entries,
800                 const unsigned int *underflows)
801 {
802         unsigned int i;
803         int ret;
804
805         newinfo->size = size;
806         newinfo->number = number;
807
808         /* Init all hooks to impossible value. */
809         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
810                 newinfo->hook_entry[i] = 0xFFFFFFFF;
811                 newinfo->underflow[i] = 0xFFFFFFFF;
812         }
813
814         duprintf("translate_table: size %u\n", newinfo->size);
815         i = 0;
816         /* Walk through entries, checking offsets. */
817         ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
818                                 check_entry_size_and_hooks,
819                                 newinfo,
820                                 entry0,
821                                 entry0 + size,
822                                 hook_entries, underflows, &i);
823         if (ret != 0)
824                 return ret;
825
826         if (i != number) {
827                 duprintf("translate_table: %u not %u entries\n",
828                          i, number);
829                 return -EINVAL;
830         }
831
832         /* Check hooks all assigned */
833         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
834                 /* Only hooks which are valid */
835                 if (!(valid_hooks & (1 << i)))
836                         continue;
837                 if (newinfo->hook_entry[i] == 0xFFFFFFFF) {
838                         duprintf("Invalid hook entry %u %u\n",
839                                  i, hook_entries[i]);
840                         return -EINVAL;
841                 }
842                 if (newinfo->underflow[i] == 0xFFFFFFFF) {
843                         duprintf("Invalid underflow %u %u\n",
844                                  i, underflows[i]);
845                         return -EINVAL;
846                 }
847         }
848
849         if (!mark_source_chains(newinfo, valid_hooks, entry0))
850                 return -ELOOP;
851
852         /* Finally, each sanity check must pass */
853         i = 0;
854         ret = IPT_ENTRY_ITERATE(entry0, newinfo->size,
855                                 find_check_entry, name, size, &i);
856
857         if (ret != 0) {
858                 IPT_ENTRY_ITERATE(entry0, newinfo->size,
859                                 cleanup_entry, &i);
860                 return ret;
861         }
862
863         /* And one copy for every other CPU */
864         for_each_possible_cpu(i) {
865                 if (newinfo->entries[i] && newinfo->entries[i] != entry0)
866                         memcpy(newinfo->entries[i], entry0, newinfo->size);
867         }
868
869         return ret;
870 }
871
872 /* Gets counters. */
873 static inline int
874 add_entry_to_counter(const struct ipt_entry *e,
875                      struct xt_counters total[],
876                      unsigned int *i)
877 {
878         ADD_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
879
880         (*i)++;
881         return 0;
882 }
883
884 static inline int
885 set_entry_to_counter(const struct ipt_entry *e,
886                      struct ipt_counters total[],
887                      unsigned int *i)
888 {
889         SET_COUNTER(total[*i], e->counters.bcnt, e->counters.pcnt);
890
891         (*i)++;
892         return 0;
893 }
894
895 static void
896 get_counters(const struct xt_table_info *t,
897              struct xt_counters counters[])
898 {
899         unsigned int cpu;
900         unsigned int i;
901         unsigned int curcpu;
902
903         /* Instead of clearing (by a previous call to memset())
904          * the counters and using adds, we set the counters
905          * with data used by 'current' CPU
906          * We dont care about preemption here.
907          */
908         curcpu = raw_smp_processor_id();
909
910         i = 0;
911         IPT_ENTRY_ITERATE(t->entries[curcpu],
912                           t->size,
913                           set_entry_to_counter,
914                           counters,
915                           &i);
916
917         for_each_possible_cpu(cpu) {
918                 if (cpu == curcpu)
919                         continue;
920                 i = 0;
921                 IPT_ENTRY_ITERATE(t->entries[cpu],
922                                   t->size,
923                                   add_entry_to_counter,
924                                   counters,
925                                   &i);
926         }
927 }
928
929 static struct xt_counters * alloc_counters(struct xt_table *table)
930 {
931         unsigned int countersize;
932         struct xt_counters *counters;
933         const struct xt_table_info *private = table->private;
934
935         /* We need atomic snapshot of counters: rest doesn't change
936            (other than comefrom, which userspace doesn't care
937            about). */
938         countersize = sizeof(struct xt_counters) * private->number;
939         counters = vmalloc_node(countersize, numa_node_id());
940
941         if (counters == NULL)
942                 return ERR_PTR(-ENOMEM);
943
944         /* First, sum counters... */
945         write_lock_bh(&table->lock);
946         get_counters(private, counters);
947         write_unlock_bh(&table->lock);
948
949         return counters;
950 }
951
952 static int
953 copy_entries_to_user(unsigned int total_size,
954                      struct xt_table *table,
955                      void __user *userptr)
956 {
957         unsigned int off, num;
958         struct ipt_entry *e;
959         struct xt_counters *counters;
960         const struct xt_table_info *private = table->private;
961         int ret = 0;
962         const void *loc_cpu_entry;
963
964         counters = alloc_counters(table);
965         if (IS_ERR(counters))
966                 return PTR_ERR(counters);
967
968         /* choose the copy that is on our node/cpu, ...
969          * This choice is lazy (because current thread is
970          * allowed to migrate to another cpu)
971          */
972         loc_cpu_entry = private->entries[raw_smp_processor_id()];
973         if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) {
974                 ret = -EFAULT;
975                 goto free_counters;
976         }
977
978         /* FIXME: use iterator macros --RR */
979         /* ... then go back and fix counters and names */
980         for (off = 0, num = 0; off < total_size; off += e->next_offset, num++){
981                 unsigned int i;
982                 const struct ipt_entry_match *m;
983                 const struct ipt_entry_target *t;
984
985                 e = (struct ipt_entry *)(loc_cpu_entry + off);
986                 if (copy_to_user(userptr + off
987                                  + offsetof(struct ipt_entry, counters),
988                                  &counters[num],
989                                  sizeof(counters[num])) != 0) {
990                         ret = -EFAULT;
991                         goto free_counters;
992                 }
993
994                 for (i = sizeof(struct ipt_entry);
995                      i < e->target_offset;
996                      i += m->u.match_size) {
997                         m = (void *)e + i;
998
999                         if (copy_to_user(userptr + off + i
1000                                          + offsetof(struct ipt_entry_match,
1001                                                     u.user.name),
1002                                          m->u.kernel.match->name,
1003                                          strlen(m->u.kernel.match->name)+1)
1004                             != 0) {
1005                                 ret = -EFAULT;
1006                                 goto free_counters;
1007                         }
1008                 }
1009
1010                 t = ipt_get_target(e);
1011                 if (copy_to_user(userptr + off + e->target_offset
1012                                  + offsetof(struct ipt_entry_target,
1013                                             u.user.name),
1014                                  t->u.kernel.target->name,
1015                                  strlen(t->u.kernel.target->name)+1) != 0) {
1016                         ret = -EFAULT;
1017                         goto free_counters;
1018                 }
1019         }
1020
1021  free_counters:
1022         vfree(counters);
1023         return ret;
1024 }
1025
1026 #ifdef CONFIG_COMPAT
1027 static void compat_standard_from_user(void *dst, void *src)
1028 {
1029         int v = *(compat_int_t *)src;
1030
1031         if (v > 0)
1032                 v += xt_compat_calc_jump(AF_INET, v);
1033         memcpy(dst, &v, sizeof(v));
1034 }
1035
1036 static int compat_standard_to_user(void __user *dst, void *src)
1037 {
1038         compat_int_t cv = *(int *)src;
1039
1040         if (cv > 0)
1041                 cv -= xt_compat_calc_jump(AF_INET, cv);
1042         return copy_to_user(dst, &cv, sizeof(cv)) ? -EFAULT : 0;
1043 }
1044
1045 static inline int
1046 compat_calc_match(struct ipt_entry_match *m, int *size)
1047 {
1048         *size += xt_compat_match_offset(m->u.kernel.match);
1049         return 0;
1050 }
1051
1052 static int compat_calc_entry(struct ipt_entry *e,
1053                              const struct xt_table_info *info,
1054                              void *base, struct xt_table_info *newinfo)
1055 {
1056         struct ipt_entry_target *t;
1057         unsigned int entry_offset;
1058         int off, i, ret;
1059
1060         off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1061         entry_offset = (void *)e - base;
1062         IPT_MATCH_ITERATE(e, compat_calc_match, &off);
1063         t = ipt_get_target(e);
1064         off += xt_compat_target_offset(t->u.kernel.target);
1065         newinfo->size -= off;
1066         ret = xt_compat_add_offset(AF_INET, entry_offset, off);
1067         if (ret)
1068                 return ret;
1069
1070         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1071                 if (info->hook_entry[i] &&
1072                     (e < (struct ipt_entry *)(base + info->hook_entry[i])))
1073                         newinfo->hook_entry[i] -= off;
1074                 if (info->underflow[i] &&
1075                     (e < (struct ipt_entry *)(base + info->underflow[i])))
1076                         newinfo->underflow[i] -= off;
1077         }
1078         return 0;
1079 }
1080
1081 static int compat_table_info(const struct xt_table_info *info,
1082                              struct xt_table_info *newinfo)
1083 {
1084         void *loc_cpu_entry;
1085
1086         if (!newinfo || !info)
1087                 return -EINVAL;
1088
1089         /* we dont care about newinfo->entries[] */
1090         memcpy(newinfo, info, offsetof(struct xt_table_info, entries));
1091         newinfo->initial_entries = 0;
1092         loc_cpu_entry = info->entries[raw_smp_processor_id()];
1093         return IPT_ENTRY_ITERATE(loc_cpu_entry, info->size,
1094                                  compat_calc_entry, info, loc_cpu_entry,
1095                                  newinfo);
1096 }
1097 #endif
1098
1099 static int get_info(struct net *net, void __user *user, int *len, int compat)
1100 {
1101         char name[IPT_TABLE_MAXNAMELEN];
1102         struct xt_table *t;
1103         int ret;
1104
1105         if (*len != sizeof(struct ipt_getinfo)) {
1106                 duprintf("length %u != %zu\n", *len,
1107                          sizeof(struct ipt_getinfo));
1108                 return -EINVAL;
1109         }
1110
1111         if (copy_from_user(name, user, sizeof(name)) != 0)
1112                 return -EFAULT;
1113
1114         name[IPT_TABLE_MAXNAMELEN-1] = '\0';
1115 #ifdef CONFIG_COMPAT
1116         if (compat)
1117                 xt_compat_lock(AF_INET);
1118 #endif
1119         t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
1120                                     "iptable_%s", name);
1121         if (t && !IS_ERR(t)) {
1122                 struct ipt_getinfo info;
1123                 const struct xt_table_info *private = t->private;
1124
1125 #ifdef CONFIG_COMPAT
1126                 if (compat) {
1127                         struct xt_table_info tmp;
1128                         ret = compat_table_info(private, &tmp);
1129                         xt_compat_flush_offsets(AF_INET);
1130                         private = &tmp;
1131                 }
1132 #endif
1133                 info.valid_hooks = t->valid_hooks;
1134                 memcpy(info.hook_entry, private->hook_entry,
1135                        sizeof(info.hook_entry));
1136                 memcpy(info.underflow, private->underflow,
1137                        sizeof(info.underflow));
1138                 info.num_entries = private->number;
1139                 info.size = private->size;
1140                 strcpy(info.name, name);
1141
1142                 if (copy_to_user(user, &info, *len) != 0)
1143                         ret = -EFAULT;
1144                 else
1145                         ret = 0;
1146
1147                 xt_table_unlock(t);
1148                 module_put(t->me);
1149         } else
1150                 ret = t ? PTR_ERR(t) : -ENOENT;
1151 #ifdef CONFIG_COMPAT
1152         if (compat)
1153                 xt_compat_unlock(AF_INET);
1154 #endif
1155         return ret;
1156 }
1157
1158 static int
1159 get_entries(struct net *net, struct ipt_get_entries __user *uptr, int *len)
1160 {
1161         int ret;
1162         struct ipt_get_entries get;
1163         struct xt_table *t;
1164
1165         if (*len < sizeof(get)) {
1166                 duprintf("get_entries: %u < %zu\n", *len, sizeof(get));
1167                 return -EINVAL;
1168         }
1169         if (copy_from_user(&get, uptr, sizeof(get)) != 0)
1170                 return -EFAULT;
1171         if (*len != sizeof(struct ipt_get_entries) + get.size) {
1172                 duprintf("get_entries: %u != %zu\n",
1173                          *len, sizeof(get) + get.size);
1174                 return -EINVAL;
1175         }
1176
1177         t = xt_find_table_lock(net, AF_INET, get.name);
1178         if (t && !IS_ERR(t)) {
1179                 const struct xt_table_info *private = t->private;
1180                 duprintf("t->private->number = %u\n", private->number);
1181                 if (get.size == private->size)
1182                         ret = copy_entries_to_user(private->size,
1183                                                    t, uptr->entrytable);
1184                 else {
1185                         duprintf("get_entries: I've got %u not %u!\n",
1186                                  private->size, get.size);
1187                         ret = -EAGAIN;
1188                 }
1189                 module_put(t->me);
1190                 xt_table_unlock(t);
1191         } else
1192                 ret = t ? PTR_ERR(t) : -ENOENT;
1193
1194         return ret;
1195 }
1196
1197 static int
1198 __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
1199              struct xt_table_info *newinfo, unsigned int num_counters,
1200              void __user *counters_ptr)
1201 {
1202         int ret;
1203         struct xt_table *t;
1204         struct xt_table_info *oldinfo;
1205         struct xt_counters *counters;
1206         void *loc_cpu_old_entry;
1207
1208         ret = 0;
1209         counters = vmalloc(num_counters * sizeof(struct xt_counters));
1210         if (!counters) {
1211                 ret = -ENOMEM;
1212                 goto out;
1213         }
1214
1215         t = try_then_request_module(xt_find_table_lock(net, AF_INET, name),
1216                                     "iptable_%s", name);
1217         if (!t || IS_ERR(t)) {
1218                 ret = t ? PTR_ERR(t) : -ENOENT;
1219                 goto free_newinfo_counters_untrans;
1220         }
1221
1222         /* You lied! */
1223         if (valid_hooks != t->valid_hooks) {
1224                 duprintf("Valid hook crap: %08X vs %08X\n",
1225                          valid_hooks, t->valid_hooks);
1226                 ret = -EINVAL;
1227                 goto put_module;
1228         }
1229
1230         oldinfo = xt_replace_table(t, num_counters, newinfo, &ret);
1231         if (!oldinfo)
1232                 goto put_module;
1233
1234         /* Update module usage count based on number of rules */
1235         duprintf("do_replace: oldnum=%u, initnum=%u, newnum=%u\n",
1236                 oldinfo->number, oldinfo->initial_entries, newinfo->number);
1237         if ((oldinfo->number > oldinfo->initial_entries) ||
1238             (newinfo->number <= oldinfo->initial_entries))
1239                 module_put(t->me);
1240         if ((oldinfo->number > oldinfo->initial_entries) &&
1241             (newinfo->number <= oldinfo->initial_entries))
1242                 module_put(t->me);
1243
1244         /* Get the old counters. */
1245         get_counters(oldinfo, counters);
1246         /* Decrease module usage counts and free resource */
1247         loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()];
1248         IPT_ENTRY_ITERATE(loc_cpu_old_entry, oldinfo->size, cleanup_entry,
1249                           NULL);
1250         xt_free_table_info(oldinfo);
1251         if (copy_to_user(counters_ptr, counters,
1252                          sizeof(struct xt_counters) * num_counters) != 0)
1253                 ret = -EFAULT;
1254         vfree(counters);
1255         xt_table_unlock(t);
1256         return ret;
1257
1258  put_module:
1259         module_put(t->me);
1260         xt_table_unlock(t);
1261  free_newinfo_counters_untrans:
1262         vfree(counters);
1263  out:
1264         return ret;
1265 }
1266
1267 static int
1268 do_replace(struct net *net, void __user *user, unsigned int len)
1269 {
1270         int ret;
1271         struct ipt_replace tmp;
1272         struct xt_table_info *newinfo;
1273         void *loc_cpu_entry;
1274
1275         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1276                 return -EFAULT;
1277
1278         /* overflow check */
1279         if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1280                 return -ENOMEM;
1281
1282         newinfo = xt_alloc_table_info(tmp.size);
1283         if (!newinfo)
1284                 return -ENOMEM;
1285
1286         /* choose the copy that is on our node/cpu */
1287         loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1288         if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
1289                            tmp.size) != 0) {
1290                 ret = -EFAULT;
1291                 goto free_newinfo;
1292         }
1293
1294         ret = translate_table(tmp.name, tmp.valid_hooks,
1295                               newinfo, loc_cpu_entry, tmp.size, tmp.num_entries,
1296                               tmp.hook_entry, tmp.underflow);
1297         if (ret != 0)
1298                 goto free_newinfo;
1299
1300         duprintf("ip_tables: Translated table\n");
1301
1302         ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
1303                            tmp.num_counters, tmp.counters);
1304         if (ret)
1305                 goto free_newinfo_untrans;
1306         return 0;
1307
1308  free_newinfo_untrans:
1309         IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
1310  free_newinfo:
1311         xt_free_table_info(newinfo);
1312         return ret;
1313 }
1314
1315 /* We're lazy, and add to the first CPU; overflow works its fey magic
1316  * and everything is OK. */
1317 static int
1318 add_counter_to_entry(struct ipt_entry *e,
1319                      const struct xt_counters addme[],
1320                      unsigned int *i)
1321 {
1322 #if 0
1323         duprintf("add_counter: Entry %u %lu/%lu + %lu/%lu\n",
1324                  *i,
1325                  (long unsigned int)e->counters.pcnt,
1326                  (long unsigned int)e->counters.bcnt,
1327                  (long unsigned int)addme[*i].pcnt,
1328                  (long unsigned int)addme[*i].bcnt);
1329 #endif
1330
1331         ADD_COUNTER(e->counters, addme[*i].bcnt, addme[*i].pcnt);
1332
1333         (*i)++;
1334         return 0;
1335 }
1336
1337 static int
1338 do_add_counters(struct net *net, void __user *user, unsigned int len, int compat)
1339 {
1340         unsigned int i;
1341         struct xt_counters_info tmp;
1342         struct xt_counters *paddc;
1343         unsigned int num_counters;
1344         const char *name;
1345         int size;
1346         void *ptmp;
1347         struct xt_table *t;
1348         const struct xt_table_info *private;
1349         int ret = 0;
1350         void *loc_cpu_entry;
1351 #ifdef CONFIG_COMPAT
1352         struct compat_xt_counters_info compat_tmp;
1353
1354         if (compat) {
1355                 ptmp = &compat_tmp;
1356                 size = sizeof(struct compat_xt_counters_info);
1357         } else
1358 #endif
1359         {
1360                 ptmp = &tmp;
1361                 size = sizeof(struct xt_counters_info);
1362         }
1363
1364         if (copy_from_user(ptmp, user, size) != 0)
1365                 return -EFAULT;
1366
1367 #ifdef CONFIG_COMPAT
1368         if (compat) {
1369                 num_counters = compat_tmp.num_counters;
1370                 name = compat_tmp.name;
1371         } else
1372 #endif
1373         {
1374                 num_counters = tmp.num_counters;
1375                 name = tmp.name;
1376         }
1377
1378         if (len != size + num_counters * sizeof(struct xt_counters))
1379                 return -EINVAL;
1380
1381         paddc = vmalloc_node(len - size, numa_node_id());
1382         if (!paddc)
1383                 return -ENOMEM;
1384
1385         if (copy_from_user(paddc, user + size, len - size) != 0) {
1386                 ret = -EFAULT;
1387                 goto free;
1388         }
1389
1390         t = xt_find_table_lock(net, AF_INET, name);
1391         if (!t || IS_ERR(t)) {
1392                 ret = t ? PTR_ERR(t) : -ENOENT;
1393                 goto free;
1394         }
1395
1396         write_lock_bh(&t->lock);
1397         private = t->private;
1398         if (private->number != num_counters) {
1399                 ret = -EINVAL;
1400                 goto unlock_up_free;
1401         }
1402
1403         i = 0;
1404         /* Choose the copy that is on our node */
1405         loc_cpu_entry = private->entries[raw_smp_processor_id()];
1406         IPT_ENTRY_ITERATE(loc_cpu_entry,
1407                           private->size,
1408                           add_counter_to_entry,
1409                           paddc,
1410                           &i);
1411  unlock_up_free:
1412         write_unlock_bh(&t->lock);
1413         xt_table_unlock(t);
1414         module_put(t->me);
1415  free:
1416         vfree(paddc);
1417
1418         return ret;
1419 }
1420
1421 #ifdef CONFIG_COMPAT
1422 struct compat_ipt_replace {
1423         char                    name[IPT_TABLE_MAXNAMELEN];
1424         u32                     valid_hooks;
1425         u32                     num_entries;
1426         u32                     size;
1427         u32                     hook_entry[NF_INET_NUMHOOKS];
1428         u32                     underflow[NF_INET_NUMHOOKS];
1429         u32                     num_counters;
1430         compat_uptr_t           counters;       /* struct ipt_counters * */
1431         struct compat_ipt_entry entries[0];
1432 };
1433
1434 static int
1435 compat_copy_entry_to_user(struct ipt_entry *e, void __user **dstptr,
1436                           unsigned int *size, struct xt_counters *counters,
1437                           unsigned int *i)
1438 {
1439         struct ipt_entry_target *t;
1440         struct compat_ipt_entry __user *ce;
1441         u_int16_t target_offset, next_offset;
1442         compat_uint_t origsize;
1443         int ret;
1444
1445         ret = -EFAULT;
1446         origsize = *size;
1447         ce = (struct compat_ipt_entry __user *)*dstptr;
1448         if (copy_to_user(ce, e, sizeof(struct ipt_entry)))
1449                 goto out;
1450
1451         if (copy_to_user(&ce->counters, &counters[*i], sizeof(counters[*i])))
1452                 goto out;
1453
1454         *dstptr += sizeof(struct compat_ipt_entry);
1455         *size -= sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1456
1457         ret = IPT_MATCH_ITERATE(e, xt_compat_match_to_user, dstptr, size);
1458         target_offset = e->target_offset - (origsize - *size);
1459         if (ret)
1460                 goto out;
1461         t = ipt_get_target(e);
1462         ret = xt_compat_target_to_user(t, dstptr, size);
1463         if (ret)
1464                 goto out;
1465         ret = -EFAULT;
1466         next_offset = e->next_offset - (origsize - *size);
1467         if (put_user(target_offset, &ce->target_offset))
1468                 goto out;
1469         if (put_user(next_offset, &ce->next_offset))
1470                 goto out;
1471
1472         (*i)++;
1473         return 0;
1474 out:
1475         return ret;
1476 }
1477
1478 static int
1479 compat_find_calc_match(struct ipt_entry_match *m,
1480                        const char *name,
1481                        const struct ipt_ip *ip,
1482                        unsigned int hookmask,
1483                        int *size, unsigned int *i)
1484 {
1485         struct xt_match *match;
1486
1487         match = try_then_request_module(xt_find_match(AF_INET, m->u.user.name,
1488                                                       m->u.user.revision),
1489                                         "ipt_%s", m->u.user.name);
1490         if (IS_ERR(match) || !match) {
1491                 duprintf("compat_check_calc_match: `%s' not found\n",
1492                          m->u.user.name);
1493                 return match ? PTR_ERR(match) : -ENOENT;
1494         }
1495         m->u.kernel.match = match;
1496         *size += xt_compat_match_offset(match);
1497
1498         (*i)++;
1499         return 0;
1500 }
1501
1502 static int
1503 compat_release_match(struct ipt_entry_match *m, unsigned int *i)
1504 {
1505         if (i && (*i)-- == 0)
1506                 return 1;
1507
1508         module_put(m->u.kernel.match->me);
1509         return 0;
1510 }
1511
1512 static int
1513 compat_release_entry(struct compat_ipt_entry *e, unsigned int *i)
1514 {
1515         struct ipt_entry_target *t;
1516
1517         if (i && (*i)-- == 0)
1518                 return 1;
1519
1520         /* Cleanup all matches */
1521         COMPAT_IPT_MATCH_ITERATE(e, compat_release_match, NULL);
1522         t = compat_ipt_get_target(e);
1523         module_put(t->u.kernel.target->me);
1524         return 0;
1525 }
1526
1527 static int
1528 check_compat_entry_size_and_hooks(struct compat_ipt_entry *e,
1529                                   struct xt_table_info *newinfo,
1530                                   unsigned int *size,
1531                                   unsigned char *base,
1532                                   unsigned char *limit,
1533                                   unsigned int *hook_entries,
1534                                   unsigned int *underflows,
1535                                   unsigned int *i,
1536                                   const char *name)
1537 {
1538         struct ipt_entry_target *t;
1539         struct xt_target *target;
1540         unsigned int entry_offset;
1541         unsigned int j;
1542         int ret, off, h;
1543
1544         duprintf("check_compat_entry_size_and_hooks %p\n", e);
1545         if ((unsigned long)e % __alignof__(struct compat_ipt_entry) != 0
1546             || (unsigned char *)e + sizeof(struct compat_ipt_entry) >= limit) {
1547                 duprintf("Bad offset %p, limit = %p\n", e, limit);
1548                 return -EINVAL;
1549         }
1550
1551         if (e->next_offset < sizeof(struct compat_ipt_entry) +
1552                              sizeof(struct compat_xt_entry_target)) {
1553                 duprintf("checking: element %p size %u\n",
1554                          e, e->next_offset);
1555                 return -EINVAL;
1556         }
1557
1558         /* For purposes of check_entry casting the compat entry is fine */
1559         ret = check_entry((struct ipt_entry *)e, name);
1560         if (ret)
1561                 return ret;
1562
1563         off = sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1564         entry_offset = (void *)e - (void *)base;
1565         j = 0;
1566         ret = COMPAT_IPT_MATCH_ITERATE(e, compat_find_calc_match, name,
1567                                        &e->ip, e->comefrom, &off, &j);
1568         if (ret != 0)
1569                 goto release_matches;
1570
1571         t = compat_ipt_get_target(e);
1572         target = try_then_request_module(xt_find_target(AF_INET,
1573                                                         t->u.user.name,
1574                                                         t->u.user.revision),
1575                                          "ipt_%s", t->u.user.name);
1576         if (IS_ERR(target) || !target) {
1577                 duprintf("check_compat_entry_size_and_hooks: `%s' not found\n",
1578                          t->u.user.name);
1579                 ret = target ? PTR_ERR(target) : -ENOENT;
1580                 goto release_matches;
1581         }
1582         t->u.kernel.target = target;
1583
1584         off += xt_compat_target_offset(target);
1585         *size += off;
1586         ret = xt_compat_add_offset(AF_INET, entry_offset, off);
1587         if (ret)
1588                 goto out;
1589
1590         /* Check hooks & underflows */
1591         for (h = 0; h < NF_INET_NUMHOOKS; h++) {
1592                 if ((unsigned char *)e - base == hook_entries[h])
1593                         newinfo->hook_entry[h] = hook_entries[h];
1594                 if ((unsigned char *)e - base == underflows[h])
1595                         newinfo->underflow[h] = underflows[h];
1596         }
1597
1598         /* Clear counters and comefrom */
1599         memset(&e->counters, 0, sizeof(e->counters));
1600         e->comefrom = 0;
1601
1602         (*i)++;
1603         return 0;
1604
1605 out:
1606         module_put(t->u.kernel.target->me);
1607 release_matches:
1608         IPT_MATCH_ITERATE(e, compat_release_match, &j);
1609         return ret;
1610 }
1611
1612 static int
1613 compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr,
1614                             unsigned int *size, const char *name,
1615                             struct xt_table_info *newinfo, unsigned char *base)
1616 {
1617         struct ipt_entry_target *t;
1618         struct xt_target *target;
1619         struct ipt_entry *de;
1620         unsigned int origsize;
1621         int ret, h;
1622
1623         ret = 0;
1624         origsize = *size;
1625         de = (struct ipt_entry *)*dstptr;
1626         memcpy(de, e, sizeof(struct ipt_entry));
1627         memcpy(&de->counters, &e->counters, sizeof(e->counters));
1628
1629         *dstptr += sizeof(struct ipt_entry);
1630         *size += sizeof(struct ipt_entry) - sizeof(struct compat_ipt_entry);
1631
1632         ret = COMPAT_IPT_MATCH_ITERATE(e, xt_compat_match_from_user,
1633                                        dstptr, size);
1634         if (ret)
1635                 return ret;
1636         de->target_offset = e->target_offset - (origsize - *size);
1637         t = compat_ipt_get_target(e);
1638         target = t->u.kernel.target;
1639         xt_compat_target_from_user(t, dstptr, size);
1640
1641         de->next_offset = e->next_offset - (origsize - *size);
1642         for (h = 0; h < NF_INET_NUMHOOKS; h++) {
1643                 if ((unsigned char *)de - base < newinfo->hook_entry[h])
1644                         newinfo->hook_entry[h] -= origsize - *size;
1645                 if ((unsigned char *)de - base < newinfo->underflow[h])
1646                         newinfo->underflow[h] -= origsize - *size;
1647         }
1648         return ret;
1649 }
1650
1651 static int
1652 compat_check_entry(struct ipt_entry *e, const char *name,
1653                                      unsigned int *i)
1654 {
1655         struct xt_mtchk_param mtpar;
1656         unsigned int j;
1657         int ret;
1658
1659         j = 0;
1660         mtpar.table     = name;
1661         mtpar.entryinfo = &e->ip;
1662         mtpar.hook_mask = e->comefrom;
1663         mtpar.family    = NFPROTO_IPV4;
1664         ret = IPT_MATCH_ITERATE(e, check_match, &mtpar, &j);
1665         if (ret)
1666                 goto cleanup_matches;
1667
1668         ret = check_target(e, name);
1669         if (ret)
1670                 goto cleanup_matches;
1671
1672         (*i)++;
1673         return 0;
1674
1675  cleanup_matches:
1676         IPT_MATCH_ITERATE(e, cleanup_match, &j);
1677         return ret;
1678 }
1679
1680 static int
1681 translate_compat_table(const char *name,
1682                        unsigned int valid_hooks,
1683                        struct xt_table_info **pinfo,
1684                        void **pentry0,
1685                        unsigned int total_size,
1686                        unsigned int number,
1687                        unsigned int *hook_entries,
1688                        unsigned int *underflows)
1689 {
1690         unsigned int i, j;
1691         struct xt_table_info *newinfo, *info;
1692         void *pos, *entry0, *entry1;
1693         unsigned int size;
1694         int ret;
1695
1696         info = *pinfo;
1697         entry0 = *pentry0;
1698         size = total_size;
1699         info->number = number;
1700
1701         /* Init all hooks to impossible value. */
1702         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1703                 info->hook_entry[i] = 0xFFFFFFFF;
1704                 info->underflow[i] = 0xFFFFFFFF;
1705         }
1706
1707         duprintf("translate_compat_table: size %u\n", info->size);
1708         j = 0;
1709         xt_compat_lock(AF_INET);
1710         /* Walk through entries, checking offsets. */
1711         ret = COMPAT_IPT_ENTRY_ITERATE(entry0, total_size,
1712                                        check_compat_entry_size_and_hooks,
1713                                        info, &size, entry0,
1714                                        entry0 + total_size,
1715                                        hook_entries, underflows, &j, name);
1716         if (ret != 0)
1717                 goto out_unlock;
1718
1719         ret = -EINVAL;
1720         if (j != number) {
1721                 duprintf("translate_compat_table: %u not %u entries\n",
1722                          j, number);
1723                 goto out_unlock;
1724         }
1725
1726         /* Check hooks all assigned */
1727         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1728                 /* Only hooks which are valid */
1729                 if (!(valid_hooks & (1 << i)))
1730                         continue;
1731                 if (info->hook_entry[i] == 0xFFFFFFFF) {
1732                         duprintf("Invalid hook entry %u %u\n",
1733                                  i, hook_entries[i]);
1734                         goto out_unlock;
1735                 }
1736                 if (info->underflow[i] == 0xFFFFFFFF) {
1737                         duprintf("Invalid underflow %u %u\n",
1738                                  i, underflows[i]);
1739                         goto out_unlock;
1740                 }
1741         }
1742
1743         ret = -ENOMEM;
1744         newinfo = xt_alloc_table_info(size);
1745         if (!newinfo)
1746                 goto out_unlock;
1747
1748         newinfo->number = number;
1749         for (i = 0; i < NF_INET_NUMHOOKS; i++) {
1750                 newinfo->hook_entry[i] = info->hook_entry[i];
1751                 newinfo->underflow[i] = info->underflow[i];
1752         }
1753         entry1 = newinfo->entries[raw_smp_processor_id()];
1754         pos = entry1;
1755         size = total_size;
1756         ret = COMPAT_IPT_ENTRY_ITERATE(entry0, total_size,
1757                                        compat_copy_entry_from_user,
1758                                        &pos, &size, name, newinfo, entry1);
1759         xt_compat_flush_offsets(AF_INET);
1760         xt_compat_unlock(AF_INET);
1761         if (ret)
1762                 goto free_newinfo;
1763
1764         ret = -ELOOP;
1765         if (!mark_source_chains(newinfo, valid_hooks, entry1))
1766                 goto free_newinfo;
1767
1768         i = 0;
1769         ret = IPT_ENTRY_ITERATE(entry1, newinfo->size, compat_check_entry,
1770                                 name, &i);
1771         if (ret) {
1772                 j -= i;
1773                 COMPAT_IPT_ENTRY_ITERATE_CONTINUE(entry0, newinfo->size, i,
1774                                                   compat_release_entry, &j);
1775                 IPT_ENTRY_ITERATE(entry1, newinfo->size, cleanup_entry, &i);
1776                 xt_free_table_info(newinfo);
1777                 return ret;
1778         }
1779
1780         /* And one copy for every other CPU */
1781         for_each_possible_cpu(i)
1782                 if (newinfo->entries[i] && newinfo->entries[i] != entry1)
1783                         memcpy(newinfo->entries[i], entry1, newinfo->size);
1784
1785         *pinfo = newinfo;
1786         *pentry0 = entry1;
1787         xt_free_table_info(info);
1788         return 0;
1789
1790 free_newinfo:
1791         xt_free_table_info(newinfo);
1792 out:
1793         COMPAT_IPT_ENTRY_ITERATE(entry0, total_size, compat_release_entry, &j);
1794         return ret;
1795 out_unlock:
1796         xt_compat_flush_offsets(AF_INET);
1797         xt_compat_unlock(AF_INET);
1798         goto out;
1799 }
1800
1801 static int
1802 compat_do_replace(struct net *net, void __user *user, unsigned int len)
1803 {
1804         int ret;
1805         struct compat_ipt_replace tmp;
1806         struct xt_table_info *newinfo;
1807         void *loc_cpu_entry;
1808
1809         if (copy_from_user(&tmp, user, sizeof(tmp)) != 0)
1810                 return -EFAULT;
1811
1812         /* overflow check */
1813         if (tmp.size >= INT_MAX / num_possible_cpus())
1814                 return -ENOMEM;
1815         if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
1816                 return -ENOMEM;
1817
1818         newinfo = xt_alloc_table_info(tmp.size);
1819         if (!newinfo)
1820                 return -ENOMEM;
1821
1822         /* choose the copy that is on our node/cpu */
1823         loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
1824         if (copy_from_user(loc_cpu_entry, user + sizeof(tmp),
1825                            tmp.size) != 0) {
1826                 ret = -EFAULT;
1827                 goto free_newinfo;
1828         }
1829
1830         ret = translate_compat_table(tmp.name, tmp.valid_hooks,
1831                                      &newinfo, &loc_cpu_entry, tmp.size,
1832                                      tmp.num_entries, tmp.hook_entry,
1833                                      tmp.underflow);
1834         if (ret != 0)
1835                 goto free_newinfo;
1836
1837         duprintf("compat_do_replace: Translated table\n");
1838
1839         ret = __do_replace(net, tmp.name, tmp.valid_hooks, newinfo,
1840                            tmp.num_counters, compat_ptr(tmp.counters));
1841         if (ret)
1842                 goto free_newinfo_untrans;
1843         return 0;
1844
1845  free_newinfo_untrans:
1846         IPT_ENTRY_ITERATE(loc_cpu_entry, newinfo->size, cleanup_entry, NULL);
1847  free_newinfo:
1848         xt_free_table_info(newinfo);
1849         return ret;
1850 }
1851
1852 static int
1853 compat_do_ipt_set_ctl(struct sock *sk,  int cmd, void __user *user,
1854                       unsigned int len)
1855 {
1856         int ret;
1857
1858         if (!capable(CAP_NET_ADMIN))
1859                 return -EPERM;
1860
1861         switch (cmd) {
1862         case IPT_SO_SET_REPLACE:
1863                 ret = compat_do_replace(sock_net(sk), user, len);
1864                 break;
1865
1866         case IPT_SO_SET_ADD_COUNTERS:
1867                 ret = do_add_counters(sock_net(sk), user, len, 1);
1868                 break;
1869
1870         default:
1871                 duprintf("do_ipt_set_ctl:  unknown request %i\n", cmd);
1872                 ret = -EINVAL;
1873         }
1874
1875         return ret;
1876 }
1877
1878 struct compat_ipt_get_entries {
1879         char name[IPT_TABLE_MAXNAMELEN];
1880         compat_uint_t size;
1881         struct compat_ipt_entry entrytable[0];
1882 };
1883
1884 static int
1885 compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
1886                             void __user *userptr)
1887 {
1888         struct xt_counters *counters;
1889         const struct xt_table_info *private = table->private;
1890         void __user *pos;
1891         unsigned int size;
1892         int ret = 0;
1893         const void *loc_cpu_entry;
1894         unsigned int i = 0;
1895
1896         counters = alloc_counters(table);
1897         if (IS_ERR(counters))
1898                 return PTR_ERR(counters);
1899
1900         /* choose the copy that is on our node/cpu, ...
1901          * This choice is lazy (because current thread is
1902          * allowed to migrate to another cpu)
1903          */
1904         loc_cpu_entry = private->entries[raw_smp_processor_id()];
1905         pos = userptr;
1906         size = total_size;
1907         ret = IPT_ENTRY_ITERATE(loc_cpu_entry, total_size,
1908                                 compat_copy_entry_to_user,
1909                                 &pos, &size, counters, &i);
1910
1911         vfree(counters);
1912         return ret;
1913 }
1914
1915 static int
1916 compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
1917                    int *len)
1918 {
1919         int ret;
1920         struct compat_ipt_get_entries get;
1921         struct xt_table *t;
1922
1923         if (*len < sizeof(get)) {
1924                 duprintf("compat_get_entries: %u < %zu\n", *len, sizeof(get));
1925                 return -EINVAL;
1926         }
1927
1928         if (copy_from_user(&get, uptr, sizeof(get)) != 0)
1929                 return -EFAULT;
1930
1931         if (*len != sizeof(struct compat_ipt_get_entries) + get.size) {
1932                 duprintf("compat_get_entries: %u != %zu\n",
1933                          *len, sizeof(get) + get.size);
1934                 return -EINVAL;
1935         }
1936
1937         xt_compat_lock(AF_INET);
1938         t = xt_find_table_lock(net, AF_INET, get.name);
1939         if (t && !IS_ERR(t)) {
1940                 const struct xt_table_info *private = t->private;
1941                 struct xt_table_info info;
1942                 duprintf("t->private->number = %u\n", private->number);
1943                 ret = compat_table_info(private, &info);
1944                 if (!ret && get.size == info.size) {
1945                         ret = compat_copy_entries_to_user(private->size,
1946                                                           t, uptr->entrytable);
1947                 } else if (!ret) {
1948                         duprintf("compat_get_entries: I've got %u not %u!\n",
1949                                  private->size, get.size);
1950                         ret = -EAGAIN;
1951                 }
1952                 xt_compat_flush_offsets(AF_INET);
1953                 module_put(t->me);
1954                 xt_table_unlock(t);
1955         } else
1956                 ret = t ? PTR_ERR(t) : -ENOENT;
1957
1958         xt_compat_unlock(AF_INET);
1959         return ret;
1960 }
1961
1962 static int do_ipt_get_ctl(struct sock *, int, void __user *, int *);
1963
1964 static int
1965 compat_do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
1966 {
1967         int ret;
1968
1969         if (!capable(CAP_NET_ADMIN))
1970                 return -EPERM;
1971
1972         switch (cmd) {
1973         case IPT_SO_GET_INFO:
1974                 ret = get_info(sock_net(sk), user, len, 1);
1975                 break;
1976         case IPT_SO_GET_ENTRIES:
1977                 ret = compat_get_entries(sock_net(sk), user, len);
1978                 break;
1979         default:
1980                 ret = do_ipt_get_ctl(sk, cmd, user, len);
1981         }
1982         return ret;
1983 }
1984 #endif
1985
1986 static int
1987 do_ipt_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
1988 {
1989         int ret;
1990
1991         if (!capable(CAP_NET_ADMIN))
1992                 return -EPERM;
1993
1994         switch (cmd) {
1995         case IPT_SO_SET_REPLACE:
1996                 ret = do_replace(sock_net(sk), user, len);
1997                 break;
1998
1999         case IPT_SO_SET_ADD_COUNTERS:
2000                 ret = do_add_counters(sock_net(sk), user, len, 0);
2001                 break;
2002
2003         default:
2004                 duprintf("do_ipt_set_ctl:  unknown request %i\n", cmd);
2005                 ret = -EINVAL;
2006         }
2007
2008         return ret;
2009 }
2010
2011 static int
2012 do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
2013 {
2014         int ret;
2015
2016         if (!capable(CAP_NET_ADMIN))
2017                 return -EPERM;
2018
2019         switch (cmd) {
2020         case IPT_SO_GET_INFO:
2021                 ret = get_info(sock_net(sk), user, len, 0);
2022                 break;
2023
2024         case IPT_SO_GET_ENTRIES:
2025                 ret = get_entries(sock_net(sk), user, len);
2026                 break;
2027
2028         case IPT_SO_GET_REVISION_MATCH:
2029         case IPT_SO_GET_REVISION_TARGET: {
2030                 struct ipt_get_revision rev;
2031                 int target;
2032
2033                 if (*len != sizeof(rev)) {
2034                         ret = -EINVAL;
2035                         break;
2036                 }
2037                 if (copy_from_user(&rev, user, sizeof(rev)) != 0) {
2038                         ret = -EFAULT;
2039                         break;
2040                 }
2041
2042                 if (cmd == IPT_SO_GET_REVISION_TARGET)
2043                         target = 1;
2044                 else
2045                         target = 0;
2046
2047                 try_then_request_module(xt_find_revision(AF_INET, rev.name,
2048                                                          rev.revision,
2049                                                          target, &ret),
2050                                         "ipt_%s", rev.name);
2051                 break;
2052         }
2053
2054         default:
2055                 duprintf("do_ipt_get_ctl: unknown request %i\n", cmd);
2056                 ret = -EINVAL;
2057         }
2058
2059         return ret;
2060 }
2061
2062 struct xt_table *ipt_register_table(struct net *net, struct xt_table *table,
2063                                     const struct ipt_replace *repl)
2064 {
2065         int ret;
2066         struct xt_table_info *newinfo;
2067         struct xt_table_info bootstrap
2068                 = { 0, 0, 0, { 0 }, { 0 }, { } };
2069         void *loc_cpu_entry;
2070         struct xt_table *new_table;
2071
2072         newinfo = xt_alloc_table_info(repl->size);
2073         if (!newinfo) {
2074                 ret = -ENOMEM;
2075                 goto out;
2076         }
2077
2078         /* choose the copy on our node/cpu, but dont care about preemption */
2079         loc_cpu_entry = newinfo->entries[raw_smp_processor_id()];
2080         memcpy(loc_cpu_entry, repl->entries, repl->size);
2081
2082         ret = translate_table(table->name, table->valid_hooks,
2083                               newinfo, loc_cpu_entry, repl->size,
2084                               repl->num_entries,
2085                               repl->hook_entry,
2086                               repl->underflow);
2087         if (ret != 0)
2088                 goto out_free;
2089
2090         new_table = xt_register_table(net, table, &bootstrap, newinfo);
2091         if (IS_ERR(new_table)) {
2092                 ret = PTR_ERR(new_table);
2093                 goto out_free;
2094         }
2095
2096         return new_table;
2097
2098 out_free:
2099         xt_free_table_info(newinfo);
2100 out:
2101         return ERR_PTR(ret);
2102 }
2103
2104 void ipt_unregister_table(struct xt_table *table)
2105 {
2106         struct xt_table_info *private;
2107         void *loc_cpu_entry;
2108         struct module *table_owner = table->me;
2109
2110         private = xt_unregister_table(table);
2111
2112         /* Decrease module usage counts and free resources */
2113         loc_cpu_entry = private->entries[raw_smp_processor_id()];
2114         IPT_ENTRY_ITERATE(loc_cpu_entry, private->size, cleanup_entry, NULL);
2115         if (private->number > private->initial_entries)
2116                 module_put(table_owner);
2117         xt_free_table_info(private);
2118 }
2119
2120 /* Returns 1 if the type and code is matched by the range, 0 otherwise */
2121 static inline bool
2122 icmp_type_code_match(u_int8_t test_type, u_int8_t min_code, u_int8_t max_code,
2123                      u_int8_t type, u_int8_t code,
2124                      bool invert)
2125 {
2126         return ((test_type == 0xFF) ||
2127                 (type == test_type && code >= min_code && code <= max_code))
2128                 ^ invert;
2129 }
2130
2131 static bool
2132 icmp_match(const struct sk_buff *skb, const struct xt_match_param *par)
2133 {
2134         const struct icmphdr *ic;
2135         struct icmphdr _icmph;
2136         const struct ipt_icmp *icmpinfo = par->matchinfo;
2137
2138         /* Must not be a fragment. */
2139         if (par->fragoff != 0)
2140                 return false;
2141
2142         ic = skb_header_pointer(skb, par->thoff, sizeof(_icmph), &_icmph);
2143         if (ic == NULL) {
2144                 /* We've been asked to examine this packet, and we
2145                  * can't.  Hence, no choice but to drop.
2146                  */
2147                 duprintf("Dropping evil ICMP tinygram.\n");
2148                 *par->hotdrop = true;
2149                 return false;
2150         }
2151
2152         return icmp_type_code_match(icmpinfo->type,
2153                                     icmpinfo->code[0],
2154                                     icmpinfo->code[1],
2155                                     ic->type, ic->code,
2156                                     !!(icmpinfo->invflags&IPT_ICMP_INV));
2157 }
2158
2159 static bool icmp_checkentry(const struct xt_mtchk_param *par)
2160 {
2161         const struct ipt_icmp *icmpinfo = par->matchinfo;
2162
2163         /* Must specify no unknown invflags */
2164         return !(icmpinfo->invflags & ~IPT_ICMP_INV);
2165 }
2166
2167 /* The built-in targets: standard (NULL) and error. */
2168 static struct xt_target ipt_standard_target __read_mostly = {
2169         .name           = IPT_STANDARD_TARGET,
2170         .targetsize     = sizeof(int),
2171         .family         = AF_INET,
2172 #ifdef CONFIG_COMPAT
2173         .compatsize     = sizeof(compat_int_t),
2174         .compat_from_user = compat_standard_from_user,
2175         .compat_to_user = compat_standard_to_user,
2176 #endif
2177 };
2178
2179 static struct xt_target ipt_error_target __read_mostly = {
2180         .name           = IPT_ERROR_TARGET,
2181         .target         = ipt_error,
2182         .targetsize     = IPT_FUNCTION_MAXNAMELEN,
2183         .family         = AF_INET,
2184 };
2185
2186 static struct nf_sockopt_ops ipt_sockopts = {
2187         .pf             = PF_INET,
2188         .set_optmin     = IPT_BASE_CTL,
2189         .set_optmax     = IPT_SO_SET_MAX+1,
2190         .set            = do_ipt_set_ctl,
2191 #ifdef CONFIG_COMPAT
2192         .compat_set     = compat_do_ipt_set_ctl,
2193 #endif
2194         .get_optmin     = IPT_BASE_CTL,
2195         .get_optmax     = IPT_SO_GET_MAX+1,
2196         .get            = do_ipt_get_ctl,
2197 #ifdef CONFIG_COMPAT
2198         .compat_get     = compat_do_ipt_get_ctl,
2199 #endif
2200         .owner          = THIS_MODULE,
2201 };
2202
2203 static struct xt_match icmp_matchstruct __read_mostly = {
2204         .name           = "icmp",
2205         .match          = icmp_match,
2206         .matchsize      = sizeof(struct ipt_icmp),
2207         .checkentry     = icmp_checkentry,
2208         .proto          = IPPROTO_ICMP,
2209         .family         = AF_INET,
2210 };
2211
2212 static int __net_init ip_tables_net_init(struct net *net)
2213 {
2214         return xt_proto_init(net, AF_INET);
2215 }
2216
2217 static void __net_exit ip_tables_net_exit(struct net *net)
2218 {
2219         xt_proto_fini(net, AF_INET);
2220 }
2221
2222 static struct pernet_operations ip_tables_net_ops = {
2223         .init = ip_tables_net_init,
2224         .exit = ip_tables_net_exit,
2225 };
2226
2227 static int __init ip_tables_init(void)
2228 {
2229         int ret;
2230
2231         ret = register_pernet_subsys(&ip_tables_net_ops);
2232         if (ret < 0)
2233                 goto err1;
2234
2235         /* Noone else will be downing sem now, so we won't sleep */
2236         ret = xt_register_target(&ipt_standard_target);
2237         if (ret < 0)
2238                 goto err2;
2239         ret = xt_register_target(&ipt_error_target);
2240         if (ret < 0)
2241                 goto err3;
2242         ret = xt_register_match(&icmp_matchstruct);
2243         if (ret < 0)
2244                 goto err4;
2245
2246         /* Register setsockopt */
2247         ret = nf_register_sockopt(&ipt_sockopts);
2248         if (ret < 0)
2249                 goto err5;
2250
2251         printk(KERN_INFO "ip_tables: (C) 2000-2006 Netfilter Core Team\n");
2252         return 0;
2253
2254 err5:
2255         xt_unregister_match(&icmp_matchstruct);
2256 err4:
2257         xt_unregister_target(&ipt_error_target);
2258 err3:
2259         xt_unregister_target(&ipt_standard_target);
2260 err2:
2261         unregister_pernet_subsys(&ip_tables_net_ops);
2262 err1:
2263         return ret;
2264 }
2265
2266 static void __exit ip_tables_fini(void)
2267 {
2268         nf_unregister_sockopt(&ipt_sockopts);
2269
2270         xt_unregister_match(&icmp_matchstruct);
2271         xt_unregister_target(&ipt_error_target);
2272         xt_unregister_target(&ipt_standard_target);
2273
2274         unregister_pernet_subsys(&ip_tables_net_ops);
2275 }
2276
2277 EXPORT_SYMBOL(ipt_register_table);
2278 EXPORT_SYMBOL(ipt_unregister_table);
2279 EXPORT_SYMBOL(ipt_do_table);
2280 module_init(ip_tables_init);
2281 module_exit(ip_tables_fini);