[IPV4/6]: Netfilter IPsec input hooks
[linux-2.6] / net / sched / police.c
1 /*
2  * net/sched/police.c   Input police filter.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *              J Hadi Salim (action changes)
11  */
12
13 #include <asm/uaccess.h>
14 #include <asm/system.h>
15 #include <linux/bitops.h>
16 #include <linux/config.h>
17 #include <linux/module.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/sched.h>
21 #include <linux/string.h>
22 #include <linux/mm.h>
23 #include <linux/socket.h>
24 #include <linux/sockios.h>
25 #include <linux/in.h>
26 #include <linux/errno.h>
27 #include <linux/interrupt.h>
28 #include <linux/netdevice.h>
29 #include <linux/skbuff.h>
30 #include <linux/module.h>
31 #include <linux/rtnetlink.h>
32 #include <linux/init.h>
33 #include <net/sock.h>
34 #include <net/act_api.h>
35
36 #define L2T(p,L)   ((p)->R_tab->data[(L)>>(p)->R_tab->rate.cell_log])
37 #define L2T_P(p,L) ((p)->P_tab->data[(L)>>(p)->P_tab->rate.cell_log])
38 #define PRIV(a) ((struct tcf_police *) (a)->priv)
39
40 /* use generic hash table */
41 #define MY_TAB_SIZE     16
42 #define MY_TAB_MASK     15
43 static u32 idx_gen;
44 static struct tcf_police *tcf_police_ht[MY_TAB_SIZE];
45 /* Policer hash table lock */
46 static DEFINE_RWLOCK(police_lock);
47
48 /* Each policer is serialized by its individual spinlock */
49
50 static __inline__ unsigned tcf_police_hash(u32 index)
51 {
52         return index&0xF;
53 }
54
55 static __inline__ struct tcf_police * tcf_police_lookup(u32 index)
56 {
57         struct tcf_police *p;
58
59         read_lock(&police_lock);
60         for (p = tcf_police_ht[tcf_police_hash(index)]; p; p = p->next) {
61                 if (p->index == index)
62                         break;
63         }
64         read_unlock(&police_lock);
65         return p;
66 }
67
68 #ifdef CONFIG_NET_CLS_ACT
69 static int tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb,
70                               int type, struct tc_action *a)
71 {
72         struct tcf_police *p;
73         int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
74         struct rtattr *r;
75
76         read_lock(&police_lock);
77
78         s_i = cb->args[0];
79
80         for (i = 0; i < MY_TAB_SIZE; i++) {
81                 p = tcf_police_ht[tcf_police_hash(i)];
82
83                 for (; p; p = p->next) {
84                         index++;
85                         if (index < s_i)
86                                 continue;
87                         a->priv = p;
88                         a->order = index;
89                         r = (struct rtattr*) skb->tail;
90                         RTA_PUT(skb, a->order, 0, NULL);
91                         if (type == RTM_DELACTION)
92                                 err = tcf_action_dump_1(skb, a, 0, 1);
93                         else
94                                 err = tcf_action_dump_1(skb, a, 0, 0);
95                         if (err < 0) {
96                                 index--;
97                                 skb_trim(skb, (u8*)r - skb->data);
98                                 goto done;
99                         }
100                         r->rta_len = skb->tail - (u8*)r;
101                         n_i++;
102                 }
103         }
104 done:
105         read_unlock(&police_lock);
106         if (n_i)
107                 cb->args[0] += n_i;
108         return n_i;
109
110 rtattr_failure:
111         skb_trim(skb, (u8*)r - skb->data);
112         goto done;
113 }
114
115 static inline int
116 tcf_hash_search(struct tc_action *a, u32 index)
117 {
118         struct tcf_police *p = tcf_police_lookup(index);
119
120         if (p != NULL) {
121                 a->priv = p;
122                 return 1;
123         } else {
124                 return 0;
125         }
126 }
127 #endif
128
129 static inline u32 tcf_police_new_index(void)
130 {
131         do {
132                 if (++idx_gen == 0)
133                         idx_gen = 1;
134         } while (tcf_police_lookup(idx_gen));
135
136         return idx_gen;
137 }
138
139 void tcf_police_destroy(struct tcf_police *p)
140 {
141         unsigned h = tcf_police_hash(p->index);
142         struct tcf_police **p1p;
143         
144         for (p1p = &tcf_police_ht[h]; *p1p; p1p = &(*p1p)->next) {
145                 if (*p1p == p) {
146                         write_lock_bh(&police_lock);
147                         *p1p = p->next;
148                         write_unlock_bh(&police_lock);
149 #ifdef CONFIG_NET_ESTIMATOR
150                         gen_kill_estimator(&p->bstats, &p->rate_est);
151 #endif
152                         if (p->R_tab)
153                                 qdisc_put_rtab(p->R_tab);
154                         if (p->P_tab)
155                                 qdisc_put_rtab(p->P_tab);
156                         kfree(p);
157                         return;
158                 }
159         }
160         BUG_TRAP(0);
161 }
162
163 #ifdef CONFIG_NET_CLS_ACT
164 static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est,
165                                  struct tc_action *a, int ovr, int bind)
166 {
167         unsigned h;
168         int ret = 0, err;
169         struct rtattr *tb[TCA_POLICE_MAX];
170         struct tc_police *parm;
171         struct tcf_police *p;
172         struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL;
173
174         if (rta == NULL || rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0)
175                 return -EINVAL;
176
177         if (tb[TCA_POLICE_TBF-1] == NULL ||
178             RTA_PAYLOAD(tb[TCA_POLICE_TBF-1]) != sizeof(*parm))
179                 return -EINVAL;
180         parm = RTA_DATA(tb[TCA_POLICE_TBF-1]);
181
182         if (tb[TCA_POLICE_RESULT-1] != NULL &&
183             RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32))
184                 return -EINVAL;
185         if (tb[TCA_POLICE_RESULT-1] != NULL &&
186             RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32))
187                 return -EINVAL;
188
189         if (parm->index && (p = tcf_police_lookup(parm->index)) != NULL) {
190                 a->priv = p;
191                 if (bind) {
192                         p->bindcnt += 1;
193                         p->refcnt += 1;
194                 }
195                 if (ovr)
196                         goto override;
197                 return ret;
198         }
199
200         p = kmalloc(sizeof(*p), GFP_KERNEL);
201         if (p == NULL)
202                 return -ENOMEM;
203         memset(p, 0, sizeof(*p));
204
205         ret = ACT_P_CREATED;
206         p->refcnt = 1;
207         spin_lock_init(&p->lock);
208         p->stats_lock = &p->lock;
209         if (bind)
210                 p->bindcnt = 1;
211 override:
212         if (parm->rate.rate) {
213                 err = -ENOMEM;
214                 R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]);
215                 if (R_tab == NULL)
216                         goto failure;
217                 if (parm->peakrate.rate) {
218                         P_tab = qdisc_get_rtab(&parm->peakrate,
219                                                tb[TCA_POLICE_PEAKRATE-1]);
220                         if (p->P_tab == NULL) {
221                                 qdisc_put_rtab(R_tab);
222                                 goto failure;
223                         }
224                 }
225         }
226         /* No failure allowed after this point */
227         spin_lock_bh(&p->lock);
228         if (R_tab != NULL) {
229                 qdisc_put_rtab(p->R_tab);
230                 p->R_tab = R_tab;
231         }
232         if (P_tab != NULL) {
233                 qdisc_put_rtab(p->P_tab);
234                 p->P_tab = P_tab;
235         }
236
237         if (tb[TCA_POLICE_RESULT-1])
238                 p->result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]);
239         p->toks = p->burst = parm->burst;
240         p->mtu = parm->mtu;
241         if (p->mtu == 0) {
242                 p->mtu = ~0;
243                 if (p->R_tab)
244                         p->mtu = 255<<p->R_tab->rate.cell_log;
245         }
246         if (p->P_tab)
247                 p->ptoks = L2T_P(p, p->mtu);
248         p->action = parm->action;
249
250 #ifdef CONFIG_NET_ESTIMATOR
251         if (tb[TCA_POLICE_AVRATE-1])
252                 p->ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]);
253         if (est)
254                 gen_replace_estimator(&p->bstats, &p->rate_est, p->stats_lock, est);
255 #endif
256
257         spin_unlock_bh(&p->lock);
258         if (ret != ACT_P_CREATED)
259                 return ret;
260
261         PSCHED_GET_TIME(p->t_c);
262         p->index = parm->index ? : tcf_police_new_index();
263         h = tcf_police_hash(p->index);
264         write_lock_bh(&police_lock);
265         p->next = tcf_police_ht[h];
266         tcf_police_ht[h] = p;
267         write_unlock_bh(&police_lock);
268
269         a->priv = p;
270         return ret;
271
272 failure:
273         if (ret == ACT_P_CREATED)
274                 kfree(p);
275         return err;
276 }
277
278 static int tcf_act_police_cleanup(struct tc_action *a, int bind)
279 {
280         struct tcf_police *p = PRIV(a);
281
282         if (p != NULL)
283                 return tcf_police_release(p, bind);
284         return 0;
285 }
286
287 static int tcf_act_police(struct sk_buff **pskb, struct tc_action *a,
288                           struct tcf_result *res)
289 {
290         psched_time_t now;
291         struct sk_buff *skb = *pskb;
292         struct tcf_police *p = PRIV(a);
293         long toks;
294         long ptoks = 0;
295
296         spin_lock(&p->lock);
297
298         p->bstats.bytes += skb->len;
299         p->bstats.packets++;
300
301 #ifdef CONFIG_NET_ESTIMATOR
302         if (p->ewma_rate && p->rate_est.bps >= p->ewma_rate) {
303                 p->qstats.overlimits++;
304                 spin_unlock(&p->lock);
305                 return p->action;
306         }
307 #endif
308
309         if (skb->len <= p->mtu) {
310                 if (p->R_tab == NULL) {
311                         spin_unlock(&p->lock);
312                         return p->result;
313                 }
314
315                 PSCHED_GET_TIME(now);
316
317                 toks = PSCHED_TDIFF_SAFE(now, p->t_c, p->burst);
318
319                 if (p->P_tab) {
320                         ptoks = toks + p->ptoks;
321                         if (ptoks > (long)L2T_P(p, p->mtu))
322                                 ptoks = (long)L2T_P(p, p->mtu);
323                         ptoks -= L2T_P(p, skb->len);
324                 }
325                 toks += p->toks;
326                 if (toks > (long)p->burst)
327                         toks = p->burst;
328                 toks -= L2T(p, skb->len);
329
330                 if ((toks|ptoks) >= 0) {
331                         p->t_c = now;
332                         p->toks = toks;
333                         p->ptoks = ptoks;
334                         spin_unlock(&p->lock);
335                         return p->result;
336                 }
337         }
338
339         p->qstats.overlimits++;
340         spin_unlock(&p->lock);
341         return p->action;
342 }
343
344 static int
345 tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
346 {
347         unsigned char    *b = skb->tail;
348         struct tc_police opt;
349         struct tcf_police *p = PRIV(a);
350
351         opt.index = p->index;
352         opt.action = p->action;
353         opt.mtu = p->mtu;
354         opt.burst = p->burst;
355         opt.refcnt = p->refcnt - ref;
356         opt.bindcnt = p->bindcnt - bind;
357         if (p->R_tab)
358                 opt.rate = p->R_tab->rate;
359         else
360                 memset(&opt.rate, 0, sizeof(opt.rate));
361         if (p->P_tab)
362                 opt.peakrate = p->P_tab->rate;
363         else
364                 memset(&opt.peakrate, 0, sizeof(opt.peakrate));
365         RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt);
366         if (p->result)
367                 RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &p->result);
368 #ifdef CONFIG_NET_ESTIMATOR
369         if (p->ewma_rate)
370                 RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &p->ewma_rate);
371 #endif
372         return skb->len;
373
374 rtattr_failure:
375         skb_trim(skb, b - skb->data);
376         return -1;
377 }
378
379 MODULE_AUTHOR("Alexey Kuznetsov");
380 MODULE_DESCRIPTION("Policing actions");
381 MODULE_LICENSE("GPL");
382
383 static struct tc_action_ops act_police_ops = {
384         .kind           =       "police",
385         .type           =       TCA_ID_POLICE,
386         .capab          =       TCA_CAP_NONE,
387         .owner          =       THIS_MODULE,
388         .act            =       tcf_act_police,
389         .dump           =       tcf_act_police_dump,
390         .cleanup        =       tcf_act_police_cleanup,
391         .lookup         =       tcf_hash_search,
392         .init           =       tcf_act_police_locate,
393         .walk           =       tcf_generic_walker
394 };
395
396 static int __init
397 police_init_module(void)
398 {
399         return tcf_register_action(&act_police_ops);
400 }
401
402 static void __exit
403 police_cleanup_module(void)
404 {
405         tcf_unregister_action(&act_police_ops);
406 }
407
408 module_init(police_init_module);
409 module_exit(police_cleanup_module);
410
411 #endif
412
413 struct tcf_police * tcf_police_locate(struct rtattr *rta, struct rtattr *est)
414 {
415         unsigned h;
416         struct tcf_police *p;
417         struct rtattr *tb[TCA_POLICE_MAX];
418         struct tc_police *parm;
419
420         if (rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0)
421                 return NULL;
422
423         if (tb[TCA_POLICE_TBF-1] == NULL ||
424             RTA_PAYLOAD(tb[TCA_POLICE_TBF-1]) != sizeof(*parm))
425                 return NULL;
426
427         parm = RTA_DATA(tb[TCA_POLICE_TBF-1]);
428
429         if (parm->index && (p = tcf_police_lookup(parm->index)) != NULL) {
430                 p->refcnt++;
431                 return p;
432         }
433
434         p = kmalloc(sizeof(*p), GFP_KERNEL);
435         if (p == NULL)
436                 return NULL;
437
438         memset(p, 0, sizeof(*p));
439         p->refcnt = 1;
440         spin_lock_init(&p->lock);
441         p->stats_lock = &p->lock;
442         if (parm->rate.rate) {
443                 p->R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]);
444                 if (p->R_tab == NULL)
445                         goto failure;
446                 if (parm->peakrate.rate) {
447                         p->P_tab = qdisc_get_rtab(&parm->peakrate,
448                                                   tb[TCA_POLICE_PEAKRATE-1]);
449                         if (p->P_tab == NULL)
450                                 goto failure;
451                 }
452         }
453         if (tb[TCA_POLICE_RESULT-1]) {
454                 if (RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32))
455                         goto failure;
456                 p->result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]);
457         }
458 #ifdef CONFIG_NET_ESTIMATOR
459         if (tb[TCA_POLICE_AVRATE-1]) {
460                 if (RTA_PAYLOAD(tb[TCA_POLICE_AVRATE-1]) != sizeof(u32))
461                         goto failure;
462                 p->ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]);
463         }
464 #endif
465         p->toks = p->burst = parm->burst;
466         p->mtu = parm->mtu;
467         if (p->mtu == 0) {
468                 p->mtu = ~0;
469                 if (p->R_tab)
470                         p->mtu = 255<<p->R_tab->rate.cell_log;
471         }
472         if (p->P_tab)
473                 p->ptoks = L2T_P(p, p->mtu);
474         PSCHED_GET_TIME(p->t_c);
475         p->index = parm->index ? : tcf_police_new_index();
476         p->action = parm->action;
477 #ifdef CONFIG_NET_ESTIMATOR
478         if (est)
479                 gen_new_estimator(&p->bstats, &p->rate_est, p->stats_lock, est);
480 #endif
481         h = tcf_police_hash(p->index);
482         write_lock_bh(&police_lock);
483         p->next = tcf_police_ht[h];
484         tcf_police_ht[h] = p;
485         write_unlock_bh(&police_lock);
486         return p;
487
488 failure:
489         if (p->R_tab)
490                 qdisc_put_rtab(p->R_tab);
491         kfree(p);
492         return NULL;
493 }
494
495 int tcf_police(struct sk_buff *skb, struct tcf_police *p)
496 {
497         psched_time_t now;
498         long toks;
499         long ptoks = 0;
500
501         spin_lock(&p->lock);
502
503         p->bstats.bytes += skb->len;
504         p->bstats.packets++;
505
506 #ifdef CONFIG_NET_ESTIMATOR
507         if (p->ewma_rate && p->rate_est.bps >= p->ewma_rate) {
508                 p->qstats.overlimits++;
509                 spin_unlock(&p->lock);
510                 return p->action;
511         }
512 #endif
513
514         if (skb->len <= p->mtu) {
515                 if (p->R_tab == NULL) {
516                         spin_unlock(&p->lock);
517                         return p->result;
518                 }
519
520                 PSCHED_GET_TIME(now);
521
522                 toks = PSCHED_TDIFF_SAFE(now, p->t_c, p->burst);
523
524                 if (p->P_tab) {
525                         ptoks = toks + p->ptoks;
526                         if (ptoks > (long)L2T_P(p, p->mtu))
527                                 ptoks = (long)L2T_P(p, p->mtu);
528                         ptoks -= L2T_P(p, skb->len);
529                 }
530                 toks += p->toks;
531                 if (toks > (long)p->burst)
532                         toks = p->burst;
533                 toks -= L2T(p, skb->len);
534
535                 if ((toks|ptoks) >= 0) {
536                         p->t_c = now;
537                         p->toks = toks;
538                         p->ptoks = ptoks;
539                         spin_unlock(&p->lock);
540                         return p->result;
541                 }
542         }
543
544         p->qstats.overlimits++;
545         spin_unlock(&p->lock);
546         return p->action;
547 }
548
549 int tcf_police_dump(struct sk_buff *skb, struct tcf_police *p)
550 {
551         unsigned char    *b = skb->tail;
552         struct tc_police opt;
553
554         opt.index = p->index;
555         opt.action = p->action;
556         opt.mtu = p->mtu;
557         opt.burst = p->burst;
558         if (p->R_tab)
559                 opt.rate = p->R_tab->rate;
560         else
561                 memset(&opt.rate, 0, sizeof(opt.rate));
562         if (p->P_tab)
563                 opt.peakrate = p->P_tab->rate;
564         else
565                 memset(&opt.peakrate, 0, sizeof(opt.peakrate));
566         RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt);
567         if (p->result)
568                 RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &p->result);
569 #ifdef CONFIG_NET_ESTIMATOR
570         if (p->ewma_rate)
571                 RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &p->ewma_rate);
572 #endif
573         return skb->len;
574
575 rtattr_failure:
576         skb_trim(skb, b - skb->data);
577         return -1;
578 }
579
580 int tcf_police_dump_stats(struct sk_buff *skb, struct tcf_police *p)
581 {
582         struct gnet_dump d;
583         
584         if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
585                         TCA_XSTATS, p->stats_lock, &d) < 0)
586                 goto errout;
587         
588         if (gnet_stats_copy_basic(&d, &p->bstats) < 0 ||
589 #ifdef CONFIG_NET_ESTIMATOR
590             gnet_stats_copy_rate_est(&d, &p->rate_est) < 0 ||
591 #endif
592             gnet_stats_copy_queue(&d, &p->qstats) < 0)
593                 goto errout;
594
595         if (gnet_stats_finish_copy(&d) < 0)
596                 goto errout;
597
598         return 0;
599
600 errout:
601         return -1;
602 }
603
604
605 EXPORT_SYMBOL(tcf_police);
606 EXPORT_SYMBOL(tcf_police_destroy);
607 EXPORT_SYMBOL(tcf_police_dump);
608 EXPORT_SYMBOL(tcf_police_dump_stats);
609 EXPORT_SYMBOL(tcf_police_hash);
610 EXPORT_SYMBOL(tcf_police_ht);
611 EXPORT_SYMBOL(tcf_police_locate);
612 EXPORT_SYMBOL(tcf_police_lookup);
613 EXPORT_SYMBOL(tcf_police_new_index);