[SUNGEM]: Make PM of PHYs more reliable (#2)
[linux-2.6] / net / sched / act_police.c
1 /*
2  * net/sched/police.c   Input police filter.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *              J Hadi Salim (action changes)
11  */
12
13 #include <asm/uaccess.h>
14 #include <asm/system.h>
15 #include <linux/bitops.h>
16 #include <linux/config.h>
17 #include <linux/module.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/sched.h>
21 #include <linux/string.h>
22 #include <linux/mm.h>
23 #include <linux/socket.h>
24 #include <linux/sockios.h>
25 #include <linux/in.h>
26 #include <linux/errno.h>
27 #include <linux/interrupt.h>
28 #include <linux/netdevice.h>
29 #include <linux/skbuff.h>
30 #include <linux/module.h>
31 #include <linux/rtnetlink.h>
32 #include <linux/init.h>
33 #include <net/sock.h>
34 #include <net/act_api.h>
35
36 #define L2T(p,L)   ((p)->R_tab->data[(L)>>(p)->R_tab->rate.cell_log])
37 #define L2T_P(p,L) ((p)->P_tab->data[(L)>>(p)->P_tab->rate.cell_log])
38 #define PRIV(a) ((struct tcf_police *) (a)->priv)
39
40 /* use generic hash table */
41 #define MY_TAB_SIZE     16
42 #define MY_TAB_MASK     15
43 static u32 idx_gen;
44 static struct tcf_police *tcf_police_ht[MY_TAB_SIZE];
45 /* Policer hash table lock */
46 static DEFINE_RWLOCK(police_lock);
47
48 /* Each policer is serialized by its individual spinlock */
49
50 static __inline__ unsigned tcf_police_hash(u32 index)
51 {
52         return index&0xF;
53 }
54
55 static __inline__ struct tcf_police * tcf_police_lookup(u32 index)
56 {
57         struct tcf_police *p;
58
59         read_lock(&police_lock);
60         for (p = tcf_police_ht[tcf_police_hash(index)]; p; p = p->next) {
61                 if (p->index == index)
62                         break;
63         }
64         read_unlock(&police_lock);
65         return p;
66 }
67
68 #ifdef CONFIG_NET_CLS_ACT
69 static int tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb,
70                               int type, struct tc_action *a)
71 {
72         struct tcf_police *p;
73         int err = 0, index = -1, i = 0, s_i = 0, n_i = 0;
74         struct rtattr *r;
75
76         read_lock(&police_lock);
77
78         s_i = cb->args[0];
79
80         for (i = 0; i < MY_TAB_SIZE; i++) {
81                 p = tcf_police_ht[tcf_police_hash(i)];
82
83                 for (; p; p = p->next) {
84                         index++;
85                         if (index < s_i)
86                                 continue;
87                         a->priv = p;
88                         a->order = index;
89                         r = (struct rtattr*) skb->tail;
90                         RTA_PUT(skb, a->order, 0, NULL);
91                         if (type == RTM_DELACTION)
92                                 err = tcf_action_dump_1(skb, a, 0, 1);
93                         else
94                                 err = tcf_action_dump_1(skb, a, 0, 0);
95                         if (err < 0) {
96                                 index--;
97                                 skb_trim(skb, (u8*)r - skb->data);
98                                 goto done;
99                         }
100                         r->rta_len = skb->tail - (u8*)r;
101                         n_i++;
102                 }
103         }
104 done:
105         read_unlock(&police_lock);
106         if (n_i)
107                 cb->args[0] += n_i;
108         return n_i;
109
110 rtattr_failure:
111         skb_trim(skb, (u8*)r - skb->data);
112         goto done;
113 }
114
115 static inline int
116 tcf_hash_search(struct tc_action *a, u32 index)
117 {
118         struct tcf_police *p = tcf_police_lookup(index);
119
120         if (p != NULL) {
121                 a->priv = p;
122                 return 1;
123         } else {
124                 return 0;
125         }
126 }
127 #endif
128
129 static inline u32 tcf_police_new_index(void)
130 {
131         do {
132                 if (++idx_gen == 0)
133                         idx_gen = 1;
134         } while (tcf_police_lookup(idx_gen));
135
136         return idx_gen;
137 }
138
139 void tcf_police_destroy(struct tcf_police *p)
140 {
141         unsigned h = tcf_police_hash(p->index);
142         struct tcf_police **p1p;
143         
144         for (p1p = &tcf_police_ht[h]; *p1p; p1p = &(*p1p)->next) {
145                 if (*p1p == p) {
146                         write_lock_bh(&police_lock);
147                         *p1p = p->next;
148                         write_unlock_bh(&police_lock);
149 #ifdef CONFIG_NET_ESTIMATOR
150                         gen_kill_estimator(&p->bstats, &p->rate_est);
151 #endif
152                         if (p->R_tab)
153                                 qdisc_put_rtab(p->R_tab);
154                         if (p->P_tab)
155                                 qdisc_put_rtab(p->P_tab);
156                         kfree(p);
157                         return;
158                 }
159         }
160         BUG_TRAP(0);
161 }
162
163 #ifdef CONFIG_NET_CLS_ACT
164 static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est,
165                                  struct tc_action *a, int ovr, int bind)
166 {
167         unsigned h;
168         int ret = 0, err;
169         struct rtattr *tb[TCA_POLICE_MAX];
170         struct tc_police *parm;
171         struct tcf_police *p;
172         struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL;
173
174         if (rta == NULL || rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0)
175                 return -EINVAL;
176
177         if (tb[TCA_POLICE_TBF-1] == NULL ||
178             RTA_PAYLOAD(tb[TCA_POLICE_TBF-1]) != sizeof(*parm))
179                 return -EINVAL;
180         parm = RTA_DATA(tb[TCA_POLICE_TBF-1]);
181
182         if (tb[TCA_POLICE_RESULT-1] != NULL &&
183             RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32))
184                 return -EINVAL;
185         if (tb[TCA_POLICE_RESULT-1] != NULL &&
186             RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32))
187                 return -EINVAL;
188
189         if (parm->index && (p = tcf_police_lookup(parm->index)) != NULL) {
190                 a->priv = p;
191                 if (bind) {
192                         p->bindcnt += 1;
193                         p->refcnt += 1;
194                 }
195                 if (ovr)
196                         goto override;
197                 return ret;
198         }
199
200         p = kmalloc(sizeof(*p), GFP_KERNEL);
201         if (p == NULL)
202                 return -ENOMEM;
203         memset(p, 0, sizeof(*p));
204
205         ret = ACT_P_CREATED;
206         p->refcnt = 1;
207         spin_lock_init(&p->lock);
208         p->stats_lock = &p->lock;
209         if (bind)
210                 p->bindcnt = 1;
211 override:
212         if (parm->rate.rate) {
213                 err = -ENOMEM;
214                 R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]);
215                 if (R_tab == NULL)
216                         goto failure;
217                 if (parm->peakrate.rate) {
218                         P_tab = qdisc_get_rtab(&parm->peakrate,
219                                                tb[TCA_POLICE_PEAKRATE-1]);
220                         if (p->P_tab == NULL) {
221                                 qdisc_put_rtab(R_tab);
222                                 goto failure;
223                         }
224                 }
225         }
226         /* No failure allowed after this point */
227         spin_lock_bh(&p->lock);
228         if (R_tab != NULL) {
229                 qdisc_put_rtab(p->R_tab);
230                 p->R_tab = R_tab;
231         }
232         if (P_tab != NULL) {
233                 qdisc_put_rtab(p->P_tab);
234                 p->P_tab = P_tab;
235         }
236
237         if (tb[TCA_POLICE_RESULT-1])
238                 p->result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]);
239         p->toks = p->burst = parm->burst;
240         p->mtu = parm->mtu;
241         if (p->mtu == 0) {
242                 p->mtu = ~0;
243                 if (p->R_tab)
244                         p->mtu = 255<<p->R_tab->rate.cell_log;
245         }
246         if (p->P_tab)
247                 p->ptoks = L2T_P(p, p->mtu);
248         p->action = parm->action;
249
250 #ifdef CONFIG_NET_ESTIMATOR
251         if (tb[TCA_POLICE_AVRATE-1])
252                 p->ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]);
253         if (est)
254                 gen_replace_estimator(&p->bstats, &p->rate_est, p->stats_lock, est);
255 #endif
256
257         spin_unlock_bh(&p->lock);
258         if (ret != ACT_P_CREATED)
259                 return ret;
260
261         PSCHED_GET_TIME(p->t_c);
262         p->index = parm->index ? : tcf_police_new_index();
263         h = tcf_police_hash(p->index);
264         write_lock_bh(&police_lock);
265         p->next = tcf_police_ht[h];
266         tcf_police_ht[h] = p;
267         write_unlock_bh(&police_lock);
268
269         a->priv = p;
270         return ret;
271
272 failure:
273         if (ret == ACT_P_CREATED)
274                 kfree(p);
275         return err;
276 }
277
278 static int tcf_act_police_cleanup(struct tc_action *a, int bind)
279 {
280         struct tcf_police *p = PRIV(a);
281
282         if (p != NULL)
283                 return tcf_police_release(p, bind);
284         return 0;
285 }
286
287 static int tcf_act_police(struct sk_buff *skb, struct tc_action *a,
288                           struct tcf_result *res)
289 {
290         psched_time_t now;
291         struct tcf_police *p = PRIV(a);
292         long toks;
293         long ptoks = 0;
294
295         spin_lock(&p->lock);
296
297         p->bstats.bytes += skb->len;
298         p->bstats.packets++;
299
300 #ifdef CONFIG_NET_ESTIMATOR
301         if (p->ewma_rate && p->rate_est.bps >= p->ewma_rate) {
302                 p->qstats.overlimits++;
303                 spin_unlock(&p->lock);
304                 return p->action;
305         }
306 #endif
307
308         if (skb->len <= p->mtu) {
309                 if (p->R_tab == NULL) {
310                         spin_unlock(&p->lock);
311                         return p->result;
312                 }
313
314                 PSCHED_GET_TIME(now);
315
316                 toks = PSCHED_TDIFF_SAFE(now, p->t_c, p->burst);
317
318                 if (p->P_tab) {
319                         ptoks = toks + p->ptoks;
320                         if (ptoks > (long)L2T_P(p, p->mtu))
321                                 ptoks = (long)L2T_P(p, p->mtu);
322                         ptoks -= L2T_P(p, skb->len);
323                 }
324                 toks += p->toks;
325                 if (toks > (long)p->burst)
326                         toks = p->burst;
327                 toks -= L2T(p, skb->len);
328
329                 if ((toks|ptoks) >= 0) {
330                         p->t_c = now;
331                         p->toks = toks;
332                         p->ptoks = ptoks;
333                         spin_unlock(&p->lock);
334                         return p->result;
335                 }
336         }
337
338         p->qstats.overlimits++;
339         spin_unlock(&p->lock);
340         return p->action;
341 }
342
343 static int
344 tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
345 {
346         unsigned char    *b = skb->tail;
347         struct tc_police opt;
348         struct tcf_police *p = PRIV(a);
349
350         opt.index = p->index;
351         opt.action = p->action;
352         opt.mtu = p->mtu;
353         opt.burst = p->burst;
354         opt.refcnt = p->refcnt - ref;
355         opt.bindcnt = p->bindcnt - bind;
356         if (p->R_tab)
357                 opt.rate = p->R_tab->rate;
358         else
359                 memset(&opt.rate, 0, sizeof(opt.rate));
360         if (p->P_tab)
361                 opt.peakrate = p->P_tab->rate;
362         else
363                 memset(&opt.peakrate, 0, sizeof(opt.peakrate));
364         RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt);
365         if (p->result)
366                 RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &p->result);
367 #ifdef CONFIG_NET_ESTIMATOR
368         if (p->ewma_rate)
369                 RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &p->ewma_rate);
370 #endif
371         return skb->len;
372
373 rtattr_failure:
374         skb_trim(skb, b - skb->data);
375         return -1;
376 }
377
378 MODULE_AUTHOR("Alexey Kuznetsov");
379 MODULE_DESCRIPTION("Policing actions");
380 MODULE_LICENSE("GPL");
381
382 static struct tc_action_ops act_police_ops = {
383         .kind           =       "police",
384         .type           =       TCA_ID_POLICE,
385         .capab          =       TCA_CAP_NONE,
386         .owner          =       THIS_MODULE,
387         .act            =       tcf_act_police,
388         .dump           =       tcf_act_police_dump,
389         .cleanup        =       tcf_act_police_cleanup,
390         .lookup         =       tcf_hash_search,
391         .init           =       tcf_act_police_locate,
392         .walk           =       tcf_generic_walker
393 };
394
395 static int __init
396 police_init_module(void)
397 {
398         return tcf_register_action(&act_police_ops);
399 }
400
401 static void __exit
402 police_cleanup_module(void)
403 {
404         tcf_unregister_action(&act_police_ops);
405 }
406
407 module_init(police_init_module);
408 module_exit(police_cleanup_module);
409
410 #else /* CONFIG_NET_CLS_ACT */
411
412 struct tcf_police * tcf_police_locate(struct rtattr *rta, struct rtattr *est)
413 {
414         unsigned h;
415         struct tcf_police *p;
416         struct rtattr *tb[TCA_POLICE_MAX];
417         struct tc_police *parm;
418
419         if (rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0)
420                 return NULL;
421
422         if (tb[TCA_POLICE_TBF-1] == NULL ||
423             RTA_PAYLOAD(tb[TCA_POLICE_TBF-1]) != sizeof(*parm))
424                 return NULL;
425
426         parm = RTA_DATA(tb[TCA_POLICE_TBF-1]);
427
428         if (parm->index && (p = tcf_police_lookup(parm->index)) != NULL) {
429                 p->refcnt++;
430                 return p;
431         }
432
433         p = kmalloc(sizeof(*p), GFP_KERNEL);
434         if (p == NULL)
435                 return NULL;
436
437         memset(p, 0, sizeof(*p));
438         p->refcnt = 1;
439         spin_lock_init(&p->lock);
440         p->stats_lock = &p->lock;
441         if (parm->rate.rate) {
442                 p->R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]);
443                 if (p->R_tab == NULL)
444                         goto failure;
445                 if (parm->peakrate.rate) {
446                         p->P_tab = qdisc_get_rtab(&parm->peakrate,
447                                                   tb[TCA_POLICE_PEAKRATE-1]);
448                         if (p->P_tab == NULL)
449                                 goto failure;
450                 }
451         }
452         if (tb[TCA_POLICE_RESULT-1]) {
453                 if (RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32))
454                         goto failure;
455                 p->result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]);
456         }
457 #ifdef CONFIG_NET_ESTIMATOR
458         if (tb[TCA_POLICE_AVRATE-1]) {
459                 if (RTA_PAYLOAD(tb[TCA_POLICE_AVRATE-1]) != sizeof(u32))
460                         goto failure;
461                 p->ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]);
462         }
463 #endif
464         p->toks = p->burst = parm->burst;
465         p->mtu = parm->mtu;
466         if (p->mtu == 0) {
467                 p->mtu = ~0;
468                 if (p->R_tab)
469                         p->mtu = 255<<p->R_tab->rate.cell_log;
470         }
471         if (p->P_tab)
472                 p->ptoks = L2T_P(p, p->mtu);
473         PSCHED_GET_TIME(p->t_c);
474         p->index = parm->index ? : tcf_police_new_index();
475         p->action = parm->action;
476 #ifdef CONFIG_NET_ESTIMATOR
477         if (est)
478                 gen_new_estimator(&p->bstats, &p->rate_est, p->stats_lock, est);
479 #endif
480         h = tcf_police_hash(p->index);
481         write_lock_bh(&police_lock);
482         p->next = tcf_police_ht[h];
483         tcf_police_ht[h] = p;
484         write_unlock_bh(&police_lock);
485         return p;
486
487 failure:
488         if (p->R_tab)
489                 qdisc_put_rtab(p->R_tab);
490         kfree(p);
491         return NULL;
492 }
493
494 int tcf_police(struct sk_buff *skb, struct tcf_police *p)
495 {
496         psched_time_t now;
497         long toks;
498         long ptoks = 0;
499
500         spin_lock(&p->lock);
501
502         p->bstats.bytes += skb->len;
503         p->bstats.packets++;
504
505 #ifdef CONFIG_NET_ESTIMATOR
506         if (p->ewma_rate && p->rate_est.bps >= p->ewma_rate) {
507                 p->qstats.overlimits++;
508                 spin_unlock(&p->lock);
509                 return p->action;
510         }
511 #endif
512
513         if (skb->len <= p->mtu) {
514                 if (p->R_tab == NULL) {
515                         spin_unlock(&p->lock);
516                         return p->result;
517                 }
518
519                 PSCHED_GET_TIME(now);
520
521                 toks = PSCHED_TDIFF_SAFE(now, p->t_c, p->burst);
522
523                 if (p->P_tab) {
524                         ptoks = toks + p->ptoks;
525                         if (ptoks > (long)L2T_P(p, p->mtu))
526                                 ptoks = (long)L2T_P(p, p->mtu);
527                         ptoks -= L2T_P(p, skb->len);
528                 }
529                 toks += p->toks;
530                 if (toks > (long)p->burst)
531                         toks = p->burst;
532                 toks -= L2T(p, skb->len);
533
534                 if ((toks|ptoks) >= 0) {
535                         p->t_c = now;
536                         p->toks = toks;
537                         p->ptoks = ptoks;
538                         spin_unlock(&p->lock);
539                         return p->result;
540                 }
541         }
542
543         p->qstats.overlimits++;
544         spin_unlock(&p->lock);
545         return p->action;
546 }
547 EXPORT_SYMBOL(tcf_police);
548
549 int tcf_police_dump(struct sk_buff *skb, struct tcf_police *p)
550 {
551         unsigned char    *b = skb->tail;
552         struct tc_police opt;
553
554         opt.index = p->index;
555         opt.action = p->action;
556         opt.mtu = p->mtu;
557         opt.burst = p->burst;
558         if (p->R_tab)
559                 opt.rate = p->R_tab->rate;
560         else
561                 memset(&opt.rate, 0, sizeof(opt.rate));
562         if (p->P_tab)
563                 opt.peakrate = p->P_tab->rate;
564         else
565                 memset(&opt.peakrate, 0, sizeof(opt.peakrate));
566         RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt);
567         if (p->result)
568                 RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &p->result);
569 #ifdef CONFIG_NET_ESTIMATOR
570         if (p->ewma_rate)
571                 RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &p->ewma_rate);
572 #endif
573         return skb->len;
574
575 rtattr_failure:
576         skb_trim(skb, b - skb->data);
577         return -1;
578 }
579
580 int tcf_police_dump_stats(struct sk_buff *skb, struct tcf_police *p)
581 {
582         struct gnet_dump d;
583         
584         if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
585                         TCA_XSTATS, p->stats_lock, &d) < 0)
586                 goto errout;
587         
588         if (gnet_stats_copy_basic(&d, &p->bstats) < 0 ||
589 #ifdef CONFIG_NET_ESTIMATOR
590             gnet_stats_copy_rate_est(&d, &p->rate_est) < 0 ||
591 #endif
592             gnet_stats_copy_queue(&d, &p->qstats) < 0)
593                 goto errout;
594
595         if (gnet_stats_finish_copy(&d) < 0)
596                 goto errout;
597
598         return 0;
599
600 errout:
601         return -1;
602 }
603
604 #endif /* CONFIG_NET_CLS_ACT */