[NETFILTER]: ctnetlink: fix reference count leak
[linux-2.6] / net / sched / sch_cbq.c
1 /*
2  * net/sched/sch_cbq.c  Class-Based Queueing discipline.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  */
12
13 #include <linux/module.h>
14 #include <asm/uaccess.h>
15 #include <asm/system.h>
16 #include <linux/bitops.h>
17 #include <linux/types.h>
18 #include <linux/kernel.h>
19 #include <linux/sched.h>
20 #include <linux/string.h>
21 #include <linux/mm.h>
22 #include <linux/socket.h>
23 #include <linux/sockios.h>
24 #include <linux/in.h>
25 #include <linux/errno.h>
26 #include <linux/interrupt.h>
27 #include <linux/if_ether.h>
28 #include <linux/inet.h>
29 #include <linux/netdevice.h>
30 #include <linux/etherdevice.h>
31 #include <linux/notifier.h>
32 #include <net/ip.h>
33 #include <net/route.h>
34 #include <linux/skbuff.h>
35 #include <net/sock.h>
36 #include <net/pkt_sched.h>
37
38
39 /*      Class-Based Queueing (CBQ) algorithm.
40         =======================================
41
42         Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
43                  Management Models for Packet Networks",
44                  IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
45
46                  [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
47
48                  [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
49                  Parameters", 1996
50
51                  [4] Sally Floyd and Michael Speer, "Experimental Results
52                  for Class-Based Queueing", 1998, not published.
53
54         -----------------------------------------------------------------------
55
56         Algorithm skeleton was taken from NS simulator cbq.cc.
57         If someone wants to check this code against the LBL version,
58         he should take into account that ONLY the skeleton was borrowed,
59         the implementation is different. Particularly:
60
61         --- The WRR algorithm is different. Our version looks more
62         reasonable (I hope) and works when quanta are allowed to be
63         less than MTU, which is always the case when real time classes
64         have small rates. Note, that the statement of [3] is
65         incomplete, delay may actually be estimated even if class
66         per-round allotment is less than MTU. Namely, if per-round
67         allotment is W*r_i, and r_1+...+r_k = r < 1
68
69         delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
70
71         In the worst case we have IntServ estimate with D = W*r+k*MTU
72         and C = MTU*r. The proof (if correct at all) is trivial.
73
74
75         --- It seems that cbq-2.0 is not very accurate. At least, I cannot
76         interpret some places, which look like wrong translations
77         from NS. Anyone is advised to find these differences
78         and explain to me, why I am wrong 8).
79
80         --- Linux has no EOI event, so that we cannot estimate true class
81         idle time. Workaround is to consider the next dequeue event
82         as sign that previous packet is finished. This is wrong because of
83         internal device queueing, but on a permanently loaded link it is true.
84         Moreover, combined with clock integrator, this scheme looks
85         very close to an ideal solution.  */
86
87 struct cbq_sched_data;
88
89
90 struct cbq_class
91 {
92         struct cbq_class        *next;          /* hash table link */
93         struct cbq_class        *next_alive;    /* next class with backlog in this priority band */
94
95 /* Parameters */
96         u32                     classid;
97         unsigned char           priority;       /* class priority */
98         unsigned char           priority2;      /* priority to be used after overlimit */
99         unsigned char           ewma_log;       /* time constant for idle time calculation */
100         unsigned char           ovl_strategy;
101 #ifdef CONFIG_NET_CLS_POLICE
102         unsigned char           police;
103 #endif
104
105         u32                     defmap;
106
107         /* Link-sharing scheduler parameters */
108         long                    maxidle;        /* Class parameters: see below. */
109         long                    offtime;
110         long                    minidle;
111         u32                     avpkt;
112         struct qdisc_rate_table *R_tab;
113
114         /* Overlimit strategy parameters */
115         void                    (*overlimit)(struct cbq_class *cl);
116         long                    penalty;
117
118         /* General scheduler (WRR) parameters */
119         long                    allot;
120         long                    quantum;        /* Allotment per WRR round */
121         long                    weight;         /* Relative allotment: see below */
122
123         struct Qdisc            *qdisc;         /* Ptr to CBQ discipline */
124         struct cbq_class        *split;         /* Ptr to split node */
125         struct cbq_class        *share;         /* Ptr to LS parent in the class tree */
126         struct cbq_class        *tparent;       /* Ptr to tree parent in the class tree */
127         struct cbq_class        *borrow;        /* NULL if class is bandwidth limited;
128                                                    parent otherwise */
129         struct cbq_class        *sibling;       /* Sibling chain */
130         struct cbq_class        *children;      /* Pointer to children chain */
131
132         struct Qdisc            *q;             /* Elementary queueing discipline */
133
134
135 /* Variables */
136         unsigned char           cpriority;      /* Effective priority */
137         unsigned char           delayed;
138         unsigned char           level;          /* level of the class in hierarchy:
139                                                    0 for leaf classes, and maximal
140                                                    level of children + 1 for nodes.
141                                                  */
142
143         psched_time_t           last;           /* Last end of service */
144         psched_time_t           undertime;
145         long                    avgidle;
146         long                    deficit;        /* Saved deficit for WRR */
147         unsigned long           penalized;
148         struct gnet_stats_basic bstats;
149         struct gnet_stats_queue qstats;
150         struct gnet_stats_rate_est rate_est;
151         spinlock_t              *stats_lock;
152         struct tc_cbq_xstats    xstats;
153
154         struct tcf_proto        *filter_list;
155
156         int                     refcnt;
157         int                     filters;
158
159         struct cbq_class        *defaults[TC_PRIO_MAX+1];
160 };
161
162 struct cbq_sched_data
163 {
164         struct cbq_class        *classes[16];           /* Hash table of all classes */
165         int                     nclasses[TC_CBQ_MAXPRIO+1];
166         unsigned                quanta[TC_CBQ_MAXPRIO+1];
167
168         struct cbq_class        link;
169
170         unsigned                activemask;
171         struct cbq_class        *active[TC_CBQ_MAXPRIO+1];      /* List of all classes
172                                                                    with backlog */
173
174 #ifdef CONFIG_NET_CLS_POLICE
175         struct cbq_class        *rx_class;
176 #endif
177         struct cbq_class        *tx_class;
178         struct cbq_class        *tx_borrowed;
179         int                     tx_len;
180         psched_time_t           now;            /* Cached timestamp */
181         psched_time_t           now_rt;         /* Cached real time */
182         unsigned                pmask;
183
184         struct timer_list       delay_timer;
185         struct timer_list       wd_timer;       /* Watchdog timer,
186                                                    started when CBQ has
187                                                    backlog, but cannot
188                                                    transmit just now */
189         long                    wd_expires;
190         int                     toplevel;
191         u32                     hgenerator;
192 };
193
194
195 #define L2T(cl,len)     ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])
196
197
198 static __inline__ unsigned cbq_hash(u32 h)
199 {
200         h ^= h>>8;
201         h ^= h>>4;
202         return h&0xF;
203 }
204
205 static __inline__ struct cbq_class *
206 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
207 {
208         struct cbq_class *cl;
209
210         for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next)
211                 if (cl->classid == classid)
212                         return cl;
213         return NULL;
214 }
215
216 #ifdef CONFIG_NET_CLS_POLICE
217
218 static struct cbq_class *
219 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
220 {
221         struct cbq_class *cl, *new;
222
223         for (cl = this->tparent; cl; cl = cl->tparent)
224                 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
225                         return new;
226
227         return NULL;
228 }
229
230 #endif
231
232 /* Classify packet. The procedure is pretty complicated, but
233    it allows us to combine link sharing and priority scheduling
234    transparently.
235
236    Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
237    so that it resolves to split nodes. Then packets are classified
238    by logical priority, or a more specific classifier may be attached
239    to the split node.
240  */
241
242 static struct cbq_class *
243 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
244 {
245         struct cbq_sched_data *q = qdisc_priv(sch);
246         struct cbq_class *head = &q->link;
247         struct cbq_class **defmap;
248         struct cbq_class *cl = NULL;
249         u32 prio = skb->priority;
250         struct tcf_result res;
251
252         /*
253          *  Step 1. If skb->priority points to one of our classes, use it.
254          */
255         if (TC_H_MAJ(prio^sch->handle) == 0 &&
256             (cl = cbq_class_lookup(q, prio)) != NULL)
257                 return cl;
258
259         *qerr = NET_XMIT_BYPASS;
260         for (;;) {
261                 int result = 0;
262                 defmap = head->defaults;
263
264                 /*
265                  * Step 2+n. Apply classifier.
266                  */
267                 if (!head->filter_list || (result = tc_classify(skb, head->filter_list, &res)) < 0)
268                         goto fallback;
269
270                 if ((cl = (void*)res.class) == NULL) {
271                         if (TC_H_MAJ(res.classid))
272                                 cl = cbq_class_lookup(q, res.classid);
273                         else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
274                                 cl = defmap[TC_PRIO_BESTEFFORT];
275
276                         if (cl == NULL || cl->level >= head->level)
277                                 goto fallback;
278                 }
279
280 #ifdef CONFIG_NET_CLS_ACT
281                 switch (result) {
282                 case TC_ACT_QUEUED:
283                 case TC_ACT_STOLEN: 
284                         *qerr = NET_XMIT_SUCCESS;
285                 case TC_ACT_SHOT:
286                         return NULL;
287                 }
288 #elif defined(CONFIG_NET_CLS_POLICE)
289                 switch (result) {
290                 case TC_POLICE_RECLASSIFY:
291                         return cbq_reclassify(skb, cl);
292                 case TC_POLICE_SHOT:
293                         return NULL;
294                 default:
295                         break;
296                 }
297 #endif
298                 if (cl->level == 0)
299                         return cl;
300
301                 /*
302                  * Step 3+n. If classifier selected a link sharing class,
303                  *         apply agency specific classifier.
304                  *         Repeat this procdure until we hit a leaf node.
305                  */
306                 head = cl;
307         }
308
309 fallback:
310         cl = head;
311
312         /*
313          * Step 4. No success...
314          */
315         if (TC_H_MAJ(prio) == 0 &&
316             !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
317             !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
318                 return head;
319
320         return cl;
321 }
322
323 /*
324    A packet has just been enqueued on the empty class.
325    cbq_activate_class adds it to the tail of active class list
326    of its priority band.
327  */
328
329 static __inline__ void cbq_activate_class(struct cbq_class *cl)
330 {
331         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
332         int prio = cl->cpriority;
333         struct cbq_class *cl_tail;
334
335         cl_tail = q->active[prio];
336         q->active[prio] = cl;
337
338         if (cl_tail != NULL) {
339                 cl->next_alive = cl_tail->next_alive;
340                 cl_tail->next_alive = cl;
341         } else {
342                 cl->next_alive = cl;
343                 q->activemask |= (1<<prio);
344         }
345 }
346
347 /*
348    Unlink class from active chain.
349    Note that this same procedure is done directly in cbq_dequeue*
350    during round-robin procedure.
351  */
352
353 static void cbq_deactivate_class(struct cbq_class *this)
354 {
355         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
356         int prio = this->cpriority;
357         struct cbq_class *cl;
358         struct cbq_class *cl_prev = q->active[prio];
359
360         do {
361                 cl = cl_prev->next_alive;
362                 if (cl == this) {
363                         cl_prev->next_alive = cl->next_alive;
364                         cl->next_alive = NULL;
365
366                         if (cl == q->active[prio]) {
367                                 q->active[prio] = cl_prev;
368                                 if (cl == q->active[prio]) {
369                                         q->active[prio] = NULL;
370                                         q->activemask &= ~(1<<prio);
371                                         return;
372                                 }
373                         }
374
375                         cl = cl_prev->next_alive;
376                         return;
377                 }
378         } while ((cl_prev = cl) != q->active[prio]);
379 }
380
381 static void
382 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
383 {
384         int toplevel = q->toplevel;
385
386         if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
387                 psched_time_t now;
388                 psched_tdiff_t incr;
389
390                 PSCHED_GET_TIME(now);
391                 incr = PSCHED_TDIFF(now, q->now_rt);
392                 PSCHED_TADD2(q->now, incr, now);
393
394                 do {
395                         if (PSCHED_TLESS(cl->undertime, now)) {
396                                 q->toplevel = cl->level;
397                                 return;
398                         }
399                 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
400         }
401 }
402
403 static int
404 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
405 {
406         struct cbq_sched_data *q = qdisc_priv(sch);
407         int len = skb->len;
408         int ret;
409         struct cbq_class *cl = cbq_classify(skb, sch, &ret);
410
411 #ifdef CONFIG_NET_CLS_POLICE
412         q->rx_class = cl;
413 #endif
414         if (cl == NULL) {
415                 if (ret == NET_XMIT_BYPASS)
416                         sch->qstats.drops++;
417                 kfree_skb(skb);
418                 return ret;
419         }
420
421 #ifdef CONFIG_NET_CLS_POLICE
422         cl->q->__parent = sch;
423 #endif
424         if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) {
425                 sch->q.qlen++;
426                 sch->bstats.packets++;
427                 sch->bstats.bytes+=len;
428                 cbq_mark_toplevel(q, cl);
429                 if (!cl->next_alive)
430                         cbq_activate_class(cl);
431                 return ret;
432         }
433
434         sch->qstats.drops++;
435         cbq_mark_toplevel(q, cl);
436         cl->qstats.drops++;
437         return ret;
438 }
439
440 static int
441 cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
442 {
443         struct cbq_sched_data *q = qdisc_priv(sch);
444         struct cbq_class *cl;
445         int ret;
446
447         if ((cl = q->tx_class) == NULL) {
448                 kfree_skb(skb);
449                 sch->qstats.drops++;
450                 return NET_XMIT_CN;
451         }
452         q->tx_class = NULL;
453
454         cbq_mark_toplevel(q, cl);
455
456 #ifdef CONFIG_NET_CLS_POLICE
457         q->rx_class = cl;
458         cl->q->__parent = sch;
459 #endif
460         if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
461                 sch->q.qlen++;
462                 sch->qstats.requeues++;
463                 if (!cl->next_alive)
464                         cbq_activate_class(cl);
465                 return 0;
466         }
467         sch->qstats.drops++;
468         cl->qstats.drops++;
469         return ret;
470 }
471
472 /* Overlimit actions */
473
474 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
475
476 static void cbq_ovl_classic(struct cbq_class *cl)
477 {
478         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
479         psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
480
481         if (!cl->delayed) {
482                 delay += cl->offtime;
483
484                 /* 
485                    Class goes to sleep, so that it will have no
486                    chance to work avgidle. Let's forgive it 8)
487
488                    BTW cbq-2.0 has a crap in this
489                    place, apparently they forgot to shift it by cl->ewma_log.
490                  */
491                 if (cl->avgidle < 0)
492                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
493                 if (cl->avgidle < cl->minidle)
494                         cl->avgidle = cl->minidle;
495                 if (delay <= 0)
496                         delay = 1;
497                 PSCHED_TADD2(q->now, delay, cl->undertime);
498
499                 cl->xstats.overactions++;
500                 cl->delayed = 1;
501         }
502         if (q->wd_expires == 0 || q->wd_expires > delay)
503                 q->wd_expires = delay;
504
505         /* Dirty work! We must schedule wakeups based on
506            real available rate, rather than leaf rate,
507            which may be tiny (even zero).
508          */
509         if (q->toplevel == TC_CBQ_MAXLEVEL) {
510                 struct cbq_class *b;
511                 psched_tdiff_t base_delay = q->wd_expires;
512
513                 for (b = cl->borrow; b; b = b->borrow) {
514                         delay = PSCHED_TDIFF(b->undertime, q->now);
515                         if (delay < base_delay) {
516                                 if (delay <= 0)
517                                         delay = 1;
518                                 base_delay = delay;
519                         }
520                 }
521
522                 q->wd_expires = base_delay;
523         }
524 }
525
526 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
527    they go overlimit
528  */
529
530 static void cbq_ovl_rclassic(struct cbq_class *cl)
531 {
532         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
533         struct cbq_class *this = cl;
534
535         do {
536                 if (cl->level > q->toplevel) {
537                         cl = NULL;
538                         break;
539                 }
540         } while ((cl = cl->borrow) != NULL);
541
542         if (cl == NULL)
543                 cl = this;
544         cbq_ovl_classic(cl);
545 }
546
547 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
548
549 static void cbq_ovl_delay(struct cbq_class *cl)
550 {
551         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
552         psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
553
554         if (!cl->delayed) {
555                 unsigned long sched = jiffies;
556
557                 delay += cl->offtime;
558                 if (cl->avgidle < 0)
559                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
560                 if (cl->avgidle < cl->minidle)
561                         cl->avgidle = cl->minidle;
562                 PSCHED_TADD2(q->now, delay, cl->undertime);
563
564                 if (delay > 0) {
565                         sched += PSCHED_US2JIFFIE(delay) + cl->penalty;
566                         cl->penalized = sched;
567                         cl->cpriority = TC_CBQ_MAXPRIO;
568                         q->pmask |= (1<<TC_CBQ_MAXPRIO);
569                         if (del_timer(&q->delay_timer) &&
570                             (long)(q->delay_timer.expires - sched) > 0)
571                                 q->delay_timer.expires = sched;
572                         add_timer(&q->delay_timer);
573                         cl->delayed = 1;
574                         cl->xstats.overactions++;
575                         return;
576                 }
577                 delay = 1;
578         }
579         if (q->wd_expires == 0 || q->wd_expires > delay)
580                 q->wd_expires = delay;
581 }
582
583 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
584
585 static void cbq_ovl_lowprio(struct cbq_class *cl)
586 {
587         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
588
589         cl->penalized = jiffies + cl->penalty;
590
591         if (cl->cpriority != cl->priority2) {
592                 cl->cpriority = cl->priority2;
593                 q->pmask |= (1<<cl->cpriority);
594                 cl->xstats.overactions++;
595         }
596         cbq_ovl_classic(cl);
597 }
598
599 /* TC_CBQ_OVL_DROP: penalize class by dropping */
600
601 static void cbq_ovl_drop(struct cbq_class *cl)
602 {
603         if (cl->q->ops->drop)
604                 if (cl->q->ops->drop(cl->q))
605                         cl->qdisc->q.qlen--;
606         cl->xstats.overactions++;
607         cbq_ovl_classic(cl);
608 }
609
610 static void cbq_watchdog(unsigned long arg)
611 {
612         struct Qdisc *sch = (struct Qdisc*)arg;
613
614         sch->flags &= ~TCQ_F_THROTTLED;
615         netif_schedule(sch->dev);
616 }
617
618 static unsigned long cbq_undelay_prio(struct cbq_sched_data *q, int prio)
619 {
620         struct cbq_class *cl;
621         struct cbq_class *cl_prev = q->active[prio];
622         unsigned long now = jiffies;
623         unsigned long sched = now;
624
625         if (cl_prev == NULL)
626                 return now;
627
628         do {
629                 cl = cl_prev->next_alive;
630                 if ((long)(now - cl->penalized) > 0) {
631                         cl_prev->next_alive = cl->next_alive;
632                         cl->next_alive = NULL;
633                         cl->cpriority = cl->priority;
634                         cl->delayed = 0;
635                         cbq_activate_class(cl);
636
637                         if (cl == q->active[prio]) {
638                                 q->active[prio] = cl_prev;
639                                 if (cl == q->active[prio]) {
640                                         q->active[prio] = NULL;
641                                         return 0;
642                                 }
643                         }
644
645                         cl = cl_prev->next_alive;
646                 } else if ((long)(sched - cl->penalized) > 0)
647                         sched = cl->penalized;
648         } while ((cl_prev = cl) != q->active[prio]);
649
650         return (long)(sched - now);
651 }
652
653 static void cbq_undelay(unsigned long arg)
654 {
655         struct Qdisc *sch = (struct Qdisc*)arg;
656         struct cbq_sched_data *q = qdisc_priv(sch);
657         long delay = 0;
658         unsigned pmask;
659
660         pmask = q->pmask;
661         q->pmask = 0;
662
663         while (pmask) {
664                 int prio = ffz(~pmask);
665                 long tmp;
666
667                 pmask &= ~(1<<prio);
668
669                 tmp = cbq_undelay_prio(q, prio);
670                 if (tmp > 0) {
671                         q->pmask |= 1<<prio;
672                         if (tmp < delay || delay == 0)
673                                 delay = tmp;
674                 }
675         }
676
677         if (delay) {
678                 q->delay_timer.expires = jiffies + delay;
679                 add_timer(&q->delay_timer);
680         }
681
682         sch->flags &= ~TCQ_F_THROTTLED;
683         netif_schedule(sch->dev);
684 }
685
686
687 #ifdef CONFIG_NET_CLS_POLICE
688
689 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
690 {
691         int len = skb->len;
692         struct Qdisc *sch = child->__parent;
693         struct cbq_sched_data *q = qdisc_priv(sch);
694         struct cbq_class *cl = q->rx_class;
695
696         q->rx_class = NULL;
697
698         if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
699
700                 cbq_mark_toplevel(q, cl);
701
702                 q->rx_class = cl;
703                 cl->q->__parent = sch;
704
705                 if (cl->q->enqueue(skb, cl->q) == 0) {
706                         sch->q.qlen++;
707                         sch->bstats.packets++;
708                         sch->bstats.bytes+=len;
709                         if (!cl->next_alive)
710                                 cbq_activate_class(cl);
711                         return 0;
712                 }
713                 sch->qstats.drops++;
714                 return 0;
715         }
716
717         sch->qstats.drops++;
718         return -1;
719 }
720 #endif
721
722 /* 
723    It is mission critical procedure.
724
725    We "regenerate" toplevel cutoff, if transmitting class
726    has backlog and it is not regulated. It is not part of
727    original CBQ description, but looks more reasonable.
728    Probably, it is wrong. This question needs further investigation.
729 */
730
731 static __inline__ void
732 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
733                     struct cbq_class *borrowed)
734 {
735         if (cl && q->toplevel >= borrowed->level) {
736                 if (cl->q->q.qlen > 1) {
737                         do {
738                                 if (PSCHED_IS_PASTPERFECT(borrowed->undertime)) {
739                                         q->toplevel = borrowed->level;
740                                         return;
741                                 }
742                         } while ((borrowed=borrowed->borrow) != NULL);
743                 }
744 #if 0   
745         /* It is not necessary now. Uncommenting it
746            will save CPU cycles, but decrease fairness.
747          */
748                 q->toplevel = TC_CBQ_MAXLEVEL;
749 #endif
750         }
751 }
752
753 static void
754 cbq_update(struct cbq_sched_data *q)
755 {
756         struct cbq_class *this = q->tx_class;
757         struct cbq_class *cl = this;
758         int len = q->tx_len;
759
760         q->tx_class = NULL;
761
762         for ( ; cl; cl = cl->share) {
763                 long avgidle = cl->avgidle;
764                 long idle;
765
766                 cl->bstats.packets++;
767                 cl->bstats.bytes += len;
768
769                 /*
770                    (now - last) is total time between packet right edges.
771                    (last_pktlen/rate) is "virtual" busy time, so that
772
773                          idle = (now - last) - last_pktlen/rate
774                  */
775
776                 idle = PSCHED_TDIFF(q->now, cl->last);
777                 if ((unsigned long)idle > 128*1024*1024) {
778                         avgidle = cl->maxidle;
779                 } else {
780                         idle -= L2T(cl, len);
781
782                 /* true_avgidle := (1-W)*true_avgidle + W*idle,
783                    where W=2^{-ewma_log}. But cl->avgidle is scaled:
784                    cl->avgidle == true_avgidle/W,
785                    hence:
786                  */
787                         avgidle += idle - (avgidle>>cl->ewma_log);
788                 }
789
790                 if (avgidle <= 0) {
791                         /* Overlimit or at-limit */
792
793                         if (avgidle < cl->minidle)
794                                 avgidle = cl->minidle;
795
796                         cl->avgidle = avgidle;
797
798                         /* Calculate expected time, when this class
799                            will be allowed to send.
800                            It will occur, when:
801                            (1-W)*true_avgidle + W*delay = 0, i.e.
802                            idle = (1/W - 1)*(-true_avgidle)
803                            or
804                            idle = (1 - W)*(-cl->avgidle);
805                          */
806                         idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
807
808                         /*
809                            That is not all.
810                            To maintain the rate allocated to the class,
811                            we add to undertime virtual clock,
812                            necessary to complete transmitted packet.
813                            (len/phys_bandwidth has been already passed
814                            to the moment of cbq_update)
815                          */
816
817                         idle -= L2T(&q->link, len);
818                         idle += L2T(cl, len);
819
820                         PSCHED_AUDIT_TDIFF(idle);
821
822                         PSCHED_TADD2(q->now, idle, cl->undertime);
823                 } else {
824                         /* Underlimit */
825
826                         PSCHED_SET_PASTPERFECT(cl->undertime);
827                         if (avgidle > cl->maxidle)
828                                 cl->avgidle = cl->maxidle;
829                         else
830                                 cl->avgidle = avgidle;
831                 }
832                 cl->last = q->now;
833         }
834
835         cbq_update_toplevel(q, this, q->tx_borrowed);
836 }
837
838 static __inline__ struct cbq_class *
839 cbq_under_limit(struct cbq_class *cl)
840 {
841         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
842         struct cbq_class *this_cl = cl;
843
844         if (cl->tparent == NULL)
845                 return cl;
846
847         if (PSCHED_IS_PASTPERFECT(cl->undertime) ||
848             !PSCHED_TLESS(q->now, cl->undertime)) {
849                 cl->delayed = 0;
850                 return cl;
851         }
852
853         do {
854                 /* It is very suspicious place. Now overlimit
855                    action is generated for not bounded classes
856                    only if link is completely congested.
857                    Though it is in agree with ancestor-only paradigm,
858                    it looks very stupid. Particularly,
859                    it means that this chunk of code will either
860                    never be called or result in strong amplification
861                    of burstiness. Dangerous, silly, and, however,
862                    no another solution exists.
863                  */
864                 if ((cl = cl->borrow) == NULL) {
865                         this_cl->qstats.overlimits++;
866                         this_cl->overlimit(this_cl);
867                         return NULL;
868                 }
869                 if (cl->level > q->toplevel)
870                         return NULL;
871         } while (!PSCHED_IS_PASTPERFECT(cl->undertime) &&
872                  PSCHED_TLESS(q->now, cl->undertime));
873
874         cl->delayed = 0;
875         return cl;
876 }
877
878 static __inline__ struct sk_buff *
879 cbq_dequeue_prio(struct Qdisc *sch, int prio)
880 {
881         struct cbq_sched_data *q = qdisc_priv(sch);
882         struct cbq_class *cl_tail, *cl_prev, *cl;
883         struct sk_buff *skb;
884         int deficit;
885
886         cl_tail = cl_prev = q->active[prio];
887         cl = cl_prev->next_alive;
888
889         do {
890                 deficit = 0;
891
892                 /* Start round */
893                 do {
894                         struct cbq_class *borrow = cl;
895
896                         if (cl->q->q.qlen &&
897                             (borrow = cbq_under_limit(cl)) == NULL)
898                                 goto skip_class;
899
900                         if (cl->deficit <= 0) {
901                                 /* Class exhausted its allotment per
902                                    this round. Switch to the next one.
903                                  */
904                                 deficit = 1;
905                                 cl->deficit += cl->quantum;
906                                 goto next_class;
907                         }
908
909                         skb = cl->q->dequeue(cl->q);
910
911                         /* Class did not give us any skb :-(
912                            It could occur even if cl->q->q.qlen != 0 
913                            f.e. if cl->q == "tbf"
914                          */
915                         if (skb == NULL)
916                                 goto skip_class;
917
918                         cl->deficit -= skb->len;
919                         q->tx_class = cl;
920                         q->tx_borrowed = borrow;
921                         if (borrow != cl) {
922 #ifndef CBQ_XSTATS_BORROWS_BYTES
923                                 borrow->xstats.borrows++;
924                                 cl->xstats.borrows++;
925 #else
926                                 borrow->xstats.borrows += skb->len;
927                                 cl->xstats.borrows += skb->len;
928 #endif
929                         }
930                         q->tx_len = skb->len;
931
932                         if (cl->deficit <= 0) {
933                                 q->active[prio] = cl;
934                                 cl = cl->next_alive;
935                                 cl->deficit += cl->quantum;
936                         }
937                         return skb;
938
939 skip_class:
940                         if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
941                                 /* Class is empty or penalized.
942                                    Unlink it from active chain.
943                                  */
944                                 cl_prev->next_alive = cl->next_alive;
945                                 cl->next_alive = NULL;
946
947                                 /* Did cl_tail point to it? */
948                                 if (cl == cl_tail) {
949                                         /* Repair it! */
950                                         cl_tail = cl_prev;
951
952                                         /* Was it the last class in this band? */
953                                         if (cl == cl_tail) {
954                                                 /* Kill the band! */
955                                                 q->active[prio] = NULL;
956                                                 q->activemask &= ~(1<<prio);
957                                                 if (cl->q->q.qlen)
958                                                         cbq_activate_class(cl);
959                                                 return NULL;
960                                         }
961
962                                         q->active[prio] = cl_tail;
963                                 }
964                                 if (cl->q->q.qlen)
965                                         cbq_activate_class(cl);
966
967                                 cl = cl_prev;
968                         }
969
970 next_class:
971                         cl_prev = cl;
972                         cl = cl->next_alive;
973                 } while (cl_prev != cl_tail);
974         } while (deficit);
975
976         q->active[prio] = cl_prev;
977
978         return NULL;
979 }
980
981 static __inline__ struct sk_buff *
982 cbq_dequeue_1(struct Qdisc *sch)
983 {
984         struct cbq_sched_data *q = qdisc_priv(sch);
985         struct sk_buff *skb;
986         unsigned activemask;
987
988         activemask = q->activemask&0xFF;
989         while (activemask) {
990                 int prio = ffz(~activemask);
991                 activemask &= ~(1<<prio);
992                 skb = cbq_dequeue_prio(sch, prio);
993                 if (skb)
994                         return skb;
995         }
996         return NULL;
997 }
998
999 static struct sk_buff *
1000 cbq_dequeue(struct Qdisc *sch)
1001 {
1002         struct sk_buff *skb;
1003         struct cbq_sched_data *q = qdisc_priv(sch);
1004         psched_time_t now;
1005         psched_tdiff_t incr;
1006
1007         PSCHED_GET_TIME(now);
1008         incr = PSCHED_TDIFF(now, q->now_rt);
1009
1010         if (q->tx_class) {
1011                 psched_tdiff_t incr2;
1012                 /* Time integrator. We calculate EOS time
1013                    by adding expected packet transmission time.
1014                    If real time is greater, we warp artificial clock,
1015                    so that:
1016
1017                    cbq_time = max(real_time, work);
1018                  */
1019                 incr2 = L2T(&q->link, q->tx_len);
1020                 PSCHED_TADD(q->now, incr2);
1021                 cbq_update(q);
1022                 if ((incr -= incr2) < 0)
1023                         incr = 0;
1024         }
1025         PSCHED_TADD(q->now, incr);
1026         q->now_rt = now;
1027
1028         for (;;) {
1029                 q->wd_expires = 0;
1030
1031                 skb = cbq_dequeue_1(sch);
1032                 if (skb) {
1033                         sch->q.qlen--;
1034                         sch->flags &= ~TCQ_F_THROTTLED;
1035                         return skb;
1036                 }
1037
1038                 /* All the classes are overlimit.
1039
1040                    It is possible, if:
1041
1042                    1. Scheduler is empty.
1043                    2. Toplevel cutoff inhibited borrowing.
1044                    3. Root class is overlimit.
1045
1046                    Reset 2d and 3d conditions and retry.
1047
1048                    Note, that NS and cbq-2.0 are buggy, peeking
1049                    an arbitrary class is appropriate for ancestor-only
1050                    sharing, but not for toplevel algorithm.
1051
1052                    Our version is better, but slower, because it requires
1053                    two passes, but it is unavoidable with top-level sharing.
1054                 */
1055
1056                 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1057                     PSCHED_IS_PASTPERFECT(q->link.undertime))
1058                         break;
1059
1060                 q->toplevel = TC_CBQ_MAXLEVEL;
1061                 PSCHED_SET_PASTPERFECT(q->link.undertime);
1062         }
1063
1064         /* No packets in scheduler or nobody wants to give them to us :-(
1065            Sigh... start watchdog timer in the last case. */
1066
1067         if (sch->q.qlen) {
1068                 sch->qstats.overlimits++;
1069                 if (q->wd_expires) {
1070                         long delay = PSCHED_US2JIFFIE(q->wd_expires);
1071                         if (delay <= 0)
1072                                 delay = 1;
1073                         mod_timer(&q->wd_timer, jiffies + delay);
1074                         sch->flags |= TCQ_F_THROTTLED;
1075                 }
1076         }
1077         return NULL;
1078 }
1079
1080 /* CBQ class maintanance routines */
1081
1082 static void cbq_adjust_levels(struct cbq_class *this)
1083 {
1084         if (this == NULL)
1085                 return;
1086
1087         do {
1088                 int level = 0;
1089                 struct cbq_class *cl;
1090
1091                 if ((cl = this->children) != NULL) {
1092                         do {
1093                                 if (cl->level > level)
1094                                         level = cl->level;
1095                         } while ((cl = cl->sibling) != this->children);
1096                 }
1097                 this->level = level+1;
1098         } while ((this = this->tparent) != NULL);
1099 }
1100
1101 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1102 {
1103         struct cbq_class *cl;
1104         unsigned h;
1105
1106         if (q->quanta[prio] == 0)
1107                 return;
1108
1109         for (h=0; h<16; h++) {
1110                 for (cl = q->classes[h]; cl; cl = cl->next) {
1111                         /* BUGGGG... Beware! This expression suffer of
1112                            arithmetic overflows!
1113                          */
1114                         if (cl->priority == prio) {
1115                                 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1116                                         q->quanta[prio];
1117                         }
1118                         if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
1119                                 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
1120                                 cl->quantum = cl->qdisc->dev->mtu/2 + 1;
1121                         }
1122                 }
1123         }
1124 }
1125
1126 static void cbq_sync_defmap(struct cbq_class *cl)
1127 {
1128         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1129         struct cbq_class *split = cl->split;
1130         unsigned h;
1131         int i;
1132
1133         if (split == NULL)
1134                 return;
1135
1136         for (i=0; i<=TC_PRIO_MAX; i++) {
1137                 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1138                         split->defaults[i] = NULL;
1139         }
1140
1141         for (i=0; i<=TC_PRIO_MAX; i++) {
1142                 int level = split->level;
1143
1144                 if (split->defaults[i])
1145                         continue;
1146
1147                 for (h=0; h<16; h++) {
1148                         struct cbq_class *c;
1149
1150                         for (c = q->classes[h]; c; c = c->next) {
1151                                 if (c->split == split && c->level < level &&
1152                                     c->defmap&(1<<i)) {
1153                                         split->defaults[i] = c;
1154                                         level = c->level;
1155                                 }
1156                         }
1157                 }
1158         }
1159 }
1160
1161 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1162 {
1163         struct cbq_class *split = NULL;
1164
1165         if (splitid == 0) {
1166                 if ((split = cl->split) == NULL)
1167                         return;
1168                 splitid = split->classid;
1169         }
1170
1171         if (split == NULL || split->classid != splitid) {
1172                 for (split = cl->tparent; split; split = split->tparent)
1173                         if (split->classid == splitid)
1174                                 break;
1175         }
1176
1177         if (split == NULL)
1178                 return;
1179
1180         if (cl->split != split) {
1181                 cl->defmap = 0;
1182                 cbq_sync_defmap(cl);
1183                 cl->split = split;
1184                 cl->defmap = def&mask;
1185         } else
1186                 cl->defmap = (cl->defmap&~mask)|(def&mask);
1187
1188         cbq_sync_defmap(cl);
1189 }
1190
1191 static void cbq_unlink_class(struct cbq_class *this)
1192 {
1193         struct cbq_class *cl, **clp;
1194         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1195
1196         for (clp = &q->classes[cbq_hash(this->classid)]; (cl = *clp) != NULL; clp = &cl->next) {
1197                 if (cl == this) {
1198                         *clp = cl->next;
1199                         cl->next = NULL;
1200                         break;
1201                 }
1202         }
1203
1204         if (this->tparent) {
1205                 clp=&this->sibling;
1206                 cl = *clp;
1207                 do {
1208                         if (cl == this) {
1209                                 *clp = cl->sibling;
1210                                 break;
1211                         }
1212                         clp = &cl->sibling;
1213                 } while ((cl = *clp) != this->sibling);
1214
1215                 if (this->tparent->children == this) {
1216                         this->tparent->children = this->sibling;
1217                         if (this->sibling == this)
1218                                 this->tparent->children = NULL;
1219                 }
1220         } else {
1221                 BUG_TRAP(this->sibling == this);
1222         }
1223 }
1224
1225 static void cbq_link_class(struct cbq_class *this)
1226 {
1227         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1228         unsigned h = cbq_hash(this->classid);
1229         struct cbq_class *parent = this->tparent;
1230
1231         this->sibling = this;
1232         this->next = q->classes[h];
1233         q->classes[h] = this;
1234
1235         if (parent == NULL)
1236                 return;
1237
1238         if (parent->children == NULL) {
1239                 parent->children = this;
1240         } else {
1241                 this->sibling = parent->children->sibling;
1242                 parent->children->sibling = this;
1243         }
1244 }
1245
1246 static unsigned int cbq_drop(struct Qdisc* sch)
1247 {
1248         struct cbq_sched_data *q = qdisc_priv(sch);
1249         struct cbq_class *cl, *cl_head;
1250         int prio;
1251         unsigned int len;
1252
1253         for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1254                 if ((cl_head = q->active[prio]) == NULL)
1255                         continue;
1256
1257                 cl = cl_head;
1258                 do {
1259                         if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1260                                 sch->q.qlen--;
1261                                 return len;
1262                         }
1263                 } while ((cl = cl->next_alive) != cl_head);
1264         }
1265         return 0;
1266 }
1267
1268 static void
1269 cbq_reset(struct Qdisc* sch)
1270 {
1271         struct cbq_sched_data *q = qdisc_priv(sch);
1272         struct cbq_class *cl;
1273         int prio;
1274         unsigned h;
1275
1276         q->activemask = 0;
1277         q->pmask = 0;
1278         q->tx_class = NULL;
1279         q->tx_borrowed = NULL;
1280         del_timer(&q->wd_timer);
1281         del_timer(&q->delay_timer);
1282         q->toplevel = TC_CBQ_MAXLEVEL;
1283         PSCHED_GET_TIME(q->now);
1284         q->now_rt = q->now;
1285
1286         for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1287                 q->active[prio] = NULL;
1288
1289         for (h = 0; h < 16; h++) {
1290                 for (cl = q->classes[h]; cl; cl = cl->next) {
1291                         qdisc_reset(cl->q);
1292
1293                         cl->next_alive = NULL;
1294                         PSCHED_SET_PASTPERFECT(cl->undertime);
1295                         cl->avgidle = cl->maxidle;
1296                         cl->deficit = cl->quantum;
1297                         cl->cpriority = cl->priority;
1298                 }
1299         }
1300         sch->q.qlen = 0;
1301 }
1302
1303
1304 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1305 {
1306         if (lss->change&TCF_CBQ_LSS_FLAGS) {
1307                 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1308                 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1309         }
1310         if (lss->change&TCF_CBQ_LSS_EWMA)
1311                 cl->ewma_log = lss->ewma_log;
1312         if (lss->change&TCF_CBQ_LSS_AVPKT)
1313                 cl->avpkt = lss->avpkt;
1314         if (lss->change&TCF_CBQ_LSS_MINIDLE)
1315                 cl->minidle = -(long)lss->minidle;
1316         if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1317                 cl->maxidle = lss->maxidle;
1318                 cl->avgidle = lss->maxidle;
1319         }
1320         if (lss->change&TCF_CBQ_LSS_OFFTIME)
1321                 cl->offtime = lss->offtime;
1322         return 0;
1323 }
1324
1325 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1326 {
1327         q->nclasses[cl->priority]--;
1328         q->quanta[cl->priority] -= cl->weight;
1329         cbq_normalize_quanta(q, cl->priority);
1330 }
1331
1332 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1333 {
1334         q->nclasses[cl->priority]++;
1335         q->quanta[cl->priority] += cl->weight;
1336         cbq_normalize_quanta(q, cl->priority);
1337 }
1338
1339 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1340 {
1341         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1342
1343         if (wrr->allot)
1344                 cl->allot = wrr->allot;
1345         if (wrr->weight)
1346                 cl->weight = wrr->weight;
1347         if (wrr->priority) {
1348                 cl->priority = wrr->priority-1;
1349                 cl->cpriority = cl->priority;
1350                 if (cl->priority >= cl->priority2)
1351                         cl->priority2 = TC_CBQ_MAXPRIO-1;
1352         }
1353
1354         cbq_addprio(q, cl);
1355         return 0;
1356 }
1357
1358 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1359 {
1360         switch (ovl->strategy) {
1361         case TC_CBQ_OVL_CLASSIC:
1362                 cl->overlimit = cbq_ovl_classic;
1363                 break;
1364         case TC_CBQ_OVL_DELAY:
1365                 cl->overlimit = cbq_ovl_delay;
1366                 break;
1367         case TC_CBQ_OVL_LOWPRIO:
1368                 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1369                     ovl->priority2-1 <= cl->priority)
1370                         return -EINVAL;
1371                 cl->priority2 = ovl->priority2-1;
1372                 cl->overlimit = cbq_ovl_lowprio;
1373                 break;
1374         case TC_CBQ_OVL_DROP:
1375                 cl->overlimit = cbq_ovl_drop;
1376                 break;
1377         case TC_CBQ_OVL_RCLASSIC:
1378                 cl->overlimit = cbq_ovl_rclassic;
1379                 break;
1380         default:
1381                 return -EINVAL;
1382         }
1383         cl->penalty = (ovl->penalty*HZ)/1000;
1384         return 0;
1385 }
1386
1387 #ifdef CONFIG_NET_CLS_POLICE
1388 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1389 {
1390         cl->police = p->police;
1391
1392         if (cl->q->handle) {
1393                 if (p->police == TC_POLICE_RECLASSIFY)
1394                         cl->q->reshape_fail = cbq_reshape_fail;
1395                 else
1396                         cl->q->reshape_fail = NULL;
1397         }
1398         return 0;
1399 }
1400 #endif
1401
1402 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1403 {
1404         cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1405         return 0;
1406 }
1407
1408 static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
1409 {
1410         struct cbq_sched_data *q = qdisc_priv(sch);
1411         struct rtattr *tb[TCA_CBQ_MAX];
1412         struct tc_ratespec *r;
1413
1414         if (rtattr_parse_nested(tb, TCA_CBQ_MAX, opt) < 0 ||
1415             tb[TCA_CBQ_RTAB-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1416             RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1417                 return -EINVAL;
1418
1419         if (tb[TCA_CBQ_LSSOPT-1] &&
1420             RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1421                 return -EINVAL;
1422
1423         r = RTA_DATA(tb[TCA_CBQ_RATE-1]);
1424
1425         if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB-1])) == NULL)
1426                 return -EINVAL;
1427
1428         q->link.refcnt = 1;
1429         q->link.sibling = &q->link;
1430         q->link.classid = sch->handle;
1431         q->link.qdisc = sch;
1432         if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)))
1433                 q->link.q = &noop_qdisc;
1434
1435         q->link.priority = TC_CBQ_MAXPRIO-1;
1436         q->link.priority2 = TC_CBQ_MAXPRIO-1;
1437         q->link.cpriority = TC_CBQ_MAXPRIO-1;
1438         q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1439         q->link.overlimit = cbq_ovl_classic;
1440         q->link.allot = psched_mtu(sch->dev);
1441         q->link.quantum = q->link.allot;
1442         q->link.weight = q->link.R_tab->rate.rate;
1443
1444         q->link.ewma_log = TC_CBQ_DEF_EWMA;
1445         q->link.avpkt = q->link.allot/2;
1446         q->link.minidle = -0x7FFFFFFF;
1447         q->link.stats_lock = &sch->dev->queue_lock;
1448
1449         init_timer(&q->wd_timer);
1450         q->wd_timer.data = (unsigned long)sch;
1451         q->wd_timer.function = cbq_watchdog;
1452         init_timer(&q->delay_timer);
1453         q->delay_timer.data = (unsigned long)sch;
1454         q->delay_timer.function = cbq_undelay;
1455         q->toplevel = TC_CBQ_MAXLEVEL;
1456         PSCHED_GET_TIME(q->now);
1457         q->now_rt = q->now;
1458
1459         cbq_link_class(&q->link);
1460
1461         if (tb[TCA_CBQ_LSSOPT-1])
1462                 cbq_set_lss(&q->link, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1463
1464         cbq_addprio(q, &q->link);
1465         return 0;
1466 }
1467
1468 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1469 {
1470         unsigned char    *b = skb->tail;
1471
1472         RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1473         return skb->len;
1474
1475 rtattr_failure:
1476         skb_trim(skb, b - skb->data);
1477         return -1;
1478 }
1479
1480 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1481 {
1482         unsigned char    *b = skb->tail;
1483         struct tc_cbq_lssopt opt;
1484
1485         opt.flags = 0;
1486         if (cl->borrow == NULL)
1487                 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1488         if (cl->share == NULL)
1489                 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1490         opt.ewma_log = cl->ewma_log;
1491         opt.level = cl->level;
1492         opt.avpkt = cl->avpkt;
1493         opt.maxidle = cl->maxidle;
1494         opt.minidle = (u32)(-cl->minidle);
1495         opt.offtime = cl->offtime;
1496         opt.change = ~0;
1497         RTA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1498         return skb->len;
1499
1500 rtattr_failure:
1501         skb_trim(skb, b - skb->data);
1502         return -1;
1503 }
1504
1505 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1506 {
1507         unsigned char    *b = skb->tail;
1508         struct tc_cbq_wrropt opt;
1509
1510         opt.flags = 0;
1511         opt.allot = cl->allot;
1512         opt.priority = cl->priority+1;
1513         opt.cpriority = cl->cpriority+1;
1514         opt.weight = cl->weight;
1515         RTA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1516         return skb->len;
1517
1518 rtattr_failure:
1519         skb_trim(skb, b - skb->data);
1520         return -1;
1521 }
1522
1523 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1524 {
1525         unsigned char    *b = skb->tail;
1526         struct tc_cbq_ovl opt;
1527
1528         opt.strategy = cl->ovl_strategy;
1529         opt.priority2 = cl->priority2+1;
1530         opt.pad = 0;
1531         opt.penalty = (cl->penalty*1000)/HZ;
1532         RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1533         return skb->len;
1534
1535 rtattr_failure:
1536         skb_trim(skb, b - skb->data);
1537         return -1;
1538 }
1539
1540 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1541 {
1542         unsigned char    *b = skb->tail;
1543         struct tc_cbq_fopt opt;
1544
1545         if (cl->split || cl->defmap) {
1546                 opt.split = cl->split ? cl->split->classid : 0;
1547                 opt.defmap = cl->defmap;
1548                 opt.defchange = ~0;
1549                 RTA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1550         }
1551         return skb->len;
1552
1553 rtattr_failure:
1554         skb_trim(skb, b - skb->data);
1555         return -1;
1556 }
1557
1558 #ifdef CONFIG_NET_CLS_POLICE
1559 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1560 {
1561         unsigned char    *b = skb->tail;
1562         struct tc_cbq_police opt;
1563
1564         if (cl->police) {
1565                 opt.police = cl->police;
1566                 opt.__res1 = 0;
1567                 opt.__res2 = 0;
1568                 RTA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1569         }
1570         return skb->len;
1571
1572 rtattr_failure:
1573         skb_trim(skb, b - skb->data);
1574         return -1;
1575 }
1576 #endif
1577
1578 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1579 {
1580         if (cbq_dump_lss(skb, cl) < 0 ||
1581             cbq_dump_rate(skb, cl) < 0 ||
1582             cbq_dump_wrr(skb, cl) < 0 ||
1583             cbq_dump_ovl(skb, cl) < 0 ||
1584 #ifdef CONFIG_NET_CLS_POLICE
1585             cbq_dump_police(skb, cl) < 0 ||
1586 #endif
1587             cbq_dump_fopt(skb, cl) < 0)
1588                 return -1;
1589         return 0;
1590 }
1591
1592 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1593 {
1594         struct cbq_sched_data *q = qdisc_priv(sch);
1595         unsigned char    *b = skb->tail;
1596         struct rtattr *rta;
1597
1598         rta = (struct rtattr*)b;
1599         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1600         if (cbq_dump_attr(skb, &q->link) < 0)
1601                 goto rtattr_failure;
1602         rta->rta_len = skb->tail - b;
1603         return skb->len;
1604
1605 rtattr_failure:
1606         skb_trim(skb, b - skb->data);
1607         return -1;
1608 }
1609
1610 static int
1611 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1612 {
1613         struct cbq_sched_data *q = qdisc_priv(sch);
1614
1615         q->link.xstats.avgidle = q->link.avgidle;
1616         return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1617 }
1618
1619 static int
1620 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1621                struct sk_buff *skb, struct tcmsg *tcm)
1622 {
1623         struct cbq_class *cl = (struct cbq_class*)arg;
1624         unsigned char    *b = skb->tail;
1625         struct rtattr *rta;
1626
1627         if (cl->tparent)
1628                 tcm->tcm_parent = cl->tparent->classid;
1629         else
1630                 tcm->tcm_parent = TC_H_ROOT;
1631         tcm->tcm_handle = cl->classid;
1632         tcm->tcm_info = cl->q->handle;
1633
1634         rta = (struct rtattr*)b;
1635         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1636         if (cbq_dump_attr(skb, cl) < 0)
1637                 goto rtattr_failure;
1638         rta->rta_len = skb->tail - b;
1639         return skb->len;
1640
1641 rtattr_failure:
1642         skb_trim(skb, b - skb->data);
1643         return -1;
1644 }
1645
1646 static int
1647 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1648         struct gnet_dump *d)
1649 {
1650         struct cbq_sched_data *q = qdisc_priv(sch);
1651         struct cbq_class *cl = (struct cbq_class*)arg;
1652
1653         cl->qstats.qlen = cl->q->q.qlen;
1654         cl->xstats.avgidle = cl->avgidle;
1655         cl->xstats.undertime = 0;
1656
1657         if (!PSCHED_IS_PASTPERFECT(cl->undertime))
1658                 cl->xstats.undertime = PSCHED_TDIFF(cl->undertime, q->now);
1659
1660         if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1661 #ifdef CONFIG_NET_ESTIMATOR
1662             gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1663 #endif
1664             gnet_stats_copy_queue(d, &cl->qstats) < 0)
1665                 return -1;
1666
1667         return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1668 }
1669
1670 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1671                      struct Qdisc **old)
1672 {
1673         struct cbq_class *cl = (struct cbq_class*)arg;
1674
1675         if (cl) {
1676                 if (new == NULL) {
1677                         if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)) == NULL)
1678                                 return -ENOBUFS;
1679                 } else {
1680 #ifdef CONFIG_NET_CLS_POLICE
1681                         if (cl->police == TC_POLICE_RECLASSIFY)
1682                                 new->reshape_fail = cbq_reshape_fail;
1683 #endif
1684                 }
1685                 sch_tree_lock(sch);
1686                 *old = cl->q;
1687                 cl->q = new;
1688                 sch->q.qlen -= (*old)->q.qlen;
1689                 qdisc_reset(*old);
1690                 sch_tree_unlock(sch);
1691
1692                 return 0;
1693         }
1694         return -ENOENT;
1695 }
1696
1697 static struct Qdisc *
1698 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1699 {
1700         struct cbq_class *cl = (struct cbq_class*)arg;
1701
1702         return cl ? cl->q : NULL;
1703 }
1704
1705 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1706 {
1707         struct cbq_sched_data *q = qdisc_priv(sch);
1708         struct cbq_class *cl = cbq_class_lookup(q, classid);
1709
1710         if (cl) {
1711                 cl->refcnt++;
1712                 return (unsigned long)cl;
1713         }
1714         return 0;
1715 }
1716
1717 static void cbq_destroy_filters(struct cbq_class *cl)
1718 {
1719         struct tcf_proto *tp;
1720
1721         while ((tp = cl->filter_list) != NULL) {
1722                 cl->filter_list = tp->next;
1723                 tcf_destroy(tp);
1724         }
1725 }
1726
1727 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1728 {
1729         struct cbq_sched_data *q = qdisc_priv(sch);
1730
1731         BUG_TRAP(!cl->filters);
1732
1733         cbq_destroy_filters(cl);
1734         qdisc_destroy(cl->q);
1735         qdisc_put_rtab(cl->R_tab);
1736 #ifdef CONFIG_NET_ESTIMATOR
1737         gen_kill_estimator(&cl->bstats, &cl->rate_est);
1738 #endif
1739         if (cl != &q->link)
1740                 kfree(cl);
1741 }
1742
1743 static void
1744 cbq_destroy(struct Qdisc* sch)
1745 {
1746         struct cbq_sched_data *q = qdisc_priv(sch);
1747         struct cbq_class *cl;
1748         unsigned h;
1749
1750 #ifdef CONFIG_NET_CLS_POLICE
1751         q->rx_class = NULL;
1752 #endif
1753         /*
1754          * Filters must be destroyed first because we don't destroy the
1755          * classes from root to leafs which means that filters can still
1756          * be bound to classes which have been destroyed already. --TGR '04
1757          */
1758         for (h = 0; h < 16; h++)
1759                 for (cl = q->classes[h]; cl; cl = cl->next)
1760                         cbq_destroy_filters(cl);
1761
1762         for (h = 0; h < 16; h++) {
1763                 struct cbq_class *next;
1764
1765                 for (cl = q->classes[h]; cl; cl = next) {
1766                         next = cl->next;
1767                         cbq_destroy_class(sch, cl);
1768                 }
1769         }
1770 }
1771
1772 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1773 {
1774         struct cbq_class *cl = (struct cbq_class*)arg;
1775
1776         if (--cl->refcnt == 0) {
1777 #ifdef CONFIG_NET_CLS_POLICE
1778                 struct cbq_sched_data *q = qdisc_priv(sch);
1779
1780                 spin_lock_bh(&sch->dev->queue_lock);
1781                 if (q->rx_class == cl)
1782                         q->rx_class = NULL;
1783                 spin_unlock_bh(&sch->dev->queue_lock);
1784 #endif
1785
1786                 cbq_destroy_class(sch, cl);
1787         }
1788 }
1789
1790 static int
1791 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca,
1792                  unsigned long *arg)
1793 {
1794         int err;
1795         struct cbq_sched_data *q = qdisc_priv(sch);
1796         struct cbq_class *cl = (struct cbq_class*)*arg;
1797         struct rtattr *opt = tca[TCA_OPTIONS-1];
1798         struct rtattr *tb[TCA_CBQ_MAX];
1799         struct cbq_class *parent;
1800         struct qdisc_rate_table *rtab = NULL;
1801
1802         if (opt==NULL || rtattr_parse_nested(tb, TCA_CBQ_MAX, opt))
1803                 return -EINVAL;
1804
1805         if (tb[TCA_CBQ_OVL_STRATEGY-1] &&
1806             RTA_PAYLOAD(tb[TCA_CBQ_OVL_STRATEGY-1]) < sizeof(struct tc_cbq_ovl))
1807                 return -EINVAL;
1808
1809         if (tb[TCA_CBQ_FOPT-1] &&
1810             RTA_PAYLOAD(tb[TCA_CBQ_FOPT-1]) < sizeof(struct tc_cbq_fopt))
1811                 return -EINVAL;
1812
1813         if (tb[TCA_CBQ_RATE-1] &&
1814             RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1815                         return -EINVAL;
1816
1817         if (tb[TCA_CBQ_LSSOPT-1] &&
1818             RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1819                         return -EINVAL;
1820
1821         if (tb[TCA_CBQ_WRROPT-1] &&
1822             RTA_PAYLOAD(tb[TCA_CBQ_WRROPT-1]) < sizeof(struct tc_cbq_wrropt))
1823                         return -EINVAL;
1824
1825 #ifdef CONFIG_NET_CLS_POLICE
1826         if (tb[TCA_CBQ_POLICE-1] &&
1827             RTA_PAYLOAD(tb[TCA_CBQ_POLICE-1]) < sizeof(struct tc_cbq_police))
1828                         return -EINVAL;
1829 #endif
1830
1831         if (cl) {
1832                 /* Check parent */
1833                 if (parentid) {
1834                         if (cl->tparent && cl->tparent->classid != parentid)
1835                                 return -EINVAL;
1836                         if (!cl->tparent && parentid != TC_H_ROOT)
1837                                 return -EINVAL;
1838                 }
1839
1840                 if (tb[TCA_CBQ_RATE-1]) {
1841                         rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1842                         if (rtab == NULL)
1843                                 return -EINVAL;
1844                 }
1845
1846                 /* Change class parameters */
1847                 sch_tree_lock(sch);
1848
1849                 if (cl->next_alive != NULL)
1850                         cbq_deactivate_class(cl);
1851
1852                 if (rtab) {
1853                         rtab = xchg(&cl->R_tab, rtab);
1854                         qdisc_put_rtab(rtab);
1855                 }
1856
1857                 if (tb[TCA_CBQ_LSSOPT-1])
1858                         cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1859
1860                 if (tb[TCA_CBQ_WRROPT-1]) {
1861                         cbq_rmprio(q, cl);
1862                         cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1863                 }
1864
1865                 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1866                         cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1867
1868 #ifdef CONFIG_NET_CLS_POLICE
1869                 if (tb[TCA_CBQ_POLICE-1])
1870                         cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1871 #endif
1872
1873                 if (tb[TCA_CBQ_FOPT-1])
1874                         cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1875
1876                 if (cl->q->q.qlen)
1877                         cbq_activate_class(cl);
1878
1879                 sch_tree_unlock(sch);
1880
1881 #ifdef CONFIG_NET_ESTIMATOR
1882                 if (tca[TCA_RATE-1])
1883                         gen_replace_estimator(&cl->bstats, &cl->rate_est,
1884                                 cl->stats_lock, tca[TCA_RATE-1]);
1885 #endif
1886                 return 0;
1887         }
1888
1889         if (parentid == TC_H_ROOT)
1890                 return -EINVAL;
1891
1892         if (tb[TCA_CBQ_WRROPT-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1893             tb[TCA_CBQ_LSSOPT-1] == NULL)
1894                 return -EINVAL;
1895
1896         rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1897         if (rtab == NULL)
1898                 return -EINVAL;
1899
1900         if (classid) {
1901                 err = -EINVAL;
1902                 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1903                         goto failure;
1904         } else {
1905                 int i;
1906                 classid = TC_H_MAKE(sch->handle,0x8000);
1907
1908                 for (i=0; i<0x8000; i++) {
1909                         if (++q->hgenerator >= 0x8000)
1910                                 q->hgenerator = 1;
1911                         if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1912                                 break;
1913                 }
1914                 err = -ENOSR;
1915                 if (i >= 0x8000)
1916                         goto failure;
1917                 classid = classid|q->hgenerator;
1918         }
1919
1920         parent = &q->link;
1921         if (parentid) {
1922                 parent = cbq_class_lookup(q, parentid);
1923                 err = -EINVAL;
1924                 if (parent == NULL)
1925                         goto failure;
1926         }
1927
1928         err = -ENOBUFS;
1929         cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1930         if (cl == NULL)
1931                 goto failure;
1932         cl->R_tab = rtab;
1933         rtab = NULL;
1934         cl->refcnt = 1;
1935         if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)))
1936                 cl->q = &noop_qdisc;
1937         cl->classid = classid;
1938         cl->tparent = parent;
1939         cl->qdisc = sch;
1940         cl->allot = parent->allot;
1941         cl->quantum = cl->allot;
1942         cl->weight = cl->R_tab->rate.rate;
1943         cl->stats_lock = &sch->dev->queue_lock;
1944
1945         sch_tree_lock(sch);
1946         cbq_link_class(cl);
1947         cl->borrow = cl->tparent;
1948         if (cl->tparent != &q->link)
1949                 cl->share = cl->tparent;
1950         cbq_adjust_levels(parent);
1951         cl->minidle = -0x7FFFFFFF;
1952         cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1953         cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1954         if (cl->ewma_log==0)
1955                 cl->ewma_log = q->link.ewma_log;
1956         if (cl->maxidle==0)
1957                 cl->maxidle = q->link.maxidle;
1958         if (cl->avpkt==0)
1959                 cl->avpkt = q->link.avpkt;
1960         cl->overlimit = cbq_ovl_classic;
1961         if (tb[TCA_CBQ_OVL_STRATEGY-1])
1962                 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1963 #ifdef CONFIG_NET_CLS_POLICE
1964         if (tb[TCA_CBQ_POLICE-1])
1965                 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1966 #endif
1967         if (tb[TCA_CBQ_FOPT-1])
1968                 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1969         sch_tree_unlock(sch);
1970
1971 #ifdef CONFIG_NET_ESTIMATOR
1972         if (tca[TCA_RATE-1])
1973                 gen_new_estimator(&cl->bstats, &cl->rate_est,
1974                         cl->stats_lock, tca[TCA_RATE-1]);
1975 #endif
1976
1977         *arg = (unsigned long)cl;
1978         return 0;
1979
1980 failure:
1981         qdisc_put_rtab(rtab);
1982         return err;
1983 }
1984
1985 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1986 {
1987         struct cbq_sched_data *q = qdisc_priv(sch);
1988         struct cbq_class *cl = (struct cbq_class*)arg;
1989
1990         if (cl->filters || cl->children || cl == &q->link)
1991                 return -EBUSY;
1992
1993         sch_tree_lock(sch);
1994
1995         if (cl->next_alive)
1996                 cbq_deactivate_class(cl);
1997
1998         if (q->tx_borrowed == cl)
1999                 q->tx_borrowed = q->tx_class;
2000         if (q->tx_class == cl) {
2001                 q->tx_class = NULL;
2002                 q->tx_borrowed = NULL;
2003         }
2004 #ifdef CONFIG_NET_CLS_POLICE
2005         if (q->rx_class == cl)
2006                 q->rx_class = NULL;
2007 #endif
2008
2009         cbq_unlink_class(cl);
2010         cbq_adjust_levels(cl->tparent);
2011         cl->defmap = 0;
2012         cbq_sync_defmap(cl);
2013
2014         cbq_rmprio(q, cl);
2015         sch_tree_unlock(sch);
2016
2017         if (--cl->refcnt == 0)
2018                 cbq_destroy_class(sch, cl);
2019
2020         return 0;
2021 }
2022
2023 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
2024 {
2025         struct cbq_sched_data *q = qdisc_priv(sch);
2026         struct cbq_class *cl = (struct cbq_class *)arg;
2027
2028         if (cl == NULL)
2029                 cl = &q->link;
2030
2031         return &cl->filter_list;
2032 }
2033
2034 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
2035                                      u32 classid)
2036 {
2037         struct cbq_sched_data *q = qdisc_priv(sch);
2038         struct cbq_class *p = (struct cbq_class*)parent;
2039         struct cbq_class *cl = cbq_class_lookup(q, classid);
2040
2041         if (cl) {
2042                 if (p && p->level <= cl->level)
2043                         return 0;
2044                 cl->filters++;
2045                 return (unsigned long)cl;
2046         }
2047         return 0;
2048 }
2049
2050 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2051 {
2052         struct cbq_class *cl = (struct cbq_class*)arg;
2053
2054         cl->filters--;
2055 }
2056
2057 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2058 {
2059         struct cbq_sched_data *q = qdisc_priv(sch);
2060         unsigned h;
2061
2062         if (arg->stop)
2063                 return;
2064
2065         for (h = 0; h < 16; h++) {
2066                 struct cbq_class *cl;
2067
2068                 for (cl = q->classes[h]; cl; cl = cl->next) {
2069                         if (arg->count < arg->skip) {
2070                                 arg->count++;
2071                                 continue;
2072                         }
2073                         if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2074                                 arg->stop = 1;
2075                                 return;
2076                         }
2077                         arg->count++;
2078                 }
2079         }
2080 }
2081
2082 static struct Qdisc_class_ops cbq_class_ops = {
2083         .graft          =       cbq_graft,
2084         .leaf           =       cbq_leaf,
2085         .get            =       cbq_get,
2086         .put            =       cbq_put,
2087         .change         =       cbq_change_class,
2088         .delete         =       cbq_delete,
2089         .walk           =       cbq_walk,
2090         .tcf_chain      =       cbq_find_tcf,
2091         .bind_tcf       =       cbq_bind_filter,
2092         .unbind_tcf     =       cbq_unbind_filter,
2093         .dump           =       cbq_dump_class,
2094         .dump_stats     =       cbq_dump_class_stats,
2095 };
2096
2097 static struct Qdisc_ops cbq_qdisc_ops = {
2098         .next           =       NULL,
2099         .cl_ops         =       &cbq_class_ops,
2100         .id             =       "cbq",
2101         .priv_size      =       sizeof(struct cbq_sched_data),
2102         .enqueue        =       cbq_enqueue,
2103         .dequeue        =       cbq_dequeue,
2104         .requeue        =       cbq_requeue,
2105         .drop           =       cbq_drop,
2106         .init           =       cbq_init,
2107         .reset          =       cbq_reset,
2108         .destroy        =       cbq_destroy,
2109         .change         =       NULL,
2110         .dump           =       cbq_dump,
2111         .dump_stats     =       cbq_dump_stats,
2112         .owner          =       THIS_MODULE,
2113 };
2114
2115 static int __init cbq_module_init(void)
2116 {
2117         return register_qdisc(&cbq_qdisc_ops);
2118 }
2119 static void __exit cbq_module_exit(void) 
2120 {
2121         unregister_qdisc(&cbq_qdisc_ops);
2122 }
2123 module_init(cbq_module_init)
2124 module_exit(cbq_module_exit)
2125 MODULE_LICENSE("GPL");