Merge master.kernel.org:/home/rmk/linux-2.6-arm
[linux-2.6] / net / sched / sch_cbq.c
1 /*
2  * net/sched/sch_cbq.c  Class-Based Queueing discipline.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  */
12
13 #include <linux/config.h>
14 #include <linux/module.h>
15 #include <asm/uaccess.h>
16 #include <asm/system.h>
17 #include <linux/bitops.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/sched.h>
21 #include <linux/string.h>
22 #include <linux/mm.h>
23 #include <linux/socket.h>
24 #include <linux/sockios.h>
25 #include <linux/in.h>
26 #include <linux/errno.h>
27 #include <linux/interrupt.h>
28 #include <linux/if_ether.h>
29 #include <linux/inet.h>
30 #include <linux/netdevice.h>
31 #include <linux/etherdevice.h>
32 #include <linux/notifier.h>
33 #include <net/ip.h>
34 #include <net/route.h>
35 #include <linux/skbuff.h>
36 #include <net/sock.h>
37 #include <net/pkt_sched.h>
38
39
40 /*      Class-Based Queueing (CBQ) algorithm.
41         =======================================
42
43         Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
44                  Management Models for Packet Networks",
45                  IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
46
47                  [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
48
49                  [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
50                  Parameters", 1996
51
52                  [4] Sally Floyd and Michael Speer, "Experimental Results
53                  for Class-Based Queueing", 1998, not published.
54
55         -----------------------------------------------------------------------
56
57         Algorithm skeleton was taken from NS simulator cbq.cc.
58         If someone wants to check this code against the LBL version,
59         he should take into account that ONLY the skeleton was borrowed,
60         the implementation is different. Particularly:
61
62         --- The WRR algorithm is different. Our version looks more
63         reasonable (I hope) and works when quanta are allowed to be
64         less than MTU, which is always the case when real time classes
65         have small rates. Note, that the statement of [3] is
66         incomplete, delay may actually be estimated even if class
67         per-round allotment is less than MTU. Namely, if per-round
68         allotment is W*r_i, and r_1+...+r_k = r < 1
69
70         delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
71
72         In the worst case we have IntServ estimate with D = W*r+k*MTU
73         and C = MTU*r. The proof (if correct at all) is trivial.
74
75
76         --- It seems that cbq-2.0 is not very accurate. At least, I cannot
77         interpret some places, which look like wrong translations
78         from NS. Anyone is advised to find these differences
79         and explain to me, why I am wrong 8).
80
81         --- Linux has no EOI event, so that we cannot estimate true class
82         idle time. Workaround is to consider the next dequeue event
83         as sign that previous packet is finished. This is wrong because of
84         internal device queueing, but on a permanently loaded link it is true.
85         Moreover, combined with clock integrator, this scheme looks
86         very close to an ideal solution.  */
87
88 struct cbq_sched_data;
89
90
91 struct cbq_class
92 {
93         struct cbq_class        *next;          /* hash table link */
94         struct cbq_class        *next_alive;    /* next class with backlog in this priority band */
95
96 /* Parameters */
97         u32                     classid;
98         unsigned char           priority;       /* class priority */
99         unsigned char           priority2;      /* priority to be used after overlimit */
100         unsigned char           ewma_log;       /* time constant for idle time calculation */
101         unsigned char           ovl_strategy;
102 #ifdef CONFIG_NET_CLS_POLICE
103         unsigned char           police;
104 #endif
105
106         u32                     defmap;
107
108         /* Link-sharing scheduler parameters */
109         long                    maxidle;        /* Class parameters: see below. */
110         long                    offtime;
111         long                    minidle;
112         u32                     avpkt;
113         struct qdisc_rate_table *R_tab;
114
115         /* Overlimit strategy parameters */
116         void                    (*overlimit)(struct cbq_class *cl);
117         long                    penalty;
118
119         /* General scheduler (WRR) parameters */
120         long                    allot;
121         long                    quantum;        /* Allotment per WRR round */
122         long                    weight;         /* Relative allotment: see below */
123
124         struct Qdisc            *qdisc;         /* Ptr to CBQ discipline */
125         struct cbq_class        *split;         /* Ptr to split node */
126         struct cbq_class        *share;         /* Ptr to LS parent in the class tree */
127         struct cbq_class        *tparent;       /* Ptr to tree parent in the class tree */
128         struct cbq_class        *borrow;        /* NULL if class is bandwidth limited;
129                                                    parent otherwise */
130         struct cbq_class        *sibling;       /* Sibling chain */
131         struct cbq_class        *children;      /* Pointer to children chain */
132
133         struct Qdisc            *q;             /* Elementary queueing discipline */
134
135
136 /* Variables */
137         unsigned char           cpriority;      /* Effective priority */
138         unsigned char           delayed;
139         unsigned char           level;          /* level of the class in hierarchy:
140                                                    0 for leaf classes, and maximal
141                                                    level of children + 1 for nodes.
142                                                  */
143
144         psched_time_t           last;           /* Last end of service */
145         psched_time_t           undertime;
146         long                    avgidle;
147         long                    deficit;        /* Saved deficit for WRR */
148         unsigned long           penalized;
149         struct gnet_stats_basic bstats;
150         struct gnet_stats_queue qstats;
151         struct gnet_stats_rate_est rate_est;
152         spinlock_t              *stats_lock;
153         struct tc_cbq_xstats    xstats;
154
155         struct tcf_proto        *filter_list;
156
157         int                     refcnt;
158         int                     filters;
159
160         struct cbq_class        *defaults[TC_PRIO_MAX+1];
161 };
162
163 struct cbq_sched_data
164 {
165         struct cbq_class        *classes[16];           /* Hash table of all classes */
166         int                     nclasses[TC_CBQ_MAXPRIO+1];
167         unsigned                quanta[TC_CBQ_MAXPRIO+1];
168
169         struct cbq_class        link;
170
171         unsigned                activemask;
172         struct cbq_class        *active[TC_CBQ_MAXPRIO+1];      /* List of all classes
173                                                                    with backlog */
174
175 #ifdef CONFIG_NET_CLS_POLICE
176         struct cbq_class        *rx_class;
177 #endif
178         struct cbq_class        *tx_class;
179         struct cbq_class        *tx_borrowed;
180         int                     tx_len;
181         psched_time_t           now;            /* Cached timestamp */
182         psched_time_t           now_rt;         /* Cached real time */
183         unsigned                pmask;
184
185         struct timer_list       delay_timer;
186         struct timer_list       wd_timer;       /* Watchdog timer,
187                                                    started when CBQ has
188                                                    backlog, but cannot
189                                                    transmit just now */
190         long                    wd_expires;
191         int                     toplevel;
192         u32                     hgenerator;
193 };
194
195
196 #define L2T(cl,len)     ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])
197
198
199 static __inline__ unsigned cbq_hash(u32 h)
200 {
201         h ^= h>>8;
202         h ^= h>>4;
203         return h&0xF;
204 }
205
206 static __inline__ struct cbq_class *
207 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
208 {
209         struct cbq_class *cl;
210
211         for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next)
212                 if (cl->classid == classid)
213                         return cl;
214         return NULL;
215 }
216
217 #ifdef CONFIG_NET_CLS_POLICE
218
219 static struct cbq_class *
220 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
221 {
222         struct cbq_class *cl, *new;
223
224         for (cl = this->tparent; cl; cl = cl->tparent)
225                 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
226                         return new;
227
228         return NULL;
229 }
230
231 #endif
232
233 /* Classify packet. The procedure is pretty complicated, but
234    it allows us to combine link sharing and priority scheduling
235    transparently.
236
237    Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
238    so that it resolves to split nodes. Then packets are classified
239    by logical priority, or a more specific classifier may be attached
240    to the split node.
241  */
242
243 static struct cbq_class *
244 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
245 {
246         struct cbq_sched_data *q = qdisc_priv(sch);
247         struct cbq_class *head = &q->link;
248         struct cbq_class **defmap;
249         struct cbq_class *cl = NULL;
250         u32 prio = skb->priority;
251         struct tcf_result res;
252
253         /*
254          *  Step 1. If skb->priority points to one of our classes, use it.
255          */
256         if (TC_H_MAJ(prio^sch->handle) == 0 &&
257             (cl = cbq_class_lookup(q, prio)) != NULL)
258                 return cl;
259
260         *qerr = NET_XMIT_BYPASS;
261         for (;;) {
262                 int result = 0;
263                 defmap = head->defaults;
264
265                 /*
266                  * Step 2+n. Apply classifier.
267                  */
268                 if (!head->filter_list || (result = tc_classify(skb, head->filter_list, &res)) < 0)
269                         goto fallback;
270
271                 if ((cl = (void*)res.class) == NULL) {
272                         if (TC_H_MAJ(res.classid))
273                                 cl = cbq_class_lookup(q, res.classid);
274                         else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
275                                 cl = defmap[TC_PRIO_BESTEFFORT];
276
277                         if (cl == NULL || cl->level >= head->level)
278                                 goto fallback;
279                 }
280
281 #ifdef CONFIG_NET_CLS_ACT
282                 switch (result) {
283                 case TC_ACT_QUEUED:
284                 case TC_ACT_STOLEN: 
285                         *qerr = NET_XMIT_SUCCESS;
286                 case TC_ACT_SHOT:
287                         return NULL;
288                 }
289 #elif defined(CONFIG_NET_CLS_POLICE)
290                 switch (result) {
291                 case TC_POLICE_RECLASSIFY:
292                         return cbq_reclassify(skb, cl);
293                 case TC_POLICE_SHOT:
294                         return NULL;
295                 default:
296                         break;
297                 }
298 #endif
299                 if (cl->level == 0)
300                         return cl;
301
302                 /*
303                  * Step 3+n. If classifier selected a link sharing class,
304                  *         apply agency specific classifier.
305                  *         Repeat this procdure until we hit a leaf node.
306                  */
307                 head = cl;
308         }
309
310 fallback:
311         cl = head;
312
313         /*
314          * Step 4. No success...
315          */
316         if (TC_H_MAJ(prio) == 0 &&
317             !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
318             !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
319                 return head;
320
321         return cl;
322 }
323
324 /*
325    A packet has just been enqueued on the empty class.
326    cbq_activate_class adds it to the tail of active class list
327    of its priority band.
328  */
329
330 static __inline__ void cbq_activate_class(struct cbq_class *cl)
331 {
332         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
333         int prio = cl->cpriority;
334         struct cbq_class *cl_tail;
335
336         cl_tail = q->active[prio];
337         q->active[prio] = cl;
338
339         if (cl_tail != NULL) {
340                 cl->next_alive = cl_tail->next_alive;
341                 cl_tail->next_alive = cl;
342         } else {
343                 cl->next_alive = cl;
344                 q->activemask |= (1<<prio);
345         }
346 }
347
348 /*
349    Unlink class from active chain.
350    Note that this same procedure is done directly in cbq_dequeue*
351    during round-robin procedure.
352  */
353
354 static void cbq_deactivate_class(struct cbq_class *this)
355 {
356         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
357         int prio = this->cpriority;
358         struct cbq_class *cl;
359         struct cbq_class *cl_prev = q->active[prio];
360
361         do {
362                 cl = cl_prev->next_alive;
363                 if (cl == this) {
364                         cl_prev->next_alive = cl->next_alive;
365                         cl->next_alive = NULL;
366
367                         if (cl == q->active[prio]) {
368                                 q->active[prio] = cl_prev;
369                                 if (cl == q->active[prio]) {
370                                         q->active[prio] = NULL;
371                                         q->activemask &= ~(1<<prio);
372                                         return;
373                                 }
374                         }
375
376                         cl = cl_prev->next_alive;
377                         return;
378                 }
379         } while ((cl_prev = cl) != q->active[prio]);
380 }
381
382 static void
383 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
384 {
385         int toplevel = q->toplevel;
386
387         if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
388                 psched_time_t now;
389                 psched_tdiff_t incr;
390
391                 PSCHED_GET_TIME(now);
392                 incr = PSCHED_TDIFF(now, q->now_rt);
393                 PSCHED_TADD2(q->now, incr, now);
394
395                 do {
396                         if (PSCHED_TLESS(cl->undertime, now)) {
397                                 q->toplevel = cl->level;
398                                 return;
399                         }
400                 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
401         }
402 }
403
404 static int
405 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
406 {
407         struct cbq_sched_data *q = qdisc_priv(sch);
408         int len = skb->len;
409         int ret;
410         struct cbq_class *cl = cbq_classify(skb, sch, &ret);
411
412 #ifdef CONFIG_NET_CLS_POLICE
413         q->rx_class = cl;
414 #endif
415         if (cl == NULL) {
416                 if (ret == NET_XMIT_BYPASS)
417                         sch->qstats.drops++;
418                 kfree_skb(skb);
419                 return ret;
420         }
421
422 #ifdef CONFIG_NET_CLS_POLICE
423         cl->q->__parent = sch;
424 #endif
425         if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) {
426                 sch->q.qlen++;
427                 sch->bstats.packets++;
428                 sch->bstats.bytes+=len;
429                 cbq_mark_toplevel(q, cl);
430                 if (!cl->next_alive)
431                         cbq_activate_class(cl);
432                 return ret;
433         }
434
435         sch->qstats.drops++;
436         cbq_mark_toplevel(q, cl);
437         cl->qstats.drops++;
438         return ret;
439 }
440
441 static int
442 cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
443 {
444         struct cbq_sched_data *q = qdisc_priv(sch);
445         struct cbq_class *cl;
446         int ret;
447
448         if ((cl = q->tx_class) == NULL) {
449                 kfree_skb(skb);
450                 sch->qstats.drops++;
451                 return NET_XMIT_CN;
452         }
453         q->tx_class = NULL;
454
455         cbq_mark_toplevel(q, cl);
456
457 #ifdef CONFIG_NET_CLS_POLICE
458         q->rx_class = cl;
459         cl->q->__parent = sch;
460 #endif
461         if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
462                 sch->q.qlen++;
463                 sch->qstats.requeues++;
464                 if (!cl->next_alive)
465                         cbq_activate_class(cl);
466                 return 0;
467         }
468         sch->qstats.drops++;
469         cl->qstats.drops++;
470         return ret;
471 }
472
473 /* Overlimit actions */
474
475 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
476
477 static void cbq_ovl_classic(struct cbq_class *cl)
478 {
479         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
480         psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
481
482         if (!cl->delayed) {
483                 delay += cl->offtime;
484
485                 /* 
486                    Class goes to sleep, so that it will have no
487                    chance to work avgidle. Let's forgive it 8)
488
489                    BTW cbq-2.0 has a crap in this
490                    place, apparently they forgot to shift it by cl->ewma_log.
491                  */
492                 if (cl->avgidle < 0)
493                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
494                 if (cl->avgidle < cl->minidle)
495                         cl->avgidle = cl->minidle;
496                 if (delay <= 0)
497                         delay = 1;
498                 PSCHED_TADD2(q->now, delay, cl->undertime);
499
500                 cl->xstats.overactions++;
501                 cl->delayed = 1;
502         }
503         if (q->wd_expires == 0 || q->wd_expires > delay)
504                 q->wd_expires = delay;
505
506         /* Dirty work! We must schedule wakeups based on
507            real available rate, rather than leaf rate,
508            which may be tiny (even zero).
509          */
510         if (q->toplevel == TC_CBQ_MAXLEVEL) {
511                 struct cbq_class *b;
512                 psched_tdiff_t base_delay = q->wd_expires;
513
514                 for (b = cl->borrow; b; b = b->borrow) {
515                         delay = PSCHED_TDIFF(b->undertime, q->now);
516                         if (delay < base_delay) {
517                                 if (delay <= 0)
518                                         delay = 1;
519                                 base_delay = delay;
520                         }
521                 }
522
523                 q->wd_expires = base_delay;
524         }
525 }
526
527 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
528    they go overlimit
529  */
530
531 static void cbq_ovl_rclassic(struct cbq_class *cl)
532 {
533         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
534         struct cbq_class *this = cl;
535
536         do {
537                 if (cl->level > q->toplevel) {
538                         cl = NULL;
539                         break;
540                 }
541         } while ((cl = cl->borrow) != NULL);
542
543         if (cl == NULL)
544                 cl = this;
545         cbq_ovl_classic(cl);
546 }
547
548 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
549
550 static void cbq_ovl_delay(struct cbq_class *cl)
551 {
552         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
553         psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
554
555         if (!cl->delayed) {
556                 unsigned long sched = jiffies;
557
558                 delay += cl->offtime;
559                 if (cl->avgidle < 0)
560                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
561                 if (cl->avgidle < cl->minidle)
562                         cl->avgidle = cl->minidle;
563                 PSCHED_TADD2(q->now, delay, cl->undertime);
564
565                 if (delay > 0) {
566                         sched += PSCHED_US2JIFFIE(delay) + cl->penalty;
567                         cl->penalized = sched;
568                         cl->cpriority = TC_CBQ_MAXPRIO;
569                         q->pmask |= (1<<TC_CBQ_MAXPRIO);
570                         if (del_timer(&q->delay_timer) &&
571                             (long)(q->delay_timer.expires - sched) > 0)
572                                 q->delay_timer.expires = sched;
573                         add_timer(&q->delay_timer);
574                         cl->delayed = 1;
575                         cl->xstats.overactions++;
576                         return;
577                 }
578                 delay = 1;
579         }
580         if (q->wd_expires == 0 || q->wd_expires > delay)
581                 q->wd_expires = delay;
582 }
583
584 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
585
586 static void cbq_ovl_lowprio(struct cbq_class *cl)
587 {
588         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
589
590         cl->penalized = jiffies + cl->penalty;
591
592         if (cl->cpriority != cl->priority2) {
593                 cl->cpriority = cl->priority2;
594                 q->pmask |= (1<<cl->cpriority);
595                 cl->xstats.overactions++;
596         }
597         cbq_ovl_classic(cl);
598 }
599
600 /* TC_CBQ_OVL_DROP: penalize class by dropping */
601
602 static void cbq_ovl_drop(struct cbq_class *cl)
603 {
604         if (cl->q->ops->drop)
605                 if (cl->q->ops->drop(cl->q))
606                         cl->qdisc->q.qlen--;
607         cl->xstats.overactions++;
608         cbq_ovl_classic(cl);
609 }
610
611 static void cbq_watchdog(unsigned long arg)
612 {
613         struct Qdisc *sch = (struct Qdisc*)arg;
614
615         sch->flags &= ~TCQ_F_THROTTLED;
616         netif_schedule(sch->dev);
617 }
618
619 static unsigned long cbq_undelay_prio(struct cbq_sched_data *q, int prio)
620 {
621         struct cbq_class *cl;
622         struct cbq_class *cl_prev = q->active[prio];
623         unsigned long now = jiffies;
624         unsigned long sched = now;
625
626         if (cl_prev == NULL)
627                 return now;
628
629         do {
630                 cl = cl_prev->next_alive;
631                 if ((long)(now - cl->penalized) > 0) {
632                         cl_prev->next_alive = cl->next_alive;
633                         cl->next_alive = NULL;
634                         cl->cpriority = cl->priority;
635                         cl->delayed = 0;
636                         cbq_activate_class(cl);
637
638                         if (cl == q->active[prio]) {
639                                 q->active[prio] = cl_prev;
640                                 if (cl == q->active[prio]) {
641                                         q->active[prio] = NULL;
642                                         return 0;
643                                 }
644                         }
645
646                         cl = cl_prev->next_alive;
647                 } else if ((long)(sched - cl->penalized) > 0)
648                         sched = cl->penalized;
649         } while ((cl_prev = cl) != q->active[prio]);
650
651         return (long)(sched - now);
652 }
653
654 static void cbq_undelay(unsigned long arg)
655 {
656         struct Qdisc *sch = (struct Qdisc*)arg;
657         struct cbq_sched_data *q = qdisc_priv(sch);
658         long delay = 0;
659         unsigned pmask;
660
661         pmask = q->pmask;
662         q->pmask = 0;
663
664         while (pmask) {
665                 int prio = ffz(~pmask);
666                 long tmp;
667
668                 pmask &= ~(1<<prio);
669
670                 tmp = cbq_undelay_prio(q, prio);
671                 if (tmp > 0) {
672                         q->pmask |= 1<<prio;
673                         if (tmp < delay || delay == 0)
674                                 delay = tmp;
675                 }
676         }
677
678         if (delay) {
679                 q->delay_timer.expires = jiffies + delay;
680                 add_timer(&q->delay_timer);
681         }
682
683         sch->flags &= ~TCQ_F_THROTTLED;
684         netif_schedule(sch->dev);
685 }
686
687
688 #ifdef CONFIG_NET_CLS_POLICE
689
690 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
691 {
692         int len = skb->len;
693         struct Qdisc *sch = child->__parent;
694         struct cbq_sched_data *q = qdisc_priv(sch);
695         struct cbq_class *cl = q->rx_class;
696
697         q->rx_class = NULL;
698
699         if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
700
701                 cbq_mark_toplevel(q, cl);
702
703                 q->rx_class = cl;
704                 cl->q->__parent = sch;
705
706                 if (cl->q->enqueue(skb, cl->q) == 0) {
707                         sch->q.qlen++;
708                         sch->bstats.packets++;
709                         sch->bstats.bytes+=len;
710                         if (!cl->next_alive)
711                                 cbq_activate_class(cl);
712                         return 0;
713                 }
714                 sch->qstats.drops++;
715                 return 0;
716         }
717
718         sch->qstats.drops++;
719         return -1;
720 }
721 #endif
722
723 /* 
724    It is mission critical procedure.
725
726    We "regenerate" toplevel cutoff, if transmitting class
727    has backlog and it is not regulated. It is not part of
728    original CBQ description, but looks more reasonable.
729    Probably, it is wrong. This question needs further investigation.
730 */
731
732 static __inline__ void
733 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
734                     struct cbq_class *borrowed)
735 {
736         if (cl && q->toplevel >= borrowed->level) {
737                 if (cl->q->q.qlen > 1) {
738                         do {
739                                 if (PSCHED_IS_PASTPERFECT(borrowed->undertime)) {
740                                         q->toplevel = borrowed->level;
741                                         return;
742                                 }
743                         } while ((borrowed=borrowed->borrow) != NULL);
744                 }
745 #if 0   
746         /* It is not necessary now. Uncommenting it
747            will save CPU cycles, but decrease fairness.
748          */
749                 q->toplevel = TC_CBQ_MAXLEVEL;
750 #endif
751         }
752 }
753
754 static void
755 cbq_update(struct cbq_sched_data *q)
756 {
757         struct cbq_class *this = q->tx_class;
758         struct cbq_class *cl = this;
759         int len = q->tx_len;
760
761         q->tx_class = NULL;
762
763         for ( ; cl; cl = cl->share) {
764                 long avgidle = cl->avgidle;
765                 long idle;
766
767                 cl->bstats.packets++;
768                 cl->bstats.bytes += len;
769
770                 /*
771                    (now - last) is total time between packet right edges.
772                    (last_pktlen/rate) is "virtual" busy time, so that
773
774                          idle = (now - last) - last_pktlen/rate
775                  */
776
777                 idle = PSCHED_TDIFF(q->now, cl->last);
778                 if ((unsigned long)idle > 128*1024*1024) {
779                         avgidle = cl->maxidle;
780                 } else {
781                         idle -= L2T(cl, len);
782
783                 /* true_avgidle := (1-W)*true_avgidle + W*idle,
784                    where W=2^{-ewma_log}. But cl->avgidle is scaled:
785                    cl->avgidle == true_avgidle/W,
786                    hence:
787                  */
788                         avgidle += idle - (avgidle>>cl->ewma_log);
789                 }
790
791                 if (avgidle <= 0) {
792                         /* Overlimit or at-limit */
793
794                         if (avgidle < cl->minidle)
795                                 avgidle = cl->minidle;
796
797                         cl->avgidle = avgidle;
798
799                         /* Calculate expected time, when this class
800                            will be allowed to send.
801                            It will occur, when:
802                            (1-W)*true_avgidle + W*delay = 0, i.e.
803                            idle = (1/W - 1)*(-true_avgidle)
804                            or
805                            idle = (1 - W)*(-cl->avgidle);
806                          */
807                         idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
808
809                         /*
810                            That is not all.
811                            To maintain the rate allocated to the class,
812                            we add to undertime virtual clock,
813                            necessary to complete transmitted packet.
814                            (len/phys_bandwidth has been already passed
815                            to the moment of cbq_update)
816                          */
817
818                         idle -= L2T(&q->link, len);
819                         idle += L2T(cl, len);
820
821                         PSCHED_AUDIT_TDIFF(idle);
822
823                         PSCHED_TADD2(q->now, idle, cl->undertime);
824                 } else {
825                         /* Underlimit */
826
827                         PSCHED_SET_PASTPERFECT(cl->undertime);
828                         if (avgidle > cl->maxidle)
829                                 cl->avgidle = cl->maxidle;
830                         else
831                                 cl->avgidle = avgidle;
832                 }
833                 cl->last = q->now;
834         }
835
836         cbq_update_toplevel(q, this, q->tx_borrowed);
837 }
838
839 static __inline__ struct cbq_class *
840 cbq_under_limit(struct cbq_class *cl)
841 {
842         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
843         struct cbq_class *this_cl = cl;
844
845         if (cl->tparent == NULL)
846                 return cl;
847
848         if (PSCHED_IS_PASTPERFECT(cl->undertime) ||
849             !PSCHED_TLESS(q->now, cl->undertime)) {
850                 cl->delayed = 0;
851                 return cl;
852         }
853
854         do {
855                 /* It is very suspicious place. Now overlimit
856                    action is generated for not bounded classes
857                    only if link is completely congested.
858                    Though it is in agree with ancestor-only paradigm,
859                    it looks very stupid. Particularly,
860                    it means that this chunk of code will either
861                    never be called or result in strong amplification
862                    of burstiness. Dangerous, silly, and, however,
863                    no another solution exists.
864                  */
865                 if ((cl = cl->borrow) == NULL) {
866                         this_cl->qstats.overlimits++;
867                         this_cl->overlimit(this_cl);
868                         return NULL;
869                 }
870                 if (cl->level > q->toplevel)
871                         return NULL;
872         } while (!PSCHED_IS_PASTPERFECT(cl->undertime) &&
873                  PSCHED_TLESS(q->now, cl->undertime));
874
875         cl->delayed = 0;
876         return cl;
877 }
878
879 static __inline__ struct sk_buff *
880 cbq_dequeue_prio(struct Qdisc *sch, int prio)
881 {
882         struct cbq_sched_data *q = qdisc_priv(sch);
883         struct cbq_class *cl_tail, *cl_prev, *cl;
884         struct sk_buff *skb;
885         int deficit;
886
887         cl_tail = cl_prev = q->active[prio];
888         cl = cl_prev->next_alive;
889
890         do {
891                 deficit = 0;
892
893                 /* Start round */
894                 do {
895                         struct cbq_class *borrow = cl;
896
897                         if (cl->q->q.qlen &&
898                             (borrow = cbq_under_limit(cl)) == NULL)
899                                 goto skip_class;
900
901                         if (cl->deficit <= 0) {
902                                 /* Class exhausted its allotment per
903                                    this round. Switch to the next one.
904                                  */
905                                 deficit = 1;
906                                 cl->deficit += cl->quantum;
907                                 goto next_class;
908                         }
909
910                         skb = cl->q->dequeue(cl->q);
911
912                         /* Class did not give us any skb :-(
913                            It could occur even if cl->q->q.qlen != 0 
914                            f.e. if cl->q == "tbf"
915                          */
916                         if (skb == NULL)
917                                 goto skip_class;
918
919                         cl->deficit -= skb->len;
920                         q->tx_class = cl;
921                         q->tx_borrowed = borrow;
922                         if (borrow != cl) {
923 #ifndef CBQ_XSTATS_BORROWS_BYTES
924                                 borrow->xstats.borrows++;
925                                 cl->xstats.borrows++;
926 #else
927                                 borrow->xstats.borrows += skb->len;
928                                 cl->xstats.borrows += skb->len;
929 #endif
930                         }
931                         q->tx_len = skb->len;
932
933                         if (cl->deficit <= 0) {
934                                 q->active[prio] = cl;
935                                 cl = cl->next_alive;
936                                 cl->deficit += cl->quantum;
937                         }
938                         return skb;
939
940 skip_class:
941                         if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
942                                 /* Class is empty or penalized.
943                                    Unlink it from active chain.
944                                  */
945                                 cl_prev->next_alive = cl->next_alive;
946                                 cl->next_alive = NULL;
947
948                                 /* Did cl_tail point to it? */
949                                 if (cl == cl_tail) {
950                                         /* Repair it! */
951                                         cl_tail = cl_prev;
952
953                                         /* Was it the last class in this band? */
954                                         if (cl == cl_tail) {
955                                                 /* Kill the band! */
956                                                 q->active[prio] = NULL;
957                                                 q->activemask &= ~(1<<prio);
958                                                 if (cl->q->q.qlen)
959                                                         cbq_activate_class(cl);
960                                                 return NULL;
961                                         }
962
963                                         q->active[prio] = cl_tail;
964                                 }
965                                 if (cl->q->q.qlen)
966                                         cbq_activate_class(cl);
967
968                                 cl = cl_prev;
969                         }
970
971 next_class:
972                         cl_prev = cl;
973                         cl = cl->next_alive;
974                 } while (cl_prev != cl_tail);
975         } while (deficit);
976
977         q->active[prio] = cl_prev;
978
979         return NULL;
980 }
981
982 static __inline__ struct sk_buff *
983 cbq_dequeue_1(struct Qdisc *sch)
984 {
985         struct cbq_sched_data *q = qdisc_priv(sch);
986         struct sk_buff *skb;
987         unsigned activemask;
988
989         activemask = q->activemask&0xFF;
990         while (activemask) {
991                 int prio = ffz(~activemask);
992                 activemask &= ~(1<<prio);
993                 skb = cbq_dequeue_prio(sch, prio);
994                 if (skb)
995                         return skb;
996         }
997         return NULL;
998 }
999
1000 static struct sk_buff *
1001 cbq_dequeue(struct Qdisc *sch)
1002 {
1003         struct sk_buff *skb;
1004         struct cbq_sched_data *q = qdisc_priv(sch);
1005         psched_time_t now;
1006         psched_tdiff_t incr;
1007
1008         PSCHED_GET_TIME(now);
1009         incr = PSCHED_TDIFF(now, q->now_rt);
1010
1011         if (q->tx_class) {
1012                 psched_tdiff_t incr2;
1013                 /* Time integrator. We calculate EOS time
1014                    by adding expected packet transmission time.
1015                    If real time is greater, we warp artificial clock,
1016                    so that:
1017
1018                    cbq_time = max(real_time, work);
1019                  */
1020                 incr2 = L2T(&q->link, q->tx_len);
1021                 PSCHED_TADD(q->now, incr2);
1022                 cbq_update(q);
1023                 if ((incr -= incr2) < 0)
1024                         incr = 0;
1025         }
1026         PSCHED_TADD(q->now, incr);
1027         q->now_rt = now;
1028
1029         for (;;) {
1030                 q->wd_expires = 0;
1031
1032                 skb = cbq_dequeue_1(sch);
1033                 if (skb) {
1034                         sch->q.qlen--;
1035                         sch->flags &= ~TCQ_F_THROTTLED;
1036                         return skb;
1037                 }
1038
1039                 /* All the classes are overlimit.
1040
1041                    It is possible, if:
1042
1043                    1. Scheduler is empty.
1044                    2. Toplevel cutoff inhibited borrowing.
1045                    3. Root class is overlimit.
1046
1047                    Reset 2d and 3d conditions and retry.
1048
1049                    Note, that NS and cbq-2.0 are buggy, peeking
1050                    an arbitrary class is appropriate for ancestor-only
1051                    sharing, but not for toplevel algorithm.
1052
1053                    Our version is better, but slower, because it requires
1054                    two passes, but it is unavoidable with top-level sharing.
1055                 */
1056
1057                 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1058                     PSCHED_IS_PASTPERFECT(q->link.undertime))
1059                         break;
1060
1061                 q->toplevel = TC_CBQ_MAXLEVEL;
1062                 PSCHED_SET_PASTPERFECT(q->link.undertime);
1063         }
1064
1065         /* No packets in scheduler or nobody wants to give them to us :-(
1066            Sigh... start watchdog timer in the last case. */
1067
1068         if (sch->q.qlen) {
1069                 sch->qstats.overlimits++;
1070                 if (q->wd_expires) {
1071                         long delay = PSCHED_US2JIFFIE(q->wd_expires);
1072                         if (delay <= 0)
1073                                 delay = 1;
1074                         mod_timer(&q->wd_timer, jiffies + delay);
1075                         sch->flags |= TCQ_F_THROTTLED;
1076                 }
1077         }
1078         return NULL;
1079 }
1080
1081 /* CBQ class maintanance routines */
1082
1083 static void cbq_adjust_levels(struct cbq_class *this)
1084 {
1085         if (this == NULL)
1086                 return;
1087
1088         do {
1089                 int level = 0;
1090                 struct cbq_class *cl;
1091
1092                 if ((cl = this->children) != NULL) {
1093                         do {
1094                                 if (cl->level > level)
1095                                         level = cl->level;
1096                         } while ((cl = cl->sibling) != this->children);
1097                 }
1098                 this->level = level+1;
1099         } while ((this = this->tparent) != NULL);
1100 }
1101
1102 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1103 {
1104         struct cbq_class *cl;
1105         unsigned h;
1106
1107         if (q->quanta[prio] == 0)
1108                 return;
1109
1110         for (h=0; h<16; h++) {
1111                 for (cl = q->classes[h]; cl; cl = cl->next) {
1112                         /* BUGGGG... Beware! This expression suffer of
1113                            arithmetic overflows!
1114                          */
1115                         if (cl->priority == prio) {
1116                                 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1117                                         q->quanta[prio];
1118                         }
1119                         if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
1120                                 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
1121                                 cl->quantum = cl->qdisc->dev->mtu/2 + 1;
1122                         }
1123                 }
1124         }
1125 }
1126
1127 static void cbq_sync_defmap(struct cbq_class *cl)
1128 {
1129         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1130         struct cbq_class *split = cl->split;
1131         unsigned h;
1132         int i;
1133
1134         if (split == NULL)
1135                 return;
1136
1137         for (i=0; i<=TC_PRIO_MAX; i++) {
1138                 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1139                         split->defaults[i] = NULL;
1140         }
1141
1142         for (i=0; i<=TC_PRIO_MAX; i++) {
1143                 int level = split->level;
1144
1145                 if (split->defaults[i])
1146                         continue;
1147
1148                 for (h=0; h<16; h++) {
1149                         struct cbq_class *c;
1150
1151                         for (c = q->classes[h]; c; c = c->next) {
1152                                 if (c->split == split && c->level < level &&
1153                                     c->defmap&(1<<i)) {
1154                                         split->defaults[i] = c;
1155                                         level = c->level;
1156                                 }
1157                         }
1158                 }
1159         }
1160 }
1161
1162 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1163 {
1164         struct cbq_class *split = NULL;
1165
1166         if (splitid == 0) {
1167                 if ((split = cl->split) == NULL)
1168                         return;
1169                 splitid = split->classid;
1170         }
1171
1172         if (split == NULL || split->classid != splitid) {
1173                 for (split = cl->tparent; split; split = split->tparent)
1174                         if (split->classid == splitid)
1175                                 break;
1176         }
1177
1178         if (split == NULL)
1179                 return;
1180
1181         if (cl->split != split) {
1182                 cl->defmap = 0;
1183                 cbq_sync_defmap(cl);
1184                 cl->split = split;
1185                 cl->defmap = def&mask;
1186         } else
1187                 cl->defmap = (cl->defmap&~mask)|(def&mask);
1188
1189         cbq_sync_defmap(cl);
1190 }
1191
1192 static void cbq_unlink_class(struct cbq_class *this)
1193 {
1194         struct cbq_class *cl, **clp;
1195         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1196
1197         for (clp = &q->classes[cbq_hash(this->classid)]; (cl = *clp) != NULL; clp = &cl->next) {
1198                 if (cl == this) {
1199                         *clp = cl->next;
1200                         cl->next = NULL;
1201                         break;
1202                 }
1203         }
1204
1205         if (this->tparent) {
1206                 clp=&this->sibling;
1207                 cl = *clp;
1208                 do {
1209                         if (cl == this) {
1210                                 *clp = cl->sibling;
1211                                 break;
1212                         }
1213                         clp = &cl->sibling;
1214                 } while ((cl = *clp) != this->sibling);
1215
1216                 if (this->tparent->children == this) {
1217                         this->tparent->children = this->sibling;
1218                         if (this->sibling == this)
1219                                 this->tparent->children = NULL;
1220                 }
1221         } else {
1222                 BUG_TRAP(this->sibling == this);
1223         }
1224 }
1225
1226 static void cbq_link_class(struct cbq_class *this)
1227 {
1228         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1229         unsigned h = cbq_hash(this->classid);
1230         struct cbq_class *parent = this->tparent;
1231
1232         this->sibling = this;
1233         this->next = q->classes[h];
1234         q->classes[h] = this;
1235
1236         if (parent == NULL)
1237                 return;
1238
1239         if (parent->children == NULL) {
1240                 parent->children = this;
1241         } else {
1242                 this->sibling = parent->children->sibling;
1243                 parent->children->sibling = this;
1244         }
1245 }
1246
1247 static unsigned int cbq_drop(struct Qdisc* sch)
1248 {
1249         struct cbq_sched_data *q = qdisc_priv(sch);
1250         struct cbq_class *cl, *cl_head;
1251         int prio;
1252         unsigned int len;
1253
1254         for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1255                 if ((cl_head = q->active[prio]) == NULL)
1256                         continue;
1257
1258                 cl = cl_head;
1259                 do {
1260                         if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1261                                 sch->q.qlen--;
1262                                 return len;
1263                         }
1264                 } while ((cl = cl->next_alive) != cl_head);
1265         }
1266         return 0;
1267 }
1268
1269 static void
1270 cbq_reset(struct Qdisc* sch)
1271 {
1272         struct cbq_sched_data *q = qdisc_priv(sch);
1273         struct cbq_class *cl;
1274         int prio;
1275         unsigned h;
1276
1277         q->activemask = 0;
1278         q->pmask = 0;
1279         q->tx_class = NULL;
1280         q->tx_borrowed = NULL;
1281         del_timer(&q->wd_timer);
1282         del_timer(&q->delay_timer);
1283         q->toplevel = TC_CBQ_MAXLEVEL;
1284         PSCHED_GET_TIME(q->now);
1285         q->now_rt = q->now;
1286
1287         for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1288                 q->active[prio] = NULL;
1289
1290         for (h = 0; h < 16; h++) {
1291                 for (cl = q->classes[h]; cl; cl = cl->next) {
1292                         qdisc_reset(cl->q);
1293
1294                         cl->next_alive = NULL;
1295                         PSCHED_SET_PASTPERFECT(cl->undertime);
1296                         cl->avgidle = cl->maxidle;
1297                         cl->deficit = cl->quantum;
1298                         cl->cpriority = cl->priority;
1299                 }
1300         }
1301         sch->q.qlen = 0;
1302 }
1303
1304
1305 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1306 {
1307         if (lss->change&TCF_CBQ_LSS_FLAGS) {
1308                 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1309                 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1310         }
1311         if (lss->change&TCF_CBQ_LSS_EWMA)
1312                 cl->ewma_log = lss->ewma_log;
1313         if (lss->change&TCF_CBQ_LSS_AVPKT)
1314                 cl->avpkt = lss->avpkt;
1315         if (lss->change&TCF_CBQ_LSS_MINIDLE)
1316                 cl->minidle = -(long)lss->minidle;
1317         if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1318                 cl->maxidle = lss->maxidle;
1319                 cl->avgidle = lss->maxidle;
1320         }
1321         if (lss->change&TCF_CBQ_LSS_OFFTIME)
1322                 cl->offtime = lss->offtime;
1323         return 0;
1324 }
1325
1326 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1327 {
1328         q->nclasses[cl->priority]--;
1329         q->quanta[cl->priority] -= cl->weight;
1330         cbq_normalize_quanta(q, cl->priority);
1331 }
1332
1333 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1334 {
1335         q->nclasses[cl->priority]++;
1336         q->quanta[cl->priority] += cl->weight;
1337         cbq_normalize_quanta(q, cl->priority);
1338 }
1339
1340 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1341 {
1342         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1343
1344         if (wrr->allot)
1345                 cl->allot = wrr->allot;
1346         if (wrr->weight)
1347                 cl->weight = wrr->weight;
1348         if (wrr->priority) {
1349                 cl->priority = wrr->priority-1;
1350                 cl->cpriority = cl->priority;
1351                 if (cl->priority >= cl->priority2)
1352                         cl->priority2 = TC_CBQ_MAXPRIO-1;
1353         }
1354
1355         cbq_addprio(q, cl);
1356         return 0;
1357 }
1358
1359 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1360 {
1361         switch (ovl->strategy) {
1362         case TC_CBQ_OVL_CLASSIC:
1363                 cl->overlimit = cbq_ovl_classic;
1364                 break;
1365         case TC_CBQ_OVL_DELAY:
1366                 cl->overlimit = cbq_ovl_delay;
1367                 break;
1368         case TC_CBQ_OVL_LOWPRIO:
1369                 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1370                     ovl->priority2-1 <= cl->priority)
1371                         return -EINVAL;
1372                 cl->priority2 = ovl->priority2-1;
1373                 cl->overlimit = cbq_ovl_lowprio;
1374                 break;
1375         case TC_CBQ_OVL_DROP:
1376                 cl->overlimit = cbq_ovl_drop;
1377                 break;
1378         case TC_CBQ_OVL_RCLASSIC:
1379                 cl->overlimit = cbq_ovl_rclassic;
1380                 break;
1381         default:
1382                 return -EINVAL;
1383         }
1384         cl->penalty = (ovl->penalty*HZ)/1000;
1385         return 0;
1386 }
1387
1388 #ifdef CONFIG_NET_CLS_POLICE
1389 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1390 {
1391         cl->police = p->police;
1392
1393         if (cl->q->handle) {
1394                 if (p->police == TC_POLICE_RECLASSIFY)
1395                         cl->q->reshape_fail = cbq_reshape_fail;
1396                 else
1397                         cl->q->reshape_fail = NULL;
1398         }
1399         return 0;
1400 }
1401 #endif
1402
1403 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1404 {
1405         cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1406         return 0;
1407 }
1408
1409 static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
1410 {
1411         struct cbq_sched_data *q = qdisc_priv(sch);
1412         struct rtattr *tb[TCA_CBQ_MAX];
1413         struct tc_ratespec *r;
1414
1415         if (rtattr_parse_nested(tb, TCA_CBQ_MAX, opt) < 0 ||
1416             tb[TCA_CBQ_RTAB-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1417             RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1418                 return -EINVAL;
1419
1420         if (tb[TCA_CBQ_LSSOPT-1] &&
1421             RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1422                 return -EINVAL;
1423
1424         r = RTA_DATA(tb[TCA_CBQ_RATE-1]);
1425
1426         if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB-1])) == NULL)
1427                 return -EINVAL;
1428
1429         q->link.refcnt = 1;
1430         q->link.sibling = &q->link;
1431         q->link.classid = sch->handle;
1432         q->link.qdisc = sch;
1433         if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)))
1434                 q->link.q = &noop_qdisc;
1435
1436         q->link.priority = TC_CBQ_MAXPRIO-1;
1437         q->link.priority2 = TC_CBQ_MAXPRIO-1;
1438         q->link.cpriority = TC_CBQ_MAXPRIO-1;
1439         q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1440         q->link.overlimit = cbq_ovl_classic;
1441         q->link.allot = psched_mtu(sch->dev);
1442         q->link.quantum = q->link.allot;
1443         q->link.weight = q->link.R_tab->rate.rate;
1444
1445         q->link.ewma_log = TC_CBQ_DEF_EWMA;
1446         q->link.avpkt = q->link.allot/2;
1447         q->link.minidle = -0x7FFFFFFF;
1448         q->link.stats_lock = &sch->dev->queue_lock;
1449
1450         init_timer(&q->wd_timer);
1451         q->wd_timer.data = (unsigned long)sch;
1452         q->wd_timer.function = cbq_watchdog;
1453         init_timer(&q->delay_timer);
1454         q->delay_timer.data = (unsigned long)sch;
1455         q->delay_timer.function = cbq_undelay;
1456         q->toplevel = TC_CBQ_MAXLEVEL;
1457         PSCHED_GET_TIME(q->now);
1458         q->now_rt = q->now;
1459
1460         cbq_link_class(&q->link);
1461
1462         if (tb[TCA_CBQ_LSSOPT-1])
1463                 cbq_set_lss(&q->link, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1464
1465         cbq_addprio(q, &q->link);
1466         return 0;
1467 }
1468
1469 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1470 {
1471         unsigned char    *b = skb->tail;
1472
1473         RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1474         return skb->len;
1475
1476 rtattr_failure:
1477         skb_trim(skb, b - skb->data);
1478         return -1;
1479 }
1480
1481 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1482 {
1483         unsigned char    *b = skb->tail;
1484         struct tc_cbq_lssopt opt;
1485
1486         opt.flags = 0;
1487         if (cl->borrow == NULL)
1488                 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1489         if (cl->share == NULL)
1490                 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1491         opt.ewma_log = cl->ewma_log;
1492         opt.level = cl->level;
1493         opt.avpkt = cl->avpkt;
1494         opt.maxidle = cl->maxidle;
1495         opt.minidle = (u32)(-cl->minidle);
1496         opt.offtime = cl->offtime;
1497         opt.change = ~0;
1498         RTA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1499         return skb->len;
1500
1501 rtattr_failure:
1502         skb_trim(skb, b - skb->data);
1503         return -1;
1504 }
1505
1506 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1507 {
1508         unsigned char    *b = skb->tail;
1509         struct tc_cbq_wrropt opt;
1510
1511         opt.flags = 0;
1512         opt.allot = cl->allot;
1513         opt.priority = cl->priority+1;
1514         opt.cpriority = cl->cpriority+1;
1515         opt.weight = cl->weight;
1516         RTA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1517         return skb->len;
1518
1519 rtattr_failure:
1520         skb_trim(skb, b - skb->data);
1521         return -1;
1522 }
1523
1524 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1525 {
1526         unsigned char    *b = skb->tail;
1527         struct tc_cbq_ovl opt;
1528
1529         opt.strategy = cl->ovl_strategy;
1530         opt.priority2 = cl->priority2+1;
1531         opt.pad = 0;
1532         opt.penalty = (cl->penalty*1000)/HZ;
1533         RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1534         return skb->len;
1535
1536 rtattr_failure:
1537         skb_trim(skb, b - skb->data);
1538         return -1;
1539 }
1540
1541 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1542 {
1543         unsigned char    *b = skb->tail;
1544         struct tc_cbq_fopt opt;
1545
1546         if (cl->split || cl->defmap) {
1547                 opt.split = cl->split ? cl->split->classid : 0;
1548                 opt.defmap = cl->defmap;
1549                 opt.defchange = ~0;
1550                 RTA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1551         }
1552         return skb->len;
1553
1554 rtattr_failure:
1555         skb_trim(skb, b - skb->data);
1556         return -1;
1557 }
1558
1559 #ifdef CONFIG_NET_CLS_POLICE
1560 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1561 {
1562         unsigned char    *b = skb->tail;
1563         struct tc_cbq_police opt;
1564
1565         if (cl->police) {
1566                 opt.police = cl->police;
1567                 opt.__res1 = 0;
1568                 opt.__res2 = 0;
1569                 RTA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1570         }
1571         return skb->len;
1572
1573 rtattr_failure:
1574         skb_trim(skb, b - skb->data);
1575         return -1;
1576 }
1577 #endif
1578
1579 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1580 {
1581         if (cbq_dump_lss(skb, cl) < 0 ||
1582             cbq_dump_rate(skb, cl) < 0 ||
1583             cbq_dump_wrr(skb, cl) < 0 ||
1584             cbq_dump_ovl(skb, cl) < 0 ||
1585 #ifdef CONFIG_NET_CLS_POLICE
1586             cbq_dump_police(skb, cl) < 0 ||
1587 #endif
1588             cbq_dump_fopt(skb, cl) < 0)
1589                 return -1;
1590         return 0;
1591 }
1592
1593 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1594 {
1595         struct cbq_sched_data *q = qdisc_priv(sch);
1596         unsigned char    *b = skb->tail;
1597         struct rtattr *rta;
1598
1599         rta = (struct rtattr*)b;
1600         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1601         if (cbq_dump_attr(skb, &q->link) < 0)
1602                 goto rtattr_failure;
1603         rta->rta_len = skb->tail - b;
1604         return skb->len;
1605
1606 rtattr_failure:
1607         skb_trim(skb, b - skb->data);
1608         return -1;
1609 }
1610
1611 static int
1612 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1613 {
1614         struct cbq_sched_data *q = qdisc_priv(sch);
1615
1616         q->link.xstats.avgidle = q->link.avgidle;
1617         return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1618 }
1619
1620 static int
1621 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1622                struct sk_buff *skb, struct tcmsg *tcm)
1623 {
1624         struct cbq_class *cl = (struct cbq_class*)arg;
1625         unsigned char    *b = skb->tail;
1626         struct rtattr *rta;
1627
1628         if (cl->tparent)
1629                 tcm->tcm_parent = cl->tparent->classid;
1630         else
1631                 tcm->tcm_parent = TC_H_ROOT;
1632         tcm->tcm_handle = cl->classid;
1633         tcm->tcm_info = cl->q->handle;
1634
1635         rta = (struct rtattr*)b;
1636         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1637         if (cbq_dump_attr(skb, cl) < 0)
1638                 goto rtattr_failure;
1639         rta->rta_len = skb->tail - b;
1640         return skb->len;
1641
1642 rtattr_failure:
1643         skb_trim(skb, b - skb->data);
1644         return -1;
1645 }
1646
1647 static int
1648 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1649         struct gnet_dump *d)
1650 {
1651         struct cbq_sched_data *q = qdisc_priv(sch);
1652         struct cbq_class *cl = (struct cbq_class*)arg;
1653
1654         cl->qstats.qlen = cl->q->q.qlen;
1655         cl->xstats.avgidle = cl->avgidle;
1656         cl->xstats.undertime = 0;
1657
1658         if (!PSCHED_IS_PASTPERFECT(cl->undertime))
1659                 cl->xstats.undertime = PSCHED_TDIFF(cl->undertime, q->now);
1660
1661         if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1662 #ifdef CONFIG_NET_ESTIMATOR
1663             gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1664 #endif
1665             gnet_stats_copy_queue(d, &cl->qstats) < 0)
1666                 return -1;
1667
1668         return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1669 }
1670
1671 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1672                      struct Qdisc **old)
1673 {
1674         struct cbq_class *cl = (struct cbq_class*)arg;
1675
1676         if (cl) {
1677                 if (new == NULL) {
1678                         if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)) == NULL)
1679                                 return -ENOBUFS;
1680                 } else {
1681 #ifdef CONFIG_NET_CLS_POLICE
1682                         if (cl->police == TC_POLICE_RECLASSIFY)
1683                                 new->reshape_fail = cbq_reshape_fail;
1684 #endif
1685                 }
1686                 sch_tree_lock(sch);
1687                 *old = cl->q;
1688                 cl->q = new;
1689                 sch->q.qlen -= (*old)->q.qlen;
1690                 qdisc_reset(*old);
1691                 sch_tree_unlock(sch);
1692
1693                 return 0;
1694         }
1695         return -ENOENT;
1696 }
1697
1698 static struct Qdisc *
1699 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1700 {
1701         struct cbq_class *cl = (struct cbq_class*)arg;
1702
1703         return cl ? cl->q : NULL;
1704 }
1705
1706 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1707 {
1708         struct cbq_sched_data *q = qdisc_priv(sch);
1709         struct cbq_class *cl = cbq_class_lookup(q, classid);
1710
1711         if (cl) {
1712                 cl->refcnt++;
1713                 return (unsigned long)cl;
1714         }
1715         return 0;
1716 }
1717
1718 static void cbq_destroy_filters(struct cbq_class *cl)
1719 {
1720         struct tcf_proto *tp;
1721
1722         while ((tp = cl->filter_list) != NULL) {
1723                 cl->filter_list = tp->next;
1724                 tcf_destroy(tp);
1725         }
1726 }
1727
1728 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1729 {
1730         struct cbq_sched_data *q = qdisc_priv(sch);
1731
1732         BUG_TRAP(!cl->filters);
1733
1734         cbq_destroy_filters(cl);
1735         qdisc_destroy(cl->q);
1736         qdisc_put_rtab(cl->R_tab);
1737 #ifdef CONFIG_NET_ESTIMATOR
1738         gen_kill_estimator(&cl->bstats, &cl->rate_est);
1739 #endif
1740         if (cl != &q->link)
1741                 kfree(cl);
1742 }
1743
1744 static void
1745 cbq_destroy(struct Qdisc* sch)
1746 {
1747         struct cbq_sched_data *q = qdisc_priv(sch);
1748         struct cbq_class *cl;
1749         unsigned h;
1750
1751 #ifdef CONFIG_NET_CLS_POLICE
1752         q->rx_class = NULL;
1753 #endif
1754         /*
1755          * Filters must be destroyed first because we don't destroy the
1756          * classes from root to leafs which means that filters can still
1757          * be bound to classes which have been destroyed already. --TGR '04
1758          */
1759         for (h = 0; h < 16; h++)
1760                 for (cl = q->classes[h]; cl; cl = cl->next)
1761                         cbq_destroy_filters(cl);
1762
1763         for (h = 0; h < 16; h++) {
1764                 struct cbq_class *next;
1765
1766                 for (cl = q->classes[h]; cl; cl = next) {
1767                         next = cl->next;
1768                         cbq_destroy_class(sch, cl);
1769                 }
1770         }
1771 }
1772
1773 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1774 {
1775         struct cbq_class *cl = (struct cbq_class*)arg;
1776
1777         if (--cl->refcnt == 0) {
1778 #ifdef CONFIG_NET_CLS_POLICE
1779                 struct cbq_sched_data *q = qdisc_priv(sch);
1780
1781                 spin_lock_bh(&sch->dev->queue_lock);
1782                 if (q->rx_class == cl)
1783                         q->rx_class = NULL;
1784                 spin_unlock_bh(&sch->dev->queue_lock);
1785 #endif
1786
1787                 cbq_destroy_class(sch, cl);
1788         }
1789 }
1790
1791 static int
1792 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca,
1793                  unsigned long *arg)
1794 {
1795         int err;
1796         struct cbq_sched_data *q = qdisc_priv(sch);
1797         struct cbq_class *cl = (struct cbq_class*)*arg;
1798         struct rtattr *opt = tca[TCA_OPTIONS-1];
1799         struct rtattr *tb[TCA_CBQ_MAX];
1800         struct cbq_class *parent;
1801         struct qdisc_rate_table *rtab = NULL;
1802
1803         if (opt==NULL || rtattr_parse_nested(tb, TCA_CBQ_MAX, opt))
1804                 return -EINVAL;
1805
1806         if (tb[TCA_CBQ_OVL_STRATEGY-1] &&
1807             RTA_PAYLOAD(tb[TCA_CBQ_OVL_STRATEGY-1]) < sizeof(struct tc_cbq_ovl))
1808                 return -EINVAL;
1809
1810         if (tb[TCA_CBQ_FOPT-1] &&
1811             RTA_PAYLOAD(tb[TCA_CBQ_FOPT-1]) < sizeof(struct tc_cbq_fopt))
1812                 return -EINVAL;
1813
1814         if (tb[TCA_CBQ_RATE-1] &&
1815             RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1816                         return -EINVAL;
1817
1818         if (tb[TCA_CBQ_LSSOPT-1] &&
1819             RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1820                         return -EINVAL;
1821
1822         if (tb[TCA_CBQ_WRROPT-1] &&
1823             RTA_PAYLOAD(tb[TCA_CBQ_WRROPT-1]) < sizeof(struct tc_cbq_wrropt))
1824                         return -EINVAL;
1825
1826 #ifdef CONFIG_NET_CLS_POLICE
1827         if (tb[TCA_CBQ_POLICE-1] &&
1828             RTA_PAYLOAD(tb[TCA_CBQ_POLICE-1]) < sizeof(struct tc_cbq_police))
1829                         return -EINVAL;
1830 #endif
1831
1832         if (cl) {
1833                 /* Check parent */
1834                 if (parentid) {
1835                         if (cl->tparent && cl->tparent->classid != parentid)
1836                                 return -EINVAL;
1837                         if (!cl->tparent && parentid != TC_H_ROOT)
1838                                 return -EINVAL;
1839                 }
1840
1841                 if (tb[TCA_CBQ_RATE-1]) {
1842                         rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1843                         if (rtab == NULL)
1844                                 return -EINVAL;
1845                 }
1846
1847                 /* Change class parameters */
1848                 sch_tree_lock(sch);
1849
1850                 if (cl->next_alive != NULL)
1851                         cbq_deactivate_class(cl);
1852
1853                 if (rtab) {
1854                         rtab = xchg(&cl->R_tab, rtab);
1855                         qdisc_put_rtab(rtab);
1856                 }
1857
1858                 if (tb[TCA_CBQ_LSSOPT-1])
1859                         cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1860
1861                 if (tb[TCA_CBQ_WRROPT-1]) {
1862                         cbq_rmprio(q, cl);
1863                         cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1864                 }
1865
1866                 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1867                         cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1868
1869 #ifdef CONFIG_NET_CLS_POLICE
1870                 if (tb[TCA_CBQ_POLICE-1])
1871                         cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1872 #endif
1873
1874                 if (tb[TCA_CBQ_FOPT-1])
1875                         cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1876
1877                 if (cl->q->q.qlen)
1878                         cbq_activate_class(cl);
1879
1880                 sch_tree_unlock(sch);
1881
1882 #ifdef CONFIG_NET_ESTIMATOR
1883                 if (tca[TCA_RATE-1])
1884                         gen_replace_estimator(&cl->bstats, &cl->rate_est,
1885                                 cl->stats_lock, tca[TCA_RATE-1]);
1886 #endif
1887                 return 0;
1888         }
1889
1890         if (parentid == TC_H_ROOT)
1891                 return -EINVAL;
1892
1893         if (tb[TCA_CBQ_WRROPT-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1894             tb[TCA_CBQ_LSSOPT-1] == NULL)
1895                 return -EINVAL;
1896
1897         rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1898         if (rtab == NULL)
1899                 return -EINVAL;
1900
1901         if (classid) {
1902                 err = -EINVAL;
1903                 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1904                         goto failure;
1905         } else {
1906                 int i;
1907                 classid = TC_H_MAKE(sch->handle,0x8000);
1908
1909                 for (i=0; i<0x8000; i++) {
1910                         if (++q->hgenerator >= 0x8000)
1911                                 q->hgenerator = 1;
1912                         if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1913                                 break;
1914                 }
1915                 err = -ENOSR;
1916                 if (i >= 0x8000)
1917                         goto failure;
1918                 classid = classid|q->hgenerator;
1919         }
1920
1921         parent = &q->link;
1922         if (parentid) {
1923                 parent = cbq_class_lookup(q, parentid);
1924                 err = -EINVAL;
1925                 if (parent == NULL)
1926                         goto failure;
1927         }
1928
1929         err = -ENOBUFS;
1930         cl = kmalloc(sizeof(*cl), GFP_KERNEL);
1931         if (cl == NULL)
1932                 goto failure;
1933         memset(cl, 0, sizeof(*cl));
1934         cl->R_tab = rtab;
1935         rtab = NULL;
1936         cl->refcnt = 1;
1937         if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops)))
1938                 cl->q = &noop_qdisc;
1939         cl->classid = classid;
1940         cl->tparent = parent;
1941         cl->qdisc = sch;
1942         cl->allot = parent->allot;
1943         cl->quantum = cl->allot;
1944         cl->weight = cl->R_tab->rate.rate;
1945         cl->stats_lock = &sch->dev->queue_lock;
1946
1947         sch_tree_lock(sch);
1948         cbq_link_class(cl);
1949         cl->borrow = cl->tparent;
1950         if (cl->tparent != &q->link)
1951                 cl->share = cl->tparent;
1952         cbq_adjust_levels(parent);
1953         cl->minidle = -0x7FFFFFFF;
1954         cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1955         cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1956         if (cl->ewma_log==0)
1957                 cl->ewma_log = q->link.ewma_log;
1958         if (cl->maxidle==0)
1959                 cl->maxidle = q->link.maxidle;
1960         if (cl->avpkt==0)
1961                 cl->avpkt = q->link.avpkt;
1962         cl->overlimit = cbq_ovl_classic;
1963         if (tb[TCA_CBQ_OVL_STRATEGY-1])
1964                 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1965 #ifdef CONFIG_NET_CLS_POLICE
1966         if (tb[TCA_CBQ_POLICE-1])
1967                 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1968 #endif
1969         if (tb[TCA_CBQ_FOPT-1])
1970                 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1971         sch_tree_unlock(sch);
1972
1973 #ifdef CONFIG_NET_ESTIMATOR
1974         if (tca[TCA_RATE-1])
1975                 gen_new_estimator(&cl->bstats, &cl->rate_est,
1976                         cl->stats_lock, tca[TCA_RATE-1]);
1977 #endif
1978
1979         *arg = (unsigned long)cl;
1980         return 0;
1981
1982 failure:
1983         qdisc_put_rtab(rtab);
1984         return err;
1985 }
1986
1987 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1988 {
1989         struct cbq_sched_data *q = qdisc_priv(sch);
1990         struct cbq_class *cl = (struct cbq_class*)arg;
1991
1992         if (cl->filters || cl->children || cl == &q->link)
1993                 return -EBUSY;
1994
1995         sch_tree_lock(sch);
1996
1997         if (cl->next_alive)
1998                 cbq_deactivate_class(cl);
1999
2000         if (q->tx_borrowed == cl)
2001                 q->tx_borrowed = q->tx_class;
2002         if (q->tx_class == cl) {
2003                 q->tx_class = NULL;
2004                 q->tx_borrowed = NULL;
2005         }
2006 #ifdef CONFIG_NET_CLS_POLICE
2007         if (q->rx_class == cl)
2008                 q->rx_class = NULL;
2009 #endif
2010
2011         cbq_unlink_class(cl);
2012         cbq_adjust_levels(cl->tparent);
2013         cl->defmap = 0;
2014         cbq_sync_defmap(cl);
2015
2016         cbq_rmprio(q, cl);
2017         sch_tree_unlock(sch);
2018
2019         if (--cl->refcnt == 0)
2020                 cbq_destroy_class(sch, cl);
2021
2022         return 0;
2023 }
2024
2025 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
2026 {
2027         struct cbq_sched_data *q = qdisc_priv(sch);
2028         struct cbq_class *cl = (struct cbq_class *)arg;
2029
2030         if (cl == NULL)
2031                 cl = &q->link;
2032
2033         return &cl->filter_list;
2034 }
2035
2036 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
2037                                      u32 classid)
2038 {
2039         struct cbq_sched_data *q = qdisc_priv(sch);
2040         struct cbq_class *p = (struct cbq_class*)parent;
2041         struct cbq_class *cl = cbq_class_lookup(q, classid);
2042
2043         if (cl) {
2044                 if (p && p->level <= cl->level)
2045                         return 0;
2046                 cl->filters++;
2047                 return (unsigned long)cl;
2048         }
2049         return 0;
2050 }
2051
2052 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2053 {
2054         struct cbq_class *cl = (struct cbq_class*)arg;
2055
2056         cl->filters--;
2057 }
2058
2059 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2060 {
2061         struct cbq_sched_data *q = qdisc_priv(sch);
2062         unsigned h;
2063
2064         if (arg->stop)
2065                 return;
2066
2067         for (h = 0; h < 16; h++) {
2068                 struct cbq_class *cl;
2069
2070                 for (cl = q->classes[h]; cl; cl = cl->next) {
2071                         if (arg->count < arg->skip) {
2072                                 arg->count++;
2073                                 continue;
2074                         }
2075                         if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2076                                 arg->stop = 1;
2077                                 return;
2078                         }
2079                         arg->count++;
2080                 }
2081         }
2082 }
2083
2084 static struct Qdisc_class_ops cbq_class_ops = {
2085         .graft          =       cbq_graft,
2086         .leaf           =       cbq_leaf,
2087         .get            =       cbq_get,
2088         .put            =       cbq_put,
2089         .change         =       cbq_change_class,
2090         .delete         =       cbq_delete,
2091         .walk           =       cbq_walk,
2092         .tcf_chain      =       cbq_find_tcf,
2093         .bind_tcf       =       cbq_bind_filter,
2094         .unbind_tcf     =       cbq_unbind_filter,
2095         .dump           =       cbq_dump_class,
2096         .dump_stats     =       cbq_dump_class_stats,
2097 };
2098
2099 static struct Qdisc_ops cbq_qdisc_ops = {
2100         .next           =       NULL,
2101         .cl_ops         =       &cbq_class_ops,
2102         .id             =       "cbq",
2103         .priv_size      =       sizeof(struct cbq_sched_data),
2104         .enqueue        =       cbq_enqueue,
2105         .dequeue        =       cbq_dequeue,
2106         .requeue        =       cbq_requeue,
2107         .drop           =       cbq_drop,
2108         .init           =       cbq_init,
2109         .reset          =       cbq_reset,
2110         .destroy        =       cbq_destroy,
2111         .change         =       NULL,
2112         .dump           =       cbq_dump,
2113         .dump_stats     =       cbq_dump_stats,
2114         .owner          =       THIS_MODULE,
2115 };
2116
2117 static int __init cbq_module_init(void)
2118 {
2119         return register_qdisc(&cbq_qdisc_ops);
2120 }
2121 static void __exit cbq_module_exit(void) 
2122 {
2123         unregister_qdisc(&cbq_qdisc_ops);
2124 }
2125 module_init(cbq_module_init)
2126 module_exit(cbq_module_exit)
2127 MODULE_LICENSE("GPL");