Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux...
[linux-2.6] / net / sched / sch_cbq.c
1 /*
2  * net/sched/sch_cbq.c  Class-Based Queueing discipline.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  */
12
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/kernel.h>
16 #include <linux/string.h>
17 #include <linux/errno.h>
18 #include <linux/skbuff.h>
19 #include <net/netlink.h>
20 #include <net/pkt_sched.h>
21
22
23 /*      Class-Based Queueing (CBQ) algorithm.
24         =======================================
25
26         Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
27                  Management Models for Packet Networks",
28                  IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
29
30                  [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
31
32                  [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
33                  Parameters", 1996
34
35                  [4] Sally Floyd and Michael Speer, "Experimental Results
36                  for Class-Based Queueing", 1998, not published.
37
38         -----------------------------------------------------------------------
39
40         Algorithm skeleton was taken from NS simulator cbq.cc.
41         If someone wants to check this code against the LBL version,
42         he should take into account that ONLY the skeleton was borrowed,
43         the implementation is different. Particularly:
44
45         --- The WRR algorithm is different. Our version looks more
46         reasonable (I hope) and works when quanta are allowed to be
47         less than MTU, which is always the case when real time classes
48         have small rates. Note, that the statement of [3] is
49         incomplete, delay may actually be estimated even if class
50         per-round allotment is less than MTU. Namely, if per-round
51         allotment is W*r_i, and r_1+...+r_k = r < 1
52
53         delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
54
55         In the worst case we have IntServ estimate with D = W*r+k*MTU
56         and C = MTU*r. The proof (if correct at all) is trivial.
57
58
59         --- It seems that cbq-2.0 is not very accurate. At least, I cannot
60         interpret some places, which look like wrong translations
61         from NS. Anyone is advised to find these differences
62         and explain to me, why I am wrong 8).
63
64         --- Linux has no EOI event, so that we cannot estimate true class
65         idle time. Workaround is to consider the next dequeue event
66         as sign that previous packet is finished. This is wrong because of
67         internal device queueing, but on a permanently loaded link it is true.
68         Moreover, combined with clock integrator, this scheme looks
69         very close to an ideal solution.  */
70
71 struct cbq_sched_data;
72
73
74 struct cbq_class
75 {
76         struct cbq_class        *next;          /* hash table link */
77         struct cbq_class        *next_alive;    /* next class with backlog in this priority band */
78
79 /* Parameters */
80         u32                     classid;
81         unsigned char           priority;       /* class priority */
82         unsigned char           priority2;      /* priority to be used after overlimit */
83         unsigned char           ewma_log;       /* time constant for idle time calculation */
84         unsigned char           ovl_strategy;
85 #ifdef CONFIG_NET_CLS_ACT
86         unsigned char           police;
87 #endif
88
89         u32                     defmap;
90
91         /* Link-sharing scheduler parameters */
92         long                    maxidle;        /* Class parameters: see below. */
93         long                    offtime;
94         long                    minidle;
95         u32                     avpkt;
96         struct qdisc_rate_table *R_tab;
97
98         /* Overlimit strategy parameters */
99         void                    (*overlimit)(struct cbq_class *cl);
100         psched_tdiff_t          penalty;
101
102         /* General scheduler (WRR) parameters */
103         long                    allot;
104         long                    quantum;        /* Allotment per WRR round */
105         long                    weight;         /* Relative allotment: see below */
106
107         struct Qdisc            *qdisc;         /* Ptr to CBQ discipline */
108         struct cbq_class        *split;         /* Ptr to split node */
109         struct cbq_class        *share;         /* Ptr to LS parent in the class tree */
110         struct cbq_class        *tparent;       /* Ptr to tree parent in the class tree */
111         struct cbq_class        *borrow;        /* NULL if class is bandwidth limited;
112                                                    parent otherwise */
113         struct cbq_class        *sibling;       /* Sibling chain */
114         struct cbq_class        *children;      /* Pointer to children chain */
115
116         struct Qdisc            *q;             /* Elementary queueing discipline */
117
118
119 /* Variables */
120         unsigned char           cpriority;      /* Effective priority */
121         unsigned char           delayed;
122         unsigned char           level;          /* level of the class in hierarchy:
123                                                    0 for leaf classes, and maximal
124                                                    level of children + 1 for nodes.
125                                                  */
126
127         psched_time_t           last;           /* Last end of service */
128         psched_time_t           undertime;
129         long                    avgidle;
130         long                    deficit;        /* Saved deficit for WRR */
131         psched_time_t           penalized;
132         struct gnet_stats_basic bstats;
133         struct gnet_stats_queue qstats;
134         struct gnet_stats_rate_est rate_est;
135         struct tc_cbq_xstats    xstats;
136
137         struct tcf_proto        *filter_list;
138
139         int                     refcnt;
140         int                     filters;
141
142         struct cbq_class        *defaults[TC_PRIO_MAX+1];
143 };
144
145 struct cbq_sched_data
146 {
147         struct cbq_class        *classes[16];           /* Hash table of all classes */
148         int                     nclasses[TC_CBQ_MAXPRIO+1];
149         unsigned                quanta[TC_CBQ_MAXPRIO+1];
150
151         struct cbq_class        link;
152
153         unsigned                activemask;
154         struct cbq_class        *active[TC_CBQ_MAXPRIO+1];      /* List of all classes
155                                                                    with backlog */
156
157 #ifdef CONFIG_NET_CLS_ACT
158         struct cbq_class        *rx_class;
159 #endif
160         struct cbq_class        *tx_class;
161         struct cbq_class        *tx_borrowed;
162         int                     tx_len;
163         psched_time_t           now;            /* Cached timestamp */
164         psched_time_t           now_rt;         /* Cached real time */
165         unsigned                pmask;
166
167         struct hrtimer          delay_timer;
168         struct qdisc_watchdog   watchdog;       /* Watchdog timer,
169                                                    started when CBQ has
170                                                    backlog, but cannot
171                                                    transmit just now */
172         psched_tdiff_t          wd_expires;
173         int                     toplevel;
174         u32                     hgenerator;
175 };
176
177
178 #define L2T(cl,len)     ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])
179
180
181 static __inline__ unsigned cbq_hash(u32 h)
182 {
183         h ^= h>>8;
184         h ^= h>>4;
185         return h&0xF;
186 }
187
188 static __inline__ struct cbq_class *
189 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
190 {
191         struct cbq_class *cl;
192
193         for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next)
194                 if (cl->classid == classid)
195                         return cl;
196         return NULL;
197 }
198
199 #ifdef CONFIG_NET_CLS_ACT
200
201 static struct cbq_class *
202 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
203 {
204         struct cbq_class *cl, *new;
205
206         for (cl = this->tparent; cl; cl = cl->tparent)
207                 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
208                         return new;
209
210         return NULL;
211 }
212
213 #endif
214
215 /* Classify packet. The procedure is pretty complicated, but
216    it allows us to combine link sharing and priority scheduling
217    transparently.
218
219    Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
220    so that it resolves to split nodes. Then packets are classified
221    by logical priority, or a more specific classifier may be attached
222    to the split node.
223  */
224
225 static struct cbq_class *
226 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
227 {
228         struct cbq_sched_data *q = qdisc_priv(sch);
229         struct cbq_class *head = &q->link;
230         struct cbq_class **defmap;
231         struct cbq_class *cl = NULL;
232         u32 prio = skb->priority;
233         struct tcf_result res;
234
235         /*
236          *  Step 1. If skb->priority points to one of our classes, use it.
237          */
238         if (TC_H_MAJ(prio^sch->handle) == 0 &&
239             (cl = cbq_class_lookup(q, prio)) != NULL)
240                 return cl;
241
242         *qerr = NET_XMIT_BYPASS;
243         for (;;) {
244                 int result = 0;
245                 defmap = head->defaults;
246
247                 /*
248                  * Step 2+n. Apply classifier.
249                  */
250                 if (!head->filter_list ||
251                     (result = tc_classify_compat(skb, head->filter_list, &res)) < 0)
252                         goto fallback;
253
254                 if ((cl = (void*)res.class) == NULL) {
255                         if (TC_H_MAJ(res.classid))
256                                 cl = cbq_class_lookup(q, res.classid);
257                         else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
258                                 cl = defmap[TC_PRIO_BESTEFFORT];
259
260                         if (cl == NULL || cl->level >= head->level)
261                                 goto fallback;
262                 }
263
264 #ifdef CONFIG_NET_CLS_ACT
265                 switch (result) {
266                 case TC_ACT_QUEUED:
267                 case TC_ACT_STOLEN:
268                         *qerr = NET_XMIT_SUCCESS;
269                 case TC_ACT_SHOT:
270                         return NULL;
271                 case TC_ACT_RECLASSIFY:
272                         return cbq_reclassify(skb, cl);
273                 }
274 #endif
275                 if (cl->level == 0)
276                         return cl;
277
278                 /*
279                  * Step 3+n. If classifier selected a link sharing class,
280                  *         apply agency specific classifier.
281                  *         Repeat this procdure until we hit a leaf node.
282                  */
283                 head = cl;
284         }
285
286 fallback:
287         cl = head;
288
289         /*
290          * Step 4. No success...
291          */
292         if (TC_H_MAJ(prio) == 0 &&
293             !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
294             !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
295                 return head;
296
297         return cl;
298 }
299
300 /*
301    A packet has just been enqueued on the empty class.
302    cbq_activate_class adds it to the tail of active class list
303    of its priority band.
304  */
305
306 static __inline__ void cbq_activate_class(struct cbq_class *cl)
307 {
308         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
309         int prio = cl->cpriority;
310         struct cbq_class *cl_tail;
311
312         cl_tail = q->active[prio];
313         q->active[prio] = cl;
314
315         if (cl_tail != NULL) {
316                 cl->next_alive = cl_tail->next_alive;
317                 cl_tail->next_alive = cl;
318         } else {
319                 cl->next_alive = cl;
320                 q->activemask |= (1<<prio);
321         }
322 }
323
324 /*
325    Unlink class from active chain.
326    Note that this same procedure is done directly in cbq_dequeue*
327    during round-robin procedure.
328  */
329
330 static void cbq_deactivate_class(struct cbq_class *this)
331 {
332         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
333         int prio = this->cpriority;
334         struct cbq_class *cl;
335         struct cbq_class *cl_prev = q->active[prio];
336
337         do {
338                 cl = cl_prev->next_alive;
339                 if (cl == this) {
340                         cl_prev->next_alive = cl->next_alive;
341                         cl->next_alive = NULL;
342
343                         if (cl == q->active[prio]) {
344                                 q->active[prio] = cl_prev;
345                                 if (cl == q->active[prio]) {
346                                         q->active[prio] = NULL;
347                                         q->activemask &= ~(1<<prio);
348                                         return;
349                                 }
350                         }
351                         return;
352                 }
353         } while ((cl_prev = cl) != q->active[prio]);
354 }
355
356 static void
357 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
358 {
359         int toplevel = q->toplevel;
360
361         if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
362                 psched_time_t now;
363                 psched_tdiff_t incr;
364
365                 now = psched_get_time();
366                 incr = now - q->now_rt;
367                 now = q->now + incr;
368
369                 do {
370                         if (cl->undertime < now) {
371                                 q->toplevel = cl->level;
372                                 return;
373                         }
374                 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
375         }
376 }
377
378 static int
379 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
380 {
381         struct cbq_sched_data *q = qdisc_priv(sch);
382         int len = skb->len;
383         int uninitialized_var(ret);
384         struct cbq_class *cl = cbq_classify(skb, sch, &ret);
385
386 #ifdef CONFIG_NET_CLS_ACT
387         q->rx_class = cl;
388 #endif
389         if (cl == NULL) {
390                 if (ret == NET_XMIT_BYPASS)
391                         sch->qstats.drops++;
392                 kfree_skb(skb);
393                 return ret;
394         }
395
396 #ifdef CONFIG_NET_CLS_ACT
397         cl->q->__parent = sch;
398 #endif
399         if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) {
400                 sch->q.qlen++;
401                 sch->bstats.packets++;
402                 sch->bstats.bytes+=len;
403                 cbq_mark_toplevel(q, cl);
404                 if (!cl->next_alive)
405                         cbq_activate_class(cl);
406                 return ret;
407         }
408
409         sch->qstats.drops++;
410         cbq_mark_toplevel(q, cl);
411         cl->qstats.drops++;
412         return ret;
413 }
414
415 static int
416 cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
417 {
418         struct cbq_sched_data *q = qdisc_priv(sch);
419         struct cbq_class *cl;
420         int ret;
421
422         if ((cl = q->tx_class) == NULL) {
423                 kfree_skb(skb);
424                 sch->qstats.drops++;
425                 return NET_XMIT_CN;
426         }
427         q->tx_class = NULL;
428
429         cbq_mark_toplevel(q, cl);
430
431 #ifdef CONFIG_NET_CLS_ACT
432         q->rx_class = cl;
433         cl->q->__parent = sch;
434 #endif
435         if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
436                 sch->q.qlen++;
437                 sch->qstats.requeues++;
438                 if (!cl->next_alive)
439                         cbq_activate_class(cl);
440                 return 0;
441         }
442         sch->qstats.drops++;
443         cl->qstats.drops++;
444         return ret;
445 }
446
447 /* Overlimit actions */
448
449 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
450
451 static void cbq_ovl_classic(struct cbq_class *cl)
452 {
453         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
454         psched_tdiff_t delay = cl->undertime - q->now;
455
456         if (!cl->delayed) {
457                 delay += cl->offtime;
458
459                 /*
460                    Class goes to sleep, so that it will have no
461                    chance to work avgidle. Let's forgive it 8)
462
463                    BTW cbq-2.0 has a crap in this
464                    place, apparently they forgot to shift it by cl->ewma_log.
465                  */
466                 if (cl->avgidle < 0)
467                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
468                 if (cl->avgidle < cl->minidle)
469                         cl->avgidle = cl->minidle;
470                 if (delay <= 0)
471                         delay = 1;
472                 cl->undertime = q->now + delay;
473
474                 cl->xstats.overactions++;
475                 cl->delayed = 1;
476         }
477         if (q->wd_expires == 0 || q->wd_expires > delay)
478                 q->wd_expires = delay;
479
480         /* Dirty work! We must schedule wakeups based on
481            real available rate, rather than leaf rate,
482            which may be tiny (even zero).
483          */
484         if (q->toplevel == TC_CBQ_MAXLEVEL) {
485                 struct cbq_class *b;
486                 psched_tdiff_t base_delay = q->wd_expires;
487
488                 for (b = cl->borrow; b; b = b->borrow) {
489                         delay = b->undertime - q->now;
490                         if (delay < base_delay) {
491                                 if (delay <= 0)
492                                         delay = 1;
493                                 base_delay = delay;
494                         }
495                 }
496
497                 q->wd_expires = base_delay;
498         }
499 }
500
501 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
502    they go overlimit
503  */
504
505 static void cbq_ovl_rclassic(struct cbq_class *cl)
506 {
507         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
508         struct cbq_class *this = cl;
509
510         do {
511                 if (cl->level > q->toplevel) {
512                         cl = NULL;
513                         break;
514                 }
515         } while ((cl = cl->borrow) != NULL);
516
517         if (cl == NULL)
518                 cl = this;
519         cbq_ovl_classic(cl);
520 }
521
522 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
523
524 static void cbq_ovl_delay(struct cbq_class *cl)
525 {
526         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
527         psched_tdiff_t delay = cl->undertime - q->now;
528
529         if (!cl->delayed) {
530                 psched_time_t sched = q->now;
531                 ktime_t expires;
532
533                 delay += cl->offtime;
534                 if (cl->avgidle < 0)
535                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
536                 if (cl->avgidle < cl->minidle)
537                         cl->avgidle = cl->minidle;
538                 cl->undertime = q->now + delay;
539
540                 if (delay > 0) {
541                         sched += delay + cl->penalty;
542                         cl->penalized = sched;
543                         cl->cpriority = TC_CBQ_MAXPRIO;
544                         q->pmask |= (1<<TC_CBQ_MAXPRIO);
545
546                         expires = ktime_set(0, 0);
547                         expires = ktime_add_ns(expires, PSCHED_US2NS(sched));
548                         if (hrtimer_try_to_cancel(&q->delay_timer) &&
549                             ktime_to_ns(ktime_sub(q->delay_timer.expires,
550                                                   expires)) > 0)
551                                 q->delay_timer.expires = expires;
552                         hrtimer_restart(&q->delay_timer);
553                         cl->delayed = 1;
554                         cl->xstats.overactions++;
555                         return;
556                 }
557                 delay = 1;
558         }
559         if (q->wd_expires == 0 || q->wd_expires > delay)
560                 q->wd_expires = delay;
561 }
562
563 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
564
565 static void cbq_ovl_lowprio(struct cbq_class *cl)
566 {
567         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
568
569         cl->penalized = q->now + cl->penalty;
570
571         if (cl->cpriority != cl->priority2) {
572                 cl->cpriority = cl->priority2;
573                 q->pmask |= (1<<cl->cpriority);
574                 cl->xstats.overactions++;
575         }
576         cbq_ovl_classic(cl);
577 }
578
579 /* TC_CBQ_OVL_DROP: penalize class by dropping */
580
581 static void cbq_ovl_drop(struct cbq_class *cl)
582 {
583         if (cl->q->ops->drop)
584                 if (cl->q->ops->drop(cl->q))
585                         cl->qdisc->q.qlen--;
586         cl->xstats.overactions++;
587         cbq_ovl_classic(cl);
588 }
589
590 static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
591                                        psched_time_t now)
592 {
593         struct cbq_class *cl;
594         struct cbq_class *cl_prev = q->active[prio];
595         psched_time_t sched = now;
596
597         if (cl_prev == NULL)
598                 return 0;
599
600         do {
601                 cl = cl_prev->next_alive;
602                 if (now - cl->penalized > 0) {
603                         cl_prev->next_alive = cl->next_alive;
604                         cl->next_alive = NULL;
605                         cl->cpriority = cl->priority;
606                         cl->delayed = 0;
607                         cbq_activate_class(cl);
608
609                         if (cl == q->active[prio]) {
610                                 q->active[prio] = cl_prev;
611                                 if (cl == q->active[prio]) {
612                                         q->active[prio] = NULL;
613                                         return 0;
614                                 }
615                         }
616
617                         cl = cl_prev->next_alive;
618                 } else if (sched - cl->penalized > 0)
619                         sched = cl->penalized;
620         } while ((cl_prev = cl) != q->active[prio]);
621
622         return sched - now;
623 }
624
625 static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
626 {
627         struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
628                                                 delay_timer);
629         struct Qdisc *sch = q->watchdog.qdisc;
630         psched_time_t now;
631         psched_tdiff_t delay = 0;
632         unsigned pmask;
633
634         now = psched_get_time();
635
636         pmask = q->pmask;
637         q->pmask = 0;
638
639         while (pmask) {
640                 int prio = ffz(~pmask);
641                 psched_tdiff_t tmp;
642
643                 pmask &= ~(1<<prio);
644
645                 tmp = cbq_undelay_prio(q, prio, now);
646                 if (tmp > 0) {
647                         q->pmask |= 1<<prio;
648                         if (tmp < delay || delay == 0)
649                                 delay = tmp;
650                 }
651         }
652
653         if (delay) {
654                 ktime_t time;
655
656                 time = ktime_set(0, 0);
657                 time = ktime_add_ns(time, PSCHED_US2NS(now + delay));
658                 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
659         }
660
661         sch->flags &= ~TCQ_F_THROTTLED;
662         netif_schedule(sch->dev);
663         return HRTIMER_NORESTART;
664 }
665
666 #ifdef CONFIG_NET_CLS_ACT
667 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
668 {
669         int len = skb->len;
670         struct Qdisc *sch = child->__parent;
671         struct cbq_sched_data *q = qdisc_priv(sch);
672         struct cbq_class *cl = q->rx_class;
673
674         q->rx_class = NULL;
675
676         if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
677
678                 cbq_mark_toplevel(q, cl);
679
680                 q->rx_class = cl;
681                 cl->q->__parent = sch;
682
683                 if (cl->q->enqueue(skb, cl->q) == 0) {
684                         sch->q.qlen++;
685                         sch->bstats.packets++;
686                         sch->bstats.bytes+=len;
687                         if (!cl->next_alive)
688                                 cbq_activate_class(cl);
689                         return 0;
690                 }
691                 sch->qstats.drops++;
692                 return 0;
693         }
694
695         sch->qstats.drops++;
696         return -1;
697 }
698 #endif
699
700 /*
701    It is mission critical procedure.
702
703    We "regenerate" toplevel cutoff, if transmitting class
704    has backlog and it is not regulated. It is not part of
705    original CBQ description, but looks more reasonable.
706    Probably, it is wrong. This question needs further investigation.
707 */
708
709 static __inline__ void
710 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
711                     struct cbq_class *borrowed)
712 {
713         if (cl && q->toplevel >= borrowed->level) {
714                 if (cl->q->q.qlen > 1) {
715                         do {
716                                 if (borrowed->undertime == PSCHED_PASTPERFECT) {
717                                         q->toplevel = borrowed->level;
718                                         return;
719                                 }
720                         } while ((borrowed=borrowed->borrow) != NULL);
721                 }
722 #if 0
723         /* It is not necessary now. Uncommenting it
724            will save CPU cycles, but decrease fairness.
725          */
726                 q->toplevel = TC_CBQ_MAXLEVEL;
727 #endif
728         }
729 }
730
731 static void
732 cbq_update(struct cbq_sched_data *q)
733 {
734         struct cbq_class *this = q->tx_class;
735         struct cbq_class *cl = this;
736         int len = q->tx_len;
737
738         q->tx_class = NULL;
739
740         for ( ; cl; cl = cl->share) {
741                 long avgidle = cl->avgidle;
742                 long idle;
743
744                 cl->bstats.packets++;
745                 cl->bstats.bytes += len;
746
747                 /*
748                    (now - last) is total time between packet right edges.
749                    (last_pktlen/rate) is "virtual" busy time, so that
750
751                          idle = (now - last) - last_pktlen/rate
752                  */
753
754                 idle = q->now - cl->last;
755                 if ((unsigned long)idle > 128*1024*1024) {
756                         avgidle = cl->maxidle;
757                 } else {
758                         idle -= L2T(cl, len);
759
760                 /* true_avgidle := (1-W)*true_avgidle + W*idle,
761                    where W=2^{-ewma_log}. But cl->avgidle is scaled:
762                    cl->avgidle == true_avgidle/W,
763                    hence:
764                  */
765                         avgidle += idle - (avgidle>>cl->ewma_log);
766                 }
767
768                 if (avgidle <= 0) {
769                         /* Overlimit or at-limit */
770
771                         if (avgidle < cl->minidle)
772                                 avgidle = cl->minidle;
773
774                         cl->avgidle = avgidle;
775
776                         /* Calculate expected time, when this class
777                            will be allowed to send.
778                            It will occur, when:
779                            (1-W)*true_avgidle + W*delay = 0, i.e.
780                            idle = (1/W - 1)*(-true_avgidle)
781                            or
782                            idle = (1 - W)*(-cl->avgidle);
783                          */
784                         idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
785
786                         /*
787                            That is not all.
788                            To maintain the rate allocated to the class,
789                            we add to undertime virtual clock,
790                            necessary to complete transmitted packet.
791                            (len/phys_bandwidth has been already passed
792                            to the moment of cbq_update)
793                          */
794
795                         idle -= L2T(&q->link, len);
796                         idle += L2T(cl, len);
797
798                         cl->undertime = q->now + idle;
799                 } else {
800                         /* Underlimit */
801
802                         cl->undertime = PSCHED_PASTPERFECT;
803                         if (avgidle > cl->maxidle)
804                                 cl->avgidle = cl->maxidle;
805                         else
806                                 cl->avgidle = avgidle;
807                 }
808                 cl->last = q->now;
809         }
810
811         cbq_update_toplevel(q, this, q->tx_borrowed);
812 }
813
814 static __inline__ struct cbq_class *
815 cbq_under_limit(struct cbq_class *cl)
816 {
817         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
818         struct cbq_class *this_cl = cl;
819
820         if (cl->tparent == NULL)
821                 return cl;
822
823         if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
824                 cl->delayed = 0;
825                 return cl;
826         }
827
828         do {
829                 /* It is very suspicious place. Now overlimit
830                    action is generated for not bounded classes
831                    only if link is completely congested.
832                    Though it is in agree with ancestor-only paradigm,
833                    it looks very stupid. Particularly,
834                    it means that this chunk of code will either
835                    never be called or result in strong amplification
836                    of burstiness. Dangerous, silly, and, however,
837                    no another solution exists.
838                  */
839                 if ((cl = cl->borrow) == NULL) {
840                         this_cl->qstats.overlimits++;
841                         this_cl->overlimit(this_cl);
842                         return NULL;
843                 }
844                 if (cl->level > q->toplevel)
845                         return NULL;
846         } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
847
848         cl->delayed = 0;
849         return cl;
850 }
851
852 static __inline__ struct sk_buff *
853 cbq_dequeue_prio(struct Qdisc *sch, int prio)
854 {
855         struct cbq_sched_data *q = qdisc_priv(sch);
856         struct cbq_class *cl_tail, *cl_prev, *cl;
857         struct sk_buff *skb;
858         int deficit;
859
860         cl_tail = cl_prev = q->active[prio];
861         cl = cl_prev->next_alive;
862
863         do {
864                 deficit = 0;
865
866                 /* Start round */
867                 do {
868                         struct cbq_class *borrow = cl;
869
870                         if (cl->q->q.qlen &&
871                             (borrow = cbq_under_limit(cl)) == NULL)
872                                 goto skip_class;
873
874                         if (cl->deficit <= 0) {
875                                 /* Class exhausted its allotment per
876                                    this round. Switch to the next one.
877                                  */
878                                 deficit = 1;
879                                 cl->deficit += cl->quantum;
880                                 goto next_class;
881                         }
882
883                         skb = cl->q->dequeue(cl->q);
884
885                         /* Class did not give us any skb :-(
886                            It could occur even if cl->q->q.qlen != 0
887                            f.e. if cl->q == "tbf"
888                          */
889                         if (skb == NULL)
890                                 goto skip_class;
891
892                         cl->deficit -= skb->len;
893                         q->tx_class = cl;
894                         q->tx_borrowed = borrow;
895                         if (borrow != cl) {
896 #ifndef CBQ_XSTATS_BORROWS_BYTES
897                                 borrow->xstats.borrows++;
898                                 cl->xstats.borrows++;
899 #else
900                                 borrow->xstats.borrows += skb->len;
901                                 cl->xstats.borrows += skb->len;
902 #endif
903                         }
904                         q->tx_len = skb->len;
905
906                         if (cl->deficit <= 0) {
907                                 q->active[prio] = cl;
908                                 cl = cl->next_alive;
909                                 cl->deficit += cl->quantum;
910                         }
911                         return skb;
912
913 skip_class:
914                         if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
915                                 /* Class is empty or penalized.
916                                    Unlink it from active chain.
917                                  */
918                                 cl_prev->next_alive = cl->next_alive;
919                                 cl->next_alive = NULL;
920
921                                 /* Did cl_tail point to it? */
922                                 if (cl == cl_tail) {
923                                         /* Repair it! */
924                                         cl_tail = cl_prev;
925
926                                         /* Was it the last class in this band? */
927                                         if (cl == cl_tail) {
928                                                 /* Kill the band! */
929                                                 q->active[prio] = NULL;
930                                                 q->activemask &= ~(1<<prio);
931                                                 if (cl->q->q.qlen)
932                                                         cbq_activate_class(cl);
933                                                 return NULL;
934                                         }
935
936                                         q->active[prio] = cl_tail;
937                                 }
938                                 if (cl->q->q.qlen)
939                                         cbq_activate_class(cl);
940
941                                 cl = cl_prev;
942                         }
943
944 next_class:
945                         cl_prev = cl;
946                         cl = cl->next_alive;
947                 } while (cl_prev != cl_tail);
948         } while (deficit);
949
950         q->active[prio] = cl_prev;
951
952         return NULL;
953 }
954
955 static __inline__ struct sk_buff *
956 cbq_dequeue_1(struct Qdisc *sch)
957 {
958         struct cbq_sched_data *q = qdisc_priv(sch);
959         struct sk_buff *skb;
960         unsigned activemask;
961
962         activemask = q->activemask&0xFF;
963         while (activemask) {
964                 int prio = ffz(~activemask);
965                 activemask &= ~(1<<prio);
966                 skb = cbq_dequeue_prio(sch, prio);
967                 if (skb)
968                         return skb;
969         }
970         return NULL;
971 }
972
973 static struct sk_buff *
974 cbq_dequeue(struct Qdisc *sch)
975 {
976         struct sk_buff *skb;
977         struct cbq_sched_data *q = qdisc_priv(sch);
978         psched_time_t now;
979         psched_tdiff_t incr;
980
981         now = psched_get_time();
982         incr = now - q->now_rt;
983
984         if (q->tx_class) {
985                 psched_tdiff_t incr2;
986                 /* Time integrator. We calculate EOS time
987                    by adding expected packet transmission time.
988                    If real time is greater, we warp artificial clock,
989                    so that:
990
991                    cbq_time = max(real_time, work);
992                  */
993                 incr2 = L2T(&q->link, q->tx_len);
994                 q->now += incr2;
995                 cbq_update(q);
996                 if ((incr -= incr2) < 0)
997                         incr = 0;
998         }
999         q->now += incr;
1000         q->now_rt = now;
1001
1002         for (;;) {
1003                 q->wd_expires = 0;
1004
1005                 skb = cbq_dequeue_1(sch);
1006                 if (skb) {
1007                         sch->q.qlen--;
1008                         sch->flags &= ~TCQ_F_THROTTLED;
1009                         return skb;
1010                 }
1011
1012                 /* All the classes are overlimit.
1013
1014                    It is possible, if:
1015
1016                    1. Scheduler is empty.
1017                    2. Toplevel cutoff inhibited borrowing.
1018                    3. Root class is overlimit.
1019
1020                    Reset 2d and 3d conditions and retry.
1021
1022                    Note, that NS and cbq-2.0 are buggy, peeking
1023                    an arbitrary class is appropriate for ancestor-only
1024                    sharing, but not for toplevel algorithm.
1025
1026                    Our version is better, but slower, because it requires
1027                    two passes, but it is unavoidable with top-level sharing.
1028                 */
1029
1030                 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1031                     q->link.undertime == PSCHED_PASTPERFECT)
1032                         break;
1033
1034                 q->toplevel = TC_CBQ_MAXLEVEL;
1035                 q->link.undertime = PSCHED_PASTPERFECT;
1036         }
1037
1038         /* No packets in scheduler or nobody wants to give them to us :-(
1039            Sigh... start watchdog timer in the last case. */
1040
1041         if (sch->q.qlen) {
1042                 sch->qstats.overlimits++;
1043                 if (q->wd_expires)
1044                         qdisc_watchdog_schedule(&q->watchdog,
1045                                                 now + q->wd_expires);
1046         }
1047         return NULL;
1048 }
1049
1050 /* CBQ class maintanance routines */
1051
1052 static void cbq_adjust_levels(struct cbq_class *this)
1053 {
1054         if (this == NULL)
1055                 return;
1056
1057         do {
1058                 int level = 0;
1059                 struct cbq_class *cl;
1060
1061                 if ((cl = this->children) != NULL) {
1062                         do {
1063                                 if (cl->level > level)
1064                                         level = cl->level;
1065                         } while ((cl = cl->sibling) != this->children);
1066                 }
1067                 this->level = level+1;
1068         } while ((this = this->tparent) != NULL);
1069 }
1070
1071 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1072 {
1073         struct cbq_class *cl;
1074         unsigned h;
1075
1076         if (q->quanta[prio] == 0)
1077                 return;
1078
1079         for (h=0; h<16; h++) {
1080                 for (cl = q->classes[h]; cl; cl = cl->next) {
1081                         /* BUGGGG... Beware! This expression suffer of
1082                            arithmetic overflows!
1083                          */
1084                         if (cl->priority == prio) {
1085                                 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1086                                         q->quanta[prio];
1087                         }
1088                         if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
1089                                 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
1090                                 cl->quantum = cl->qdisc->dev->mtu/2 + 1;
1091                         }
1092                 }
1093         }
1094 }
1095
1096 static void cbq_sync_defmap(struct cbq_class *cl)
1097 {
1098         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1099         struct cbq_class *split = cl->split;
1100         unsigned h;
1101         int i;
1102
1103         if (split == NULL)
1104                 return;
1105
1106         for (i=0; i<=TC_PRIO_MAX; i++) {
1107                 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1108                         split->defaults[i] = NULL;
1109         }
1110
1111         for (i=0; i<=TC_PRIO_MAX; i++) {
1112                 int level = split->level;
1113
1114                 if (split->defaults[i])
1115                         continue;
1116
1117                 for (h=0; h<16; h++) {
1118                         struct cbq_class *c;
1119
1120                         for (c = q->classes[h]; c; c = c->next) {
1121                                 if (c->split == split && c->level < level &&
1122                                     c->defmap&(1<<i)) {
1123                                         split->defaults[i] = c;
1124                                         level = c->level;
1125                                 }
1126                         }
1127                 }
1128         }
1129 }
1130
1131 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1132 {
1133         struct cbq_class *split = NULL;
1134
1135         if (splitid == 0) {
1136                 if ((split = cl->split) == NULL)
1137                         return;
1138                 splitid = split->classid;
1139         }
1140
1141         if (split == NULL || split->classid != splitid) {
1142                 for (split = cl->tparent; split; split = split->tparent)
1143                         if (split->classid == splitid)
1144                                 break;
1145         }
1146
1147         if (split == NULL)
1148                 return;
1149
1150         if (cl->split != split) {
1151                 cl->defmap = 0;
1152                 cbq_sync_defmap(cl);
1153                 cl->split = split;
1154                 cl->defmap = def&mask;
1155         } else
1156                 cl->defmap = (cl->defmap&~mask)|(def&mask);
1157
1158         cbq_sync_defmap(cl);
1159 }
1160
1161 static void cbq_unlink_class(struct cbq_class *this)
1162 {
1163         struct cbq_class *cl, **clp;
1164         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1165
1166         for (clp = &q->classes[cbq_hash(this->classid)]; (cl = *clp) != NULL; clp = &cl->next) {
1167                 if (cl == this) {
1168                         *clp = cl->next;
1169                         cl->next = NULL;
1170                         break;
1171                 }
1172         }
1173
1174         if (this->tparent) {
1175                 clp=&this->sibling;
1176                 cl = *clp;
1177                 do {
1178                         if (cl == this) {
1179                                 *clp = cl->sibling;
1180                                 break;
1181                         }
1182                         clp = &cl->sibling;
1183                 } while ((cl = *clp) != this->sibling);
1184
1185                 if (this->tparent->children == this) {
1186                         this->tparent->children = this->sibling;
1187                         if (this->sibling == this)
1188                                 this->tparent->children = NULL;
1189                 }
1190         } else {
1191                 BUG_TRAP(this->sibling == this);
1192         }
1193 }
1194
1195 static void cbq_link_class(struct cbq_class *this)
1196 {
1197         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1198         unsigned h = cbq_hash(this->classid);
1199         struct cbq_class *parent = this->tparent;
1200
1201         this->sibling = this;
1202         this->next = q->classes[h];
1203         q->classes[h] = this;
1204
1205         if (parent == NULL)
1206                 return;
1207
1208         if (parent->children == NULL) {
1209                 parent->children = this;
1210         } else {
1211                 this->sibling = parent->children->sibling;
1212                 parent->children->sibling = this;
1213         }
1214 }
1215
1216 static unsigned int cbq_drop(struct Qdisc* sch)
1217 {
1218         struct cbq_sched_data *q = qdisc_priv(sch);
1219         struct cbq_class *cl, *cl_head;
1220         int prio;
1221         unsigned int len;
1222
1223         for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1224                 if ((cl_head = q->active[prio]) == NULL)
1225                         continue;
1226
1227                 cl = cl_head;
1228                 do {
1229                         if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1230                                 sch->q.qlen--;
1231                                 if (!cl->q->q.qlen)
1232                                         cbq_deactivate_class(cl);
1233                                 return len;
1234                         }
1235                 } while ((cl = cl->next_alive) != cl_head);
1236         }
1237         return 0;
1238 }
1239
1240 static void
1241 cbq_reset(struct Qdisc* sch)
1242 {
1243         struct cbq_sched_data *q = qdisc_priv(sch);
1244         struct cbq_class *cl;
1245         int prio;
1246         unsigned h;
1247
1248         q->activemask = 0;
1249         q->pmask = 0;
1250         q->tx_class = NULL;
1251         q->tx_borrowed = NULL;
1252         qdisc_watchdog_cancel(&q->watchdog);
1253         hrtimer_cancel(&q->delay_timer);
1254         q->toplevel = TC_CBQ_MAXLEVEL;
1255         q->now = psched_get_time();
1256         q->now_rt = q->now;
1257
1258         for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1259                 q->active[prio] = NULL;
1260
1261         for (h = 0; h < 16; h++) {
1262                 for (cl = q->classes[h]; cl; cl = cl->next) {
1263                         qdisc_reset(cl->q);
1264
1265                         cl->next_alive = NULL;
1266                         cl->undertime = PSCHED_PASTPERFECT;
1267                         cl->avgidle = cl->maxidle;
1268                         cl->deficit = cl->quantum;
1269                         cl->cpriority = cl->priority;
1270                 }
1271         }
1272         sch->q.qlen = 0;
1273 }
1274
1275
1276 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1277 {
1278         if (lss->change&TCF_CBQ_LSS_FLAGS) {
1279                 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1280                 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1281         }
1282         if (lss->change&TCF_CBQ_LSS_EWMA)
1283                 cl->ewma_log = lss->ewma_log;
1284         if (lss->change&TCF_CBQ_LSS_AVPKT)
1285                 cl->avpkt = lss->avpkt;
1286         if (lss->change&TCF_CBQ_LSS_MINIDLE)
1287                 cl->minidle = -(long)lss->minidle;
1288         if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1289                 cl->maxidle = lss->maxidle;
1290                 cl->avgidle = lss->maxidle;
1291         }
1292         if (lss->change&TCF_CBQ_LSS_OFFTIME)
1293                 cl->offtime = lss->offtime;
1294         return 0;
1295 }
1296
1297 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1298 {
1299         q->nclasses[cl->priority]--;
1300         q->quanta[cl->priority] -= cl->weight;
1301         cbq_normalize_quanta(q, cl->priority);
1302 }
1303
1304 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1305 {
1306         q->nclasses[cl->priority]++;
1307         q->quanta[cl->priority] += cl->weight;
1308         cbq_normalize_quanta(q, cl->priority);
1309 }
1310
1311 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1312 {
1313         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1314
1315         if (wrr->allot)
1316                 cl->allot = wrr->allot;
1317         if (wrr->weight)
1318                 cl->weight = wrr->weight;
1319         if (wrr->priority) {
1320                 cl->priority = wrr->priority-1;
1321                 cl->cpriority = cl->priority;
1322                 if (cl->priority >= cl->priority2)
1323                         cl->priority2 = TC_CBQ_MAXPRIO-1;
1324         }
1325
1326         cbq_addprio(q, cl);
1327         return 0;
1328 }
1329
1330 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1331 {
1332         switch (ovl->strategy) {
1333         case TC_CBQ_OVL_CLASSIC:
1334                 cl->overlimit = cbq_ovl_classic;
1335                 break;
1336         case TC_CBQ_OVL_DELAY:
1337                 cl->overlimit = cbq_ovl_delay;
1338                 break;
1339         case TC_CBQ_OVL_LOWPRIO:
1340                 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1341                     ovl->priority2-1 <= cl->priority)
1342                         return -EINVAL;
1343                 cl->priority2 = ovl->priority2-1;
1344                 cl->overlimit = cbq_ovl_lowprio;
1345                 break;
1346         case TC_CBQ_OVL_DROP:
1347                 cl->overlimit = cbq_ovl_drop;
1348                 break;
1349         case TC_CBQ_OVL_RCLASSIC:
1350                 cl->overlimit = cbq_ovl_rclassic;
1351                 break;
1352         default:
1353                 return -EINVAL;
1354         }
1355         cl->penalty = ovl->penalty;
1356         return 0;
1357 }
1358
1359 #ifdef CONFIG_NET_CLS_ACT
1360 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1361 {
1362         cl->police = p->police;
1363
1364         if (cl->q->handle) {
1365                 if (p->police == TC_POLICE_RECLASSIFY)
1366                         cl->q->reshape_fail = cbq_reshape_fail;
1367                 else
1368                         cl->q->reshape_fail = NULL;
1369         }
1370         return 0;
1371 }
1372 #endif
1373
1374 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1375 {
1376         cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1377         return 0;
1378 }
1379
1380 static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
1381 {
1382         struct cbq_sched_data *q = qdisc_priv(sch);
1383         struct rtattr *tb[TCA_CBQ_MAX];
1384         struct tc_ratespec *r;
1385
1386         if (rtattr_parse_nested(tb, TCA_CBQ_MAX, opt) < 0 ||
1387             tb[TCA_CBQ_RTAB-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1388             RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1389                 return -EINVAL;
1390
1391         if (tb[TCA_CBQ_LSSOPT-1] &&
1392             RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1393                 return -EINVAL;
1394
1395         r = RTA_DATA(tb[TCA_CBQ_RATE-1]);
1396
1397         if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB-1])) == NULL)
1398                 return -EINVAL;
1399
1400         q->link.refcnt = 1;
1401         q->link.sibling = &q->link;
1402         q->link.classid = sch->handle;
1403         q->link.qdisc = sch;
1404         if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1405                                             sch->handle)))
1406                 q->link.q = &noop_qdisc;
1407
1408         q->link.priority = TC_CBQ_MAXPRIO-1;
1409         q->link.priority2 = TC_CBQ_MAXPRIO-1;
1410         q->link.cpriority = TC_CBQ_MAXPRIO-1;
1411         q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1412         q->link.overlimit = cbq_ovl_classic;
1413         q->link.allot = psched_mtu(sch->dev);
1414         q->link.quantum = q->link.allot;
1415         q->link.weight = q->link.R_tab->rate.rate;
1416
1417         q->link.ewma_log = TC_CBQ_DEF_EWMA;
1418         q->link.avpkt = q->link.allot/2;
1419         q->link.minidle = -0x7FFFFFFF;
1420
1421         qdisc_watchdog_init(&q->watchdog, sch);
1422         hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1423         q->delay_timer.function = cbq_undelay;
1424         q->toplevel = TC_CBQ_MAXLEVEL;
1425         q->now = psched_get_time();
1426         q->now_rt = q->now;
1427
1428         cbq_link_class(&q->link);
1429
1430         if (tb[TCA_CBQ_LSSOPT-1])
1431                 cbq_set_lss(&q->link, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1432
1433         cbq_addprio(q, &q->link);
1434         return 0;
1435 }
1436
1437 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1438 {
1439         unsigned char *b = skb_tail_pointer(skb);
1440
1441         RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1442         return skb->len;
1443
1444 rtattr_failure:
1445         nlmsg_trim(skb, b);
1446         return -1;
1447 }
1448
1449 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1450 {
1451         unsigned char *b = skb_tail_pointer(skb);
1452         struct tc_cbq_lssopt opt;
1453
1454         opt.flags = 0;
1455         if (cl->borrow == NULL)
1456                 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1457         if (cl->share == NULL)
1458                 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1459         opt.ewma_log = cl->ewma_log;
1460         opt.level = cl->level;
1461         opt.avpkt = cl->avpkt;
1462         opt.maxidle = cl->maxidle;
1463         opt.minidle = (u32)(-cl->minidle);
1464         opt.offtime = cl->offtime;
1465         opt.change = ~0;
1466         RTA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1467         return skb->len;
1468
1469 rtattr_failure:
1470         nlmsg_trim(skb, b);
1471         return -1;
1472 }
1473
1474 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1475 {
1476         unsigned char *b = skb_tail_pointer(skb);
1477         struct tc_cbq_wrropt opt;
1478
1479         opt.flags = 0;
1480         opt.allot = cl->allot;
1481         opt.priority = cl->priority+1;
1482         opt.cpriority = cl->cpriority+1;
1483         opt.weight = cl->weight;
1484         RTA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1485         return skb->len;
1486
1487 rtattr_failure:
1488         nlmsg_trim(skb, b);
1489         return -1;
1490 }
1491
1492 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1493 {
1494         unsigned char *b = skb_tail_pointer(skb);
1495         struct tc_cbq_ovl opt;
1496
1497         opt.strategy = cl->ovl_strategy;
1498         opt.priority2 = cl->priority2+1;
1499         opt.pad = 0;
1500         opt.penalty = cl->penalty;
1501         RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1502         return skb->len;
1503
1504 rtattr_failure:
1505         nlmsg_trim(skb, b);
1506         return -1;
1507 }
1508
1509 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1510 {
1511         unsigned char *b = skb_tail_pointer(skb);
1512         struct tc_cbq_fopt opt;
1513
1514         if (cl->split || cl->defmap) {
1515                 opt.split = cl->split ? cl->split->classid : 0;
1516                 opt.defmap = cl->defmap;
1517                 opt.defchange = ~0;
1518                 RTA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1519         }
1520         return skb->len;
1521
1522 rtattr_failure:
1523         nlmsg_trim(skb, b);
1524         return -1;
1525 }
1526
1527 #ifdef CONFIG_NET_CLS_ACT
1528 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1529 {
1530         unsigned char *b = skb_tail_pointer(skb);
1531         struct tc_cbq_police opt;
1532
1533         if (cl->police) {
1534                 opt.police = cl->police;
1535                 opt.__res1 = 0;
1536                 opt.__res2 = 0;
1537                 RTA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1538         }
1539         return skb->len;
1540
1541 rtattr_failure:
1542         nlmsg_trim(skb, b);
1543         return -1;
1544 }
1545 #endif
1546
1547 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1548 {
1549         if (cbq_dump_lss(skb, cl) < 0 ||
1550             cbq_dump_rate(skb, cl) < 0 ||
1551             cbq_dump_wrr(skb, cl) < 0 ||
1552             cbq_dump_ovl(skb, cl) < 0 ||
1553 #ifdef CONFIG_NET_CLS_ACT
1554             cbq_dump_police(skb, cl) < 0 ||
1555 #endif
1556             cbq_dump_fopt(skb, cl) < 0)
1557                 return -1;
1558         return 0;
1559 }
1560
1561 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1562 {
1563         struct cbq_sched_data *q = qdisc_priv(sch);
1564         unsigned char *b = skb_tail_pointer(skb);
1565         struct rtattr *rta;
1566
1567         rta = (struct rtattr*)b;
1568         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1569         if (cbq_dump_attr(skb, &q->link) < 0)
1570                 goto rtattr_failure;
1571         rta->rta_len = skb_tail_pointer(skb) - b;
1572         return skb->len;
1573
1574 rtattr_failure:
1575         nlmsg_trim(skb, b);
1576         return -1;
1577 }
1578
1579 static int
1580 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1581 {
1582         struct cbq_sched_data *q = qdisc_priv(sch);
1583
1584         q->link.xstats.avgidle = q->link.avgidle;
1585         return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1586 }
1587
1588 static int
1589 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1590                struct sk_buff *skb, struct tcmsg *tcm)
1591 {
1592         struct cbq_class *cl = (struct cbq_class*)arg;
1593         unsigned char *b = skb_tail_pointer(skb);
1594         struct rtattr *rta;
1595
1596         if (cl->tparent)
1597                 tcm->tcm_parent = cl->tparent->classid;
1598         else
1599                 tcm->tcm_parent = TC_H_ROOT;
1600         tcm->tcm_handle = cl->classid;
1601         tcm->tcm_info = cl->q->handle;
1602
1603         rta = (struct rtattr*)b;
1604         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1605         if (cbq_dump_attr(skb, cl) < 0)
1606                 goto rtattr_failure;
1607         rta->rta_len = skb_tail_pointer(skb) - b;
1608         return skb->len;
1609
1610 rtattr_failure:
1611         nlmsg_trim(skb, b);
1612         return -1;
1613 }
1614
1615 static int
1616 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1617         struct gnet_dump *d)
1618 {
1619         struct cbq_sched_data *q = qdisc_priv(sch);
1620         struct cbq_class *cl = (struct cbq_class*)arg;
1621
1622         cl->qstats.qlen = cl->q->q.qlen;
1623         cl->xstats.avgidle = cl->avgidle;
1624         cl->xstats.undertime = 0;
1625
1626         if (cl->undertime != PSCHED_PASTPERFECT)
1627                 cl->xstats.undertime = cl->undertime - q->now;
1628
1629         if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1630             gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1631             gnet_stats_copy_queue(d, &cl->qstats) < 0)
1632                 return -1;
1633
1634         return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1635 }
1636
1637 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1638                      struct Qdisc **old)
1639 {
1640         struct cbq_class *cl = (struct cbq_class*)arg;
1641
1642         if (cl) {
1643                 if (new == NULL) {
1644                         if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1645                                                      cl->classid)) == NULL)
1646                                 return -ENOBUFS;
1647                 } else {
1648 #ifdef CONFIG_NET_CLS_ACT
1649                         if (cl->police == TC_POLICE_RECLASSIFY)
1650                                 new->reshape_fail = cbq_reshape_fail;
1651 #endif
1652                 }
1653                 sch_tree_lock(sch);
1654                 *old = xchg(&cl->q, new);
1655                 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1656                 qdisc_reset(*old);
1657                 sch_tree_unlock(sch);
1658
1659                 return 0;
1660         }
1661         return -ENOENT;
1662 }
1663
1664 static struct Qdisc *
1665 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1666 {
1667         struct cbq_class *cl = (struct cbq_class*)arg;
1668
1669         return cl ? cl->q : NULL;
1670 }
1671
1672 static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1673 {
1674         struct cbq_class *cl = (struct cbq_class *)arg;
1675
1676         if (cl->q->q.qlen == 0)
1677                 cbq_deactivate_class(cl);
1678 }
1679
1680 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1681 {
1682         struct cbq_sched_data *q = qdisc_priv(sch);
1683         struct cbq_class *cl = cbq_class_lookup(q, classid);
1684
1685         if (cl) {
1686                 cl->refcnt++;
1687                 return (unsigned long)cl;
1688         }
1689         return 0;
1690 }
1691
1692 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1693 {
1694         struct cbq_sched_data *q = qdisc_priv(sch);
1695
1696         BUG_TRAP(!cl->filters);
1697
1698         tcf_destroy_chain(cl->filter_list);
1699         qdisc_destroy(cl->q);
1700         qdisc_put_rtab(cl->R_tab);
1701         gen_kill_estimator(&cl->bstats, &cl->rate_est);
1702         if (cl != &q->link)
1703                 kfree(cl);
1704 }
1705
1706 static void
1707 cbq_destroy(struct Qdisc* sch)
1708 {
1709         struct cbq_sched_data *q = qdisc_priv(sch);
1710         struct cbq_class *cl;
1711         unsigned h;
1712
1713 #ifdef CONFIG_NET_CLS_ACT
1714         q->rx_class = NULL;
1715 #endif
1716         /*
1717          * Filters must be destroyed first because we don't destroy the
1718          * classes from root to leafs which means that filters can still
1719          * be bound to classes which have been destroyed already. --TGR '04
1720          */
1721         for (h = 0; h < 16; h++) {
1722                 for (cl = q->classes[h]; cl; cl = cl->next) {
1723                         tcf_destroy_chain(cl->filter_list);
1724                         cl->filter_list = NULL;
1725                 }
1726         }
1727         for (h = 0; h < 16; h++) {
1728                 struct cbq_class *next;
1729
1730                 for (cl = q->classes[h]; cl; cl = next) {
1731                         next = cl->next;
1732                         cbq_destroy_class(sch, cl);
1733                 }
1734         }
1735 }
1736
1737 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1738 {
1739         struct cbq_class *cl = (struct cbq_class*)arg;
1740
1741         if (--cl->refcnt == 0) {
1742 #ifdef CONFIG_NET_CLS_ACT
1743                 struct cbq_sched_data *q = qdisc_priv(sch);
1744
1745                 spin_lock_bh(&sch->dev->queue_lock);
1746                 if (q->rx_class == cl)
1747                         q->rx_class = NULL;
1748                 spin_unlock_bh(&sch->dev->queue_lock);
1749 #endif
1750
1751                 cbq_destroy_class(sch, cl);
1752         }
1753 }
1754
1755 static int
1756 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca,
1757                  unsigned long *arg)
1758 {
1759         int err;
1760         struct cbq_sched_data *q = qdisc_priv(sch);
1761         struct cbq_class *cl = (struct cbq_class*)*arg;
1762         struct rtattr *opt = tca[TCA_OPTIONS-1];
1763         struct rtattr *tb[TCA_CBQ_MAX];
1764         struct cbq_class *parent;
1765         struct qdisc_rate_table *rtab = NULL;
1766
1767         if (opt==NULL || rtattr_parse_nested(tb, TCA_CBQ_MAX, opt))
1768                 return -EINVAL;
1769
1770         if (tb[TCA_CBQ_OVL_STRATEGY-1] &&
1771             RTA_PAYLOAD(tb[TCA_CBQ_OVL_STRATEGY-1]) < sizeof(struct tc_cbq_ovl))
1772                 return -EINVAL;
1773
1774         if (tb[TCA_CBQ_FOPT-1] &&
1775             RTA_PAYLOAD(tb[TCA_CBQ_FOPT-1]) < sizeof(struct tc_cbq_fopt))
1776                 return -EINVAL;
1777
1778         if (tb[TCA_CBQ_RATE-1] &&
1779             RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1780                         return -EINVAL;
1781
1782         if (tb[TCA_CBQ_LSSOPT-1] &&
1783             RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1784                         return -EINVAL;
1785
1786         if (tb[TCA_CBQ_WRROPT-1] &&
1787             RTA_PAYLOAD(tb[TCA_CBQ_WRROPT-1]) < sizeof(struct tc_cbq_wrropt))
1788                         return -EINVAL;
1789
1790 #ifdef CONFIG_NET_CLS_ACT
1791         if (tb[TCA_CBQ_POLICE-1] &&
1792             RTA_PAYLOAD(tb[TCA_CBQ_POLICE-1]) < sizeof(struct tc_cbq_police))
1793                         return -EINVAL;
1794 #endif
1795
1796         if (cl) {
1797                 /* Check parent */
1798                 if (parentid) {
1799                         if (cl->tparent && cl->tparent->classid != parentid)
1800                                 return -EINVAL;
1801                         if (!cl->tparent && parentid != TC_H_ROOT)
1802                                 return -EINVAL;
1803                 }
1804
1805                 if (tb[TCA_CBQ_RATE-1]) {
1806                         rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1807                         if (rtab == NULL)
1808                                 return -EINVAL;
1809                 }
1810
1811                 /* Change class parameters */
1812                 sch_tree_lock(sch);
1813
1814                 if (cl->next_alive != NULL)
1815                         cbq_deactivate_class(cl);
1816
1817                 if (rtab) {
1818                         rtab = xchg(&cl->R_tab, rtab);
1819                         qdisc_put_rtab(rtab);
1820                 }
1821
1822                 if (tb[TCA_CBQ_LSSOPT-1])
1823                         cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1824
1825                 if (tb[TCA_CBQ_WRROPT-1]) {
1826                         cbq_rmprio(q, cl);
1827                         cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1828                 }
1829
1830                 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1831                         cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1832
1833 #ifdef CONFIG_NET_CLS_ACT
1834                 if (tb[TCA_CBQ_POLICE-1])
1835                         cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1836 #endif
1837
1838                 if (tb[TCA_CBQ_FOPT-1])
1839                         cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1840
1841                 if (cl->q->q.qlen)
1842                         cbq_activate_class(cl);
1843
1844                 sch_tree_unlock(sch);
1845
1846                 if (tca[TCA_RATE-1])
1847                         gen_replace_estimator(&cl->bstats, &cl->rate_est,
1848                                               &sch->dev->queue_lock,
1849                                               tca[TCA_RATE-1]);
1850                 return 0;
1851         }
1852
1853         if (parentid == TC_H_ROOT)
1854                 return -EINVAL;
1855
1856         if (tb[TCA_CBQ_WRROPT-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1857             tb[TCA_CBQ_LSSOPT-1] == NULL)
1858                 return -EINVAL;
1859
1860         rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1861         if (rtab == NULL)
1862                 return -EINVAL;
1863
1864         if (classid) {
1865                 err = -EINVAL;
1866                 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1867                         goto failure;
1868         } else {
1869                 int i;
1870                 classid = TC_H_MAKE(sch->handle,0x8000);
1871
1872                 for (i=0; i<0x8000; i++) {
1873                         if (++q->hgenerator >= 0x8000)
1874                                 q->hgenerator = 1;
1875                         if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1876                                 break;
1877                 }
1878                 err = -ENOSR;
1879                 if (i >= 0x8000)
1880                         goto failure;
1881                 classid = classid|q->hgenerator;
1882         }
1883
1884         parent = &q->link;
1885         if (parentid) {
1886                 parent = cbq_class_lookup(q, parentid);
1887                 err = -EINVAL;
1888                 if (parent == NULL)
1889                         goto failure;
1890         }
1891
1892         err = -ENOBUFS;
1893         cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1894         if (cl == NULL)
1895                 goto failure;
1896         cl->R_tab = rtab;
1897         rtab = NULL;
1898         cl->refcnt = 1;
1899         if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid)))
1900                 cl->q = &noop_qdisc;
1901         cl->classid = classid;
1902         cl->tparent = parent;
1903         cl->qdisc = sch;
1904         cl->allot = parent->allot;
1905         cl->quantum = cl->allot;
1906         cl->weight = cl->R_tab->rate.rate;
1907
1908         sch_tree_lock(sch);
1909         cbq_link_class(cl);
1910         cl->borrow = cl->tparent;
1911         if (cl->tparent != &q->link)
1912                 cl->share = cl->tparent;
1913         cbq_adjust_levels(parent);
1914         cl->minidle = -0x7FFFFFFF;
1915         cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1916         cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1917         if (cl->ewma_log==0)
1918                 cl->ewma_log = q->link.ewma_log;
1919         if (cl->maxidle==0)
1920                 cl->maxidle = q->link.maxidle;
1921         if (cl->avpkt==0)
1922                 cl->avpkt = q->link.avpkt;
1923         cl->overlimit = cbq_ovl_classic;
1924         if (tb[TCA_CBQ_OVL_STRATEGY-1])
1925                 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1926 #ifdef CONFIG_NET_CLS_ACT
1927         if (tb[TCA_CBQ_POLICE-1])
1928                 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1929 #endif
1930         if (tb[TCA_CBQ_FOPT-1])
1931                 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1932         sch_tree_unlock(sch);
1933
1934         if (tca[TCA_RATE-1])
1935                 gen_new_estimator(&cl->bstats, &cl->rate_est,
1936                                   &sch->dev->queue_lock, tca[TCA_RATE-1]);
1937
1938         *arg = (unsigned long)cl;
1939         return 0;
1940
1941 failure:
1942         qdisc_put_rtab(rtab);
1943         return err;
1944 }
1945
1946 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1947 {
1948         struct cbq_sched_data *q = qdisc_priv(sch);
1949         struct cbq_class *cl = (struct cbq_class*)arg;
1950         unsigned int qlen;
1951
1952         if (cl->filters || cl->children || cl == &q->link)
1953                 return -EBUSY;
1954
1955         sch_tree_lock(sch);
1956
1957         qlen = cl->q->q.qlen;
1958         qdisc_reset(cl->q);
1959         qdisc_tree_decrease_qlen(cl->q, qlen);
1960
1961         if (cl->next_alive)
1962                 cbq_deactivate_class(cl);
1963
1964         if (q->tx_borrowed == cl)
1965                 q->tx_borrowed = q->tx_class;
1966         if (q->tx_class == cl) {
1967                 q->tx_class = NULL;
1968                 q->tx_borrowed = NULL;
1969         }
1970 #ifdef CONFIG_NET_CLS_ACT
1971         if (q->rx_class == cl)
1972                 q->rx_class = NULL;
1973 #endif
1974
1975         cbq_unlink_class(cl);
1976         cbq_adjust_levels(cl->tparent);
1977         cl->defmap = 0;
1978         cbq_sync_defmap(cl);
1979
1980         cbq_rmprio(q, cl);
1981         sch_tree_unlock(sch);
1982
1983         if (--cl->refcnt == 0)
1984                 cbq_destroy_class(sch, cl);
1985
1986         return 0;
1987 }
1988
1989 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
1990 {
1991         struct cbq_sched_data *q = qdisc_priv(sch);
1992         struct cbq_class *cl = (struct cbq_class *)arg;
1993
1994         if (cl == NULL)
1995                 cl = &q->link;
1996
1997         return &cl->filter_list;
1998 }
1999
2000 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
2001                                      u32 classid)
2002 {
2003         struct cbq_sched_data *q = qdisc_priv(sch);
2004         struct cbq_class *p = (struct cbq_class*)parent;
2005         struct cbq_class *cl = cbq_class_lookup(q, classid);
2006
2007         if (cl) {
2008                 if (p && p->level <= cl->level)
2009                         return 0;
2010                 cl->filters++;
2011                 return (unsigned long)cl;
2012         }
2013         return 0;
2014 }
2015
2016 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2017 {
2018         struct cbq_class *cl = (struct cbq_class*)arg;
2019
2020         cl->filters--;
2021 }
2022
2023 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2024 {
2025         struct cbq_sched_data *q = qdisc_priv(sch);
2026         unsigned h;
2027
2028         if (arg->stop)
2029                 return;
2030
2031         for (h = 0; h < 16; h++) {
2032                 struct cbq_class *cl;
2033
2034                 for (cl = q->classes[h]; cl; cl = cl->next) {
2035                         if (arg->count < arg->skip) {
2036                                 arg->count++;
2037                                 continue;
2038                         }
2039                         if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2040                                 arg->stop = 1;
2041                                 return;
2042                         }
2043                         arg->count++;
2044                 }
2045         }
2046 }
2047
2048 static struct Qdisc_class_ops cbq_class_ops = {
2049         .graft          =       cbq_graft,
2050         .leaf           =       cbq_leaf,
2051         .qlen_notify    =       cbq_qlen_notify,
2052         .get            =       cbq_get,
2053         .put            =       cbq_put,
2054         .change         =       cbq_change_class,
2055         .delete         =       cbq_delete,
2056         .walk           =       cbq_walk,
2057         .tcf_chain      =       cbq_find_tcf,
2058         .bind_tcf       =       cbq_bind_filter,
2059         .unbind_tcf     =       cbq_unbind_filter,
2060         .dump           =       cbq_dump_class,
2061         .dump_stats     =       cbq_dump_class_stats,
2062 };
2063
2064 static struct Qdisc_ops cbq_qdisc_ops = {
2065         .next           =       NULL,
2066         .cl_ops         =       &cbq_class_ops,
2067         .id             =       "cbq",
2068         .priv_size      =       sizeof(struct cbq_sched_data),
2069         .enqueue        =       cbq_enqueue,
2070         .dequeue        =       cbq_dequeue,
2071         .requeue        =       cbq_requeue,
2072         .drop           =       cbq_drop,
2073         .init           =       cbq_init,
2074         .reset          =       cbq_reset,
2075         .destroy        =       cbq_destroy,
2076         .change         =       NULL,
2077         .dump           =       cbq_dump,
2078         .dump_stats     =       cbq_dump_stats,
2079         .owner          =       THIS_MODULE,
2080 };
2081
2082 static int __init cbq_module_init(void)
2083 {
2084         return register_qdisc(&cbq_qdisc_ops);
2085 }
2086 static void __exit cbq_module_exit(void)
2087 {
2088         unregister_qdisc(&cbq_qdisc_ops);
2089 }
2090 module_init(cbq_module_init)
2091 module_exit(cbq_module_exit)
2092 MODULE_LICENSE("GPL");