2 * net/sched/sch_cbq.c Class-Based Queueing discipline.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/kernel.h>
16 #include <linux/string.h>
17 #include <linux/errno.h>
18 #include <linux/skbuff.h>
19 #include <net/netlink.h>
20 #include <net/pkt_sched.h>
23 /* Class-Based Queueing (CBQ) algorithm.
24 =======================================
26 Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
27 Management Models for Packet Networks",
28 IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
30 [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
32 [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
35 [4] Sally Floyd and Michael Speer, "Experimental Results
36 for Class-Based Queueing", 1998, not published.
38 -----------------------------------------------------------------------
40 Algorithm skeleton was taken from NS simulator cbq.cc.
41 If someone wants to check this code against the LBL version,
42 he should take into account that ONLY the skeleton was borrowed,
43 the implementation is different. Particularly:
45 --- The WRR algorithm is different. Our version looks more
46 reasonable (I hope) and works when quanta are allowed to be
47 less than MTU, which is always the case when real time classes
48 have small rates. Note, that the statement of [3] is
49 incomplete, delay may actually be estimated even if class
50 per-round allotment is less than MTU. Namely, if per-round
51 allotment is W*r_i, and r_1+...+r_k = r < 1
53 delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
55 In the worst case we have IntServ estimate with D = W*r+k*MTU
56 and C = MTU*r. The proof (if correct at all) is trivial.
59 --- It seems that cbq-2.0 is not very accurate. At least, I cannot
60 interpret some places, which look like wrong translations
61 from NS. Anyone is advised to find these differences
62 and explain to me, why I am wrong 8).
64 --- Linux has no EOI event, so that we cannot estimate true class
65 idle time. Workaround is to consider the next dequeue event
66 as sign that previous packet is finished. This is wrong because of
67 internal device queueing, but on a permanently loaded link it is true.
68 Moreover, combined with clock integrator, this scheme looks
69 very close to an ideal solution. */
71 struct cbq_sched_data;
76 struct cbq_class *next; /* hash table link */
77 struct cbq_class *next_alive; /* next class with backlog in this priority band */
81 unsigned char priority; /* class priority */
82 unsigned char priority2; /* priority to be used after overlimit */
83 unsigned char ewma_log; /* time constant for idle time calculation */
84 unsigned char ovl_strategy;
85 #ifdef CONFIG_NET_CLS_POLICE
91 /* Link-sharing scheduler parameters */
92 long maxidle; /* Class parameters: see below. */
96 struct qdisc_rate_table *R_tab;
98 /* Overlimit strategy parameters */
99 void (*overlimit)(struct cbq_class *cl);
100 psched_tdiff_t penalty;
102 /* General scheduler (WRR) parameters */
104 long quantum; /* Allotment per WRR round */
105 long weight; /* Relative allotment: see below */
107 struct Qdisc *qdisc; /* Ptr to CBQ discipline */
108 struct cbq_class *split; /* Ptr to split node */
109 struct cbq_class *share; /* Ptr to LS parent in the class tree */
110 struct cbq_class *tparent; /* Ptr to tree parent in the class tree */
111 struct cbq_class *borrow; /* NULL if class is bandwidth limited;
113 struct cbq_class *sibling; /* Sibling chain */
114 struct cbq_class *children; /* Pointer to children chain */
116 struct Qdisc *q; /* Elementary queueing discipline */
120 unsigned char cpriority; /* Effective priority */
121 unsigned char delayed;
122 unsigned char level; /* level of the class in hierarchy:
123 0 for leaf classes, and maximal
124 level of children + 1 for nodes.
127 psched_time_t last; /* Last end of service */
128 psched_time_t undertime;
130 long deficit; /* Saved deficit for WRR */
131 psched_time_t penalized;
132 struct gnet_stats_basic bstats;
133 struct gnet_stats_queue qstats;
134 struct gnet_stats_rate_est rate_est;
135 struct tc_cbq_xstats xstats;
137 struct tcf_proto *filter_list;
142 struct cbq_class *defaults[TC_PRIO_MAX+1];
145 struct cbq_sched_data
147 struct cbq_class *classes[16]; /* Hash table of all classes */
148 int nclasses[TC_CBQ_MAXPRIO+1];
149 unsigned quanta[TC_CBQ_MAXPRIO+1];
151 struct cbq_class link;
154 struct cbq_class *active[TC_CBQ_MAXPRIO+1]; /* List of all classes
157 #ifdef CONFIG_NET_CLS_POLICE
158 struct cbq_class *rx_class;
160 struct cbq_class *tx_class;
161 struct cbq_class *tx_borrowed;
163 psched_time_t now; /* Cached timestamp */
164 psched_time_t now_rt; /* Cached real time */
167 struct hrtimer delay_timer;
168 struct qdisc_watchdog watchdog; /* Watchdog timer,
172 psched_tdiff_t wd_expires;
178 #define L2T(cl,len) ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])
181 static __inline__ unsigned cbq_hash(u32 h)
188 static __inline__ struct cbq_class *
189 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
191 struct cbq_class *cl;
193 for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next)
194 if (cl->classid == classid)
199 #ifdef CONFIG_NET_CLS_POLICE
201 static struct cbq_class *
202 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
204 struct cbq_class *cl, *new;
206 for (cl = this->tparent; cl; cl = cl->tparent)
207 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
215 /* Classify packet. The procedure is pretty complicated, but
216 it allows us to combine link sharing and priority scheduling
219 Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
220 so that it resolves to split nodes. Then packets are classified
221 by logical priority, or a more specific classifier may be attached
225 static struct cbq_class *
226 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
228 struct cbq_sched_data *q = qdisc_priv(sch);
229 struct cbq_class *head = &q->link;
230 struct cbq_class **defmap;
231 struct cbq_class *cl = NULL;
232 u32 prio = skb->priority;
233 struct tcf_result res;
236 * Step 1. If skb->priority points to one of our classes, use it.
238 if (TC_H_MAJ(prio^sch->handle) == 0 &&
239 (cl = cbq_class_lookup(q, prio)) != NULL)
242 *qerr = NET_XMIT_BYPASS;
245 defmap = head->defaults;
248 * Step 2+n. Apply classifier.
250 if (!head->filter_list || (result = tc_classify(skb, head->filter_list, &res)) < 0)
253 if ((cl = (void*)res.class) == NULL) {
254 if (TC_H_MAJ(res.classid))
255 cl = cbq_class_lookup(q, res.classid);
256 else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
257 cl = defmap[TC_PRIO_BESTEFFORT];
259 if (cl == NULL || cl->level >= head->level)
263 #ifdef CONFIG_NET_CLS_ACT
267 *qerr = NET_XMIT_SUCCESS;
271 #elif defined(CONFIG_NET_CLS_POLICE)
273 case TC_POLICE_RECLASSIFY:
274 return cbq_reclassify(skb, cl);
285 * Step 3+n. If classifier selected a link sharing class,
286 * apply agency specific classifier.
287 * Repeat this procdure until we hit a leaf node.
296 * Step 4. No success...
298 if (TC_H_MAJ(prio) == 0 &&
299 !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
300 !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
307 A packet has just been enqueued on the empty class.
308 cbq_activate_class adds it to the tail of active class list
309 of its priority band.
312 static __inline__ void cbq_activate_class(struct cbq_class *cl)
314 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
315 int prio = cl->cpriority;
316 struct cbq_class *cl_tail;
318 cl_tail = q->active[prio];
319 q->active[prio] = cl;
321 if (cl_tail != NULL) {
322 cl->next_alive = cl_tail->next_alive;
323 cl_tail->next_alive = cl;
326 q->activemask |= (1<<prio);
331 Unlink class from active chain.
332 Note that this same procedure is done directly in cbq_dequeue*
333 during round-robin procedure.
336 static void cbq_deactivate_class(struct cbq_class *this)
338 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
339 int prio = this->cpriority;
340 struct cbq_class *cl;
341 struct cbq_class *cl_prev = q->active[prio];
344 cl = cl_prev->next_alive;
346 cl_prev->next_alive = cl->next_alive;
347 cl->next_alive = NULL;
349 if (cl == q->active[prio]) {
350 q->active[prio] = cl_prev;
351 if (cl == q->active[prio]) {
352 q->active[prio] = NULL;
353 q->activemask &= ~(1<<prio);
359 } while ((cl_prev = cl) != q->active[prio]);
363 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
365 int toplevel = q->toplevel;
367 if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
371 now = psched_get_time();
372 incr = now - q->now_rt;
376 if (cl->undertime < now) {
377 q->toplevel = cl->level;
380 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
385 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
387 struct cbq_sched_data *q = qdisc_priv(sch);
390 struct cbq_class *cl = cbq_classify(skb, sch, &ret);
392 #ifdef CONFIG_NET_CLS_POLICE
396 if (ret == NET_XMIT_BYPASS)
402 #ifdef CONFIG_NET_CLS_POLICE
403 cl->q->__parent = sch;
405 if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) {
407 sch->bstats.packets++;
408 sch->bstats.bytes+=len;
409 cbq_mark_toplevel(q, cl);
411 cbq_activate_class(cl);
416 cbq_mark_toplevel(q, cl);
422 cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
424 struct cbq_sched_data *q = qdisc_priv(sch);
425 struct cbq_class *cl;
428 if ((cl = q->tx_class) == NULL) {
435 cbq_mark_toplevel(q, cl);
437 #ifdef CONFIG_NET_CLS_POLICE
439 cl->q->__parent = sch;
441 if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
443 sch->qstats.requeues++;
445 cbq_activate_class(cl);
453 /* Overlimit actions */
455 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
457 static void cbq_ovl_classic(struct cbq_class *cl)
459 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
460 psched_tdiff_t delay = cl->undertime - q->now;
463 delay += cl->offtime;
466 Class goes to sleep, so that it will have no
467 chance to work avgidle. Let's forgive it 8)
469 BTW cbq-2.0 has a crap in this
470 place, apparently they forgot to shift it by cl->ewma_log.
473 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
474 if (cl->avgidle < cl->minidle)
475 cl->avgidle = cl->minidle;
478 cl->undertime = q->now + delay;
480 cl->xstats.overactions++;
483 if (q->wd_expires == 0 || q->wd_expires > delay)
484 q->wd_expires = delay;
486 /* Dirty work! We must schedule wakeups based on
487 real available rate, rather than leaf rate,
488 which may be tiny (even zero).
490 if (q->toplevel == TC_CBQ_MAXLEVEL) {
492 psched_tdiff_t base_delay = q->wd_expires;
494 for (b = cl->borrow; b; b = b->borrow) {
495 delay = b->undertime - q->now;
496 if (delay < base_delay) {
503 q->wd_expires = base_delay;
507 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
511 static void cbq_ovl_rclassic(struct cbq_class *cl)
513 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
514 struct cbq_class *this = cl;
517 if (cl->level > q->toplevel) {
521 } while ((cl = cl->borrow) != NULL);
528 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
530 static void cbq_ovl_delay(struct cbq_class *cl)
532 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
533 psched_tdiff_t delay = cl->undertime - q->now;
536 psched_time_t sched = q->now;
539 delay += cl->offtime;
541 delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
542 if (cl->avgidle < cl->minidle)
543 cl->avgidle = cl->minidle;
544 cl->undertime = q->now + delay;
547 sched += delay + cl->penalty;
548 cl->penalized = sched;
549 cl->cpriority = TC_CBQ_MAXPRIO;
550 q->pmask |= (1<<TC_CBQ_MAXPRIO);
552 expires = ktime_set(0, 0);
553 expires = ktime_add_ns(expires, PSCHED_US2NS(sched));
554 if (hrtimer_try_to_cancel(&q->delay_timer) &&
555 ktime_to_ns(ktime_sub(q->delay_timer.expires,
557 q->delay_timer.expires = expires;
558 hrtimer_restart(&q->delay_timer);
560 cl->xstats.overactions++;
565 if (q->wd_expires == 0 || q->wd_expires > delay)
566 q->wd_expires = delay;
569 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
571 static void cbq_ovl_lowprio(struct cbq_class *cl)
573 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
575 cl->penalized = q->now + cl->penalty;
577 if (cl->cpriority != cl->priority2) {
578 cl->cpriority = cl->priority2;
579 q->pmask |= (1<<cl->cpriority);
580 cl->xstats.overactions++;
585 /* TC_CBQ_OVL_DROP: penalize class by dropping */
587 static void cbq_ovl_drop(struct cbq_class *cl)
589 if (cl->q->ops->drop)
590 if (cl->q->ops->drop(cl->q))
592 cl->xstats.overactions++;
596 static psched_tdiff_t cbq_undelay_prio(struct cbq_sched_data *q, int prio,
599 struct cbq_class *cl;
600 struct cbq_class *cl_prev = q->active[prio];
601 psched_time_t sched = now;
607 cl = cl_prev->next_alive;
608 if (now - cl->penalized > 0) {
609 cl_prev->next_alive = cl->next_alive;
610 cl->next_alive = NULL;
611 cl->cpriority = cl->priority;
613 cbq_activate_class(cl);
615 if (cl == q->active[prio]) {
616 q->active[prio] = cl_prev;
617 if (cl == q->active[prio]) {
618 q->active[prio] = NULL;
623 cl = cl_prev->next_alive;
624 } else if (sched - cl->penalized > 0)
625 sched = cl->penalized;
626 } while ((cl_prev = cl) != q->active[prio]);
631 static enum hrtimer_restart cbq_undelay(struct hrtimer *timer)
633 struct cbq_sched_data *q = container_of(timer, struct cbq_sched_data,
635 struct Qdisc *sch = q->watchdog.qdisc;
637 psched_tdiff_t delay = 0;
640 now = psched_get_time();
646 int prio = ffz(~pmask);
651 tmp = cbq_undelay_prio(q, prio, now);
654 if (tmp < delay || delay == 0)
662 time = ktime_set(0, 0);
663 time = ktime_add_ns(time, PSCHED_US2NS(now + delay));
664 hrtimer_start(&q->delay_timer, time, HRTIMER_MODE_ABS);
667 sch->flags &= ~TCQ_F_THROTTLED;
668 netif_schedule(sch->dev);
669 return HRTIMER_NORESTART;
673 #ifdef CONFIG_NET_CLS_POLICE
675 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
678 struct Qdisc *sch = child->__parent;
679 struct cbq_sched_data *q = qdisc_priv(sch);
680 struct cbq_class *cl = q->rx_class;
684 if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
686 cbq_mark_toplevel(q, cl);
689 cl->q->__parent = sch;
691 if (cl->q->enqueue(skb, cl->q) == 0) {
693 sch->bstats.packets++;
694 sch->bstats.bytes+=len;
696 cbq_activate_class(cl);
709 It is mission critical procedure.
711 We "regenerate" toplevel cutoff, if transmitting class
712 has backlog and it is not regulated. It is not part of
713 original CBQ description, but looks more reasonable.
714 Probably, it is wrong. This question needs further investigation.
717 static __inline__ void
718 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
719 struct cbq_class *borrowed)
721 if (cl && q->toplevel >= borrowed->level) {
722 if (cl->q->q.qlen > 1) {
724 if (borrowed->undertime == PSCHED_PASTPERFECT) {
725 q->toplevel = borrowed->level;
728 } while ((borrowed=borrowed->borrow) != NULL);
731 /* It is not necessary now. Uncommenting it
732 will save CPU cycles, but decrease fairness.
734 q->toplevel = TC_CBQ_MAXLEVEL;
740 cbq_update(struct cbq_sched_data *q)
742 struct cbq_class *this = q->tx_class;
743 struct cbq_class *cl = this;
748 for ( ; cl; cl = cl->share) {
749 long avgidle = cl->avgidle;
752 cl->bstats.packets++;
753 cl->bstats.bytes += len;
756 (now - last) is total time between packet right edges.
757 (last_pktlen/rate) is "virtual" busy time, so that
759 idle = (now - last) - last_pktlen/rate
762 idle = q->now - cl->last;
763 if ((unsigned long)idle > 128*1024*1024) {
764 avgidle = cl->maxidle;
766 idle -= L2T(cl, len);
768 /* true_avgidle := (1-W)*true_avgidle + W*idle,
769 where W=2^{-ewma_log}. But cl->avgidle is scaled:
770 cl->avgidle == true_avgidle/W,
773 avgidle += idle - (avgidle>>cl->ewma_log);
777 /* Overlimit or at-limit */
779 if (avgidle < cl->minidle)
780 avgidle = cl->minidle;
782 cl->avgidle = avgidle;
784 /* Calculate expected time, when this class
785 will be allowed to send.
787 (1-W)*true_avgidle + W*delay = 0, i.e.
788 idle = (1/W - 1)*(-true_avgidle)
790 idle = (1 - W)*(-cl->avgidle);
792 idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
796 To maintain the rate allocated to the class,
797 we add to undertime virtual clock,
798 necessary to complete transmitted packet.
799 (len/phys_bandwidth has been already passed
800 to the moment of cbq_update)
803 idle -= L2T(&q->link, len);
804 idle += L2T(cl, len);
806 cl->undertime = q->now + idle;
810 cl->undertime = PSCHED_PASTPERFECT;
811 if (avgidle > cl->maxidle)
812 cl->avgidle = cl->maxidle;
814 cl->avgidle = avgidle;
819 cbq_update_toplevel(q, this, q->tx_borrowed);
822 static __inline__ struct cbq_class *
823 cbq_under_limit(struct cbq_class *cl)
825 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
826 struct cbq_class *this_cl = cl;
828 if (cl->tparent == NULL)
831 if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
837 /* It is very suspicious place. Now overlimit
838 action is generated for not bounded classes
839 only if link is completely congested.
840 Though it is in agree with ancestor-only paradigm,
841 it looks very stupid. Particularly,
842 it means that this chunk of code will either
843 never be called or result in strong amplification
844 of burstiness. Dangerous, silly, and, however,
845 no another solution exists.
847 if ((cl = cl->borrow) == NULL) {
848 this_cl->qstats.overlimits++;
849 this_cl->overlimit(this_cl);
852 if (cl->level > q->toplevel)
854 } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
860 static __inline__ struct sk_buff *
861 cbq_dequeue_prio(struct Qdisc *sch, int prio)
863 struct cbq_sched_data *q = qdisc_priv(sch);
864 struct cbq_class *cl_tail, *cl_prev, *cl;
868 cl_tail = cl_prev = q->active[prio];
869 cl = cl_prev->next_alive;
876 struct cbq_class *borrow = cl;
879 (borrow = cbq_under_limit(cl)) == NULL)
882 if (cl->deficit <= 0) {
883 /* Class exhausted its allotment per
884 this round. Switch to the next one.
887 cl->deficit += cl->quantum;
891 skb = cl->q->dequeue(cl->q);
893 /* Class did not give us any skb :-(
894 It could occur even if cl->q->q.qlen != 0
895 f.e. if cl->q == "tbf"
900 cl->deficit -= skb->len;
902 q->tx_borrowed = borrow;
904 #ifndef CBQ_XSTATS_BORROWS_BYTES
905 borrow->xstats.borrows++;
906 cl->xstats.borrows++;
908 borrow->xstats.borrows += skb->len;
909 cl->xstats.borrows += skb->len;
912 q->tx_len = skb->len;
914 if (cl->deficit <= 0) {
915 q->active[prio] = cl;
917 cl->deficit += cl->quantum;
922 if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
923 /* Class is empty or penalized.
924 Unlink it from active chain.
926 cl_prev->next_alive = cl->next_alive;
927 cl->next_alive = NULL;
929 /* Did cl_tail point to it? */
934 /* Was it the last class in this band? */
937 q->active[prio] = NULL;
938 q->activemask &= ~(1<<prio);
940 cbq_activate_class(cl);
944 q->active[prio] = cl_tail;
947 cbq_activate_class(cl);
955 } while (cl_prev != cl_tail);
958 q->active[prio] = cl_prev;
963 static __inline__ struct sk_buff *
964 cbq_dequeue_1(struct Qdisc *sch)
966 struct cbq_sched_data *q = qdisc_priv(sch);
970 activemask = q->activemask&0xFF;
972 int prio = ffz(~activemask);
973 activemask &= ~(1<<prio);
974 skb = cbq_dequeue_prio(sch, prio);
981 static struct sk_buff *
982 cbq_dequeue(struct Qdisc *sch)
985 struct cbq_sched_data *q = qdisc_priv(sch);
989 now = psched_get_time();
990 incr = now - q->now_rt;
993 psched_tdiff_t incr2;
994 /* Time integrator. We calculate EOS time
995 by adding expected packet transmission time.
996 If real time is greater, we warp artificial clock,
999 cbq_time = max(real_time, work);
1001 incr2 = L2T(&q->link, q->tx_len);
1004 if ((incr -= incr2) < 0)
1013 skb = cbq_dequeue_1(sch);
1016 sch->flags &= ~TCQ_F_THROTTLED;
1020 /* All the classes are overlimit.
1024 1. Scheduler is empty.
1025 2. Toplevel cutoff inhibited borrowing.
1026 3. Root class is overlimit.
1028 Reset 2d and 3d conditions and retry.
1030 Note, that NS and cbq-2.0 are buggy, peeking
1031 an arbitrary class is appropriate for ancestor-only
1032 sharing, but not for toplevel algorithm.
1034 Our version is better, but slower, because it requires
1035 two passes, but it is unavoidable with top-level sharing.
1038 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1039 q->link.undertime == PSCHED_PASTPERFECT)
1042 q->toplevel = TC_CBQ_MAXLEVEL;
1043 q->link.undertime = PSCHED_PASTPERFECT;
1046 /* No packets in scheduler or nobody wants to give them to us :-(
1047 Sigh... start watchdog timer in the last case. */
1050 sch->qstats.overlimits++;
1052 qdisc_watchdog_schedule(&q->watchdog,
1053 now + q->wd_expires);
1058 /* CBQ class maintanance routines */
1060 static void cbq_adjust_levels(struct cbq_class *this)
1067 struct cbq_class *cl;
1069 if ((cl = this->children) != NULL) {
1071 if (cl->level > level)
1073 } while ((cl = cl->sibling) != this->children);
1075 this->level = level+1;
1076 } while ((this = this->tparent) != NULL);
1079 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1081 struct cbq_class *cl;
1084 if (q->quanta[prio] == 0)
1087 for (h=0; h<16; h++) {
1088 for (cl = q->classes[h]; cl; cl = cl->next) {
1089 /* BUGGGG... Beware! This expression suffer of
1090 arithmetic overflows!
1092 if (cl->priority == prio) {
1093 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1096 if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
1097 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
1098 cl->quantum = cl->qdisc->dev->mtu/2 + 1;
1104 static void cbq_sync_defmap(struct cbq_class *cl)
1106 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1107 struct cbq_class *split = cl->split;
1114 for (i=0; i<=TC_PRIO_MAX; i++) {
1115 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1116 split->defaults[i] = NULL;
1119 for (i=0; i<=TC_PRIO_MAX; i++) {
1120 int level = split->level;
1122 if (split->defaults[i])
1125 for (h=0; h<16; h++) {
1126 struct cbq_class *c;
1128 for (c = q->classes[h]; c; c = c->next) {
1129 if (c->split == split && c->level < level &&
1131 split->defaults[i] = c;
1139 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1141 struct cbq_class *split = NULL;
1144 if ((split = cl->split) == NULL)
1146 splitid = split->classid;
1149 if (split == NULL || split->classid != splitid) {
1150 for (split = cl->tparent; split; split = split->tparent)
1151 if (split->classid == splitid)
1158 if (cl->split != split) {
1160 cbq_sync_defmap(cl);
1162 cl->defmap = def&mask;
1164 cl->defmap = (cl->defmap&~mask)|(def&mask);
1166 cbq_sync_defmap(cl);
1169 static void cbq_unlink_class(struct cbq_class *this)
1171 struct cbq_class *cl, **clp;
1172 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1174 for (clp = &q->classes[cbq_hash(this->classid)]; (cl = *clp) != NULL; clp = &cl->next) {
1182 if (this->tparent) {
1191 } while ((cl = *clp) != this->sibling);
1193 if (this->tparent->children == this) {
1194 this->tparent->children = this->sibling;
1195 if (this->sibling == this)
1196 this->tparent->children = NULL;
1199 BUG_TRAP(this->sibling == this);
1203 static void cbq_link_class(struct cbq_class *this)
1205 struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1206 unsigned h = cbq_hash(this->classid);
1207 struct cbq_class *parent = this->tparent;
1209 this->sibling = this;
1210 this->next = q->classes[h];
1211 q->classes[h] = this;
1216 if (parent->children == NULL) {
1217 parent->children = this;
1219 this->sibling = parent->children->sibling;
1220 parent->children->sibling = this;
1224 static unsigned int cbq_drop(struct Qdisc* sch)
1226 struct cbq_sched_data *q = qdisc_priv(sch);
1227 struct cbq_class *cl, *cl_head;
1231 for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1232 if ((cl_head = q->active[prio]) == NULL)
1237 if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1240 cbq_deactivate_class(cl);
1243 } while ((cl = cl->next_alive) != cl_head);
1249 cbq_reset(struct Qdisc* sch)
1251 struct cbq_sched_data *q = qdisc_priv(sch);
1252 struct cbq_class *cl;
1259 q->tx_borrowed = NULL;
1260 qdisc_watchdog_cancel(&q->watchdog);
1261 hrtimer_cancel(&q->delay_timer);
1262 q->toplevel = TC_CBQ_MAXLEVEL;
1263 q->now = psched_get_time();
1266 for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1267 q->active[prio] = NULL;
1269 for (h = 0; h < 16; h++) {
1270 for (cl = q->classes[h]; cl; cl = cl->next) {
1273 cl->next_alive = NULL;
1274 cl->undertime = PSCHED_PASTPERFECT;
1275 cl->avgidle = cl->maxidle;
1276 cl->deficit = cl->quantum;
1277 cl->cpriority = cl->priority;
1284 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1286 if (lss->change&TCF_CBQ_LSS_FLAGS) {
1287 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1288 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1290 if (lss->change&TCF_CBQ_LSS_EWMA)
1291 cl->ewma_log = lss->ewma_log;
1292 if (lss->change&TCF_CBQ_LSS_AVPKT)
1293 cl->avpkt = lss->avpkt;
1294 if (lss->change&TCF_CBQ_LSS_MINIDLE)
1295 cl->minidle = -(long)lss->minidle;
1296 if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1297 cl->maxidle = lss->maxidle;
1298 cl->avgidle = lss->maxidle;
1300 if (lss->change&TCF_CBQ_LSS_OFFTIME)
1301 cl->offtime = lss->offtime;
1305 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1307 q->nclasses[cl->priority]--;
1308 q->quanta[cl->priority] -= cl->weight;
1309 cbq_normalize_quanta(q, cl->priority);
1312 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1314 q->nclasses[cl->priority]++;
1315 q->quanta[cl->priority] += cl->weight;
1316 cbq_normalize_quanta(q, cl->priority);
1319 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1321 struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1324 cl->allot = wrr->allot;
1326 cl->weight = wrr->weight;
1327 if (wrr->priority) {
1328 cl->priority = wrr->priority-1;
1329 cl->cpriority = cl->priority;
1330 if (cl->priority >= cl->priority2)
1331 cl->priority2 = TC_CBQ_MAXPRIO-1;
1338 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1340 switch (ovl->strategy) {
1341 case TC_CBQ_OVL_CLASSIC:
1342 cl->overlimit = cbq_ovl_classic;
1344 case TC_CBQ_OVL_DELAY:
1345 cl->overlimit = cbq_ovl_delay;
1347 case TC_CBQ_OVL_LOWPRIO:
1348 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1349 ovl->priority2-1 <= cl->priority)
1351 cl->priority2 = ovl->priority2-1;
1352 cl->overlimit = cbq_ovl_lowprio;
1354 case TC_CBQ_OVL_DROP:
1355 cl->overlimit = cbq_ovl_drop;
1357 case TC_CBQ_OVL_RCLASSIC:
1358 cl->overlimit = cbq_ovl_rclassic;
1363 cl->penalty = ovl->penalty;
1367 #ifdef CONFIG_NET_CLS_POLICE
1368 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1370 cl->police = p->police;
1372 if (cl->q->handle) {
1373 if (p->police == TC_POLICE_RECLASSIFY)
1374 cl->q->reshape_fail = cbq_reshape_fail;
1376 cl->q->reshape_fail = NULL;
1382 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1384 cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1388 static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
1390 struct cbq_sched_data *q = qdisc_priv(sch);
1391 struct rtattr *tb[TCA_CBQ_MAX];
1392 struct tc_ratespec *r;
1394 if (rtattr_parse_nested(tb, TCA_CBQ_MAX, opt) < 0 ||
1395 tb[TCA_CBQ_RTAB-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1396 RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1399 if (tb[TCA_CBQ_LSSOPT-1] &&
1400 RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1403 r = RTA_DATA(tb[TCA_CBQ_RATE-1]);
1405 if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB-1])) == NULL)
1409 q->link.sibling = &q->link;
1410 q->link.classid = sch->handle;
1411 q->link.qdisc = sch;
1412 if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1414 q->link.q = &noop_qdisc;
1416 q->link.priority = TC_CBQ_MAXPRIO-1;
1417 q->link.priority2 = TC_CBQ_MAXPRIO-1;
1418 q->link.cpriority = TC_CBQ_MAXPRIO-1;
1419 q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1420 q->link.overlimit = cbq_ovl_classic;
1421 q->link.allot = psched_mtu(sch->dev);
1422 q->link.quantum = q->link.allot;
1423 q->link.weight = q->link.R_tab->rate.rate;
1425 q->link.ewma_log = TC_CBQ_DEF_EWMA;
1426 q->link.avpkt = q->link.allot/2;
1427 q->link.minidle = -0x7FFFFFFF;
1429 qdisc_watchdog_init(&q->watchdog, sch);
1430 hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
1431 q->delay_timer.function = cbq_undelay;
1432 q->toplevel = TC_CBQ_MAXLEVEL;
1433 q->now = psched_get_time();
1436 cbq_link_class(&q->link);
1438 if (tb[TCA_CBQ_LSSOPT-1])
1439 cbq_set_lss(&q->link, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1441 cbq_addprio(q, &q->link);
1445 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1447 unsigned char *b = skb_tail_pointer(skb);
1449 RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1457 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1459 unsigned char *b = skb_tail_pointer(skb);
1460 struct tc_cbq_lssopt opt;
1463 if (cl->borrow == NULL)
1464 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1465 if (cl->share == NULL)
1466 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1467 opt.ewma_log = cl->ewma_log;
1468 opt.level = cl->level;
1469 opt.avpkt = cl->avpkt;
1470 opt.maxidle = cl->maxidle;
1471 opt.minidle = (u32)(-cl->minidle);
1472 opt.offtime = cl->offtime;
1474 RTA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1482 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1484 unsigned char *b = skb_tail_pointer(skb);
1485 struct tc_cbq_wrropt opt;
1488 opt.allot = cl->allot;
1489 opt.priority = cl->priority+1;
1490 opt.cpriority = cl->cpriority+1;
1491 opt.weight = cl->weight;
1492 RTA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1500 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1502 unsigned char *b = skb_tail_pointer(skb);
1503 struct tc_cbq_ovl opt;
1505 opt.strategy = cl->ovl_strategy;
1506 opt.priority2 = cl->priority2+1;
1508 opt.penalty = cl->penalty;
1509 RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1517 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1519 unsigned char *b = skb_tail_pointer(skb);
1520 struct tc_cbq_fopt opt;
1522 if (cl->split || cl->defmap) {
1523 opt.split = cl->split ? cl->split->classid : 0;
1524 opt.defmap = cl->defmap;
1526 RTA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1535 #ifdef CONFIG_NET_CLS_POLICE
1536 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1538 unsigned char *b = skb_tail_pointer(skb);
1539 struct tc_cbq_police opt;
1542 opt.police = cl->police;
1545 RTA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1555 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1557 if (cbq_dump_lss(skb, cl) < 0 ||
1558 cbq_dump_rate(skb, cl) < 0 ||
1559 cbq_dump_wrr(skb, cl) < 0 ||
1560 cbq_dump_ovl(skb, cl) < 0 ||
1561 #ifdef CONFIG_NET_CLS_POLICE
1562 cbq_dump_police(skb, cl) < 0 ||
1564 cbq_dump_fopt(skb, cl) < 0)
1569 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1571 struct cbq_sched_data *q = qdisc_priv(sch);
1572 unsigned char *b = skb_tail_pointer(skb);
1575 rta = (struct rtattr*)b;
1576 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1577 if (cbq_dump_attr(skb, &q->link) < 0)
1578 goto rtattr_failure;
1579 rta->rta_len = skb_tail_pointer(skb) - b;
1588 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1590 struct cbq_sched_data *q = qdisc_priv(sch);
1592 q->link.xstats.avgidle = q->link.avgidle;
1593 return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1597 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1598 struct sk_buff *skb, struct tcmsg *tcm)
1600 struct cbq_class *cl = (struct cbq_class*)arg;
1601 unsigned char *b = skb_tail_pointer(skb);
1605 tcm->tcm_parent = cl->tparent->classid;
1607 tcm->tcm_parent = TC_H_ROOT;
1608 tcm->tcm_handle = cl->classid;
1609 tcm->tcm_info = cl->q->handle;
1611 rta = (struct rtattr*)b;
1612 RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1613 if (cbq_dump_attr(skb, cl) < 0)
1614 goto rtattr_failure;
1615 rta->rta_len = skb_tail_pointer(skb) - b;
1624 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1625 struct gnet_dump *d)
1627 struct cbq_sched_data *q = qdisc_priv(sch);
1628 struct cbq_class *cl = (struct cbq_class*)arg;
1630 cl->qstats.qlen = cl->q->q.qlen;
1631 cl->xstats.avgidle = cl->avgidle;
1632 cl->xstats.undertime = 0;
1634 if (cl->undertime != PSCHED_PASTPERFECT)
1635 cl->xstats.undertime = cl->undertime - q->now;
1637 if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1638 gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1639 gnet_stats_copy_queue(d, &cl->qstats) < 0)
1642 return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1645 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1648 struct cbq_class *cl = (struct cbq_class*)arg;
1652 if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1653 cl->classid)) == NULL)
1656 #ifdef CONFIG_NET_CLS_POLICE
1657 if (cl->police == TC_POLICE_RECLASSIFY)
1658 new->reshape_fail = cbq_reshape_fail;
1662 *old = xchg(&cl->q, new);
1663 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1665 sch_tree_unlock(sch);
1672 static struct Qdisc *
1673 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1675 struct cbq_class *cl = (struct cbq_class*)arg;
1677 return cl ? cl->q : NULL;
1680 static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
1682 struct cbq_class *cl = (struct cbq_class *)arg;
1684 if (cl->q->q.qlen == 0)
1685 cbq_deactivate_class(cl);
1688 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1690 struct cbq_sched_data *q = qdisc_priv(sch);
1691 struct cbq_class *cl = cbq_class_lookup(q, classid);
1695 return (unsigned long)cl;
1700 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1702 struct cbq_sched_data *q = qdisc_priv(sch);
1704 BUG_TRAP(!cl->filters);
1706 tcf_destroy_chain(cl->filter_list);
1707 qdisc_destroy(cl->q);
1708 qdisc_put_rtab(cl->R_tab);
1709 gen_kill_estimator(&cl->bstats, &cl->rate_est);
1715 cbq_destroy(struct Qdisc* sch)
1717 struct cbq_sched_data *q = qdisc_priv(sch);
1718 struct cbq_class *cl;
1721 #ifdef CONFIG_NET_CLS_POLICE
1725 * Filters must be destroyed first because we don't destroy the
1726 * classes from root to leafs which means that filters can still
1727 * be bound to classes which have been destroyed already. --TGR '04
1729 for (h = 0; h < 16; h++) {
1730 for (cl = q->classes[h]; cl; cl = cl->next) {
1731 tcf_destroy_chain(cl->filter_list);
1732 cl->filter_list = NULL;
1735 for (h = 0; h < 16; h++) {
1736 struct cbq_class *next;
1738 for (cl = q->classes[h]; cl; cl = next) {
1740 cbq_destroy_class(sch, cl);
1745 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1747 struct cbq_class *cl = (struct cbq_class*)arg;
1749 if (--cl->refcnt == 0) {
1750 #ifdef CONFIG_NET_CLS_POLICE
1751 struct cbq_sched_data *q = qdisc_priv(sch);
1753 spin_lock_bh(&sch->dev->queue_lock);
1754 if (q->rx_class == cl)
1756 spin_unlock_bh(&sch->dev->queue_lock);
1759 cbq_destroy_class(sch, cl);
1764 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca,
1768 struct cbq_sched_data *q = qdisc_priv(sch);
1769 struct cbq_class *cl = (struct cbq_class*)*arg;
1770 struct rtattr *opt = tca[TCA_OPTIONS-1];
1771 struct rtattr *tb[TCA_CBQ_MAX];
1772 struct cbq_class *parent;
1773 struct qdisc_rate_table *rtab = NULL;
1775 if (opt==NULL || rtattr_parse_nested(tb, TCA_CBQ_MAX, opt))
1778 if (tb[TCA_CBQ_OVL_STRATEGY-1] &&
1779 RTA_PAYLOAD(tb[TCA_CBQ_OVL_STRATEGY-1]) < sizeof(struct tc_cbq_ovl))
1782 if (tb[TCA_CBQ_FOPT-1] &&
1783 RTA_PAYLOAD(tb[TCA_CBQ_FOPT-1]) < sizeof(struct tc_cbq_fopt))
1786 if (tb[TCA_CBQ_RATE-1] &&
1787 RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1790 if (tb[TCA_CBQ_LSSOPT-1] &&
1791 RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1794 if (tb[TCA_CBQ_WRROPT-1] &&
1795 RTA_PAYLOAD(tb[TCA_CBQ_WRROPT-1]) < sizeof(struct tc_cbq_wrropt))
1798 #ifdef CONFIG_NET_CLS_POLICE
1799 if (tb[TCA_CBQ_POLICE-1] &&
1800 RTA_PAYLOAD(tb[TCA_CBQ_POLICE-1]) < sizeof(struct tc_cbq_police))
1807 if (cl->tparent && cl->tparent->classid != parentid)
1809 if (!cl->tparent && parentid != TC_H_ROOT)
1813 if (tb[TCA_CBQ_RATE-1]) {
1814 rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1819 /* Change class parameters */
1822 if (cl->next_alive != NULL)
1823 cbq_deactivate_class(cl);
1826 rtab = xchg(&cl->R_tab, rtab);
1827 qdisc_put_rtab(rtab);
1830 if (tb[TCA_CBQ_LSSOPT-1])
1831 cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1833 if (tb[TCA_CBQ_WRROPT-1]) {
1835 cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1838 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1839 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1841 #ifdef CONFIG_NET_CLS_POLICE
1842 if (tb[TCA_CBQ_POLICE-1])
1843 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1846 if (tb[TCA_CBQ_FOPT-1])
1847 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1850 cbq_activate_class(cl);
1852 sch_tree_unlock(sch);
1854 if (tca[TCA_RATE-1])
1855 gen_replace_estimator(&cl->bstats, &cl->rate_est,
1856 &sch->dev->queue_lock,
1861 if (parentid == TC_H_ROOT)
1864 if (tb[TCA_CBQ_WRROPT-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1865 tb[TCA_CBQ_LSSOPT-1] == NULL)
1868 rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1874 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1878 classid = TC_H_MAKE(sch->handle,0x8000);
1880 for (i=0; i<0x8000; i++) {
1881 if (++q->hgenerator >= 0x8000)
1883 if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1889 classid = classid|q->hgenerator;
1894 parent = cbq_class_lookup(q, parentid);
1901 cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1907 if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid)))
1908 cl->q = &noop_qdisc;
1909 cl->classid = classid;
1910 cl->tparent = parent;
1912 cl->allot = parent->allot;
1913 cl->quantum = cl->allot;
1914 cl->weight = cl->R_tab->rate.rate;
1918 cl->borrow = cl->tparent;
1919 if (cl->tparent != &q->link)
1920 cl->share = cl->tparent;
1921 cbq_adjust_levels(parent);
1922 cl->minidle = -0x7FFFFFFF;
1923 cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1924 cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1925 if (cl->ewma_log==0)
1926 cl->ewma_log = q->link.ewma_log;
1928 cl->maxidle = q->link.maxidle;
1930 cl->avpkt = q->link.avpkt;
1931 cl->overlimit = cbq_ovl_classic;
1932 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1933 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1934 #ifdef CONFIG_NET_CLS_POLICE
1935 if (tb[TCA_CBQ_POLICE-1])
1936 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1938 if (tb[TCA_CBQ_FOPT-1])
1939 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1940 sch_tree_unlock(sch);
1942 if (tca[TCA_RATE-1])
1943 gen_new_estimator(&cl->bstats, &cl->rate_est,
1944 &sch->dev->queue_lock, tca[TCA_RATE-1]);
1946 *arg = (unsigned long)cl;
1950 qdisc_put_rtab(rtab);
1954 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1956 struct cbq_sched_data *q = qdisc_priv(sch);
1957 struct cbq_class *cl = (struct cbq_class*)arg;
1960 if (cl->filters || cl->children || cl == &q->link)
1965 qlen = cl->q->q.qlen;
1967 qdisc_tree_decrease_qlen(cl->q, qlen);
1970 cbq_deactivate_class(cl);
1972 if (q->tx_borrowed == cl)
1973 q->tx_borrowed = q->tx_class;
1974 if (q->tx_class == cl) {
1976 q->tx_borrowed = NULL;
1978 #ifdef CONFIG_NET_CLS_POLICE
1979 if (q->rx_class == cl)
1983 cbq_unlink_class(cl);
1984 cbq_adjust_levels(cl->tparent);
1986 cbq_sync_defmap(cl);
1989 sch_tree_unlock(sch);
1991 if (--cl->refcnt == 0)
1992 cbq_destroy_class(sch, cl);
1997 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
1999 struct cbq_sched_data *q = qdisc_priv(sch);
2000 struct cbq_class *cl = (struct cbq_class *)arg;
2005 return &cl->filter_list;
2008 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
2011 struct cbq_sched_data *q = qdisc_priv(sch);
2012 struct cbq_class *p = (struct cbq_class*)parent;
2013 struct cbq_class *cl = cbq_class_lookup(q, classid);
2016 if (p && p->level <= cl->level)
2019 return (unsigned long)cl;
2024 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2026 struct cbq_class *cl = (struct cbq_class*)arg;
2031 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2033 struct cbq_sched_data *q = qdisc_priv(sch);
2039 for (h = 0; h < 16; h++) {
2040 struct cbq_class *cl;
2042 for (cl = q->classes[h]; cl; cl = cl->next) {
2043 if (arg->count < arg->skip) {
2047 if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2056 static struct Qdisc_class_ops cbq_class_ops = {
2059 .qlen_notify = cbq_qlen_notify,
2062 .change = cbq_change_class,
2063 .delete = cbq_delete,
2065 .tcf_chain = cbq_find_tcf,
2066 .bind_tcf = cbq_bind_filter,
2067 .unbind_tcf = cbq_unbind_filter,
2068 .dump = cbq_dump_class,
2069 .dump_stats = cbq_dump_class_stats,
2072 static struct Qdisc_ops cbq_qdisc_ops = {
2074 .cl_ops = &cbq_class_ops,
2076 .priv_size = sizeof(struct cbq_sched_data),
2077 .enqueue = cbq_enqueue,
2078 .dequeue = cbq_dequeue,
2079 .requeue = cbq_requeue,
2083 .destroy = cbq_destroy,
2086 .dump_stats = cbq_dump_stats,
2087 .owner = THIS_MODULE,
2090 static int __init cbq_module_init(void)
2092 return register_qdisc(&cbq_qdisc_ops);
2094 static void __exit cbq_module_exit(void)
2096 unregister_qdisc(&cbq_qdisc_ops);
2098 module_init(cbq_module_init)
2099 module_exit(cbq_module_exit)
2100 MODULE_LICENSE("GPL");