git.oblomov.eu Git - linux-2.6/blob - net/sched/sch_cbq.c

   1 /*
   2  * net/sched/sch_cbq.c  Class-Based Queueing discipline.
   3  *
   4  *              This program is free software; you can redistribute it and/or
   5  *              modify it under the terms of the GNU General Public License
   6  *              as published by the Free Software Foundation; either version
   7  *              2 of the License, or (at your option) any later version.
   8  *
   9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  10  *
  11  */
  12
  13 #include <linux/module.h>
  14 #include <asm/uaccess.h>
  15 #include <asm/system.h>
  16 #include <linux/bitops.h>
  17 #include <linux/types.h>
  18 #include <linux/kernel.h>
  19 #include <linux/sched.h>
  20 #include <linux/string.h>
  21 #include <linux/mm.h>
  22 #include <linux/socket.h>
  23 #include <linux/sockios.h>
  24 #include <linux/in.h>
  25 #include <linux/errno.h>
  26 #include <linux/interrupt.h>
  27 #include <linux/if_ether.h>
  28 #include <linux/inet.h>
  29 #include <linux/netdevice.h>
  30 #include <linux/etherdevice.h>
  31 #include <linux/notifier.h>
  32 #include <net/ip.h>
  33 #include <net/route.h>
  34 #include <linux/skbuff.h>
  35 #include <net/sock.h>
  36 #include <net/pkt_sched.h>
  37
  38
  39 /*      Class-Based Queueing (CBQ) algorithm.
  40         =======================================
  41
  42         Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
  43                  Management Models for Packet Networks",
  44                  IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
  45
  46                  [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
  47
  48                  [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
  49                  Parameters", 1996
  50
  51                  [4] Sally Floyd and Michael Speer, "Experimental Results
  52                  for Class-Based Queueing", 1998, not published.
  53
  54         -----------------------------------------------------------------------
  55
  56         Algorithm skeleton was taken from NS simulator cbq.cc.
  57         If someone wants to check this code against the LBL version,
  58         he should take into account that ONLY the skeleton was borrowed,
  59         the implementation is different. Particularly:
  60
  61         --- The WRR algorithm is different. Our version looks more
  62         reasonable (I hope) and works when quanta are allowed to be
  63         less than MTU, which is always the case when real time classes
  64         have small rates. Note, that the statement of [3] is
  65         incomplete, delay may actually be estimated even if class
  66         per-round allotment is less than MTU. Namely, if per-round
  67         allotment is W*r_i, and r_1+...+r_k = r < 1
  68
  69         delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
  70
  71         In the worst case we have IntServ estimate with D = W*r+k*MTU
  72         and C = MTU*r. The proof (if correct at all) is trivial.
  73
  74
  75         --- It seems that cbq-2.0 is not very accurate. At least, I cannot
  76         interpret some places, which look like wrong translations
  77         from NS. Anyone is advised to find these differences
  78         and explain to me, why I am wrong 8).
  79
  80         --- Linux has no EOI event, so that we cannot estimate true class
  81         idle time. Workaround is to consider the next dequeue event
  82         as sign that previous packet is finished. This is wrong because of
  83         internal device queueing, but on a permanently loaded link it is true.
  84         Moreover, combined with clock integrator, this scheme looks
  85         very close to an ideal solution.  */
  86
  87 struct cbq_sched_data;
  88
  89
  90 struct cbq_class
  91 {
  92         struct cbq_class        *next;          /* hash table link */
  93         struct cbq_class        *next_alive;    /* next class with backlog in this priority band */
  94
  95 /* Parameters */
  96         u32                     classid;
  97         unsigned char           priority;       /* class priority */
  98         unsigned char           priority2;      /* priority to be used after overlimit */
  99         unsigned char           ewma_log;       /* time constant for idle time calculation */
 100         unsigned char           ovl_strategy;
 101 #ifdef CONFIG_NET_CLS_POLICE
 102         unsigned char           police;
 103 #endif
 104
 105         u32                     defmap;
 106
 107         /* Link-sharing scheduler parameters */
 108         long                    maxidle;        /* Class parameters: see below. */
 109         long                    offtime;
 110         long                    minidle;
 111         u32                     avpkt;
 112         struct qdisc_rate_table *R_tab;
 113
 114         /* Overlimit strategy parameters */
 115         void                    (*overlimit)(struct cbq_class *cl);
 116         long                    penalty;
 117
 118         /* General scheduler (WRR) parameters */
 119         long                    allot;
 120         long                    quantum;        /* Allotment per WRR round */
 121         long                    weight;         /* Relative allotment: see below */
 122
 123         struct Qdisc            *qdisc;         /* Ptr to CBQ discipline */
 124         struct cbq_class        *split;         /* Ptr to split node */
 125         struct cbq_class        *share;         /* Ptr to LS parent in the class tree */
 126         struct cbq_class        *tparent;       /* Ptr to tree parent in the class tree */
 127         struct cbq_class        *borrow;        /* NULL if class is bandwidth limited;
 128                                                    parent otherwise */
 129         struct cbq_class        *sibling;       /* Sibling chain */
 130         struct cbq_class        *children;      /* Pointer to children chain */
 131
 132         struct Qdisc            *q;             /* Elementary queueing discipline */
 133
 134
 135 /* Variables */
 136         unsigned char           cpriority;      /* Effective priority */
 137         unsigned char           delayed;
 138         unsigned char           level;          /* level of the class in hierarchy:
 139                                                    0 for leaf classes, and maximal
 140                                                    level of children + 1 for nodes.
 141                                                  */
 142
 143         psched_time_t           last;           /* Last end of service */
 144         psched_time_t           undertime;
 145         long                    avgidle;
 146         long                    deficit;        /* Saved deficit for WRR */
 147         unsigned long           penalized;
 148         struct gnet_stats_basic bstats;
 149         struct gnet_stats_queue qstats;
 150         struct gnet_stats_rate_est rate_est;
 151         spinlock_t              *stats_lock;
 152         struct tc_cbq_xstats    xstats;
 153
 154         struct tcf_proto        *filter_list;
 155
 156         int                     refcnt;
 157         int                     filters;
 158
 159         struct cbq_class        *defaults[TC_PRIO_MAX+1];
 160 };
 161
 162 struct cbq_sched_data
 163 {
 164         struct cbq_class        *classes[16];           /* Hash table of all classes */
 165         int                     nclasses[TC_CBQ_MAXPRIO+1];
 166         unsigned                quanta[TC_CBQ_MAXPRIO+1];
 167
 168         struct cbq_class        link;
 169
 170         unsigned                activemask;
 171         struct cbq_class        *active[TC_CBQ_MAXPRIO+1];      /* List of all classes
 172                                                                    with backlog */
 173
 174 #ifdef CONFIG_NET_CLS_POLICE
 175         struct cbq_class        *rx_class;
 176 #endif
 177         struct cbq_class        *tx_class;
 178         struct cbq_class        *tx_borrowed;
 179         int                     tx_len;
 180         psched_time_t           now;            /* Cached timestamp */
 181         psched_time_t           now_rt;         /* Cached real time */
 182         unsigned                pmask;
 183
 184         struct timer_list       delay_timer;
 185         struct timer_list       wd_timer;       /* Watchdog timer,
 186                                                    started when CBQ has
 187                                                    backlog, but cannot
 188                                                    transmit just now */
 189         long                    wd_expires;
 190         int                     toplevel;
 191         u32                     hgenerator;
 192 };
 193
 194
 195 #define L2T(cl,len)     ((cl)->R_tab->data[(len)>>(cl)->R_tab->rate.cell_log])
 196
 197
 198 static __inline__ unsigned cbq_hash(u32 h)
 199 {
 200         h ^= h>>8;
 201         h ^= h>>4;
 202         return h&0xF;
 203 }
 204
 205 static __inline__ struct cbq_class *
 206 cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
 207 {
 208         struct cbq_class *cl;
 209
 210         for (cl = q->classes[cbq_hash(classid)]; cl; cl = cl->next)
 211                 if (cl->classid == classid)
 212                         return cl;
 213         return NULL;
 214 }
 215
 216 #ifdef CONFIG_NET_CLS_POLICE
 217
 218 static struct cbq_class *
 219 cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
 220 {
 221         struct cbq_class *cl, *new;
 222
 223         for (cl = this->tparent; cl; cl = cl->tparent)
 224                 if ((new = cl->defaults[TC_PRIO_BESTEFFORT]) != NULL && new != this)
 225                         return new;
 226
 227         return NULL;
 228 }
 229
 230 #endif
 231
 232 /* Classify packet. The procedure is pretty complicated, but
 233    it allows us to combine link sharing and priority scheduling
 234    transparently.
 235
 236    Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
 237    so that it resolves to split nodes. Then packets are classified
 238    by logical priority, or a more specific classifier may be attached
 239    to the split node.
 240  */
 241
 242 static struct cbq_class *
 243 cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
 244 {
 245         struct cbq_sched_data *q = qdisc_priv(sch);
 246         struct cbq_class *head = &q->link;
 247         struct cbq_class **defmap;
 248         struct cbq_class *cl = NULL;
 249         u32 prio = skb->priority;
 250         struct tcf_result res;
 251
 252         /*
 253          *  Step 1. If skb->priority points to one of our classes, use it.
 254          */
 255         if (TC_H_MAJ(prio^sch->handle) == 0 &&
 256             (cl = cbq_class_lookup(q, prio)) != NULL)
 257                 return cl;
 258
 259         *qerr = NET_XMIT_BYPASS;
 260         for (;;) {
 261                 int result = 0;
 262                 defmap = head->defaults;
 263
 264                 /*
 265                  * Step 2+n. Apply classifier.
 266                  */
 267                 if (!head->filter_list || (result = tc_classify(skb, head->filter_list, &res)) < 0)
 268                         goto fallback;
 269
 270                 if ((cl = (void*)res.class) == NULL) {
 271                         if (TC_H_MAJ(res.classid))
 272                                 cl = cbq_class_lookup(q, res.classid);
 273                         else if ((cl = defmap[res.classid&TC_PRIO_MAX]) == NULL)
 274                                 cl = defmap[TC_PRIO_BESTEFFORT];
 275
 276                         if (cl == NULL || cl->level >= head->level)
 277                                 goto fallback;
 278                 }
 279
 280 #ifdef CONFIG_NET_CLS_ACT
 281                 switch (result) {
 282                 case TC_ACT_QUEUED:
 283                 case TC_ACT_STOLEN:
 284                         *qerr = NET_XMIT_SUCCESS;
 285                 case TC_ACT_SHOT:
 286                         return NULL;
 287                 }
 288 #elif defined(CONFIG_NET_CLS_POLICE)
 289                 switch (result) {
 290                 case TC_POLICE_RECLASSIFY:
 291                         return cbq_reclassify(skb, cl);
 292                 case TC_POLICE_SHOT:
 293                         return NULL;
 294                 default:
 295                         break;
 296                 }
 297 #endif
 298                 if (cl->level == 0)
 299                         return cl;
 300
 301                 /*
 302                  * Step 3+n. If classifier selected a link sharing class,
 303                  *         apply agency specific classifier.
 304                  *         Repeat this procdure until we hit a leaf node.
 305                  */
 306                 head = cl;
 307         }
 308
 309 fallback:
 310         cl = head;
 311
 312         /*
 313          * Step 4. No success...
 314          */
 315         if (TC_H_MAJ(prio) == 0 &&
 316             !(cl = head->defaults[prio&TC_PRIO_MAX]) &&
 317             !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
 318                 return head;
 319
 320         return cl;
 321 }
 322
 323 /*
 324    A packet has just been enqueued on the empty class.
 325    cbq_activate_class adds it to the tail of active class list
 326    of its priority band.
 327  */
 328
 329 static __inline__ void cbq_activate_class(struct cbq_class *cl)
 330 {
 331         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 332         int prio = cl->cpriority;
 333         struct cbq_class *cl_tail;
 334
 335         cl_tail = q->active[prio];
 336         q->active[prio] = cl;
 337
 338         if (cl_tail != NULL) {
 339                 cl->next_alive = cl_tail->next_alive;
 340                 cl_tail->next_alive = cl;
 341         } else {
 342                 cl->next_alive = cl;
 343                 q->activemask |= (1<<prio);
 344         }
 345 }
 346
 347 /*
 348    Unlink class from active chain.
 349    Note that this same procedure is done directly in cbq_dequeue*
 350    during round-robin procedure.
 351  */
 352
 353 static void cbq_deactivate_class(struct cbq_class *this)
 354 {
 355         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
 356         int prio = this->cpriority;
 357         struct cbq_class *cl;
 358         struct cbq_class *cl_prev = q->active[prio];
 359
 360         do {
 361                 cl = cl_prev->next_alive;
 362                 if (cl == this) {
 363                         cl_prev->next_alive = cl->next_alive;
 364                         cl->next_alive = NULL;
 365
 366                         if (cl == q->active[prio]) {
 367                                 q->active[prio] = cl_prev;
 368                                 if (cl == q->active[prio]) {
 369                                         q->active[prio] = NULL;
 370                                         q->activemask &= ~(1<<prio);
 371                                         return;
 372                                 }
 373                         }
 374
 375                         cl = cl_prev->next_alive;
 376                         return;
 377                 }
 378         } while ((cl_prev = cl) != q->active[prio]);
 379 }
 380
 381 static void
 382 cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
 383 {
 384         int toplevel = q->toplevel;
 385
 386         if (toplevel > cl->level && !(cl->q->flags&TCQ_F_THROTTLED)) {
 387                 psched_time_t now;
 388                 psched_tdiff_t incr;
 389
 390                 PSCHED_GET_TIME(now);
 391                 incr = PSCHED_TDIFF(now, q->now_rt);
 392                 PSCHED_TADD2(q->now, incr, now);
 393
 394                 do {
 395                         if (PSCHED_TLESS(cl->undertime, now)) {
 396                                 q->toplevel = cl->level;
 397                                 return;
 398                         }
 399                 } while ((cl=cl->borrow) != NULL && toplevel > cl->level);
 400         }
 401 }
 402
 403 static int
 404 cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 405 {
 406         struct cbq_sched_data *q = qdisc_priv(sch);
 407         int len = skb->len;
 408         int ret;
 409         struct cbq_class *cl = cbq_classify(skb, sch, &ret);
 410
 411 #ifdef CONFIG_NET_CLS_POLICE
 412         q->rx_class = cl;
 413 #endif
 414         if (cl == NULL) {
 415                 if (ret == NET_XMIT_BYPASS)
 416                         sch->qstats.drops++;
 417                 kfree_skb(skb);
 418                 return ret;
 419         }
 420
 421 #ifdef CONFIG_NET_CLS_POLICE
 422         cl->q->__parent = sch;
 423 #endif
 424         if ((ret = cl->q->enqueue(skb, cl->q)) == NET_XMIT_SUCCESS) {
 425                 sch->q.qlen++;
 426                 sch->bstats.packets++;
 427                 sch->bstats.bytes+=len;
 428                 cbq_mark_toplevel(q, cl);
 429                 if (!cl->next_alive)
 430                         cbq_activate_class(cl);
 431                 return ret;
 432         }
 433
 434         sch->qstats.drops++;
 435         cbq_mark_toplevel(q, cl);
 436         cl->qstats.drops++;
 437         return ret;
 438 }
 439
 440 static int
 441 cbq_requeue(struct sk_buff *skb, struct Qdisc *sch)
 442 {
 443         struct cbq_sched_data *q = qdisc_priv(sch);
 444         struct cbq_class *cl;
 445         int ret;
 446
 447         if ((cl = q->tx_class) == NULL) {
 448                 kfree_skb(skb);
 449                 sch->qstats.drops++;
 450                 return NET_XMIT_CN;
 451         }
 452         q->tx_class = NULL;
 453
 454         cbq_mark_toplevel(q, cl);
 455
 456 #ifdef CONFIG_NET_CLS_POLICE
 457         q->rx_class = cl;
 458         cl->q->__parent = sch;
 459 #endif
 460         if ((ret = cl->q->ops->requeue(skb, cl->q)) == 0) {
 461                 sch->q.qlen++;
 462                 sch->qstats.requeues++;
 463                 if (!cl->next_alive)
 464                         cbq_activate_class(cl);
 465                 return 0;
 466         }
 467         sch->qstats.drops++;
 468         cl->qstats.drops++;
 469         return ret;
 470 }
 471
 472 /* Overlimit actions */
 473
 474 /* TC_CBQ_OVL_CLASSIC: (default) penalize leaf class by adding offtime */
 475
 476 static void cbq_ovl_classic(struct cbq_class *cl)
 477 {
 478         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 479         psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
 480
 481         if (!cl->delayed) {
 482                 delay += cl->offtime;
 483
 484                 /*
 485                    Class goes to sleep, so that it will have no
 486                    chance to work avgidle. Let's forgive it 8)
 487
 488                    BTW cbq-2.0 has a crap in this
 489                    place, apparently they forgot to shift it by cl->ewma_log.
 490                  */
 491                 if (cl->avgidle < 0)
 492                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 493                 if (cl->avgidle < cl->minidle)
 494                         cl->avgidle = cl->minidle;
 495                 if (delay <= 0)
 496                         delay = 1;
 497                 PSCHED_TADD2(q->now, delay, cl->undertime);
 498
 499                 cl->xstats.overactions++;
 500                 cl->delayed = 1;
 501         }
 502         if (q->wd_expires == 0 || q->wd_expires > delay)
 503                 q->wd_expires = delay;
 504
 505         /* Dirty work! We must schedule wakeups based on
 506            real available rate, rather than leaf rate,
 507            which may be tiny (even zero).
 508          */
 509         if (q->toplevel == TC_CBQ_MAXLEVEL) {
 510                 struct cbq_class *b;
 511                 psched_tdiff_t base_delay = q->wd_expires;
 512
 513                 for (b = cl->borrow; b; b = b->borrow) {
 514                         delay = PSCHED_TDIFF(b->undertime, q->now);
 515                         if (delay < base_delay) {
 516                                 if (delay <= 0)
 517                                         delay = 1;
 518                                 base_delay = delay;
 519                         }
 520                 }
 521
 522                 q->wd_expires = base_delay;
 523         }
 524 }
 525
 526 /* TC_CBQ_OVL_RCLASSIC: penalize by offtime classes in hierarchy, when
 527    they go overlimit
 528  */
 529
 530 static void cbq_ovl_rclassic(struct cbq_class *cl)
 531 {
 532         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 533         struct cbq_class *this = cl;
 534
 535         do {
 536                 if (cl->level > q->toplevel) {
 537                         cl = NULL;
 538                         break;
 539                 }
 540         } while ((cl = cl->borrow) != NULL);
 541
 542         if (cl == NULL)
 543                 cl = this;
 544         cbq_ovl_classic(cl);
 545 }
 546
 547 /* TC_CBQ_OVL_DELAY: delay until it will go to underlimit */
 548
 549 static void cbq_ovl_delay(struct cbq_class *cl)
 550 {
 551         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 552         psched_tdiff_t delay = PSCHED_TDIFF(cl->undertime, q->now);
 553
 554         if (!cl->delayed) {
 555                 unsigned long sched = jiffies;
 556
 557                 delay += cl->offtime;
 558                 if (cl->avgidle < 0)
 559                         delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
 560                 if (cl->avgidle < cl->minidle)
 561                         cl->avgidle = cl->minidle;
 562                 PSCHED_TADD2(q->now, delay, cl->undertime);
 563
 564                 if (delay > 0) {
 565                         sched += PSCHED_US2JIFFIE(delay) + cl->penalty;
 566                         cl->penalized = sched;
 567                         cl->cpriority = TC_CBQ_MAXPRIO;
 568                         q->pmask |= (1<<TC_CBQ_MAXPRIO);
 569                         if (del_timer(&q->delay_timer) &&
 570                             (long)(q->delay_timer.expires - sched) > 0)
 571                                 q->delay_timer.expires = sched;
 572                         add_timer(&q->delay_timer);
 573                         cl->delayed = 1;
 574                         cl->xstats.overactions++;
 575                         return;
 576                 }
 577                 delay = 1;
 578         }
 579         if (q->wd_expires == 0 || q->wd_expires > delay)
 580                 q->wd_expires = delay;
 581 }
 582
 583 /* TC_CBQ_OVL_LOWPRIO: penalize class by lowering its priority band */
 584
 585 static void cbq_ovl_lowprio(struct cbq_class *cl)
 586 {
 587         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 588
 589         cl->penalized = jiffies + cl->penalty;
 590
 591         if (cl->cpriority != cl->priority2) {
 592                 cl->cpriority = cl->priority2;
 593                 q->pmask |= (1<<cl->cpriority);
 594                 cl->xstats.overactions++;
 595         }
 596         cbq_ovl_classic(cl);
 597 }
 598
 599 /* TC_CBQ_OVL_DROP: penalize class by dropping */
 600
 601 static void cbq_ovl_drop(struct cbq_class *cl)
 602 {
 603         if (cl->q->ops->drop)
 604                 if (cl->q->ops->drop(cl->q))
 605                         cl->qdisc->q.qlen--;
 606         cl->xstats.overactions++;
 607         cbq_ovl_classic(cl);
 608 }
 609
 610 static void cbq_watchdog(unsigned long arg)
 611 {
 612         struct Qdisc *sch = (struct Qdisc*)arg;
 613
 614         sch->flags &= ~TCQ_F_THROTTLED;
 615         netif_schedule(sch->dev);
 616 }
 617
 618 static unsigned long cbq_undelay_prio(struct cbq_sched_data *q, int prio)
 619 {
 620         struct cbq_class *cl;
 621         struct cbq_class *cl_prev = q->active[prio];
 622         unsigned long now = jiffies;
 623         unsigned long sched = now;
 624
 625         if (cl_prev == NULL)
 626                 return now;
 627
 628         do {
 629                 cl = cl_prev->next_alive;
 630                 if ((long)(now - cl->penalized) > 0) {
 631                         cl_prev->next_alive = cl->next_alive;
 632                         cl->next_alive = NULL;
 633                         cl->cpriority = cl->priority;
 634                         cl->delayed = 0;
 635                         cbq_activate_class(cl);
 636
 637                         if (cl == q->active[prio]) {
 638                                 q->active[prio] = cl_prev;
 639                                 if (cl == q->active[prio]) {
 640                                         q->active[prio] = NULL;
 641                                         return 0;
 642                                 }
 643                         }
 644
 645                         cl = cl_prev->next_alive;
 646                 } else if ((long)(sched - cl->penalized) > 0)
 647                         sched = cl->penalized;
 648         } while ((cl_prev = cl) != q->active[prio]);
 649
 650         return (long)(sched - now);
 651 }
 652
 653 static void cbq_undelay(unsigned long arg)
 654 {
 655         struct Qdisc *sch = (struct Qdisc*)arg;
 656         struct cbq_sched_data *q = qdisc_priv(sch);
 657         long delay = 0;
 658         unsigned pmask;
 659
 660         pmask = q->pmask;
 661         q->pmask = 0;
 662
 663         while (pmask) {
 664                 int prio = ffz(~pmask);
 665                 long tmp;
 666
 667                 pmask &= ~(1<<prio);
 668
 669                 tmp = cbq_undelay_prio(q, prio);
 670                 if (tmp > 0) {
 671                         q->pmask |= 1<<prio;
 672                         if (tmp < delay || delay == 0)
 673                                 delay = tmp;
 674                 }
 675         }
 676
 677         if (delay) {
 678                 q->delay_timer.expires = jiffies + delay;
 679                 add_timer(&q->delay_timer);
 680         }
 681
 682         sch->flags &= ~TCQ_F_THROTTLED;
 683         netif_schedule(sch->dev);
 684 }
 685
 686
 687 #ifdef CONFIG_NET_CLS_POLICE
 688
 689 static int cbq_reshape_fail(struct sk_buff *skb, struct Qdisc *child)
 690 {
 691         int len = skb->len;
 692         struct Qdisc *sch = child->__parent;
 693         struct cbq_sched_data *q = qdisc_priv(sch);
 694         struct cbq_class *cl = q->rx_class;
 695
 696         q->rx_class = NULL;
 697
 698         if (cl && (cl = cbq_reclassify(skb, cl)) != NULL) {
 699
 700                 cbq_mark_toplevel(q, cl);
 701
 702                 q->rx_class = cl;
 703                 cl->q->__parent = sch;
 704
 705                 if (cl->q->enqueue(skb, cl->q) == 0) {
 706                         sch->q.qlen++;
 707                         sch->bstats.packets++;
 708                         sch->bstats.bytes+=len;
 709                         if (!cl->next_alive)
 710                                 cbq_activate_class(cl);
 711                         return 0;
 712                 }
 713                 sch->qstats.drops++;
 714                 return 0;
 715         }
 716
 717         sch->qstats.drops++;
 718         return -1;
 719 }
 720 #endif
 721
 722 /*
 723    It is mission critical procedure.
 724
 725    We "regenerate" toplevel cutoff, if transmitting class
 726    has backlog and it is not regulated. It is not part of
 727    original CBQ description, but looks more reasonable.
 728    Probably, it is wrong. This question needs further investigation.
 729 */
 730
 731 static __inline__ void
 732 cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
 733                     struct cbq_class *borrowed)
 734 {
 735         if (cl && q->toplevel >= borrowed->level) {
 736                 if (cl->q->q.qlen > 1) {
 737                         do {
 738                                 if (PSCHED_IS_PASTPERFECT(borrowed->undertime)) {
 739                                         q->toplevel = borrowed->level;
 740                                         return;
 741                                 }
 742                         } while ((borrowed=borrowed->borrow) != NULL);
 743                 }
 744 #if 0
 745         /* It is not necessary now. Uncommenting it
 746            will save CPU cycles, but decrease fairness.
 747          */
 748                 q->toplevel = TC_CBQ_MAXLEVEL;
 749 #endif
 750         }
 751 }
 752
 753 static void
 754 cbq_update(struct cbq_sched_data *q)
 755 {
 756         struct cbq_class *this = q->tx_class;
 757         struct cbq_class *cl = this;
 758         int len = q->tx_len;
 759
 760         q->tx_class = NULL;
 761
 762         for ( ; cl; cl = cl->share) {
 763                 long avgidle = cl->avgidle;
 764                 long idle;
 765
 766                 cl->bstats.packets++;
 767                 cl->bstats.bytes += len;
 768
 769                 /*
 770                    (now - last) is total time between packet right edges.
 771                    (last_pktlen/rate) is "virtual" busy time, so that
 772
 773                          idle = (now - last) - last_pktlen/rate
 774                  */
 775
 776                 idle = PSCHED_TDIFF(q->now, cl->last);
 777                 if ((unsigned long)idle > 128*1024*1024) {
 778                         avgidle = cl->maxidle;
 779                 } else {
 780                         idle -= L2T(cl, len);
 781
 782                 /* true_avgidle := (1-W)*true_avgidle + W*idle,
 783                    where W=2^{-ewma_log}. But cl->avgidle is scaled:
 784                    cl->avgidle == true_avgidle/W,
 785                    hence:
 786                  */
 787                         avgidle += idle - (avgidle>>cl->ewma_log);
 788                 }
 789
 790                 if (avgidle <= 0) {
 791                         /* Overlimit or at-limit */
 792
 793                         if (avgidle < cl->minidle)
 794                                 avgidle = cl->minidle;
 795
 796                         cl->avgidle = avgidle;
 797
 798                         /* Calculate expected time, when this class
 799                            will be allowed to send.
 800                            It will occur, when:
 801                            (1-W)*true_avgidle + W*delay = 0, i.e.
 802                            idle = (1/W - 1)*(-true_avgidle)
 803                            or
 804                            idle = (1 - W)*(-cl->avgidle);
 805                          */
 806                         idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
 807
 808                         /*
 809                            That is not all.
 810                            To maintain the rate allocated to the class,
 811                            we add to undertime virtual clock,
 812                            necessary to complete transmitted packet.
 813                            (len/phys_bandwidth has been already passed
 814                            to the moment of cbq_update)
 815                          */
 816
 817                         idle -= L2T(&q->link, len);
 818                         idle += L2T(cl, len);
 819
 820                         PSCHED_AUDIT_TDIFF(idle);
 821
 822                         PSCHED_TADD2(q->now, idle, cl->undertime);
 823                 } else {
 824                         /* Underlimit */
 825
 826                         PSCHED_SET_PASTPERFECT(cl->undertime);
 827                         if (avgidle > cl->maxidle)
 828                                 cl->avgidle = cl->maxidle;
 829                         else
 830                                 cl->avgidle = avgidle;
 831                 }
 832                 cl->last = q->now;
 833         }
 834
 835         cbq_update_toplevel(q, this, q->tx_borrowed);
 836 }
 837
 838 static __inline__ struct cbq_class *
 839 cbq_under_limit(struct cbq_class *cl)
 840 {
 841         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
 842         struct cbq_class *this_cl = cl;
 843
 844         if (cl->tparent == NULL)
 845                 return cl;
 846
 847         if (PSCHED_IS_PASTPERFECT(cl->undertime) ||
 848             !PSCHED_TLESS(q->now, cl->undertime)) {
 849                 cl->delayed = 0;
 850                 return cl;
 851         }
 852
 853         do {
 854                 /* It is very suspicious place. Now overlimit
 855                    action is generated for not bounded classes
 856                    only if link is completely congested.
 857                    Though it is in agree with ancestor-only paradigm,
 858                    it looks very stupid. Particularly,
 859                    it means that this chunk of code will either
 860                    never be called or result in strong amplification
 861                    of burstiness. Dangerous, silly, and, however,
 862                    no another solution exists.
 863                  */
 864                 if ((cl = cl->borrow) == NULL) {
 865                         this_cl->qstats.overlimits++;
 866                         this_cl->overlimit(this_cl);
 867                         return NULL;
 868                 }
 869                 if (cl->level > q->toplevel)
 870                         return NULL;
 871         } while (!PSCHED_IS_PASTPERFECT(cl->undertime) &&
 872                  PSCHED_TLESS(q->now, cl->undertime));
 873
 874         cl->delayed = 0;
 875         return cl;
 876 }
 877
 878 static __inline__ struct sk_buff *
 879 cbq_dequeue_prio(struct Qdisc *sch, int prio)
 880 {
 881         struct cbq_sched_data *q = qdisc_priv(sch);
 882         struct cbq_class *cl_tail, *cl_prev, *cl;
 883         struct sk_buff *skb;
 884         int deficit;
 885
 886         cl_tail = cl_prev = q->active[prio];
 887         cl = cl_prev->next_alive;
 888
 889         do {
 890                 deficit = 0;
 891
 892                 /* Start round */
 893                 do {
 894                         struct cbq_class *borrow = cl;
 895
 896                         if (cl->q->q.qlen &&
 897                             (borrow = cbq_under_limit(cl)) == NULL)
 898                                 goto skip_class;
 899
 900                         if (cl->deficit <= 0) {
 901                                 /* Class exhausted its allotment per
 902                                    this round. Switch to the next one.
 903                                  */
 904                                 deficit = 1;
 905                                 cl->deficit += cl->quantum;
 906                                 goto next_class;
 907                         }
 908
 909                         skb = cl->q->dequeue(cl->q);
 910
 911                         /* Class did not give us any skb :-(
 912                            It could occur even if cl->q->q.qlen != 0
 913                            f.e. if cl->q == "tbf"
 914                          */
 915                         if (skb == NULL)
 916                                 goto skip_class;
 917
 918                         cl->deficit -= skb->len;
 919                         q->tx_class = cl;
 920                         q->tx_borrowed = borrow;
 921                         if (borrow != cl) {
 922 #ifndef CBQ_XSTATS_BORROWS_BYTES
 923                                 borrow->xstats.borrows++;
 924                                 cl->xstats.borrows++;
 925 #else
 926                                 borrow->xstats.borrows += skb->len;
 927                                 cl->xstats.borrows += skb->len;
 928 #endif
 929                         }
 930                         q->tx_len = skb->len;
 931
 932                         if (cl->deficit <= 0) {
 933                                 q->active[prio] = cl;
 934                                 cl = cl->next_alive;
 935                                 cl->deficit += cl->quantum;
 936                         }
 937                         return skb;
 938
 939 skip_class:
 940                         if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
 941                                 /* Class is empty or penalized.
 942                                    Unlink it from active chain.
 943                                  */
 944                                 cl_prev->next_alive = cl->next_alive;
 945                                 cl->next_alive = NULL;
 946
 947                                 /* Did cl_tail point to it? */
 948                                 if (cl == cl_tail) {
 949                                         /* Repair it! */
 950                                         cl_tail = cl_prev;
 951
 952                                         /* Was it the last class in this band? */
 953                                         if (cl == cl_tail) {
 954                                                 /* Kill the band! */
 955                                                 q->active[prio] = NULL;
 956                                                 q->activemask &= ~(1<<prio);
 957                                                 if (cl->q->q.qlen)
 958                                                         cbq_activate_class(cl);
 959                                                 return NULL;
 960                                         }
 961
 962                                         q->active[prio] = cl_tail;
 963                                 }
 964                                 if (cl->q->q.qlen)
 965                                         cbq_activate_class(cl);
 966
 967                                 cl = cl_prev;
 968                         }
 969
 970 next_class:
 971                         cl_prev = cl;
 972                         cl = cl->next_alive;
 973                 } while (cl_prev != cl_tail);
 974         } while (deficit);
 975
 976         q->active[prio] = cl_prev;
 977
 978         return NULL;
 979 }
 980
 981 static __inline__ struct sk_buff *
 982 cbq_dequeue_1(struct Qdisc *sch)
 983 {
 984         struct cbq_sched_data *q = qdisc_priv(sch);
 985         struct sk_buff *skb;
 986         unsigned activemask;
 987
 988         activemask = q->activemask&0xFF;
 989         while (activemask) {
 990                 int prio = ffz(~activemask);
 991                 activemask &= ~(1<<prio);
 992                 skb = cbq_dequeue_prio(sch, prio);
 993                 if (skb)
 994                         return skb;
 995         }
 996         return NULL;
 997 }
 998
 999 static struct sk_buff *
1000 cbq_dequeue(struct Qdisc *sch)
1001 {
1002         struct sk_buff *skb;
1003         struct cbq_sched_data *q = qdisc_priv(sch);
1004         psched_time_t now;
1005         psched_tdiff_t incr;
1006
1007         PSCHED_GET_TIME(now);
1008         incr = PSCHED_TDIFF(now, q->now_rt);
1009
1010         if (q->tx_class) {
1011                 psched_tdiff_t incr2;
1012                 /* Time integrator. We calculate EOS time
1013                    by adding expected packet transmission time.
1014                    If real time is greater, we warp artificial clock,
1015                    so that:
1016
1017                    cbq_time = max(real_time, work);
1018                  */
1019                 incr2 = L2T(&q->link, q->tx_len);
1020                 PSCHED_TADD(q->now, incr2);
1021                 cbq_update(q);
1022                 if ((incr -= incr2) < 0)
1023                         incr = 0;
1024         }
1025         PSCHED_TADD(q->now, incr);
1026         q->now_rt = now;
1027
1028         for (;;) {
1029                 q->wd_expires = 0;
1030
1031                 skb = cbq_dequeue_1(sch);
1032                 if (skb) {
1033                         sch->q.qlen--;
1034                         sch->flags &= ~TCQ_F_THROTTLED;
1035                         return skb;
1036                 }
1037
1038                 /* All the classes are overlimit.
1039
1040                    It is possible, if:
1041
1042                    1. Scheduler is empty.
1043                    2. Toplevel cutoff inhibited borrowing.
1044                    3. Root class is overlimit.
1045
1046                    Reset 2d and 3d conditions and retry.
1047
1048                    Note, that NS and cbq-2.0 are buggy, peeking
1049                    an arbitrary class is appropriate for ancestor-only
1050                    sharing, but not for toplevel algorithm.
1051
1052                    Our version is better, but slower, because it requires
1053                    two passes, but it is unavoidable with top-level sharing.
1054                 */
1055
1056                 if (q->toplevel == TC_CBQ_MAXLEVEL &&
1057                     PSCHED_IS_PASTPERFECT(q->link.undertime))
1058                         break;
1059
1060                 q->toplevel = TC_CBQ_MAXLEVEL;
1061                 PSCHED_SET_PASTPERFECT(q->link.undertime);
1062         }
1063
1064         /* No packets in scheduler or nobody wants to give them to us :-(
1065            Sigh... start watchdog timer in the last case. */
1066
1067         if (sch->q.qlen) {
1068                 sch->qstats.overlimits++;
1069                 if (q->wd_expires) {
1070                         long delay = PSCHED_US2JIFFIE(q->wd_expires);
1071                         if (delay <= 0)
1072                                 delay = 1;
1073                         mod_timer(&q->wd_timer, jiffies + delay);
1074                         sch->flags |= TCQ_F_THROTTLED;
1075                 }
1076         }
1077         return NULL;
1078 }
1079
1080 /* CBQ class maintanance routines */
1081
1082 static void cbq_adjust_levels(struct cbq_class *this)
1083 {
1084         if (this == NULL)
1085                 return;
1086
1087         do {
1088                 int level = 0;
1089                 struct cbq_class *cl;
1090
1091                 if ((cl = this->children) != NULL) {
1092                         do {
1093                                 if (cl->level > level)
1094                                         level = cl->level;
1095                         } while ((cl = cl->sibling) != this->children);
1096                 }
1097                 this->level = level+1;
1098         } while ((this = this->tparent) != NULL);
1099 }
1100
1101 static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
1102 {
1103         struct cbq_class *cl;
1104         unsigned h;
1105
1106         if (q->quanta[prio] == 0)
1107                 return;
1108
1109         for (h=0; h<16; h++) {
1110                 for (cl = q->classes[h]; cl; cl = cl->next) {
1111                         /* BUGGGG... Beware! This expression suffer of
1112                            arithmetic overflows!
1113                          */
1114                         if (cl->priority == prio) {
1115                                 cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
1116                                         q->quanta[prio];
1117                         }
1118                         if (cl->quantum <= 0 || cl->quantum>32*cl->qdisc->dev->mtu) {
1119                                 printk(KERN_WARNING "CBQ: class %08x has bad quantum==%ld, repaired.\n", cl->classid, cl->quantum);
1120                                 cl->quantum = cl->qdisc->dev->mtu/2 + 1;
1121                         }
1122                 }
1123         }
1124 }
1125
1126 static void cbq_sync_defmap(struct cbq_class *cl)
1127 {
1128         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1129         struct cbq_class *split = cl->split;
1130         unsigned h;
1131         int i;
1132
1133         if (split == NULL)
1134                 return;
1135
1136         for (i=0; i<=TC_PRIO_MAX; i++) {
1137                 if (split->defaults[i] == cl && !(cl->defmap&(1<<i)))
1138                         split->defaults[i] = NULL;
1139         }
1140
1141         for (i=0; i<=TC_PRIO_MAX; i++) {
1142                 int level = split->level;
1143
1144                 if (split->defaults[i])
1145                         continue;
1146
1147                 for (h=0; h<16; h++) {
1148                         struct cbq_class *c;
1149
1150                         for (c = q->classes[h]; c; c = c->next) {
1151                                 if (c->split == split && c->level < level &&
1152                                     c->defmap&(1<<i)) {
1153                                         split->defaults[i] = c;
1154                                         level = c->level;
1155                                 }
1156                         }
1157                 }
1158         }
1159 }
1160
1161 static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
1162 {
1163         struct cbq_class *split = NULL;
1164
1165         if (splitid == 0) {
1166                 if ((split = cl->split) == NULL)
1167                         return;
1168                 splitid = split->classid;
1169         }
1170
1171         if (split == NULL || split->classid != splitid) {
1172                 for (split = cl->tparent; split; split = split->tparent)
1173                         if (split->classid == splitid)
1174                                 break;
1175         }
1176
1177         if (split == NULL)
1178                 return;
1179
1180         if (cl->split != split) {
1181                 cl->defmap = 0;
1182                 cbq_sync_defmap(cl);
1183                 cl->split = split;
1184                 cl->defmap = def&mask;
1185         } else
1186                 cl->defmap = (cl->defmap&~mask)|(def&mask);
1187
1188         cbq_sync_defmap(cl);
1189 }
1190
1191 static void cbq_unlink_class(struct cbq_class *this)
1192 {
1193         struct cbq_class *cl, **clp;
1194         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1195
1196         for (clp = &q->classes[cbq_hash(this->classid)]; (cl = *clp) != NULL; clp = &cl->next) {
1197                 if (cl == this) {
1198                         *clp = cl->next;
1199                         cl->next = NULL;
1200                         break;
1201                 }
1202         }
1203
1204         if (this->tparent) {
1205                 clp=&this->sibling;
1206                 cl = *clp;
1207                 do {
1208                         if (cl == this) {
1209                                 *clp = cl->sibling;
1210                                 break;
1211                         }
1212                         clp = &cl->sibling;
1213                 } while ((cl = *clp) != this->sibling);
1214
1215                 if (this->tparent->children == this) {
1216                         this->tparent->children = this->sibling;
1217                         if (this->sibling == this)
1218                                 this->tparent->children = NULL;
1219                 }
1220         } else {
1221                 BUG_TRAP(this->sibling == this);
1222         }
1223 }
1224
1225 static void cbq_link_class(struct cbq_class *this)
1226 {
1227         struct cbq_sched_data *q = qdisc_priv(this->qdisc);
1228         unsigned h = cbq_hash(this->classid);
1229         struct cbq_class *parent = this->tparent;
1230
1231         this->sibling = this;
1232         this->next = q->classes[h];
1233         q->classes[h] = this;
1234
1235         if (parent == NULL)
1236                 return;
1237
1238         if (parent->children == NULL) {
1239                 parent->children = this;
1240         } else {
1241                 this->sibling = parent->children->sibling;
1242                 parent->children->sibling = this;
1243         }
1244 }
1245
1246 static unsigned int cbq_drop(struct Qdisc* sch)
1247 {
1248         struct cbq_sched_data *q = qdisc_priv(sch);
1249         struct cbq_class *cl, *cl_head;
1250         int prio;
1251         unsigned int len;
1252
1253         for (prio = TC_CBQ_MAXPRIO; prio >= 0; prio--) {
1254                 if ((cl_head = q->active[prio]) == NULL)
1255                         continue;
1256
1257                 cl = cl_head;
1258                 do {
1259                         if (cl->q->ops->drop && (len = cl->q->ops->drop(cl->q))) {
1260                                 sch->q.qlen--;
1261                                 return len;
1262                         }
1263                 } while ((cl = cl->next_alive) != cl_head);
1264         }
1265         return 0;
1266 }
1267
1268 static void
1269 cbq_reset(struct Qdisc* sch)
1270 {
1271         struct cbq_sched_data *q = qdisc_priv(sch);
1272         struct cbq_class *cl;
1273         int prio;
1274         unsigned h;
1275
1276         q->activemask = 0;
1277         q->pmask = 0;
1278         q->tx_class = NULL;
1279         q->tx_borrowed = NULL;
1280         del_timer(&q->wd_timer);
1281         del_timer(&q->delay_timer);
1282         q->toplevel = TC_CBQ_MAXLEVEL;
1283         PSCHED_GET_TIME(q->now);
1284         q->now_rt = q->now;
1285
1286         for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
1287                 q->active[prio] = NULL;
1288
1289         for (h = 0; h < 16; h++) {
1290                 for (cl = q->classes[h]; cl; cl = cl->next) {
1291                         qdisc_reset(cl->q);
1292
1293                         cl->next_alive = NULL;
1294                         PSCHED_SET_PASTPERFECT(cl->undertime);
1295                         cl->avgidle = cl->maxidle;
1296                         cl->deficit = cl->quantum;
1297                         cl->cpriority = cl->priority;
1298                 }
1299         }
1300         sch->q.qlen = 0;
1301 }
1302
1303
1304 static int cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
1305 {
1306         if (lss->change&TCF_CBQ_LSS_FLAGS) {
1307                 cl->share = (lss->flags&TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
1308                 cl->borrow = (lss->flags&TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
1309         }
1310         if (lss->change&TCF_CBQ_LSS_EWMA)
1311                 cl->ewma_log = lss->ewma_log;
1312         if (lss->change&TCF_CBQ_LSS_AVPKT)
1313                 cl->avpkt = lss->avpkt;
1314         if (lss->change&TCF_CBQ_LSS_MINIDLE)
1315                 cl->minidle = -(long)lss->minidle;
1316         if (lss->change&TCF_CBQ_LSS_MAXIDLE) {
1317                 cl->maxidle = lss->maxidle;
1318                 cl->avgidle = lss->maxidle;
1319         }
1320         if (lss->change&TCF_CBQ_LSS_OFFTIME)
1321                 cl->offtime = lss->offtime;
1322         return 0;
1323 }
1324
1325 static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
1326 {
1327         q->nclasses[cl->priority]--;
1328         q->quanta[cl->priority] -= cl->weight;
1329         cbq_normalize_quanta(q, cl->priority);
1330 }
1331
1332 static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
1333 {
1334         q->nclasses[cl->priority]++;
1335         q->quanta[cl->priority] += cl->weight;
1336         cbq_normalize_quanta(q, cl->priority);
1337 }
1338
1339 static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
1340 {
1341         struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
1342
1343         if (wrr->allot)
1344                 cl->allot = wrr->allot;
1345         if (wrr->weight)
1346                 cl->weight = wrr->weight;
1347         if (wrr->priority) {
1348                 cl->priority = wrr->priority-1;
1349                 cl->cpriority = cl->priority;
1350                 if (cl->priority >= cl->priority2)
1351                         cl->priority2 = TC_CBQ_MAXPRIO-1;
1352         }
1353
1354         cbq_addprio(q, cl);
1355         return 0;
1356 }
1357
1358 static int cbq_set_overlimit(struct cbq_class *cl, struct tc_cbq_ovl *ovl)
1359 {
1360         switch (ovl->strategy) {
1361         case TC_CBQ_OVL_CLASSIC:
1362                 cl->overlimit = cbq_ovl_classic;
1363                 break;
1364         case TC_CBQ_OVL_DELAY:
1365                 cl->overlimit = cbq_ovl_delay;
1366                 break;
1367         case TC_CBQ_OVL_LOWPRIO:
1368                 if (ovl->priority2-1 >= TC_CBQ_MAXPRIO ||
1369                     ovl->priority2-1 <= cl->priority)
1370                         return -EINVAL;
1371                 cl->priority2 = ovl->priority2-1;
1372                 cl->overlimit = cbq_ovl_lowprio;
1373                 break;
1374         case TC_CBQ_OVL_DROP:
1375                 cl->overlimit = cbq_ovl_drop;
1376                 break;
1377         case TC_CBQ_OVL_RCLASSIC:
1378                 cl->overlimit = cbq_ovl_rclassic;
1379                 break;
1380         default:
1381                 return -EINVAL;
1382         }
1383         cl->penalty = (ovl->penalty*HZ)/1000;
1384         return 0;
1385 }
1386
1387 #ifdef CONFIG_NET_CLS_POLICE
1388 static int cbq_set_police(struct cbq_class *cl, struct tc_cbq_police *p)
1389 {
1390         cl->police = p->police;
1391
1392         if (cl->q->handle) {
1393                 if (p->police == TC_POLICE_RECLASSIFY)
1394                         cl->q->reshape_fail = cbq_reshape_fail;
1395                 else
1396                         cl->q->reshape_fail = NULL;
1397         }
1398         return 0;
1399 }
1400 #endif
1401
1402 static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
1403 {
1404         cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
1405         return 0;
1406 }
1407
1408 static int cbq_init(struct Qdisc *sch, struct rtattr *opt)
1409 {
1410         struct cbq_sched_data *q = qdisc_priv(sch);
1411         struct rtattr *tb[TCA_CBQ_MAX];
1412         struct tc_ratespec *r;
1413
1414         if (rtattr_parse_nested(tb, TCA_CBQ_MAX, opt) < 0 ||
1415             tb[TCA_CBQ_RTAB-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1416             RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1417                 return -EINVAL;
1418
1419         if (tb[TCA_CBQ_LSSOPT-1] &&
1420             RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1421                 return -EINVAL;
1422
1423         r = RTA_DATA(tb[TCA_CBQ_RATE-1]);
1424
1425         if ((q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB-1])) == NULL)
1426                 return -EINVAL;
1427
1428         q->link.refcnt = 1;
1429         q->link.sibling = &q->link;
1430         q->link.classid = sch->handle;
1431         q->link.qdisc = sch;
1432         if (!(q->link.q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1433                                             sch->handle)))
1434                 q->link.q = &noop_qdisc;
1435
1436         q->link.priority = TC_CBQ_MAXPRIO-1;
1437         q->link.priority2 = TC_CBQ_MAXPRIO-1;
1438         q->link.cpriority = TC_CBQ_MAXPRIO-1;
1439         q->link.ovl_strategy = TC_CBQ_OVL_CLASSIC;
1440         q->link.overlimit = cbq_ovl_classic;
1441         q->link.allot = psched_mtu(sch->dev);
1442         q->link.quantum = q->link.allot;
1443         q->link.weight = q->link.R_tab->rate.rate;
1444
1445         q->link.ewma_log = TC_CBQ_DEF_EWMA;
1446         q->link.avpkt = q->link.allot/2;
1447         q->link.minidle = -0x7FFFFFFF;
1448         q->link.stats_lock = &sch->dev->queue_lock;
1449
1450         init_timer(&q->wd_timer);
1451         q->wd_timer.data = (unsigned long)sch;
1452         q->wd_timer.function = cbq_watchdog;
1453         init_timer(&q->delay_timer);
1454         q->delay_timer.data = (unsigned long)sch;
1455         q->delay_timer.function = cbq_undelay;
1456         q->toplevel = TC_CBQ_MAXLEVEL;
1457         PSCHED_GET_TIME(q->now);
1458         q->now_rt = q->now;
1459
1460         cbq_link_class(&q->link);
1461
1462         if (tb[TCA_CBQ_LSSOPT-1])
1463                 cbq_set_lss(&q->link, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1464
1465         cbq_addprio(q, &q->link);
1466         return 0;
1467 }
1468
1469 static __inline__ int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
1470 {
1471         unsigned char    *b = skb->tail;
1472
1473         RTA_PUT(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate);
1474         return skb->len;
1475
1476 rtattr_failure:
1477         skb_trim(skb, b - skb->data);
1478         return -1;
1479 }
1480
1481 static __inline__ int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
1482 {
1483         unsigned char    *b = skb->tail;
1484         struct tc_cbq_lssopt opt;
1485
1486         opt.flags = 0;
1487         if (cl->borrow == NULL)
1488                 opt.flags |= TCF_CBQ_LSS_BOUNDED;
1489         if (cl->share == NULL)
1490                 opt.flags |= TCF_CBQ_LSS_ISOLATED;
1491         opt.ewma_log = cl->ewma_log;
1492         opt.level = cl->level;
1493         opt.avpkt = cl->avpkt;
1494         opt.maxidle = cl->maxidle;
1495         opt.minidle = (u32)(-cl->minidle);
1496         opt.offtime = cl->offtime;
1497         opt.change = ~0;
1498         RTA_PUT(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt);
1499         return skb->len;
1500
1501 rtattr_failure:
1502         skb_trim(skb, b - skb->data);
1503         return -1;
1504 }
1505
1506 static __inline__ int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
1507 {
1508         unsigned char    *b = skb->tail;
1509         struct tc_cbq_wrropt opt;
1510
1511         opt.flags = 0;
1512         opt.allot = cl->allot;
1513         opt.priority = cl->priority+1;
1514         opt.cpriority = cl->cpriority+1;
1515         opt.weight = cl->weight;
1516         RTA_PUT(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt);
1517         return skb->len;
1518
1519 rtattr_failure:
1520         skb_trim(skb, b - skb->data);
1521         return -1;
1522 }
1523
1524 static __inline__ int cbq_dump_ovl(struct sk_buff *skb, struct cbq_class *cl)
1525 {
1526         unsigned char    *b = skb->tail;
1527         struct tc_cbq_ovl opt;
1528
1529         opt.strategy = cl->ovl_strategy;
1530         opt.priority2 = cl->priority2+1;
1531         opt.pad = 0;
1532         opt.penalty = (cl->penalty*1000)/HZ;
1533         RTA_PUT(skb, TCA_CBQ_OVL_STRATEGY, sizeof(opt), &opt);
1534         return skb->len;
1535
1536 rtattr_failure:
1537         skb_trim(skb, b - skb->data);
1538         return -1;
1539 }
1540
1541 static __inline__ int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
1542 {
1543         unsigned char    *b = skb->tail;
1544         struct tc_cbq_fopt opt;
1545
1546         if (cl->split || cl->defmap) {
1547                 opt.split = cl->split ? cl->split->classid : 0;
1548                 opt.defmap = cl->defmap;
1549                 opt.defchange = ~0;
1550                 RTA_PUT(skb, TCA_CBQ_FOPT, sizeof(opt), &opt);
1551         }
1552         return skb->len;
1553
1554 rtattr_failure:
1555         skb_trim(skb, b - skb->data);
1556         return -1;
1557 }
1558
1559 #ifdef CONFIG_NET_CLS_POLICE
1560 static __inline__ int cbq_dump_police(struct sk_buff *skb, struct cbq_class *cl)
1561 {
1562         unsigned char    *b = skb->tail;
1563         struct tc_cbq_police opt;
1564
1565         if (cl->police) {
1566                 opt.police = cl->police;
1567                 opt.__res1 = 0;
1568                 opt.__res2 = 0;
1569                 RTA_PUT(skb, TCA_CBQ_POLICE, sizeof(opt), &opt);
1570         }
1571         return skb->len;
1572
1573 rtattr_failure:
1574         skb_trim(skb, b - skb->data);
1575         return -1;
1576 }
1577 #endif
1578
1579 static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
1580 {
1581         if (cbq_dump_lss(skb, cl) < 0 ||
1582             cbq_dump_rate(skb, cl) < 0 ||
1583             cbq_dump_wrr(skb, cl) < 0 ||
1584             cbq_dump_ovl(skb, cl) < 0 ||
1585 #ifdef CONFIG_NET_CLS_POLICE
1586             cbq_dump_police(skb, cl) < 0 ||
1587 #endif
1588             cbq_dump_fopt(skb, cl) < 0)
1589                 return -1;
1590         return 0;
1591 }
1592
1593 static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
1594 {
1595         struct cbq_sched_data *q = qdisc_priv(sch);
1596         unsigned char    *b = skb->tail;
1597         struct rtattr *rta;
1598
1599         rta = (struct rtattr*)b;
1600         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1601         if (cbq_dump_attr(skb, &q->link) < 0)
1602                 goto rtattr_failure;
1603         rta->rta_len = skb->tail - b;
1604         return skb->len;
1605
1606 rtattr_failure:
1607         skb_trim(skb, b - skb->data);
1608         return -1;
1609 }
1610
1611 static int
1612 cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
1613 {
1614         struct cbq_sched_data *q = qdisc_priv(sch);
1615
1616         q->link.xstats.avgidle = q->link.avgidle;
1617         return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
1618 }
1619
1620 static int
1621 cbq_dump_class(struct Qdisc *sch, unsigned long arg,
1622                struct sk_buff *skb, struct tcmsg *tcm)
1623 {
1624         struct cbq_class *cl = (struct cbq_class*)arg;
1625         unsigned char    *b = skb->tail;
1626         struct rtattr *rta;
1627
1628         if (cl->tparent)
1629                 tcm->tcm_parent = cl->tparent->classid;
1630         else
1631                 tcm->tcm_parent = TC_H_ROOT;
1632         tcm->tcm_handle = cl->classid;
1633         tcm->tcm_info = cl->q->handle;
1634
1635         rta = (struct rtattr*)b;
1636         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
1637         if (cbq_dump_attr(skb, cl) < 0)
1638                 goto rtattr_failure;
1639         rta->rta_len = skb->tail - b;
1640         return skb->len;
1641
1642 rtattr_failure:
1643         skb_trim(skb, b - skb->data);
1644         return -1;
1645 }
1646
1647 static int
1648 cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
1649         struct gnet_dump *d)
1650 {
1651         struct cbq_sched_data *q = qdisc_priv(sch);
1652         struct cbq_class *cl = (struct cbq_class*)arg;
1653
1654         cl->qstats.qlen = cl->q->q.qlen;
1655         cl->xstats.avgidle = cl->avgidle;
1656         cl->xstats.undertime = 0;
1657
1658         if (!PSCHED_IS_PASTPERFECT(cl->undertime))
1659                 cl->xstats.undertime = PSCHED_TDIFF(cl->undertime, q->now);
1660
1661         if (gnet_stats_copy_basic(d, &cl->bstats) < 0 ||
1662 #ifdef CONFIG_NET_ESTIMATOR
1663             gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
1664 #endif
1665             gnet_stats_copy_queue(d, &cl->qstats) < 0)
1666                 return -1;
1667
1668         return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
1669 }
1670
1671 static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
1672                      struct Qdisc **old)
1673 {
1674         struct cbq_class *cl = (struct cbq_class*)arg;
1675
1676         if (cl) {
1677                 if (new == NULL) {
1678                         if ((new = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops,
1679                                                      cl->classid)) == NULL)
1680                                 return -ENOBUFS;
1681                 } else {
1682 #ifdef CONFIG_NET_CLS_POLICE
1683                         if (cl->police == TC_POLICE_RECLASSIFY)
1684                                 new->reshape_fail = cbq_reshape_fail;
1685 #endif
1686                 }
1687                 sch_tree_lock(sch);
1688                 *old = cl->q;
1689                 cl->q = new;
1690                 qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
1691                 qdisc_reset(*old);
1692                 sch_tree_unlock(sch);
1693
1694                 return 0;
1695         }
1696         return -ENOENT;
1697 }
1698
1699 static struct Qdisc *
1700 cbq_leaf(struct Qdisc *sch, unsigned long arg)
1701 {
1702         struct cbq_class *cl = (struct cbq_class*)arg;
1703
1704         return cl ? cl->q : NULL;
1705 }
1706
1707 static unsigned long cbq_get(struct Qdisc *sch, u32 classid)
1708 {
1709         struct cbq_sched_data *q = qdisc_priv(sch);
1710         struct cbq_class *cl = cbq_class_lookup(q, classid);
1711
1712         if (cl) {
1713                 cl->refcnt++;
1714                 return (unsigned long)cl;
1715         }
1716         return 0;
1717 }
1718
1719 static void cbq_destroy_filters(struct cbq_class *cl)
1720 {
1721         struct tcf_proto *tp;
1722
1723         while ((tp = cl->filter_list) != NULL) {
1724                 cl->filter_list = tp->next;
1725                 tcf_destroy(tp);
1726         }
1727 }
1728
1729 static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
1730 {
1731         struct cbq_sched_data *q = qdisc_priv(sch);
1732
1733         BUG_TRAP(!cl->filters);
1734
1735         cbq_destroy_filters(cl);
1736         qdisc_destroy(cl->q);
1737         qdisc_put_rtab(cl->R_tab);
1738 #ifdef CONFIG_NET_ESTIMATOR
1739         gen_kill_estimator(&cl->bstats, &cl->rate_est);
1740 #endif
1741         if (cl != &q->link)
1742                 kfree(cl);
1743 }
1744
1745 static void
1746 cbq_destroy(struct Qdisc* sch)
1747 {
1748         struct cbq_sched_data *q = qdisc_priv(sch);
1749         struct cbq_class *cl;
1750         unsigned h;
1751
1752 #ifdef CONFIG_NET_CLS_POLICE
1753         q->rx_class = NULL;
1754 #endif
1755         /*
1756          * Filters must be destroyed first because we don't destroy the
1757          * classes from root to leafs which means that filters can still
1758          * be bound to classes which have been destroyed already. --TGR '04
1759          */
1760         for (h = 0; h < 16; h++)
1761                 for (cl = q->classes[h]; cl; cl = cl->next)
1762                         cbq_destroy_filters(cl);
1763
1764         for (h = 0; h < 16; h++) {
1765                 struct cbq_class *next;
1766
1767                 for (cl = q->classes[h]; cl; cl = next) {
1768                         next = cl->next;
1769                         cbq_destroy_class(sch, cl);
1770                 }
1771         }
1772 }
1773
1774 static void cbq_put(struct Qdisc *sch, unsigned long arg)
1775 {
1776         struct cbq_class *cl = (struct cbq_class*)arg;
1777
1778         if (--cl->refcnt == 0) {
1779 #ifdef CONFIG_NET_CLS_POLICE
1780                 struct cbq_sched_data *q = qdisc_priv(sch);
1781
1782                 spin_lock_bh(&sch->dev->queue_lock);
1783                 if (q->rx_class == cl)
1784                         q->rx_class = NULL;
1785                 spin_unlock_bh(&sch->dev->queue_lock);
1786 #endif
1787
1788                 cbq_destroy_class(sch, cl);
1789         }
1790 }
1791
1792 static int
1793 cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct rtattr **tca,
1794                  unsigned long *arg)
1795 {
1796         int err;
1797         struct cbq_sched_data *q = qdisc_priv(sch);
1798         struct cbq_class *cl = (struct cbq_class*)*arg;
1799         struct rtattr *opt = tca[TCA_OPTIONS-1];
1800         struct rtattr *tb[TCA_CBQ_MAX];
1801         struct cbq_class *parent;
1802         struct qdisc_rate_table *rtab = NULL;
1803
1804         if (opt==NULL || rtattr_parse_nested(tb, TCA_CBQ_MAX, opt))
1805                 return -EINVAL;
1806
1807         if (tb[TCA_CBQ_OVL_STRATEGY-1] &&
1808             RTA_PAYLOAD(tb[TCA_CBQ_OVL_STRATEGY-1]) < sizeof(struct tc_cbq_ovl))
1809                 return -EINVAL;
1810
1811         if (tb[TCA_CBQ_FOPT-1] &&
1812             RTA_PAYLOAD(tb[TCA_CBQ_FOPT-1]) < sizeof(struct tc_cbq_fopt))
1813                 return -EINVAL;
1814
1815         if (tb[TCA_CBQ_RATE-1] &&
1816             RTA_PAYLOAD(tb[TCA_CBQ_RATE-1]) < sizeof(struct tc_ratespec))
1817                         return -EINVAL;
1818
1819         if (tb[TCA_CBQ_LSSOPT-1] &&
1820             RTA_PAYLOAD(tb[TCA_CBQ_LSSOPT-1]) < sizeof(struct tc_cbq_lssopt))
1821                         return -EINVAL;
1822
1823         if (tb[TCA_CBQ_WRROPT-1] &&
1824             RTA_PAYLOAD(tb[TCA_CBQ_WRROPT-1]) < sizeof(struct tc_cbq_wrropt))
1825                         return -EINVAL;
1826
1827 #ifdef CONFIG_NET_CLS_POLICE
1828         if (tb[TCA_CBQ_POLICE-1] &&
1829             RTA_PAYLOAD(tb[TCA_CBQ_POLICE-1]) < sizeof(struct tc_cbq_police))
1830                         return -EINVAL;
1831 #endif
1832
1833         if (cl) {
1834                 /* Check parent */
1835                 if (parentid) {
1836                         if (cl->tparent && cl->tparent->classid != parentid)
1837                                 return -EINVAL;
1838                         if (!cl->tparent && parentid != TC_H_ROOT)
1839                                 return -EINVAL;
1840                 }
1841
1842                 if (tb[TCA_CBQ_RATE-1]) {
1843                         rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1844                         if (rtab == NULL)
1845                                 return -EINVAL;
1846                 }
1847
1848                 /* Change class parameters */
1849                 sch_tree_lock(sch);
1850
1851                 if (cl->next_alive != NULL)
1852                         cbq_deactivate_class(cl);
1853
1854                 if (rtab) {
1855                         rtab = xchg(&cl->R_tab, rtab);
1856                         qdisc_put_rtab(rtab);
1857                 }
1858
1859                 if (tb[TCA_CBQ_LSSOPT-1])
1860                         cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1861
1862                 if (tb[TCA_CBQ_WRROPT-1]) {
1863                         cbq_rmprio(q, cl);
1864                         cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1865                 }
1866
1867                 if (tb[TCA_CBQ_OVL_STRATEGY-1])
1868                         cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1869
1870 #ifdef CONFIG_NET_CLS_POLICE
1871                 if (tb[TCA_CBQ_POLICE-1])
1872                         cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1873 #endif
1874
1875                 if (tb[TCA_CBQ_FOPT-1])
1876                         cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1877
1878                 if (cl->q->q.qlen)
1879                         cbq_activate_class(cl);
1880
1881                 sch_tree_unlock(sch);
1882
1883 #ifdef CONFIG_NET_ESTIMATOR
1884                 if (tca[TCA_RATE-1])
1885                         gen_replace_estimator(&cl->bstats, &cl->rate_est,
1886                                 cl->stats_lock, tca[TCA_RATE-1]);
1887 #endif
1888                 return 0;
1889         }
1890
1891         if (parentid == TC_H_ROOT)
1892                 return -EINVAL;
1893
1894         if (tb[TCA_CBQ_WRROPT-1] == NULL || tb[TCA_CBQ_RATE-1] == NULL ||
1895             tb[TCA_CBQ_LSSOPT-1] == NULL)
1896                 return -EINVAL;
1897
1898         rtab = qdisc_get_rtab(RTA_DATA(tb[TCA_CBQ_RATE-1]), tb[TCA_CBQ_RTAB-1]);
1899         if (rtab == NULL)
1900                 return -EINVAL;
1901
1902         if (classid) {
1903                 err = -EINVAL;
1904                 if (TC_H_MAJ(classid^sch->handle) || cbq_class_lookup(q, classid))
1905                         goto failure;
1906         } else {
1907                 int i;
1908                 classid = TC_H_MAKE(sch->handle,0x8000);
1909
1910                 for (i=0; i<0x8000; i++) {
1911                         if (++q->hgenerator >= 0x8000)
1912                                 q->hgenerator = 1;
1913                         if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
1914                                 break;
1915                 }
1916                 err = -ENOSR;
1917                 if (i >= 0x8000)
1918                         goto failure;
1919                 classid = classid|q->hgenerator;
1920         }
1921
1922         parent = &q->link;
1923         if (parentid) {
1924                 parent = cbq_class_lookup(q, parentid);
1925                 err = -EINVAL;
1926                 if (parent == NULL)
1927                         goto failure;
1928         }
1929
1930         err = -ENOBUFS;
1931         cl = kzalloc(sizeof(*cl), GFP_KERNEL);
1932         if (cl == NULL)
1933                 goto failure;
1934         cl->R_tab = rtab;
1935         rtab = NULL;
1936         cl->refcnt = 1;
1937         if (!(cl->q = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops, classid)))
1938                 cl->q = &noop_qdisc;
1939         cl->classid = classid;
1940         cl->tparent = parent;
1941         cl->qdisc = sch;
1942         cl->allot = parent->allot;
1943         cl->quantum = cl->allot;
1944         cl->weight = cl->R_tab->rate.rate;
1945         cl->stats_lock = &sch->dev->queue_lock;
1946
1947         sch_tree_lock(sch);
1948         cbq_link_class(cl);
1949         cl->borrow = cl->tparent;
1950         if (cl->tparent != &q->link)
1951                 cl->share = cl->tparent;
1952         cbq_adjust_levels(parent);
1953         cl->minidle = -0x7FFFFFFF;
1954         cbq_set_lss(cl, RTA_DATA(tb[TCA_CBQ_LSSOPT-1]));
1955         cbq_set_wrr(cl, RTA_DATA(tb[TCA_CBQ_WRROPT-1]));
1956         if (cl->ewma_log==0)
1957                 cl->ewma_log = q->link.ewma_log;
1958         if (cl->maxidle==0)
1959                 cl->maxidle = q->link.maxidle;
1960         if (cl->avpkt==0)
1961                 cl->avpkt = q->link.avpkt;
1962         cl->overlimit = cbq_ovl_classic;
1963         if (tb[TCA_CBQ_OVL_STRATEGY-1])
1964                 cbq_set_overlimit(cl, RTA_DATA(tb[TCA_CBQ_OVL_STRATEGY-1]));
1965 #ifdef CONFIG_NET_CLS_POLICE
1966         if (tb[TCA_CBQ_POLICE-1])
1967                 cbq_set_police(cl, RTA_DATA(tb[TCA_CBQ_POLICE-1]));
1968 #endif
1969         if (tb[TCA_CBQ_FOPT-1])
1970                 cbq_set_fopt(cl, RTA_DATA(tb[TCA_CBQ_FOPT-1]));
1971         sch_tree_unlock(sch);
1972
1973 #ifdef CONFIG_NET_ESTIMATOR
1974         if (tca[TCA_RATE-1])
1975                 gen_new_estimator(&cl->bstats, &cl->rate_est,
1976                         cl->stats_lock, tca[TCA_RATE-1]);
1977 #endif
1978
1979         *arg = (unsigned long)cl;
1980         return 0;
1981
1982 failure:
1983         qdisc_put_rtab(rtab);
1984         return err;
1985 }
1986
1987 static int cbq_delete(struct Qdisc *sch, unsigned long arg)
1988 {
1989         struct cbq_sched_data *q = qdisc_priv(sch);
1990         struct cbq_class *cl = (struct cbq_class*)arg;
1991
1992         if (cl->filters || cl->children || cl == &q->link)
1993                 return -EBUSY;
1994
1995         sch_tree_lock(sch);
1996
1997         if (cl->next_alive)
1998                 cbq_deactivate_class(cl);
1999
2000         if (q->tx_borrowed == cl)
2001                 q->tx_borrowed = q->tx_class;
2002         if (q->tx_class == cl) {
2003                 q->tx_class = NULL;
2004                 q->tx_borrowed = NULL;
2005         }
2006 #ifdef CONFIG_NET_CLS_POLICE
2007         if (q->rx_class == cl)
2008                 q->rx_class = NULL;
2009 #endif
2010
2011         cbq_unlink_class(cl);
2012         cbq_adjust_levels(cl->tparent);
2013         cl->defmap = 0;
2014         cbq_sync_defmap(cl);
2015
2016         cbq_rmprio(q, cl);
2017         sch_tree_unlock(sch);
2018
2019         if (--cl->refcnt == 0)
2020                 cbq_destroy_class(sch, cl);
2021
2022         return 0;
2023 }
2024
2025 static struct tcf_proto **cbq_find_tcf(struct Qdisc *sch, unsigned long arg)
2026 {
2027         struct cbq_sched_data *q = qdisc_priv(sch);
2028         struct cbq_class *cl = (struct cbq_class *)arg;
2029
2030         if (cl == NULL)
2031                 cl = &q->link;
2032
2033         return &cl->filter_list;
2034 }
2035
2036 static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
2037                                      u32 classid)
2038 {
2039         struct cbq_sched_data *q = qdisc_priv(sch);
2040         struct cbq_class *p = (struct cbq_class*)parent;
2041         struct cbq_class *cl = cbq_class_lookup(q, classid);
2042
2043         if (cl) {
2044                 if (p && p->level <= cl->level)
2045                         return 0;
2046                 cl->filters++;
2047                 return (unsigned long)cl;
2048         }
2049         return 0;
2050 }
2051
2052 static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
2053 {
2054         struct cbq_class *cl = (struct cbq_class*)arg;
2055
2056         cl->filters--;
2057 }
2058
2059 static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
2060 {
2061         struct cbq_sched_data *q = qdisc_priv(sch);
2062         unsigned h;
2063
2064         if (arg->stop)
2065                 return;
2066
2067         for (h = 0; h < 16; h++) {
2068                 struct cbq_class *cl;
2069
2070                 for (cl = q->classes[h]; cl; cl = cl->next) {
2071                         if (arg->count < arg->skip) {
2072                                 arg->count++;
2073                                 continue;
2074                         }
2075                         if (arg->fn(sch, (unsigned long)cl, arg) < 0) {
2076                                 arg->stop = 1;
2077                                 return;
2078                         }
2079                         arg->count++;
2080                 }
2081         }
2082 }
2083
2084 static struct Qdisc_class_ops cbq_class_ops = {
2085         .graft          =       cbq_graft,
2086         .leaf           =       cbq_leaf,
2087         .get            =       cbq_get,
2088         .put            =       cbq_put,
2089         .change         =       cbq_change_class,
2090         .delete         =       cbq_delete,
2091         .walk           =       cbq_walk,
2092         .tcf_chain      =       cbq_find_tcf,
2093         .bind_tcf       =       cbq_bind_filter,
2094         .unbind_tcf     =       cbq_unbind_filter,
2095         .dump           =       cbq_dump_class,
2096         .dump_stats     =       cbq_dump_class_stats,
2097 };
2098
2099 static struct Qdisc_ops cbq_qdisc_ops = {
2100         .next           =       NULL,
2101         .cl_ops         =       &cbq_class_ops,
2102         .id             =       "cbq",
2103         .priv_size      =       sizeof(struct cbq_sched_data),
2104         .enqueue        =       cbq_enqueue,
2105         .dequeue        =       cbq_dequeue,
2106         .requeue        =       cbq_requeue,
2107         .drop           =       cbq_drop,
2108         .init           =       cbq_init,
2109         .reset          =       cbq_reset,
2110         .destroy        =       cbq_destroy,
2111         .change         =       NULL,
2112         .dump           =       cbq_dump,
2113         .dump_stats     =       cbq_dump_stats,
2114         .owner          =       THIS_MODULE,
2115 };
2116
2117 static int __init cbq_module_init(void)
2118 {
2119         return register_qdisc(&cbq_qdisc_ops);
2120 }
2121 static void __exit cbq_module_exit(void)
2122 {
2123         unregister_qdisc(&cbq_qdisc_ops);
2124 }
2125 module_init(cbq_module_init)
2126 module_exit(cbq_module_exit)
2127 MODULE_LICENSE("GPL");