2 * net/sched/sch_api.c Packet scheduler API.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
18 #include <linux/module.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/string.h>
23 #include <linux/socket.h>
24 #include <linux/sockios.h>
26 #include <linux/errno.h>
27 #include <linux/interrupt.h>
28 #include <linux/netdevice.h>
29 #include <linux/skbuff.h>
30 #include <linux/init.h>
31 #include <linux/proc_fs.h>
32 #include <linux/seq_file.h>
33 #include <linux/kmod.h>
34 #include <linux/list.h>
35 #include <linux/bitops.h>
36 #include <linux/hrtimer.h>
38 #include <net/netlink.h>
40 #include <net/pkt_sched.h>
42 #include <asm/processor.h>
43 #include <asm/uaccess.h>
44 #include <asm/system.h>
46 static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, u32 clid,
47 struct Qdisc *old, struct Qdisc *new);
48 static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
49 struct Qdisc *q, unsigned long cl, int event);
56 This file consists of two interrelated parts:
58 1. queueing disciplines manager frontend.
59 2. traffic classes manager frontend.
61 Generally, queueing discipline ("qdisc") is a black box,
62 which is able to enqueue packets and to dequeue them (when
63 device is ready to send something) in order and at times
64 determined by algorithm hidden in it.
66 qdisc's are divided to two categories:
67 - "queues", which have no internal structure visible from outside.
68 - "schedulers", which split all the packets to "traffic classes",
69 using "packet classifiers" (look at cls_api.c)
71 In turn, classes may have child qdiscs (as rule, queues)
72 attached to them etc. etc. etc.
74 The goal of the routines in this file is to translate
75 information supplied by user in the form of handles
76 to more intelligible for kernel form, to make some sanity
77 checks and part of work, which is common to all qdiscs
78 and to provide rtnetlink notifications.
80 All real intelligent work is done inside qdisc modules.
84 Every discipline has two major routines: enqueue and dequeue.
88 dequeue usually returns a skb to send. It is allowed to return NULL,
89 but it does not mean that queue is empty, it just means that
90 discipline does not want to send anything this time.
91 Queue is really empty if q->q.qlen == 0.
92 For complicated disciplines with multiple queues q->q is not
93 real packet queue, but however q->q.qlen must be valid.
97 enqueue returns 0, if packet was enqueued successfully.
98 If packet (this one or another one) was dropped, it returns
100 NET_XMIT_DROP - this packet dropped
101 Expected action: do not backoff, but wait until queue will clear.
102 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
103 Expected action: backoff or ignore
104 NET_XMIT_POLICED - dropped by police.
105 Expected action: backoff or error to real-time apps.
111 requeues once dequeued packet. It is used for non-standard or
112 just buggy devices, which can defer output even if dev->tbusy=0.
116 returns qdisc to initial state: purge all buffers, clear all
117 timers, counters (except for statistics) etc.
121 initializes newly created qdisc.
125 destroys resources allocated by init and during lifetime of qdisc.
129 changes qdisc parameters.
132 /* Protects list of registered TC modules. It is pure SMP lock. */
133 static DEFINE_RWLOCK(qdisc_mod_lock);
136 /************************************************
137 * Queueing disciplines manipulation. *
138 ************************************************/
141 /* The list of all installed queueing disciplines. */
143 static struct Qdisc_ops *qdisc_base;
145 /* Register/uregister queueing discipline */
147 int register_qdisc(struct Qdisc_ops *qops)
149 struct Qdisc_ops *q, **qp;
152 write_lock(&qdisc_mod_lock);
153 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
154 if (!strcmp(qops->id, q->id))
157 if (qops->enqueue == NULL)
158 qops->enqueue = noop_qdisc_ops.enqueue;
159 if (qops->requeue == NULL)
160 qops->requeue = noop_qdisc_ops.requeue;
161 if (qops->dequeue == NULL)
162 qops->dequeue = noop_qdisc_ops.dequeue;
168 write_unlock(&qdisc_mod_lock);
172 int unregister_qdisc(struct Qdisc_ops *qops)
174 struct Qdisc_ops *q, **qp;
177 write_lock(&qdisc_mod_lock);
178 for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next)
186 write_unlock(&qdisc_mod_lock);
190 /* We know handle. Find qdisc among all qdisc's attached to device
191 (root qdisc, all its children, children of children etc.)
194 static struct Qdisc *__qdisc_lookup(struct net_device *dev, u32 handle)
198 list_for_each_entry(q, &dev->qdisc_list, list) {
199 if (q->handle == handle)
205 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
209 read_lock(&qdisc_tree_lock);
210 q = __qdisc_lookup(dev, handle);
211 read_unlock(&qdisc_tree_lock);
215 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
219 struct Qdisc_class_ops *cops = p->ops->cl_ops;
223 cl = cops->get(p, classid);
227 leaf = cops->leaf(p, cl);
232 /* Find queueing discipline by name */
234 static struct Qdisc_ops *qdisc_lookup_ops(struct rtattr *kind)
236 struct Qdisc_ops *q = NULL;
239 read_lock(&qdisc_mod_lock);
240 for (q = qdisc_base; q; q = q->next) {
241 if (rtattr_strcmp(kind, q->id) == 0) {
242 if (!try_module_get(q->owner))
247 read_unlock(&qdisc_mod_lock);
252 static struct qdisc_rate_table *qdisc_rtab_list;
254 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct rtattr *tab)
256 struct qdisc_rate_table *rtab;
258 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
259 if (memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) == 0) {
265 if (tab == NULL || r->rate == 0 || r->cell_log == 0 || RTA_PAYLOAD(tab) != 1024)
268 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
272 memcpy(rtab->data, RTA_DATA(tab), 1024);
273 rtab->next = qdisc_rtab_list;
274 qdisc_rtab_list = rtab;
279 void qdisc_put_rtab(struct qdisc_rate_table *tab)
281 struct qdisc_rate_table *rtab, **rtabp;
283 if (!tab || --tab->refcnt)
286 for (rtabp = &qdisc_rtab_list; (rtab=*rtabp) != NULL; rtabp = &rtab->next) {
295 static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
297 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
299 struct net_device *dev = wd->qdisc->dev;
301 wd->qdisc->flags &= ~TCQ_F_THROTTLED;
303 if (spin_trylock(&dev->queue_lock)) {
305 spin_unlock(&dev->queue_lock);
309 return HRTIMER_NORESTART;
312 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
314 hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
315 wd->timer.function = qdisc_watchdog;
318 EXPORT_SYMBOL(qdisc_watchdog_init);
320 void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires)
324 wd->qdisc->flags |= TCQ_F_THROTTLED;
326 time = ktime_set(0, 0);
327 time = ktime_add_ns(time, PSCHED_US2NS(expires));
328 hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS);
330 EXPORT_SYMBOL(qdisc_watchdog_schedule);
332 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
334 hrtimer_cancel(&wd->timer);
335 wd->qdisc->flags &= ~TCQ_F_THROTTLED;
338 EXPORT_SYMBOL(qdisc_watchdog_cancel);
340 /* Allocate an unique handle from space managed by kernel */
342 static u32 qdisc_alloc_handle(struct net_device *dev)
345 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
348 autohandle += TC_H_MAKE(0x10000U, 0);
349 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
350 autohandle = TC_H_MAKE(0x80000000U, 0);
351 } while (qdisc_lookup(dev, autohandle) && --i > 0);
353 return i>0 ? autohandle : 0;
356 /* Attach toplevel qdisc to device dev */
358 static struct Qdisc *
359 dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc)
361 struct Qdisc *oqdisc;
363 if (dev->flags & IFF_UP)
366 qdisc_lock_tree(dev);
367 if (qdisc && qdisc->flags&TCQ_F_INGRESS) {
368 oqdisc = dev->qdisc_ingress;
369 /* Prune old scheduler */
370 if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) {
373 dev->qdisc_ingress = NULL;
375 dev->qdisc_ingress = qdisc;
380 oqdisc = dev->qdisc_sleeping;
382 /* Prune old scheduler */
383 if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1)
386 /* ... and graft new one */
389 dev->qdisc_sleeping = qdisc;
390 dev->qdisc = &noop_qdisc;
393 qdisc_unlock_tree(dev);
395 if (dev->flags & IFF_UP)
401 void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
403 struct Qdisc_class_ops *cops;
409 while ((parentid = sch->parent)) {
410 sch = __qdisc_lookup(sch->dev, TC_H_MAJ(parentid));
411 cops = sch->ops->cl_ops;
412 if (cops->qlen_notify) {
413 cl = cops->get(sch, parentid);
414 cops->qlen_notify(sch, cl);
420 EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
422 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
425 Old qdisc is not destroyed but returned in *old.
428 static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
430 struct Qdisc *new, struct Qdisc **old)
433 struct Qdisc *q = *old;
436 if (parent == NULL) {
437 if (q && q->flags&TCQ_F_INGRESS) {
438 *old = dev_graft_qdisc(dev, q);
440 *old = dev_graft_qdisc(dev, new);
443 struct Qdisc_class_ops *cops = parent->ops->cl_ops;
448 unsigned long cl = cops->get(parent, classid);
450 err = cops->graft(parent, cl, new, old);
452 new->parent = classid;
453 cops->put(parent, cl);
461 Allocate and initialize new qdisc.
463 Parameters are passed via opt.
466 static struct Qdisc *
467 qdisc_create(struct net_device *dev, u32 handle, struct rtattr **tca, int *errp)
470 struct rtattr *kind = tca[TCA_KIND-1];
472 struct Qdisc_ops *ops;
474 ops = qdisc_lookup_ops(kind);
476 if (ops == NULL && kind != NULL) {
478 if (rtattr_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
479 /* We dropped the RTNL semaphore in order to
480 * perform the module load. So, even if we
481 * succeeded in loading the module we have to
482 * tell the caller to replay the request. We
483 * indicate this using -EAGAIN.
484 * We replay the request because the device may
485 * go away in the mean time.
488 request_module("sch_%s", name);
490 ops = qdisc_lookup_ops(kind);
492 /* We will try again qdisc_lookup_ops,
493 * so don't keep a reference.
495 module_put(ops->owner);
507 sch = qdisc_alloc(dev, ops);
513 if (handle == TC_H_INGRESS) {
514 sch->flags |= TCQ_F_INGRESS;
515 handle = TC_H_MAKE(TC_H_INGRESS, 0);
516 } else if (handle == 0) {
517 handle = qdisc_alloc_handle(dev);
523 sch->handle = handle;
525 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS-1])) == 0) {
526 #ifdef CONFIG_NET_ESTIMATOR
527 if (tca[TCA_RATE-1]) {
528 err = gen_new_estimator(&sch->bstats, &sch->rate_est,
533 * Any broken qdiscs that would require
534 * a ops->reset() here? The qdisc was never
535 * in action so it shouldn't be necessary.
543 qdisc_lock_tree(dev);
544 list_add_tail(&sch->list, &dev->qdisc_list);
545 qdisc_unlock_tree(dev);
551 kfree((char *) sch - sch->padded);
553 module_put(ops->owner);
559 static int qdisc_change(struct Qdisc *sch, struct rtattr **tca)
561 if (tca[TCA_OPTIONS-1]) {
564 if (sch->ops->change == NULL)
566 err = sch->ops->change(sch, tca[TCA_OPTIONS-1]);
570 #ifdef CONFIG_NET_ESTIMATOR
572 gen_replace_estimator(&sch->bstats, &sch->rate_est,
573 sch->stats_lock, tca[TCA_RATE-1]);
578 struct check_loop_arg
580 struct qdisc_walker w;
585 static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w);
587 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
589 struct check_loop_arg arg;
591 if (q->ops->cl_ops == NULL)
594 arg.w.stop = arg.w.skip = arg.w.count = 0;
595 arg.w.fn = check_loop_fn;
598 q->ops->cl_ops->walk(q, &arg.w);
599 return arg.w.stop ? -ELOOP : 0;
603 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
606 struct Qdisc_class_ops *cops = q->ops->cl_ops;
607 struct check_loop_arg *arg = (struct check_loop_arg *)w;
609 leaf = cops->leaf(q, cl);
611 if (leaf == arg->p || arg->depth > 7)
613 return check_loop(leaf, arg->p, arg->depth + 1);
622 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
624 struct tcmsg *tcm = NLMSG_DATA(n);
625 struct rtattr **tca = arg;
626 struct net_device *dev;
627 u32 clid = tcm->tcm_parent;
628 struct Qdisc *q = NULL;
629 struct Qdisc *p = NULL;
632 if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL)
636 if (clid != TC_H_ROOT) {
637 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
638 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
640 q = qdisc_leaf(p, clid);
641 } else { /* ingress */
642 q = dev->qdisc_ingress;
645 q = dev->qdisc_sleeping;
650 if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
653 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
657 if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
660 if (n->nlmsg_type == RTM_DELQDISC) {
665 if ((err = qdisc_graft(dev, p, clid, NULL, &q)) != 0)
668 qdisc_notify(skb, n, clid, q, NULL);
669 spin_lock_bh(&dev->queue_lock);
671 spin_unlock_bh(&dev->queue_lock);
674 qdisc_notify(skb, n, clid, NULL, q);
683 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
687 struct net_device *dev;
693 /* Reinit, just in case something touches this. */
696 clid = tcm->tcm_parent;
699 if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL)
703 if (clid != TC_H_ROOT) {
704 if (clid != TC_H_INGRESS) {
705 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
707 q = qdisc_leaf(p, clid);
708 } else { /*ingress */
709 q = dev->qdisc_ingress;
712 q = dev->qdisc_sleeping;
715 /* It may be default qdisc, ignore it */
716 if (q && q->handle == 0)
719 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
720 if (tcm->tcm_handle) {
721 if (q && !(n->nlmsg_flags&NLM_F_REPLACE))
723 if (TC_H_MIN(tcm->tcm_handle))
725 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
727 if (n->nlmsg_flags&NLM_F_EXCL)
729 if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
732 (p && check_loop(q, p, 0)))
734 atomic_inc(&q->refcnt);
740 /* This magic test requires explanation.
742 * We know, that some child q is already
743 * attached to this parent and have choice:
744 * either to change it or to create/graft new one.
746 * 1. We are allowed to create/graft only
747 * if CREATE and REPLACE flags are set.
749 * 2. If EXCL is set, requestor wanted to say,
750 * that qdisc tcm_handle is not expected
751 * to exist, so that we choose create/graft too.
753 * 3. The last case is when no flags are set.
754 * Alas, it is sort of hole in API, we
755 * cannot decide what to do unambiguously.
756 * For now we select create/graft, if
757 * user gave KIND, which does not match existing.
759 if ((n->nlmsg_flags&NLM_F_CREATE) &&
760 (n->nlmsg_flags&NLM_F_REPLACE) &&
761 ((n->nlmsg_flags&NLM_F_EXCL) ||
763 rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))))
768 if (!tcm->tcm_handle)
770 q = qdisc_lookup(dev, tcm->tcm_handle);
773 /* Change qdisc parameters */
776 if (n->nlmsg_flags&NLM_F_EXCL)
778 if (tca[TCA_KIND-1] && rtattr_strcmp(tca[TCA_KIND-1], q->ops->id))
780 err = qdisc_change(q, tca);
782 qdisc_notify(skb, n, clid, NULL, q);
786 if (!(n->nlmsg_flags&NLM_F_CREATE))
788 if (clid == TC_H_INGRESS)
789 q = qdisc_create(dev, tcm->tcm_parent, tca, &err);
791 q = qdisc_create(dev, tcm->tcm_handle, tca, &err);
800 struct Qdisc *old_q = NULL;
801 err = qdisc_graft(dev, p, clid, q, &old_q);
804 spin_lock_bh(&dev->queue_lock);
806 spin_unlock_bh(&dev->queue_lock);
810 qdisc_notify(skb, n, clid, old_q, q);
812 spin_lock_bh(&dev->queue_lock);
813 qdisc_destroy(old_q);
814 spin_unlock_bh(&dev->queue_lock);
820 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
821 u32 pid, u32 seq, u16 flags, int event)
824 struct nlmsghdr *nlh;
825 unsigned char *b = skb_tail_pointer(skb);
828 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
829 tcm = NLMSG_DATA(nlh);
830 tcm->tcm_family = AF_UNSPEC;
833 tcm->tcm_ifindex = q->dev->ifindex;
834 tcm->tcm_parent = clid;
835 tcm->tcm_handle = q->handle;
836 tcm->tcm_info = atomic_read(&q->refcnt);
837 RTA_PUT(skb, TCA_KIND, IFNAMSIZ, q->ops->id);
838 if (q->ops->dump && q->ops->dump(q, skb) < 0)
840 q->qstats.qlen = q->q.qlen;
842 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
843 TCA_XSTATS, q->stats_lock, &d) < 0)
846 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
849 if (gnet_stats_copy_basic(&d, &q->bstats) < 0 ||
850 #ifdef CONFIG_NET_ESTIMATOR
851 gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
853 gnet_stats_copy_queue(&d, &q->qstats) < 0)
856 if (gnet_stats_finish_copy(&d) < 0)
859 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
868 static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
869 u32 clid, struct Qdisc *old, struct Qdisc *new)
872 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
874 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
878 if (old && old->handle) {
879 if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0)
883 if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
888 return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
895 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
899 struct net_device *dev;
903 s_q_idx = q_idx = cb->args[1];
904 read_lock(&dev_base_lock);
905 for (dev=dev_base, idx=0; dev; dev = dev->next, idx++) {
910 read_lock(&qdisc_tree_lock);
912 list_for_each_entry(q, &dev->qdisc_list, list) {
913 if (q_idx < s_q_idx) {
917 if (tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
918 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) {
919 read_unlock(&qdisc_tree_lock);
924 read_unlock(&qdisc_tree_lock);
928 read_unlock(&dev_base_lock);
938 /************************************************
939 * Traffic classes manipulation. *
940 ************************************************/
944 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
946 struct tcmsg *tcm = NLMSG_DATA(n);
947 struct rtattr **tca = arg;
948 struct net_device *dev;
949 struct Qdisc *q = NULL;
950 struct Qdisc_class_ops *cops;
951 unsigned long cl = 0;
952 unsigned long new_cl;
953 u32 pid = tcm->tcm_parent;
954 u32 clid = tcm->tcm_handle;
955 u32 qid = TC_H_MAJ(clid);
958 if ((dev = __dev_get_by_index(tcm->tcm_ifindex)) == NULL)
962 parent == TC_H_UNSPEC - unspecified parent.
963 parent == TC_H_ROOT - class is root, which has no parent.
964 parent == X:0 - parent is root class.
965 parent == X:Y - parent is a node in hierarchy.
966 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
968 handle == 0:0 - generate handle from kernel pool.
969 handle == 0:Y - class is X:Y, where X:0 is qdisc.
970 handle == X:Y - clear.
971 handle == X:0 - root class.
974 /* Step 1. Determine qdisc handle X:0 */
976 if (pid != TC_H_ROOT) {
977 u32 qid1 = TC_H_MAJ(pid);
980 /* If both majors are known, they must be identical. */
986 qid = dev->qdisc_sleeping->handle;
988 /* Now qid is genuine qdisc handle consistent
989 both with parent and child.
991 TC_H_MAJ(pid) still may be unspecified, complete it now.
994 pid = TC_H_MAKE(qid, pid);
997 qid = dev->qdisc_sleeping->handle;
1000 /* OK. Locate qdisc */
1001 if ((q = qdisc_lookup(dev, qid)) == NULL)
1004 /* An check that it supports classes */
1005 cops = q->ops->cl_ops;
1009 /* Now try to get class */
1011 if (pid == TC_H_ROOT)
1014 clid = TC_H_MAKE(qid, clid);
1017 cl = cops->get(q, clid);
1021 if (n->nlmsg_type != RTM_NEWTCLASS || !(n->nlmsg_flags&NLM_F_CREATE))
1024 switch (n->nlmsg_type) {
1027 if (n->nlmsg_flags&NLM_F_EXCL)
1031 err = cops->delete(q, cl);
1033 tclass_notify(skb, n, q, cl, RTM_DELTCLASS);
1036 err = tclass_notify(skb, n, q, cl, RTM_NEWTCLASS);
1045 err = cops->change(q, clid, pid, tca, &new_cl);
1047 tclass_notify(skb, n, q, new_cl, RTM_NEWTCLASS);
1057 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1059 u32 pid, u32 seq, u16 flags, int event)
1062 struct nlmsghdr *nlh;
1063 unsigned char *b = skb_tail_pointer(skb);
1065 struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1067 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
1068 tcm = NLMSG_DATA(nlh);
1069 tcm->tcm_family = AF_UNSPEC;
1070 tcm->tcm_ifindex = q->dev->ifindex;
1071 tcm->tcm_parent = q->handle;
1072 tcm->tcm_handle = q->handle;
1074 RTA_PUT(skb, TCA_KIND, IFNAMSIZ, q->ops->id);
1075 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1076 goto rtattr_failure;
1078 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
1079 TCA_XSTATS, q->stats_lock, &d) < 0)
1080 goto rtattr_failure;
1082 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1083 goto rtattr_failure;
1085 if (gnet_stats_finish_copy(&d) < 0)
1086 goto rtattr_failure;
1088 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1097 static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
1098 struct Qdisc *q, unsigned long cl, int event)
1100 struct sk_buff *skb;
1101 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
1103 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1107 if (tc_fill_tclass(skb, q, cl, pid, n->nlmsg_seq, 0, event) < 0) {
1112 return rtnetlink_send(skb, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
1115 struct qdisc_dump_args
1117 struct qdisc_walker w;
1118 struct sk_buff *skb;
1119 struct netlink_callback *cb;
1122 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
1124 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1126 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).pid,
1127 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
1130 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1134 struct net_device *dev;
1136 struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
1137 struct qdisc_dump_args arg;
1139 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
1141 if ((dev = dev_get_by_index(tcm->tcm_ifindex)) == NULL)
1147 read_lock(&qdisc_tree_lock);
1148 list_for_each_entry(q, &dev->qdisc_list, list) {
1149 if (t < s_t || !q->ops->cl_ops ||
1151 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1156 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1157 arg.w.fn = qdisc_class_dump;
1161 arg.w.skip = cb->args[1];
1163 q->ops->cl_ops->walk(q, &arg.w);
1164 cb->args[1] = arg.w.count;
1169 read_unlock(&qdisc_tree_lock);
1177 /* Main classifier routine: scans classifier chain attached
1178 to this qdisc, (optionally) tests for protocol and asks
1179 specific classifiers.
1181 int tc_classify(struct sk_buff *skb, struct tcf_proto *tp,
1182 struct tcf_result *res)
1185 __be16 protocol = skb->protocol;
1186 #ifdef CONFIG_NET_CLS_ACT
1187 struct tcf_proto *otp = tp;
1190 protocol = skb->protocol;
1192 for ( ; tp; tp = tp->next) {
1193 if ((tp->protocol == protocol ||
1194 tp->protocol == htons(ETH_P_ALL)) &&
1195 (err = tp->classify(skb, tp, res)) >= 0) {
1196 #ifdef CONFIG_NET_CLS_ACT
1197 if ( TC_ACT_RECLASSIFY == err) {
1198 __u32 verd = (__u32) G_TC_VERD(skb->tc_verd);
1201 if (MAX_REC_LOOP < verd++) {
1202 printk("rule prio %d protocol %02x reclassify is buggy packet dropped\n",
1203 tp->prio&0xffff, ntohs(tp->protocol));
1206 skb->tc_verd = SET_TC_VERD(skb->tc_verd,verd);
1210 skb->tc_verd = SET_TC_VERD(skb->tc_verd,0);
1223 #ifdef CONFIG_PROC_FS
1224 static int psched_show(struct seq_file *seq, void *v)
1226 seq_printf(seq, "%08x %08x %08x %08x\n",
1227 (u32)NSEC_PER_USEC, (u32)PSCHED_US2NS(1),
1229 (u32)NSEC_PER_SEC/(u32)ktime_to_ns(KTIME_MONOTONIC_RES));
1234 static int psched_open(struct inode *inode, struct file *file)
1236 return single_open(file, psched_show, PDE(inode)->data);
1239 static const struct file_operations psched_fops = {
1240 .owner = THIS_MODULE,
1241 .open = psched_open,
1243 .llseek = seq_lseek,
1244 .release = single_release,
1248 static int __init pktsched_init(void)
1250 register_qdisc(&pfifo_qdisc_ops);
1251 register_qdisc(&bfifo_qdisc_ops);
1252 proc_net_fops_create("psched", 0, &psched_fops);
1254 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL);
1255 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL);
1256 rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc);
1257 rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL);
1258 rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL);
1259 rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass);
1264 subsys_initcall(pktsched_init);
1266 EXPORT_SYMBOL(qdisc_get_rtab);
1267 EXPORT_SYMBOL(qdisc_put_rtab);
1268 EXPORT_SYMBOL(register_qdisc);
1269 EXPORT_SYMBOL(unregister_qdisc);
1270 EXPORT_SYMBOL(tc_classify);