2 * net/sched/sch_api.c Packet scheduler API.
4 * This program is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU General Public License
6 * as published by the Free Software Foundation; either version
7 * 2 of the License, or (at your option) any later version.
9 * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
13 * Rani Assaf <rani@magic.metawire.com> :980802: JIFFIES and CPU clock sources are repaired.
14 * Eduardo J. Blanco <ejbs@netlabs.com.uy> :990222: kmod support
15 * Jamal Hadi Salim <hadi@nortelnetworks.com>: 990601: ingress support
18 #include <linux/module.h>
19 #include <linux/types.h>
20 #include <linux/kernel.h>
21 #include <linux/string.h>
22 #include <linux/errno.h>
23 #include <linux/skbuff.h>
24 #include <linux/init.h>
25 #include <linux/proc_fs.h>
26 #include <linux/seq_file.h>
27 #include <linux/kmod.h>
28 #include <linux/list.h>
29 #include <linux/hrtimer.h>
31 #include <net/net_namespace.h>
33 #include <net/netlink.h>
34 #include <net/pkt_sched.h>
36 static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n, u32 clid,
37 struct Qdisc *old, struct Qdisc *new);
38 static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
39 struct Qdisc *q, unsigned long cl, int event);
46 This file consists of two interrelated parts:
48 1. queueing disciplines manager frontend.
49 2. traffic classes manager frontend.
51 Generally, queueing discipline ("qdisc") is a black box,
52 which is able to enqueue packets and to dequeue them (when
53 device is ready to send something) in order and at times
54 determined by algorithm hidden in it.
56 qdisc's are divided to two categories:
57 - "queues", which have no internal structure visible from outside.
58 - "schedulers", which split all the packets to "traffic classes",
59 using "packet classifiers" (look at cls_api.c)
61 In turn, classes may have child qdiscs (as rule, queues)
62 attached to them etc. etc. etc.
64 The goal of the routines in this file is to translate
65 information supplied by user in the form of handles
66 to more intelligible for kernel form, to make some sanity
67 checks and part of work, which is common to all qdiscs
68 and to provide rtnetlink notifications.
70 All real intelligent work is done inside qdisc modules.
74 Every discipline has two major routines: enqueue and dequeue.
78 dequeue usually returns a skb to send. It is allowed to return NULL,
79 but it does not mean that queue is empty, it just means that
80 discipline does not want to send anything this time.
81 Queue is really empty if q->q.qlen == 0.
82 For complicated disciplines with multiple queues q->q is not
83 real packet queue, but however q->q.qlen must be valid.
87 enqueue returns 0, if packet was enqueued successfully.
88 If packet (this one or another one) was dropped, it returns
90 NET_XMIT_DROP - this packet dropped
91 Expected action: do not backoff, but wait until queue will clear.
92 NET_XMIT_CN - probably this packet enqueued, but another one dropped.
93 Expected action: backoff or ignore
94 NET_XMIT_POLICED - dropped by police.
95 Expected action: backoff or error to real-time apps.
101 requeues once dequeued packet. It is used for non-standard or
102 just buggy devices, which can defer output even if dev->tbusy=0.
106 returns qdisc to initial state: purge all buffers, clear all
107 timers, counters (except for statistics) etc.
111 initializes newly created qdisc.
115 destroys resources allocated by init and during lifetime of qdisc.
119 changes qdisc parameters.
122 /* Protects list of registered TC modules. It is pure SMP lock. */
123 static DEFINE_RWLOCK(qdisc_mod_lock);
126 /************************************************
127 * Queueing disciplines manipulation. *
128 ************************************************/
131 /* The list of all installed queueing disciplines. */
133 static struct Qdisc_ops *qdisc_base;
135 /* Register/uregister queueing discipline */
137 int register_qdisc(struct Qdisc_ops *qops)
139 struct Qdisc_ops *q, **qp;
142 write_lock(&qdisc_mod_lock);
143 for (qp = &qdisc_base; (q = *qp) != NULL; qp = &q->next)
144 if (!strcmp(qops->id, q->id))
147 if (qops->enqueue == NULL)
148 qops->enqueue = noop_qdisc_ops.enqueue;
149 if (qops->requeue == NULL)
150 qops->requeue = noop_qdisc_ops.requeue;
151 if (qops->dequeue == NULL)
152 qops->dequeue = noop_qdisc_ops.dequeue;
158 write_unlock(&qdisc_mod_lock);
161 EXPORT_SYMBOL(register_qdisc);
163 int unregister_qdisc(struct Qdisc_ops *qops)
165 struct Qdisc_ops *q, **qp;
168 write_lock(&qdisc_mod_lock);
169 for (qp = &qdisc_base; (q=*qp)!=NULL; qp = &q->next)
177 write_unlock(&qdisc_mod_lock);
180 EXPORT_SYMBOL(unregister_qdisc);
182 /* We know handle. Find qdisc among all qdisc's attached to device
183 (root qdisc, all its children, children of children etc.)
186 struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle)
190 list_for_each_entry(q, &dev->qdisc_list, list) {
191 if (q->handle == handle)
197 static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid)
201 const struct Qdisc_class_ops *cops = p->ops->cl_ops;
205 cl = cops->get(p, classid);
209 leaf = cops->leaf(p, cl);
214 /* Find queueing discipline by name */
216 static struct Qdisc_ops *qdisc_lookup_ops(struct nlattr *kind)
218 struct Qdisc_ops *q = NULL;
221 read_lock(&qdisc_mod_lock);
222 for (q = qdisc_base; q; q = q->next) {
223 if (nla_strcmp(kind, q->id) == 0) {
224 if (!try_module_get(q->owner))
229 read_unlock(&qdisc_mod_lock);
234 static struct qdisc_rate_table *qdisc_rtab_list;
236 struct qdisc_rate_table *qdisc_get_rtab(struct tc_ratespec *r, struct nlattr *tab)
238 struct qdisc_rate_table *rtab;
240 for (rtab = qdisc_rtab_list; rtab; rtab = rtab->next) {
241 if (memcmp(&rtab->rate, r, sizeof(struct tc_ratespec)) == 0) {
247 if (tab == NULL || r->rate == 0 || r->cell_log == 0 ||
248 nla_len(tab) != TC_RTAB_SIZE)
251 rtab = kmalloc(sizeof(*rtab), GFP_KERNEL);
255 memcpy(rtab->data, nla_data(tab), 1024);
256 rtab->next = qdisc_rtab_list;
257 qdisc_rtab_list = rtab;
261 EXPORT_SYMBOL(qdisc_get_rtab);
263 void qdisc_put_rtab(struct qdisc_rate_table *tab)
265 struct qdisc_rate_table *rtab, **rtabp;
267 if (!tab || --tab->refcnt)
270 for (rtabp = &qdisc_rtab_list; (rtab=*rtabp) != NULL; rtabp = &rtab->next) {
278 EXPORT_SYMBOL(qdisc_put_rtab);
280 static enum hrtimer_restart qdisc_watchdog(struct hrtimer *timer)
282 struct qdisc_watchdog *wd = container_of(timer, struct qdisc_watchdog,
284 struct net_device *dev = wd->qdisc->dev;
286 wd->qdisc->flags &= ~TCQ_F_THROTTLED;
290 return HRTIMER_NORESTART;
293 void qdisc_watchdog_init(struct qdisc_watchdog *wd, struct Qdisc *qdisc)
295 hrtimer_init(&wd->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS);
296 wd->timer.function = qdisc_watchdog;
299 EXPORT_SYMBOL(qdisc_watchdog_init);
301 void qdisc_watchdog_schedule(struct qdisc_watchdog *wd, psched_time_t expires)
305 wd->qdisc->flags |= TCQ_F_THROTTLED;
306 time = ktime_set(0, 0);
307 time = ktime_add_ns(time, PSCHED_US2NS(expires));
308 hrtimer_start(&wd->timer, time, HRTIMER_MODE_ABS);
310 EXPORT_SYMBOL(qdisc_watchdog_schedule);
312 void qdisc_watchdog_cancel(struct qdisc_watchdog *wd)
314 hrtimer_cancel(&wd->timer);
315 wd->qdisc->flags &= ~TCQ_F_THROTTLED;
317 EXPORT_SYMBOL(qdisc_watchdog_cancel);
319 /* Allocate an unique handle from space managed by kernel */
321 static u32 qdisc_alloc_handle(struct net_device *dev)
324 static u32 autohandle = TC_H_MAKE(0x80000000U, 0);
327 autohandle += TC_H_MAKE(0x10000U, 0);
328 if (autohandle == TC_H_MAKE(TC_H_ROOT, 0))
329 autohandle = TC_H_MAKE(0x80000000U, 0);
330 } while (qdisc_lookup(dev, autohandle) && --i > 0);
332 return i>0 ? autohandle : 0;
335 /* Attach toplevel qdisc to device dev */
337 static struct Qdisc *
338 dev_graft_qdisc(struct net_device *dev, struct Qdisc *qdisc)
340 struct Qdisc *oqdisc;
342 if (dev->flags & IFF_UP)
345 qdisc_lock_tree(dev);
346 if (qdisc && qdisc->flags&TCQ_F_INGRESS) {
347 oqdisc = dev->qdisc_ingress;
348 /* Prune old scheduler */
349 if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) {
352 dev->qdisc_ingress = NULL;
354 dev->qdisc_ingress = qdisc;
359 oqdisc = dev->qdisc_sleeping;
361 /* Prune old scheduler */
362 if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1)
365 /* ... and graft new one */
368 dev->qdisc_sleeping = qdisc;
369 dev->qdisc = &noop_qdisc;
372 qdisc_unlock_tree(dev);
374 if (dev->flags & IFF_UP)
380 void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n)
382 const struct Qdisc_class_ops *cops;
388 while ((parentid = sch->parent)) {
389 if (TC_H_MAJ(parentid) == TC_H_MAJ(TC_H_INGRESS))
392 sch = qdisc_lookup(sch->dev, TC_H_MAJ(parentid));
394 WARN_ON(parentid != TC_H_ROOT);
397 cops = sch->ops->cl_ops;
398 if (cops->qlen_notify) {
399 cl = cops->get(sch, parentid);
400 cops->qlen_notify(sch, cl);
406 EXPORT_SYMBOL(qdisc_tree_decrease_qlen);
408 /* Graft qdisc "new" to class "classid" of qdisc "parent" or
411 Old qdisc is not destroyed but returned in *old.
414 static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
416 struct Qdisc *new, struct Qdisc **old)
419 struct Qdisc *q = *old;
422 if (parent == NULL) {
423 if (q && q->flags&TCQ_F_INGRESS) {
424 *old = dev_graft_qdisc(dev, q);
426 *old = dev_graft_qdisc(dev, new);
429 const struct Qdisc_class_ops *cops = parent->ops->cl_ops;
434 unsigned long cl = cops->get(parent, classid);
436 err = cops->graft(parent, cl, new, old);
437 cops->put(parent, cl);
445 Allocate and initialize new qdisc.
447 Parameters are passed via opt.
450 static struct Qdisc *
451 qdisc_create(struct net_device *dev, u32 parent, u32 handle,
452 struct nlattr **tca, int *errp)
455 struct nlattr *kind = tca[TCA_KIND];
457 struct Qdisc_ops *ops;
459 ops = qdisc_lookup_ops(kind);
461 if (ops == NULL && kind != NULL) {
463 if (nla_strlcpy(name, kind, IFNAMSIZ) < IFNAMSIZ) {
464 /* We dropped the RTNL semaphore in order to
465 * perform the module load. So, even if we
466 * succeeded in loading the module we have to
467 * tell the caller to replay the request. We
468 * indicate this using -EAGAIN.
469 * We replay the request because the device may
470 * go away in the mean time.
473 request_module("sch_%s", name);
475 ops = qdisc_lookup_ops(kind);
477 /* We will try again qdisc_lookup_ops,
478 * so don't keep a reference.
480 module_put(ops->owner);
492 sch = qdisc_alloc(dev, ops);
498 sch->parent = parent;
500 if (handle == TC_H_INGRESS) {
501 sch->flags |= TCQ_F_INGRESS;
502 sch->stats_lock = &dev->ingress_lock;
503 handle = TC_H_MAKE(TC_H_INGRESS, 0);
505 sch->stats_lock = &dev->queue_lock;
507 handle = qdisc_alloc_handle(dev);
514 sch->handle = handle;
516 if (!ops->init || (err = ops->init(sch, tca[TCA_OPTIONS])) == 0) {
518 err = gen_new_estimator(&sch->bstats, &sch->rate_est,
523 * Any broken qdiscs that would require
524 * a ops->reset() here? The qdisc was never
525 * in action so it shouldn't be necessary.
532 qdisc_lock_tree(dev);
533 list_add_tail(&sch->list, &dev->qdisc_list);
534 qdisc_unlock_tree(dev);
540 kfree((char *) sch - sch->padded);
542 module_put(ops->owner);
548 static int qdisc_change(struct Qdisc *sch, struct nlattr **tca)
550 if (tca[TCA_OPTIONS]) {
553 if (sch->ops->change == NULL)
555 err = sch->ops->change(sch, tca[TCA_OPTIONS]);
560 gen_replace_estimator(&sch->bstats, &sch->rate_est,
561 sch->stats_lock, tca[TCA_RATE]);
565 struct check_loop_arg
567 struct qdisc_walker w;
572 static int check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w);
574 static int check_loop(struct Qdisc *q, struct Qdisc *p, int depth)
576 struct check_loop_arg arg;
578 if (q->ops->cl_ops == NULL)
581 arg.w.stop = arg.w.skip = arg.w.count = 0;
582 arg.w.fn = check_loop_fn;
585 q->ops->cl_ops->walk(q, &arg.w);
586 return arg.w.stop ? -ELOOP : 0;
590 check_loop_fn(struct Qdisc *q, unsigned long cl, struct qdisc_walker *w)
593 const struct Qdisc_class_ops *cops = q->ops->cl_ops;
594 struct check_loop_arg *arg = (struct check_loop_arg *)w;
596 leaf = cops->leaf(q, cl);
598 if (leaf == arg->p || arg->depth > 7)
600 return check_loop(leaf, arg->p, arg->depth + 1);
609 static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
611 struct net *net = sock_net(skb->sk);
612 struct tcmsg *tcm = NLMSG_DATA(n);
613 struct nlattr *tca[TCA_MAX + 1];
614 struct net_device *dev;
615 u32 clid = tcm->tcm_parent;
616 struct Qdisc *q = NULL;
617 struct Qdisc *p = NULL;
620 if (net != &init_net)
623 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
626 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
631 if (clid != TC_H_ROOT) {
632 if (TC_H_MAJ(clid) != TC_H_MAJ(TC_H_INGRESS)) {
633 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
635 q = qdisc_leaf(p, clid);
636 } else { /* ingress */
637 q = dev->qdisc_ingress;
640 q = dev->qdisc_sleeping;
645 if (tcm->tcm_handle && q->handle != tcm->tcm_handle)
648 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
652 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
655 if (n->nlmsg_type == RTM_DELQDISC) {
660 if ((err = qdisc_graft(dev, p, clid, NULL, &q)) != 0)
663 qdisc_notify(skb, n, clid, q, NULL);
664 qdisc_lock_tree(dev);
666 qdisc_unlock_tree(dev);
669 qdisc_notify(skb, n, clid, NULL, q);
678 static int tc_modify_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
680 struct net *net = sock_net(skb->sk);
682 struct nlattr *tca[TCA_MAX + 1];
683 struct net_device *dev;
688 if (net != &init_net)
692 /* Reinit, just in case something touches this. */
694 clid = tcm->tcm_parent;
697 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
700 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
705 if (clid != TC_H_ROOT) {
706 if (clid != TC_H_INGRESS) {
707 if ((p = qdisc_lookup(dev, TC_H_MAJ(clid))) == NULL)
709 q = qdisc_leaf(p, clid);
710 } else { /*ingress */
711 q = dev->qdisc_ingress;
714 q = dev->qdisc_sleeping;
717 /* It may be default qdisc, ignore it */
718 if (q && q->handle == 0)
721 if (!q || !tcm->tcm_handle || q->handle != tcm->tcm_handle) {
722 if (tcm->tcm_handle) {
723 if (q && !(n->nlmsg_flags&NLM_F_REPLACE))
725 if (TC_H_MIN(tcm->tcm_handle))
727 if ((q = qdisc_lookup(dev, tcm->tcm_handle)) == NULL)
729 if (n->nlmsg_flags&NLM_F_EXCL)
731 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
734 (p && check_loop(q, p, 0)))
736 atomic_inc(&q->refcnt);
742 /* This magic test requires explanation.
744 * We know, that some child q is already
745 * attached to this parent and have choice:
746 * either to change it or to create/graft new one.
748 * 1. We are allowed to create/graft only
749 * if CREATE and REPLACE flags are set.
751 * 2. If EXCL is set, requestor wanted to say,
752 * that qdisc tcm_handle is not expected
753 * to exist, so that we choose create/graft too.
755 * 3. The last case is when no flags are set.
756 * Alas, it is sort of hole in API, we
757 * cannot decide what to do unambiguously.
758 * For now we select create/graft, if
759 * user gave KIND, which does not match existing.
761 if ((n->nlmsg_flags&NLM_F_CREATE) &&
762 (n->nlmsg_flags&NLM_F_REPLACE) &&
763 ((n->nlmsg_flags&NLM_F_EXCL) ||
765 nla_strcmp(tca[TCA_KIND], q->ops->id))))
770 if (!tcm->tcm_handle)
772 q = qdisc_lookup(dev, tcm->tcm_handle);
775 /* Change qdisc parameters */
778 if (n->nlmsg_flags&NLM_F_EXCL)
780 if (tca[TCA_KIND] && nla_strcmp(tca[TCA_KIND], q->ops->id))
782 err = qdisc_change(q, tca);
784 qdisc_notify(skb, n, clid, NULL, q);
788 if (!(n->nlmsg_flags&NLM_F_CREATE))
790 if (clid == TC_H_INGRESS)
791 q = qdisc_create(dev, tcm->tcm_parent, tcm->tcm_parent,
794 q = qdisc_create(dev, tcm->tcm_parent, tcm->tcm_handle,
804 struct Qdisc *old_q = NULL;
805 err = qdisc_graft(dev, p, clid, q, &old_q);
808 qdisc_lock_tree(dev);
810 qdisc_unlock_tree(dev);
814 qdisc_notify(skb, n, clid, old_q, q);
816 qdisc_lock_tree(dev);
817 qdisc_destroy(old_q);
818 qdisc_unlock_tree(dev);
824 static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
825 u32 pid, u32 seq, u16 flags, int event)
828 struct nlmsghdr *nlh;
829 unsigned char *b = skb_tail_pointer(skb);
832 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
833 tcm = NLMSG_DATA(nlh);
834 tcm->tcm_family = AF_UNSPEC;
837 tcm->tcm_ifindex = q->dev->ifindex;
838 tcm->tcm_parent = clid;
839 tcm->tcm_handle = q->handle;
840 tcm->tcm_info = atomic_read(&q->refcnt);
841 NLA_PUT_STRING(skb, TCA_KIND, q->ops->id);
842 if (q->ops->dump && q->ops->dump(q, skb) < 0)
843 goto nla_put_failure;
844 q->qstats.qlen = q->q.qlen;
846 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
847 TCA_XSTATS, q->stats_lock, &d) < 0)
848 goto nla_put_failure;
850 if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0)
851 goto nla_put_failure;
853 if (gnet_stats_copy_basic(&d, &q->bstats) < 0 ||
854 gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 ||
855 gnet_stats_copy_queue(&d, &q->qstats) < 0)
856 goto nla_put_failure;
858 if (gnet_stats_finish_copy(&d) < 0)
859 goto nla_put_failure;
861 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
870 static int qdisc_notify(struct sk_buff *oskb, struct nlmsghdr *n,
871 u32 clid, struct Qdisc *old, struct Qdisc *new)
874 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
876 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
880 if (old && old->handle) {
881 if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0)
885 if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
890 return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
897 static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb)
899 struct net *net = sock_net(skb->sk);
902 struct net_device *dev;
905 if (net != &init_net)
909 s_q_idx = q_idx = cb->args[1];
910 read_lock(&dev_base_lock);
912 for_each_netdev(&init_net, dev) {
918 list_for_each_entry(q, &dev->qdisc_list, list) {
919 if (q_idx < s_q_idx) {
923 if (tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid,
924 cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0)
933 read_unlock(&dev_base_lock);
943 /************************************************
944 * Traffic classes manipulation. *
945 ************************************************/
949 static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg)
951 struct net *net = sock_net(skb->sk);
952 struct tcmsg *tcm = NLMSG_DATA(n);
953 struct nlattr *tca[TCA_MAX + 1];
954 struct net_device *dev;
955 struct Qdisc *q = NULL;
956 const struct Qdisc_class_ops *cops;
957 unsigned long cl = 0;
958 unsigned long new_cl;
959 u32 pid = tcm->tcm_parent;
960 u32 clid = tcm->tcm_handle;
961 u32 qid = TC_H_MAJ(clid);
964 if (net != &init_net)
967 if ((dev = __dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
970 err = nlmsg_parse(n, sizeof(*tcm), tca, TCA_MAX, NULL);
975 parent == TC_H_UNSPEC - unspecified parent.
976 parent == TC_H_ROOT - class is root, which has no parent.
977 parent == X:0 - parent is root class.
978 parent == X:Y - parent is a node in hierarchy.
979 parent == 0:Y - parent is X:Y, where X:0 is qdisc.
981 handle == 0:0 - generate handle from kernel pool.
982 handle == 0:Y - class is X:Y, where X:0 is qdisc.
983 handle == X:Y - clear.
984 handle == X:0 - root class.
987 /* Step 1. Determine qdisc handle X:0 */
989 if (pid != TC_H_ROOT) {
990 u32 qid1 = TC_H_MAJ(pid);
993 /* If both majors are known, they must be identical. */
999 qid = dev->qdisc_sleeping->handle;
1001 /* Now qid is genuine qdisc handle consistent
1002 both with parent and child.
1004 TC_H_MAJ(pid) still may be unspecified, complete it now.
1007 pid = TC_H_MAKE(qid, pid);
1010 qid = dev->qdisc_sleeping->handle;
1013 /* OK. Locate qdisc */
1014 if ((q = qdisc_lookup(dev, qid)) == NULL)
1017 /* An check that it supports classes */
1018 cops = q->ops->cl_ops;
1022 /* Now try to get class */
1024 if (pid == TC_H_ROOT)
1027 clid = TC_H_MAKE(qid, clid);
1030 cl = cops->get(q, clid);
1034 if (n->nlmsg_type != RTM_NEWTCLASS || !(n->nlmsg_flags&NLM_F_CREATE))
1037 switch (n->nlmsg_type) {
1040 if (n->nlmsg_flags&NLM_F_EXCL)
1044 err = cops->delete(q, cl);
1046 tclass_notify(skb, n, q, cl, RTM_DELTCLASS);
1049 err = tclass_notify(skb, n, q, cl, RTM_NEWTCLASS);
1058 err = cops->change(q, clid, pid, tca, &new_cl);
1060 tclass_notify(skb, n, q, new_cl, RTM_NEWTCLASS);
1070 static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
1072 u32 pid, u32 seq, u16 flags, int event)
1075 struct nlmsghdr *nlh;
1076 unsigned char *b = skb_tail_pointer(skb);
1078 const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops;
1080 nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*tcm), flags);
1081 tcm = NLMSG_DATA(nlh);
1082 tcm->tcm_family = AF_UNSPEC;
1083 tcm->tcm_ifindex = q->dev->ifindex;
1084 tcm->tcm_parent = q->handle;
1085 tcm->tcm_handle = q->handle;
1087 NLA_PUT_STRING(skb, TCA_KIND, q->ops->id);
1088 if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0)
1089 goto nla_put_failure;
1091 if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
1092 TCA_XSTATS, q->stats_lock, &d) < 0)
1093 goto nla_put_failure;
1095 if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0)
1096 goto nla_put_failure;
1098 if (gnet_stats_finish_copy(&d) < 0)
1099 goto nla_put_failure;
1101 nlh->nlmsg_len = skb_tail_pointer(skb) - b;
1110 static int tclass_notify(struct sk_buff *oskb, struct nlmsghdr *n,
1111 struct Qdisc *q, unsigned long cl, int event)
1113 struct sk_buff *skb;
1114 u32 pid = oskb ? NETLINK_CB(oskb).pid : 0;
1116 skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
1120 if (tc_fill_tclass(skb, q, cl, pid, n->nlmsg_seq, 0, event) < 0) {
1125 return rtnetlink_send(skb, &init_net, pid, RTNLGRP_TC, n->nlmsg_flags&NLM_F_ECHO);
1128 struct qdisc_dump_args
1130 struct qdisc_walker w;
1131 struct sk_buff *skb;
1132 struct netlink_callback *cb;
1135 static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walker *arg)
1137 struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg;
1139 return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).pid,
1140 a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS);
1143 static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
1145 struct net *net = sock_net(skb->sk);
1148 struct net_device *dev;
1150 struct tcmsg *tcm = (struct tcmsg*)NLMSG_DATA(cb->nlh);
1151 struct qdisc_dump_args arg;
1153 if (net != &init_net)
1156 if (cb->nlh->nlmsg_len < NLMSG_LENGTH(sizeof(*tcm)))
1158 if ((dev = dev_get_by_index(&init_net, tcm->tcm_ifindex)) == NULL)
1164 list_for_each_entry(q, &dev->qdisc_list, list) {
1165 if (t < s_t || !q->ops->cl_ops ||
1167 TC_H_MAJ(tcm->tcm_parent) != q->handle)) {
1172 memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0]));
1173 arg.w.fn = qdisc_class_dump;
1177 arg.w.skip = cb->args[1];
1179 q->ops->cl_ops->walk(q, &arg.w);
1180 cb->args[1] = arg.w.count;
1192 /* Main classifier routine: scans classifier chain attached
1193 to this qdisc, (optionally) tests for protocol and asks
1194 specific classifiers.
1196 int tc_classify_compat(struct sk_buff *skb, struct tcf_proto *tp,
1197 struct tcf_result *res)
1199 __be16 protocol = skb->protocol;
1202 for (; tp; tp = tp->next) {
1203 if ((tp->protocol == protocol ||
1204 tp->protocol == htons(ETH_P_ALL)) &&
1205 (err = tp->classify(skb, tp, res)) >= 0) {
1206 #ifdef CONFIG_NET_CLS_ACT
1207 if (err != TC_ACT_RECLASSIFY && skb->tc_verd)
1208 skb->tc_verd = SET_TC_VERD(skb->tc_verd, 0);
1215 EXPORT_SYMBOL(tc_classify_compat);
1217 int tc_classify(struct sk_buff *skb, struct tcf_proto *tp,
1218 struct tcf_result *res)
1222 #ifdef CONFIG_NET_CLS_ACT
1223 struct tcf_proto *otp = tp;
1226 protocol = skb->protocol;
1228 err = tc_classify_compat(skb, tp, res);
1229 #ifdef CONFIG_NET_CLS_ACT
1230 if (err == TC_ACT_RECLASSIFY) {
1231 u32 verd = G_TC_VERD(skb->tc_verd);
1234 if (verd++ >= MAX_REC_LOOP) {
1235 printk("rule prio %u protocol %02x reclassify loop, "
1237 tp->prio&0xffff, ntohs(tp->protocol));
1240 skb->tc_verd = SET_TC_VERD(skb->tc_verd, verd);
1246 EXPORT_SYMBOL(tc_classify);
1248 void tcf_destroy(struct tcf_proto *tp)
1250 tp->ops->destroy(tp);
1251 module_put(tp->ops->owner);
1255 void tcf_destroy_chain(struct tcf_proto *fl)
1257 struct tcf_proto *tp;
1259 while ((tp = fl) != NULL) {
1264 EXPORT_SYMBOL(tcf_destroy_chain);
1266 #ifdef CONFIG_PROC_FS
1267 static int psched_show(struct seq_file *seq, void *v)
1271 hrtimer_get_res(CLOCK_MONOTONIC, &ts);
1272 seq_printf(seq, "%08x %08x %08x %08x\n",
1273 (u32)NSEC_PER_USEC, (u32)PSCHED_US2NS(1),
1275 (u32)NSEC_PER_SEC/(u32)ktime_to_ns(timespec_to_ktime(ts)));
1280 static int psched_open(struct inode *inode, struct file *file)
1282 return single_open(file, psched_show, PDE(inode)->data);
1285 static const struct file_operations psched_fops = {
1286 .owner = THIS_MODULE,
1287 .open = psched_open,
1289 .llseek = seq_lseek,
1290 .release = single_release,
1294 static int __init pktsched_init(void)
1296 register_qdisc(&pfifo_qdisc_ops);
1297 register_qdisc(&bfifo_qdisc_ops);
1298 proc_net_fops_create(&init_net, "psched", 0, &psched_fops);
1300 rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL);
1301 rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL);
1302 rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc);
1303 rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL);
1304 rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL);
1305 rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass);
1310 subsys_initcall(pktsched_init);