Merge branch 'oprofile-for-tip' of git://git.kernel.org/pub/scm/linux/kernel/git...
[linux-2.6] / net / sched / sch_red.c
1 /*
2  * net/sched/sch_red.c  Random Early Detection queue.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  * Changes:
12  * J Hadi Salim 980914: computation fixes
13  * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
14  * J Hadi Salim 980816:  ECN support
15  */
16
17 #include <linux/module.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/skbuff.h>
21 #include <net/pkt_sched.h>
22 #include <net/inet_ecn.h>
23 #include <net/red.h>
24
25
26 /*      Parameters, settable by user:
27         -----------------------------
28
29         limit           - bytes (must be > qth_max + burst)
30
31         Hard limit on queue length, should be chosen >qth_max
32         to allow packet bursts. This parameter does not
33         affect the algorithms behaviour and can be chosen
34         arbitrarily high (well, less than ram size)
35         Really, this limit will never be reached
36         if RED works correctly.
37  */
38
39 struct red_sched_data
40 {
41         u32                     limit;          /* HARD maximal queue length */
42         unsigned char           flags;
43         struct red_parms        parms;
44         struct red_stats        stats;
45         struct Qdisc            *qdisc;
46 };
47
48 static inline int red_use_ecn(struct red_sched_data *q)
49 {
50         return q->flags & TC_RED_ECN;
51 }
52
53 static inline int red_use_harddrop(struct red_sched_data *q)
54 {
55         return q->flags & TC_RED_HARDDROP;
56 }
57
58 static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch)
59 {
60         struct red_sched_data *q = qdisc_priv(sch);
61         struct Qdisc *child = q->qdisc;
62         int ret;
63
64         q->parms.qavg = red_calc_qavg(&q->parms, child->qstats.backlog);
65
66         if (red_is_idling(&q->parms))
67                 red_end_of_idle_period(&q->parms);
68
69         switch (red_action(&q->parms, q->parms.qavg)) {
70                 case RED_DONT_MARK:
71                         break;
72
73                 case RED_PROB_MARK:
74                         sch->qstats.overlimits++;
75                         if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
76                                 q->stats.prob_drop++;
77                                 goto congestion_drop;
78                         }
79
80                         q->stats.prob_mark++;
81                         break;
82
83                 case RED_HARD_MARK:
84                         sch->qstats.overlimits++;
85                         if (red_use_harddrop(q) || !red_use_ecn(q) ||
86                             !INET_ECN_set_ce(skb)) {
87                                 q->stats.forced_drop++;
88                                 goto congestion_drop;
89                         }
90
91                         q->stats.forced_mark++;
92                         break;
93         }
94
95         ret = qdisc_enqueue(skb, child);
96         if (likely(ret == NET_XMIT_SUCCESS)) {
97                 sch->bstats.bytes += qdisc_pkt_len(skb);
98                 sch->bstats.packets++;
99                 sch->q.qlen++;
100         } else if (net_xmit_drop_count(ret)) {
101                 q->stats.pdrop++;
102                 sch->qstats.drops++;
103         }
104         return ret;
105
106 congestion_drop:
107         qdisc_drop(skb, sch);
108         return NET_XMIT_CN;
109 }
110
111 static int red_requeue(struct sk_buff *skb, struct Qdisc* sch)
112 {
113         struct red_sched_data *q = qdisc_priv(sch);
114         struct Qdisc *child = q->qdisc;
115         int ret;
116
117         if (red_is_idling(&q->parms))
118                 red_end_of_idle_period(&q->parms);
119
120         ret = child->ops->requeue(skb, child);
121         if (likely(ret == NET_XMIT_SUCCESS)) {
122                 sch->qstats.requeues++;
123                 sch->q.qlen++;
124         }
125         return ret;
126 }
127
128 static struct sk_buff * red_dequeue(struct Qdisc* sch)
129 {
130         struct sk_buff *skb;
131         struct red_sched_data *q = qdisc_priv(sch);
132         struct Qdisc *child = q->qdisc;
133
134         skb = child->dequeue(child);
135         if (skb)
136                 sch->q.qlen--;
137         else if (!red_is_idling(&q->parms))
138                 red_start_of_idle_period(&q->parms);
139
140         return skb;
141 }
142
143 static unsigned int red_drop(struct Qdisc* sch)
144 {
145         struct red_sched_data *q = qdisc_priv(sch);
146         struct Qdisc *child = q->qdisc;
147         unsigned int len;
148
149         if (child->ops->drop && (len = child->ops->drop(child)) > 0) {
150                 q->stats.other++;
151                 sch->qstats.drops++;
152                 sch->q.qlen--;
153                 return len;
154         }
155
156         if (!red_is_idling(&q->parms))
157                 red_start_of_idle_period(&q->parms);
158
159         return 0;
160 }
161
162 static void red_reset(struct Qdisc* sch)
163 {
164         struct red_sched_data *q = qdisc_priv(sch);
165
166         qdisc_reset(q->qdisc);
167         sch->q.qlen = 0;
168         red_restart(&q->parms);
169 }
170
171 static void red_destroy(struct Qdisc *sch)
172 {
173         struct red_sched_data *q = qdisc_priv(sch);
174         qdisc_destroy(q->qdisc);
175 }
176
177 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
178         [TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) },
179         [TCA_RED_STAB]  = { .len = RED_STAB_SIZE },
180 };
181
182 static int red_change(struct Qdisc *sch, struct nlattr *opt)
183 {
184         struct red_sched_data *q = qdisc_priv(sch);
185         struct nlattr *tb[TCA_RED_MAX + 1];
186         struct tc_red_qopt *ctl;
187         struct Qdisc *child = NULL;
188         int err;
189
190         if (opt == NULL)
191                 return -EINVAL;
192
193         err = nla_parse_nested(tb, TCA_RED_MAX, opt, red_policy);
194         if (err < 0)
195                 return err;
196
197         if (tb[TCA_RED_PARMS] == NULL ||
198             tb[TCA_RED_STAB] == NULL)
199                 return -EINVAL;
200
201         ctl = nla_data(tb[TCA_RED_PARMS]);
202
203         if (ctl->limit > 0) {
204                 child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit);
205                 if (IS_ERR(child))
206                         return PTR_ERR(child);
207         }
208
209         sch_tree_lock(sch);
210         q->flags = ctl->flags;
211         q->limit = ctl->limit;
212         if (child) {
213                 qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
214                 qdisc_destroy(xchg(&q->qdisc, child));
215         }
216
217         red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
218                                  ctl->Plog, ctl->Scell_log,
219                                  nla_data(tb[TCA_RED_STAB]));
220
221         if (skb_queue_empty(&sch->q))
222                 red_end_of_idle_period(&q->parms);
223
224         sch_tree_unlock(sch);
225         return 0;
226 }
227
228 static int red_init(struct Qdisc* sch, struct nlattr *opt)
229 {
230         struct red_sched_data *q = qdisc_priv(sch);
231
232         q->qdisc = &noop_qdisc;
233         return red_change(sch, opt);
234 }
235
236 static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
237 {
238         struct red_sched_data *q = qdisc_priv(sch);
239         struct nlattr *opts = NULL;
240         struct tc_red_qopt opt = {
241                 .limit          = q->limit,
242                 .flags          = q->flags,
243                 .qth_min        = q->parms.qth_min >> q->parms.Wlog,
244                 .qth_max        = q->parms.qth_max >> q->parms.Wlog,
245                 .Wlog           = q->parms.Wlog,
246                 .Plog           = q->parms.Plog,
247                 .Scell_log      = q->parms.Scell_log,
248         };
249
250         opts = nla_nest_start(skb, TCA_OPTIONS);
251         if (opts == NULL)
252                 goto nla_put_failure;
253         NLA_PUT(skb, TCA_RED_PARMS, sizeof(opt), &opt);
254         return nla_nest_end(skb, opts);
255
256 nla_put_failure:
257         nla_nest_cancel(skb, opts);
258         return -EMSGSIZE;
259 }
260
261 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
262 {
263         struct red_sched_data *q = qdisc_priv(sch);
264         struct tc_red_xstats st = {
265                 .early  = q->stats.prob_drop + q->stats.forced_drop,
266                 .pdrop  = q->stats.pdrop,
267                 .other  = q->stats.other,
268                 .marked = q->stats.prob_mark + q->stats.forced_mark,
269         };
270
271         return gnet_stats_copy_app(d, &st, sizeof(st));
272 }
273
274 static int red_dump_class(struct Qdisc *sch, unsigned long cl,
275                           struct sk_buff *skb, struct tcmsg *tcm)
276 {
277         struct red_sched_data *q = qdisc_priv(sch);
278
279         if (cl != 1)
280                 return -ENOENT;
281         tcm->tcm_handle |= TC_H_MIN(1);
282         tcm->tcm_info = q->qdisc->handle;
283         return 0;
284 }
285
286 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
287                      struct Qdisc **old)
288 {
289         struct red_sched_data *q = qdisc_priv(sch);
290
291         if (new == NULL)
292                 new = &noop_qdisc;
293
294         sch_tree_lock(sch);
295         *old = xchg(&q->qdisc, new);
296         qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
297         qdisc_reset(*old);
298         sch_tree_unlock(sch);
299         return 0;
300 }
301
302 static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
303 {
304         struct red_sched_data *q = qdisc_priv(sch);
305         return q->qdisc;
306 }
307
308 static unsigned long red_get(struct Qdisc *sch, u32 classid)
309 {
310         return 1;
311 }
312
313 static void red_put(struct Qdisc *sch, unsigned long arg)
314 {
315         return;
316 }
317
318 static int red_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
319                             struct nlattr **tca, unsigned long *arg)
320 {
321         return -ENOSYS;
322 }
323
324 static int red_delete(struct Qdisc *sch, unsigned long cl)
325 {
326         return -ENOSYS;
327 }
328
329 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
330 {
331         if (!walker->stop) {
332                 if (walker->count >= walker->skip)
333                         if (walker->fn(sch, 1, walker) < 0) {
334                                 walker->stop = 1;
335                                 return;
336                         }
337                 walker->count++;
338         }
339 }
340
341 static struct tcf_proto **red_find_tcf(struct Qdisc *sch, unsigned long cl)
342 {
343         return NULL;
344 }
345
346 static const struct Qdisc_class_ops red_class_ops = {
347         .graft          =       red_graft,
348         .leaf           =       red_leaf,
349         .get            =       red_get,
350         .put            =       red_put,
351         .change         =       red_change_class,
352         .delete         =       red_delete,
353         .walk           =       red_walk,
354         .tcf_chain      =       red_find_tcf,
355         .dump           =       red_dump_class,
356 };
357
358 static struct Qdisc_ops red_qdisc_ops __read_mostly = {
359         .id             =       "red",
360         .priv_size      =       sizeof(struct red_sched_data),
361         .cl_ops         =       &red_class_ops,
362         .enqueue        =       red_enqueue,
363         .dequeue        =       red_dequeue,
364         .requeue        =       red_requeue,
365         .drop           =       red_drop,
366         .init           =       red_init,
367         .reset          =       red_reset,
368         .destroy        =       red_destroy,
369         .change         =       red_change,
370         .dump           =       red_dump,
371         .dump_stats     =       red_dump_stats,
372         .owner          =       THIS_MODULE,
373 };
374
375 static int __init red_module_init(void)
376 {
377         return register_qdisc(&red_qdisc_ops);
378 }
379
380 static void __exit red_module_exit(void)
381 {
382         unregister_qdisc(&red_qdisc_ops);
383 }
384
385 module_init(red_module_init)
386 module_exit(red_module_exit)
387
388 MODULE_LICENSE("GPL");