Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/sparc-2.6
[linux-2.6] / net / sched / sch_red.c
1 /*
2  * net/sched/sch_red.c  Random Early Detection queue.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  * Changes:
12  * J Hadi Salim 980914: computation fixes
13  * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
14  * J Hadi Salim 980816:  ECN support
15  */
16
17 #include <linux/module.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/skbuff.h>
21 #include <net/pkt_sched.h>
22 #include <net/inet_ecn.h>
23 #include <net/red.h>
24
25
26 /*      Parameters, settable by user:
27         -----------------------------
28
29         limit           - bytes (must be > qth_max + burst)
30
31         Hard limit on queue length, should be chosen >qth_max
32         to allow packet bursts. This parameter does not
33         affect the algorithms behaviour and can be chosen
34         arbitrarily high (well, less than ram size)
35         Really, this limit will never be reached
36         if RED works correctly.
37  */
38
39 struct red_sched_data
40 {
41         u32                     limit;          /* HARD maximal queue length */
42         unsigned char           flags;
43         struct red_parms        parms;
44         struct red_stats        stats;
45         struct Qdisc            *qdisc;
46 };
47
48 static inline int red_use_ecn(struct red_sched_data *q)
49 {
50         return q->flags & TC_RED_ECN;
51 }
52
53 static inline int red_use_harddrop(struct red_sched_data *q)
54 {
55         return q->flags & TC_RED_HARDDROP;
56 }
57
58 static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch)
59 {
60         struct red_sched_data *q = qdisc_priv(sch);
61         struct Qdisc *child = q->qdisc;
62         int ret;
63
64         q->parms.qavg = red_calc_qavg(&q->parms, child->qstats.backlog);
65
66         if (red_is_idling(&q->parms))
67                 red_end_of_idle_period(&q->parms);
68
69         switch (red_action(&q->parms, q->parms.qavg)) {
70                 case RED_DONT_MARK:
71                         break;
72
73                 case RED_PROB_MARK:
74                         sch->qstats.overlimits++;
75                         if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
76                                 q->stats.prob_drop++;
77                                 goto congestion_drop;
78                         }
79
80                         q->stats.prob_mark++;
81                         break;
82
83                 case RED_HARD_MARK:
84                         sch->qstats.overlimits++;
85                         if (red_use_harddrop(q) || !red_use_ecn(q) ||
86                             !INET_ECN_set_ce(skb)) {
87                                 q->stats.forced_drop++;
88                                 goto congestion_drop;
89                         }
90
91                         q->stats.forced_mark++;
92                         break;
93         }
94
95         ret = child->enqueue(skb, child);
96         if (likely(ret == NET_XMIT_SUCCESS)) {
97                 sch->bstats.bytes += skb->len;
98                 sch->bstats.packets++;
99                 sch->q.qlen++;
100         } else {
101                 q->stats.pdrop++;
102                 sch->qstats.drops++;
103         }
104         return ret;
105
106 congestion_drop:
107         qdisc_drop(skb, sch);
108         return NET_XMIT_CN;
109 }
110
111 static int red_requeue(struct sk_buff *skb, struct Qdisc* sch)
112 {
113         struct red_sched_data *q = qdisc_priv(sch);
114         struct Qdisc *child = q->qdisc;
115         int ret;
116
117         if (red_is_idling(&q->parms))
118                 red_end_of_idle_period(&q->parms);
119
120         ret = child->ops->requeue(skb, child);
121         if (likely(ret == NET_XMIT_SUCCESS)) {
122                 sch->qstats.requeues++;
123                 sch->q.qlen++;
124         }
125         return ret;
126 }
127
128 static struct sk_buff * red_dequeue(struct Qdisc* sch)
129 {
130         struct sk_buff *skb;
131         struct red_sched_data *q = qdisc_priv(sch);
132         struct Qdisc *child = q->qdisc;
133
134         skb = child->dequeue(child);
135         if (skb)
136                 sch->q.qlen--;
137         else if (!red_is_idling(&q->parms))
138                 red_start_of_idle_period(&q->parms);
139
140         return skb;
141 }
142
143 static unsigned int red_drop(struct Qdisc* sch)
144 {
145         struct red_sched_data *q = qdisc_priv(sch);
146         struct Qdisc *child = q->qdisc;
147         unsigned int len;
148
149         if (child->ops->drop && (len = child->ops->drop(child)) > 0) {
150                 q->stats.other++;
151                 sch->qstats.drops++;
152                 sch->q.qlen--;
153                 return len;
154         }
155
156         if (!red_is_idling(&q->parms))
157                 red_start_of_idle_period(&q->parms);
158
159         return 0;
160 }
161
162 static void red_reset(struct Qdisc* sch)
163 {
164         struct red_sched_data *q = qdisc_priv(sch);
165
166         qdisc_reset(q->qdisc);
167         sch->q.qlen = 0;
168         red_restart(&q->parms);
169 }
170
171 static void red_destroy(struct Qdisc *sch)
172 {
173         struct red_sched_data *q = qdisc_priv(sch);
174         qdisc_destroy(q->qdisc);
175 }
176
177 static struct Qdisc *red_create_dflt(struct Qdisc *sch, u32 limit)
178 {
179         struct Qdisc *q;
180         struct nlattr *nla;
181         int ret;
182
183         q = qdisc_create_dflt(sch->dev, &bfifo_qdisc_ops,
184                               TC_H_MAKE(sch->handle, 1));
185         if (q) {
186                 nla = kmalloc(nla_attr_size(sizeof(struct tc_fifo_qopt)),
187                               GFP_KERNEL);
188                 if (nla) {
189                         nla->nla_type = RTM_NEWQDISC;
190                         nla->nla_len = nla_attr_size(sizeof(struct tc_fifo_qopt));
191                         ((struct tc_fifo_qopt *)nla_data(nla))->limit = limit;
192
193                         ret = q->ops->change(q, nla);
194                         kfree(nla);
195
196                         if (ret == 0)
197                                 return q;
198                 }
199                 qdisc_destroy(q);
200         }
201         return NULL;
202 }
203
204 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
205         [TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) },
206         [TCA_RED_STAB]  = { .len = RED_STAB_SIZE },
207 };
208
209 static int red_change(struct Qdisc *sch, struct nlattr *opt)
210 {
211         struct red_sched_data *q = qdisc_priv(sch);
212         struct nlattr *tb[TCA_RED_MAX + 1];
213         struct tc_red_qopt *ctl;
214         struct Qdisc *child = NULL;
215         int err;
216
217         if (opt == NULL)
218                 return -EINVAL;
219
220         err = nla_parse_nested(tb, TCA_RED_MAX, opt, red_policy);
221         if (err < 0)
222                 return err;
223
224         if (tb[TCA_RED_PARMS] == NULL ||
225             tb[TCA_RED_STAB] == NULL)
226                 return -EINVAL;
227
228         ctl = nla_data(tb[TCA_RED_PARMS]);
229
230         if (ctl->limit > 0) {
231                 child = red_create_dflt(sch, ctl->limit);
232                 if (child == NULL)
233                         return -ENOMEM;
234         }
235
236         sch_tree_lock(sch);
237         q->flags = ctl->flags;
238         q->limit = ctl->limit;
239         if (child) {
240                 qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
241                 qdisc_destroy(xchg(&q->qdisc, child));
242         }
243
244         red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
245                                  ctl->Plog, ctl->Scell_log,
246                                  nla_data(tb[TCA_RED_STAB]));
247
248         if (skb_queue_empty(&sch->q))
249                 red_end_of_idle_period(&q->parms);
250
251         sch_tree_unlock(sch);
252         return 0;
253 }
254
255 static int red_init(struct Qdisc* sch, struct nlattr *opt)
256 {
257         struct red_sched_data *q = qdisc_priv(sch);
258
259         q->qdisc = &noop_qdisc;
260         return red_change(sch, opt);
261 }
262
263 static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
264 {
265         struct red_sched_data *q = qdisc_priv(sch);
266         struct nlattr *opts = NULL;
267         struct tc_red_qopt opt = {
268                 .limit          = q->limit,
269                 .flags          = q->flags,
270                 .qth_min        = q->parms.qth_min >> q->parms.Wlog,
271                 .qth_max        = q->parms.qth_max >> q->parms.Wlog,
272                 .Wlog           = q->parms.Wlog,
273                 .Plog           = q->parms.Plog,
274                 .Scell_log      = q->parms.Scell_log,
275         };
276
277         opts = nla_nest_start(skb, TCA_OPTIONS);
278         if (opts == NULL)
279                 goto nla_put_failure;
280         NLA_PUT(skb, TCA_RED_PARMS, sizeof(opt), &opt);
281         return nla_nest_end(skb, opts);
282
283 nla_put_failure:
284         return nla_nest_cancel(skb, opts);
285 }
286
287 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
288 {
289         struct red_sched_data *q = qdisc_priv(sch);
290         struct tc_red_xstats st = {
291                 .early  = q->stats.prob_drop + q->stats.forced_drop,
292                 .pdrop  = q->stats.pdrop,
293                 .other  = q->stats.other,
294                 .marked = q->stats.prob_mark + q->stats.forced_mark,
295         };
296
297         return gnet_stats_copy_app(d, &st, sizeof(st));
298 }
299
300 static int red_dump_class(struct Qdisc *sch, unsigned long cl,
301                           struct sk_buff *skb, struct tcmsg *tcm)
302 {
303         struct red_sched_data *q = qdisc_priv(sch);
304
305         if (cl != 1)
306                 return -ENOENT;
307         tcm->tcm_handle |= TC_H_MIN(1);
308         tcm->tcm_info = q->qdisc->handle;
309         return 0;
310 }
311
312 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
313                      struct Qdisc **old)
314 {
315         struct red_sched_data *q = qdisc_priv(sch);
316
317         if (new == NULL)
318                 new = &noop_qdisc;
319
320         sch_tree_lock(sch);
321         *old = xchg(&q->qdisc, new);
322         qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
323         qdisc_reset(*old);
324         sch_tree_unlock(sch);
325         return 0;
326 }
327
328 static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
329 {
330         struct red_sched_data *q = qdisc_priv(sch);
331         return q->qdisc;
332 }
333
334 static unsigned long red_get(struct Qdisc *sch, u32 classid)
335 {
336         return 1;
337 }
338
339 static void red_put(struct Qdisc *sch, unsigned long arg)
340 {
341         return;
342 }
343
344 static int red_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
345                             struct nlattr **tca, unsigned long *arg)
346 {
347         return -ENOSYS;
348 }
349
350 static int red_delete(struct Qdisc *sch, unsigned long cl)
351 {
352         return -ENOSYS;
353 }
354
355 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
356 {
357         if (!walker->stop) {
358                 if (walker->count >= walker->skip)
359                         if (walker->fn(sch, 1, walker) < 0) {
360                                 walker->stop = 1;
361                                 return;
362                         }
363                 walker->count++;
364         }
365 }
366
367 static struct tcf_proto **red_find_tcf(struct Qdisc *sch, unsigned long cl)
368 {
369         return NULL;
370 }
371
372 static const struct Qdisc_class_ops red_class_ops = {
373         .graft          =       red_graft,
374         .leaf           =       red_leaf,
375         .get            =       red_get,
376         .put            =       red_put,
377         .change         =       red_change_class,
378         .delete         =       red_delete,
379         .walk           =       red_walk,
380         .tcf_chain      =       red_find_tcf,
381         .dump           =       red_dump_class,
382 };
383
384 static struct Qdisc_ops red_qdisc_ops __read_mostly = {
385         .id             =       "red",
386         .priv_size      =       sizeof(struct red_sched_data),
387         .cl_ops         =       &red_class_ops,
388         .enqueue        =       red_enqueue,
389         .dequeue        =       red_dequeue,
390         .requeue        =       red_requeue,
391         .drop           =       red_drop,
392         .init           =       red_init,
393         .reset          =       red_reset,
394         .destroy        =       red_destroy,
395         .change         =       red_change,
396         .dump           =       red_dump,
397         .dump_stats     =       red_dump_stats,
398         .owner          =       THIS_MODULE,
399 };
400
401 static int __init red_module_init(void)
402 {
403         return register_qdisc(&red_qdisc_ops);
404 }
405
406 static void __exit red_module_exit(void)
407 {
408         unregister_qdisc(&red_qdisc_ops);
409 }
410
411 module_init(red_module_init)
412 module_exit(red_module_exit)
413
414 MODULE_LICENSE("GPL");