Merge git://git.kernel.org/pub/scm/linux/kernel/git/davem/net-next-2.6
[linux-2.6] / net / sched / sch_red.c
1 /*
2  * net/sched/sch_red.c  Random Early Detection queue.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  * Changes:
12  * J Hadi Salim 980914: computation fixes
13  * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
14  * J Hadi Salim 980816:  ECN support
15  */
16
17 #include <linux/module.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/skbuff.h>
21 #include <net/pkt_sched.h>
22 #include <net/inet_ecn.h>
23 #include <net/red.h>
24
25
26 /*      Parameters, settable by user:
27         -----------------------------
28
29         limit           - bytes (must be > qth_max + burst)
30
31         Hard limit on queue length, should be chosen >qth_max
32         to allow packet bursts. This parameter does not
33         affect the algorithms behaviour and can be chosen
34         arbitrarily high (well, less than ram size)
35         Really, this limit will never be reached
36         if RED works correctly.
37  */
38
39 struct red_sched_data
40 {
41         u32                     limit;          /* HARD maximal queue length */
42         unsigned char           flags;
43         struct red_parms        parms;
44         struct red_stats        stats;
45         struct Qdisc            *qdisc;
46 };
47
48 static inline int red_use_ecn(struct red_sched_data *q)
49 {
50         return q->flags & TC_RED_ECN;
51 }
52
53 static inline int red_use_harddrop(struct red_sched_data *q)
54 {
55         return q->flags & TC_RED_HARDDROP;
56 }
57
58 static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch)
59 {
60         struct red_sched_data *q = qdisc_priv(sch);
61         struct Qdisc *child = q->qdisc;
62         int ret;
63
64         q->parms.qavg = red_calc_qavg(&q->parms, child->qstats.backlog);
65
66         if (red_is_idling(&q->parms))
67                 red_end_of_idle_period(&q->parms);
68
69         switch (red_action(&q->parms, q->parms.qavg)) {
70                 case RED_DONT_MARK:
71                         break;
72
73                 case RED_PROB_MARK:
74                         sch->qstats.overlimits++;
75                         if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
76                                 q->stats.prob_drop++;
77                                 goto congestion_drop;
78                         }
79
80                         q->stats.prob_mark++;
81                         break;
82
83                 case RED_HARD_MARK:
84                         sch->qstats.overlimits++;
85                         if (red_use_harddrop(q) || !red_use_ecn(q) ||
86                             !INET_ECN_set_ce(skb)) {
87                                 q->stats.forced_drop++;
88                                 goto congestion_drop;
89                         }
90
91                         q->stats.forced_mark++;
92                         break;
93         }
94
95         ret = qdisc_enqueue(skb, child);
96         if (likely(ret == NET_XMIT_SUCCESS)) {
97                 sch->bstats.bytes += qdisc_pkt_len(skb);
98                 sch->bstats.packets++;
99                 sch->q.qlen++;
100         } else if (net_xmit_drop_count(ret)) {
101                 q->stats.pdrop++;
102                 sch->qstats.drops++;
103         }
104         return ret;
105
106 congestion_drop:
107         qdisc_drop(skb, sch);
108         return NET_XMIT_CN;
109 }
110
111 static struct sk_buff * red_dequeue(struct Qdisc* sch)
112 {
113         struct sk_buff *skb;
114         struct red_sched_data *q = qdisc_priv(sch);
115         struct Qdisc *child = q->qdisc;
116
117         skb = child->dequeue(child);
118         if (skb)
119                 sch->q.qlen--;
120         else if (!red_is_idling(&q->parms))
121                 red_start_of_idle_period(&q->parms);
122
123         return skb;
124 }
125
126 static struct sk_buff * red_peek(struct Qdisc* sch)
127 {
128         struct red_sched_data *q = qdisc_priv(sch);
129         struct Qdisc *child = q->qdisc;
130
131         return child->ops->peek(child);
132 }
133
134 static unsigned int red_drop(struct Qdisc* sch)
135 {
136         struct red_sched_data *q = qdisc_priv(sch);
137         struct Qdisc *child = q->qdisc;
138         unsigned int len;
139
140         if (child->ops->drop && (len = child->ops->drop(child)) > 0) {
141                 q->stats.other++;
142                 sch->qstats.drops++;
143                 sch->q.qlen--;
144                 return len;
145         }
146
147         if (!red_is_idling(&q->parms))
148                 red_start_of_idle_period(&q->parms);
149
150         return 0;
151 }
152
153 static void red_reset(struct Qdisc* sch)
154 {
155         struct red_sched_data *q = qdisc_priv(sch);
156
157         qdisc_reset(q->qdisc);
158         sch->q.qlen = 0;
159         red_restart(&q->parms);
160 }
161
162 static void red_destroy(struct Qdisc *sch)
163 {
164         struct red_sched_data *q = qdisc_priv(sch);
165         qdisc_destroy(q->qdisc);
166 }
167
168 static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
169         [TCA_RED_PARMS] = { .len = sizeof(struct tc_red_qopt) },
170         [TCA_RED_STAB]  = { .len = RED_STAB_SIZE },
171 };
172
173 static int red_change(struct Qdisc *sch, struct nlattr *opt)
174 {
175         struct red_sched_data *q = qdisc_priv(sch);
176         struct nlattr *tb[TCA_RED_MAX + 1];
177         struct tc_red_qopt *ctl;
178         struct Qdisc *child = NULL;
179         int err;
180
181         if (opt == NULL)
182                 return -EINVAL;
183
184         err = nla_parse_nested(tb, TCA_RED_MAX, opt, red_policy);
185         if (err < 0)
186                 return err;
187
188         if (tb[TCA_RED_PARMS] == NULL ||
189             tb[TCA_RED_STAB] == NULL)
190                 return -EINVAL;
191
192         ctl = nla_data(tb[TCA_RED_PARMS]);
193
194         if (ctl->limit > 0) {
195                 child = fifo_create_dflt(sch, &bfifo_qdisc_ops, ctl->limit);
196                 if (IS_ERR(child))
197                         return PTR_ERR(child);
198         }
199
200         sch_tree_lock(sch);
201         q->flags = ctl->flags;
202         q->limit = ctl->limit;
203         if (child) {
204                 qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
205                 qdisc_destroy(q->qdisc);
206                 q->qdisc = child;
207         }
208
209         red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
210                                  ctl->Plog, ctl->Scell_log,
211                                  nla_data(tb[TCA_RED_STAB]));
212
213         if (skb_queue_empty(&sch->q))
214                 red_end_of_idle_period(&q->parms);
215
216         sch_tree_unlock(sch);
217         return 0;
218 }
219
220 static int red_init(struct Qdisc* sch, struct nlattr *opt)
221 {
222         struct red_sched_data *q = qdisc_priv(sch);
223
224         q->qdisc = &noop_qdisc;
225         return red_change(sch, opt);
226 }
227
228 static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
229 {
230         struct red_sched_data *q = qdisc_priv(sch);
231         struct nlattr *opts = NULL;
232         struct tc_red_qopt opt = {
233                 .limit          = q->limit,
234                 .flags          = q->flags,
235                 .qth_min        = q->parms.qth_min >> q->parms.Wlog,
236                 .qth_max        = q->parms.qth_max >> q->parms.Wlog,
237                 .Wlog           = q->parms.Wlog,
238                 .Plog           = q->parms.Plog,
239                 .Scell_log      = q->parms.Scell_log,
240         };
241
242         opts = nla_nest_start(skb, TCA_OPTIONS);
243         if (opts == NULL)
244                 goto nla_put_failure;
245         NLA_PUT(skb, TCA_RED_PARMS, sizeof(opt), &opt);
246         return nla_nest_end(skb, opts);
247
248 nla_put_failure:
249         nla_nest_cancel(skb, opts);
250         return -EMSGSIZE;
251 }
252
253 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
254 {
255         struct red_sched_data *q = qdisc_priv(sch);
256         struct tc_red_xstats st = {
257                 .early  = q->stats.prob_drop + q->stats.forced_drop,
258                 .pdrop  = q->stats.pdrop,
259                 .other  = q->stats.other,
260                 .marked = q->stats.prob_mark + q->stats.forced_mark,
261         };
262
263         return gnet_stats_copy_app(d, &st, sizeof(st));
264 }
265
266 static int red_dump_class(struct Qdisc *sch, unsigned long cl,
267                           struct sk_buff *skb, struct tcmsg *tcm)
268 {
269         struct red_sched_data *q = qdisc_priv(sch);
270
271         if (cl != 1)
272                 return -ENOENT;
273         tcm->tcm_handle |= TC_H_MIN(1);
274         tcm->tcm_info = q->qdisc->handle;
275         return 0;
276 }
277
278 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
279                      struct Qdisc **old)
280 {
281         struct red_sched_data *q = qdisc_priv(sch);
282
283         if (new == NULL)
284                 new = &noop_qdisc;
285
286         sch_tree_lock(sch);
287         *old = q->qdisc;
288         q->qdisc = new;
289         qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
290         qdisc_reset(*old);
291         sch_tree_unlock(sch);
292         return 0;
293 }
294
295 static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
296 {
297         struct red_sched_data *q = qdisc_priv(sch);
298         return q->qdisc;
299 }
300
301 static unsigned long red_get(struct Qdisc *sch, u32 classid)
302 {
303         return 1;
304 }
305
306 static void red_put(struct Qdisc *sch, unsigned long arg)
307 {
308         return;
309 }
310
311 static int red_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
312                             struct nlattr **tca, unsigned long *arg)
313 {
314         return -ENOSYS;
315 }
316
317 static int red_delete(struct Qdisc *sch, unsigned long cl)
318 {
319         return -ENOSYS;
320 }
321
322 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
323 {
324         if (!walker->stop) {
325                 if (walker->count >= walker->skip)
326                         if (walker->fn(sch, 1, walker) < 0) {
327                                 walker->stop = 1;
328                                 return;
329                         }
330                 walker->count++;
331         }
332 }
333
334 static struct tcf_proto **red_find_tcf(struct Qdisc *sch, unsigned long cl)
335 {
336         return NULL;
337 }
338
339 static const struct Qdisc_class_ops red_class_ops = {
340         .graft          =       red_graft,
341         .leaf           =       red_leaf,
342         .get            =       red_get,
343         .put            =       red_put,
344         .change         =       red_change_class,
345         .delete         =       red_delete,
346         .walk           =       red_walk,
347         .tcf_chain      =       red_find_tcf,
348         .dump           =       red_dump_class,
349 };
350
351 static struct Qdisc_ops red_qdisc_ops __read_mostly = {
352         .id             =       "red",
353         .priv_size      =       sizeof(struct red_sched_data),
354         .cl_ops         =       &red_class_ops,
355         .enqueue        =       red_enqueue,
356         .dequeue        =       red_dequeue,
357         .peek           =       red_peek,
358         .drop           =       red_drop,
359         .init           =       red_init,
360         .reset          =       red_reset,
361         .destroy        =       red_destroy,
362         .change         =       red_change,
363         .dump           =       red_dump,
364         .dump_stats     =       red_dump_stats,
365         .owner          =       THIS_MODULE,
366 };
367
368 static int __init red_module_init(void)
369 {
370         return register_qdisc(&red_qdisc_ops);
371 }
372
373 static void __exit red_module_exit(void)
374 {
375         unregister_qdisc(&red_qdisc_ops);
376 }
377
378 module_init(red_module_init)
379 module_exit(red_module_exit)
380
381 MODULE_LICENSE("GPL");