[TCP]: Keep state in Disorder also if only lost_out > 0
[linux-2.6] / net / sched / sch_red.c
1 /*
2  * net/sched/sch_red.c  Random Early Detection queue.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  *
11  * Changes:
12  * J Hadi Salim 980914: computation fixes
13  * Alexey Makarenko <makar@phoenix.kharkov.ua> 990814: qave on idle link was calculated incorrectly.
14  * J Hadi Salim 980816:  ECN support
15  */
16
17 #include <linux/module.h>
18 #include <linux/types.h>
19 #include <linux/kernel.h>
20 #include <linux/skbuff.h>
21 #include <net/pkt_sched.h>
22 #include <net/inet_ecn.h>
23 #include <net/red.h>
24
25
26 /*      Parameters, settable by user:
27         -----------------------------
28
29         limit           - bytes (must be > qth_max + burst)
30
31         Hard limit on queue length, should be chosen >qth_max
32         to allow packet bursts. This parameter does not
33         affect the algorithms behaviour and can be chosen
34         arbitrarily high (well, less than ram size)
35         Really, this limit will never be reached
36         if RED works correctly.
37  */
38
39 struct red_sched_data
40 {
41         u32                     limit;          /* HARD maximal queue length */
42         unsigned char           flags;
43         struct red_parms        parms;
44         struct red_stats        stats;
45         struct Qdisc            *qdisc;
46 };
47
48 static inline int red_use_ecn(struct red_sched_data *q)
49 {
50         return q->flags & TC_RED_ECN;
51 }
52
53 static inline int red_use_harddrop(struct red_sched_data *q)
54 {
55         return q->flags & TC_RED_HARDDROP;
56 }
57
58 static int red_enqueue(struct sk_buff *skb, struct Qdisc* sch)
59 {
60         struct red_sched_data *q = qdisc_priv(sch);
61         struct Qdisc *child = q->qdisc;
62         int ret;
63
64         q->parms.qavg = red_calc_qavg(&q->parms, child->qstats.backlog);
65
66         if (red_is_idling(&q->parms))
67                 red_end_of_idle_period(&q->parms);
68
69         switch (red_action(&q->parms, q->parms.qavg)) {
70                 case RED_DONT_MARK:
71                         break;
72
73                 case RED_PROB_MARK:
74                         sch->qstats.overlimits++;
75                         if (!red_use_ecn(q) || !INET_ECN_set_ce(skb)) {
76                                 q->stats.prob_drop++;
77                                 goto congestion_drop;
78                         }
79
80                         q->stats.prob_mark++;
81                         break;
82
83                 case RED_HARD_MARK:
84                         sch->qstats.overlimits++;
85                         if (red_use_harddrop(q) || !red_use_ecn(q) ||
86                             !INET_ECN_set_ce(skb)) {
87                                 q->stats.forced_drop++;
88                                 goto congestion_drop;
89                         }
90
91                         q->stats.forced_mark++;
92                         break;
93         }
94
95         ret = child->enqueue(skb, child);
96         if (likely(ret == NET_XMIT_SUCCESS)) {
97                 sch->bstats.bytes += skb->len;
98                 sch->bstats.packets++;
99                 sch->q.qlen++;
100         } else {
101                 q->stats.pdrop++;
102                 sch->qstats.drops++;
103         }
104         return ret;
105
106 congestion_drop:
107         qdisc_drop(skb, sch);
108         return NET_XMIT_CN;
109 }
110
111 static int red_requeue(struct sk_buff *skb, struct Qdisc* sch)
112 {
113         struct red_sched_data *q = qdisc_priv(sch);
114         struct Qdisc *child = q->qdisc;
115         int ret;
116
117         if (red_is_idling(&q->parms))
118                 red_end_of_idle_period(&q->parms);
119
120         ret = child->ops->requeue(skb, child);
121         if (likely(ret == NET_XMIT_SUCCESS)) {
122                 sch->qstats.requeues++;
123                 sch->q.qlen++;
124         }
125         return ret;
126 }
127
128 static struct sk_buff * red_dequeue(struct Qdisc* sch)
129 {
130         struct sk_buff *skb;
131         struct red_sched_data *q = qdisc_priv(sch);
132         struct Qdisc *child = q->qdisc;
133
134         skb = child->dequeue(child);
135         if (skb)
136                 sch->q.qlen--;
137         else if (!red_is_idling(&q->parms))
138                 red_start_of_idle_period(&q->parms);
139
140         return skb;
141 }
142
143 static unsigned int red_drop(struct Qdisc* sch)
144 {
145         struct red_sched_data *q = qdisc_priv(sch);
146         struct Qdisc *child = q->qdisc;
147         unsigned int len;
148
149         if (child->ops->drop && (len = child->ops->drop(child)) > 0) {
150                 q->stats.other++;
151                 sch->qstats.drops++;
152                 sch->q.qlen--;
153                 return len;
154         }
155
156         if (!red_is_idling(&q->parms))
157                 red_start_of_idle_period(&q->parms);
158
159         return 0;
160 }
161
162 static void red_reset(struct Qdisc* sch)
163 {
164         struct red_sched_data *q = qdisc_priv(sch);
165
166         qdisc_reset(q->qdisc);
167         sch->q.qlen = 0;
168         red_restart(&q->parms);
169 }
170
171 static void red_destroy(struct Qdisc *sch)
172 {
173         struct red_sched_data *q = qdisc_priv(sch);
174         qdisc_destroy(q->qdisc);
175 }
176
177 static struct Qdisc *red_create_dflt(struct Qdisc *sch, u32 limit)
178 {
179         struct Qdisc *q;
180         struct rtattr *rta;
181         int ret;
182
183         q = qdisc_create_dflt(sch->dev, &bfifo_qdisc_ops,
184                               TC_H_MAKE(sch->handle, 1));
185         if (q) {
186                 rta = kmalloc(RTA_LENGTH(sizeof(struct tc_fifo_qopt)),
187                               GFP_KERNEL);
188                 if (rta) {
189                         rta->rta_type = RTM_NEWQDISC;
190                         rta->rta_len = RTA_LENGTH(sizeof(struct tc_fifo_qopt));
191                         ((struct tc_fifo_qopt *)RTA_DATA(rta))->limit = limit;
192
193                         ret = q->ops->change(q, rta);
194                         kfree(rta);
195
196                         if (ret == 0)
197                                 return q;
198                 }
199                 qdisc_destroy(q);
200         }
201         return NULL;
202 }
203
204 static int red_change(struct Qdisc *sch, struct rtattr *opt)
205 {
206         struct red_sched_data *q = qdisc_priv(sch);
207         struct rtattr *tb[TCA_RED_MAX];
208         struct tc_red_qopt *ctl;
209         struct Qdisc *child = NULL;
210
211         if (opt == NULL || rtattr_parse_nested(tb, TCA_RED_MAX, opt))
212                 return -EINVAL;
213
214         if (tb[TCA_RED_PARMS-1] == NULL ||
215             RTA_PAYLOAD(tb[TCA_RED_PARMS-1]) < sizeof(*ctl) ||
216             tb[TCA_RED_STAB-1] == NULL ||
217             RTA_PAYLOAD(tb[TCA_RED_STAB-1]) < RED_STAB_SIZE)
218                 return -EINVAL;
219
220         ctl = RTA_DATA(tb[TCA_RED_PARMS-1]);
221
222         if (ctl->limit > 0) {
223                 child = red_create_dflt(sch, ctl->limit);
224                 if (child == NULL)
225                         return -ENOMEM;
226         }
227
228         sch_tree_lock(sch);
229         q->flags = ctl->flags;
230         q->limit = ctl->limit;
231         if (child) {
232                 qdisc_tree_decrease_qlen(q->qdisc, q->qdisc->q.qlen);
233                 qdisc_destroy(xchg(&q->qdisc, child));
234         }
235
236         red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
237                                  ctl->Plog, ctl->Scell_log,
238                                  RTA_DATA(tb[TCA_RED_STAB-1]));
239
240         if (skb_queue_empty(&sch->q))
241                 red_end_of_idle_period(&q->parms);
242
243         sch_tree_unlock(sch);
244         return 0;
245 }
246
247 static int red_init(struct Qdisc* sch, struct rtattr *opt)
248 {
249         struct red_sched_data *q = qdisc_priv(sch);
250
251         q->qdisc = &noop_qdisc;
252         return red_change(sch, opt);
253 }
254
255 static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
256 {
257         struct red_sched_data *q = qdisc_priv(sch);
258         struct rtattr *opts = NULL;
259         struct tc_red_qopt opt = {
260                 .limit          = q->limit,
261                 .flags          = q->flags,
262                 .qth_min        = q->parms.qth_min >> q->parms.Wlog,
263                 .qth_max        = q->parms.qth_max >> q->parms.Wlog,
264                 .Wlog           = q->parms.Wlog,
265                 .Plog           = q->parms.Plog,
266                 .Scell_log      = q->parms.Scell_log,
267         };
268
269         opts = RTA_NEST(skb, TCA_OPTIONS);
270         RTA_PUT(skb, TCA_RED_PARMS, sizeof(opt), &opt);
271         return RTA_NEST_END(skb, opts);
272
273 rtattr_failure:
274         return RTA_NEST_CANCEL(skb, opts);
275 }
276
277 static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
278 {
279         struct red_sched_data *q = qdisc_priv(sch);
280         struct tc_red_xstats st = {
281                 .early  = q->stats.prob_drop + q->stats.forced_drop,
282                 .pdrop  = q->stats.pdrop,
283                 .other  = q->stats.other,
284                 .marked = q->stats.prob_mark + q->stats.forced_mark,
285         };
286
287         return gnet_stats_copy_app(d, &st, sizeof(st));
288 }
289
290 static int red_dump_class(struct Qdisc *sch, unsigned long cl,
291                           struct sk_buff *skb, struct tcmsg *tcm)
292 {
293         struct red_sched_data *q = qdisc_priv(sch);
294
295         if (cl != 1)
296                 return -ENOENT;
297         tcm->tcm_handle |= TC_H_MIN(1);
298         tcm->tcm_info = q->qdisc->handle;
299         return 0;
300 }
301
302 static int red_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
303                      struct Qdisc **old)
304 {
305         struct red_sched_data *q = qdisc_priv(sch);
306
307         if (new == NULL)
308                 new = &noop_qdisc;
309
310         sch_tree_lock(sch);
311         *old = xchg(&q->qdisc, new);
312         qdisc_tree_decrease_qlen(*old, (*old)->q.qlen);
313         qdisc_reset(*old);
314         sch_tree_unlock(sch);
315         return 0;
316 }
317
318 static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg)
319 {
320         struct red_sched_data *q = qdisc_priv(sch);
321         return q->qdisc;
322 }
323
324 static unsigned long red_get(struct Qdisc *sch, u32 classid)
325 {
326         return 1;
327 }
328
329 static void red_put(struct Qdisc *sch, unsigned long arg)
330 {
331         return;
332 }
333
334 static int red_change_class(struct Qdisc *sch, u32 classid, u32 parentid,
335                             struct rtattr **tca, unsigned long *arg)
336 {
337         return -ENOSYS;
338 }
339
340 static int red_delete(struct Qdisc *sch, unsigned long cl)
341 {
342         return -ENOSYS;
343 }
344
345 static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker)
346 {
347         if (!walker->stop) {
348                 if (walker->count >= walker->skip)
349                         if (walker->fn(sch, 1, walker) < 0) {
350                                 walker->stop = 1;
351                                 return;
352                         }
353                 walker->count++;
354         }
355 }
356
357 static struct tcf_proto **red_find_tcf(struct Qdisc *sch, unsigned long cl)
358 {
359         return NULL;
360 }
361
362 static struct Qdisc_class_ops red_class_ops = {
363         .graft          =       red_graft,
364         .leaf           =       red_leaf,
365         .get            =       red_get,
366         .put            =       red_put,
367         .change         =       red_change_class,
368         .delete         =       red_delete,
369         .walk           =       red_walk,
370         .tcf_chain      =       red_find_tcf,
371         .dump           =       red_dump_class,
372 };
373
374 static struct Qdisc_ops red_qdisc_ops = {
375         .id             =       "red",
376         .priv_size      =       sizeof(struct red_sched_data),
377         .cl_ops         =       &red_class_ops,
378         .enqueue        =       red_enqueue,
379         .dequeue        =       red_dequeue,
380         .requeue        =       red_requeue,
381         .drop           =       red_drop,
382         .init           =       red_init,
383         .reset          =       red_reset,
384         .destroy        =       red_destroy,
385         .change         =       red_change,
386         .dump           =       red_dump,
387         .dump_stats     =       red_dump_stats,
388         .owner          =       THIS_MODULE,
389 };
390
391 static int __init red_module_init(void)
392 {
393         return register_qdisc(&red_qdisc_ops);
394 }
395
396 static void __exit red_module_exit(void)
397 {
398         unregister_qdisc(&red_qdisc_ops);
399 }
400
401 module_init(red_module_init)
402 module_exit(red_module_exit)
403
404 MODULE_LICENSE("GPL");