Merge branch 'topic/vmaster-update' into topic/docbook-fix
[linux-2.6] / net / sched / sch_teql.c
1 /* net/sched/sch_teql.c "True" (or "trivial") link equalizer.
2  *
3  *              This program is free software; you can redistribute it and/or
4  *              modify it under the terms of the GNU General Public License
5  *              as published by the Free Software Foundation; either version
6  *              2 of the License, or (at your option) any later version.
7  *
8  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9  */
10
11 #include <linux/module.h>
12 #include <linux/types.h>
13 #include <linux/kernel.h>
14 #include <linux/string.h>
15 #include <linux/errno.h>
16 #include <linux/if_arp.h>
17 #include <linux/netdevice.h>
18 #include <linux/init.h>
19 #include <linux/skbuff.h>
20 #include <linux/moduleparam.h>
21 #include <net/dst.h>
22 #include <net/neighbour.h>
23 #include <net/pkt_sched.h>
24
25 /*
26    How to setup it.
27    ----------------
28
29    After loading this module you will find a new device teqlN
30    and new qdisc with the same name. To join a slave to the equalizer
31    you should just set this qdisc on a device f.e.
32
33    # tc qdisc add dev eth0 root teql0
34    # tc qdisc add dev eth1 root teql0
35
36    That's all. Full PnP 8)
37
38    Applicability.
39    --------------
40
41    1. Slave devices MUST be active devices, i.e., they must raise the tbusy
42       signal and generate EOI events. If you want to equalize virtual devices
43       like tunnels, use a normal eql device.
44    2. This device puts no limitations on physical slave characteristics
45       f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
46       Certainly, large difference in link speeds will make the resulting
47       eqalized link unusable, because of huge packet reordering.
48       I estimate an upper useful difference as ~10 times.
49    3. If the slave requires address resolution, only protocols using
50       neighbour cache (IPv4/IPv6) will work over the equalized link.
51       Other protocols are still allowed to use the slave device directly,
52       which will not break load balancing, though native slave
53       traffic will have the highest priority.  */
54
55 struct teql_master
56 {
57         struct Qdisc_ops qops;
58         struct net_device *dev;
59         struct Qdisc *slaves;
60         struct list_head master_list;
61         struct net_device_stats stats;
62 };
63
64 struct teql_sched_data
65 {
66         struct Qdisc *next;
67         struct teql_master *m;
68         struct neighbour *ncache;
69         struct sk_buff_head q;
70 };
71
72 #define NEXT_SLAVE(q) (((struct teql_sched_data*)qdisc_priv(q))->next)
73
74 #define FMASK (IFF_BROADCAST|IFF_POINTOPOINT)
75
76 /* "teql*" qdisc routines */
77
78 static int
79 teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
80 {
81         struct net_device *dev = qdisc_dev(sch);
82         struct teql_sched_data *q = qdisc_priv(sch);
83
84         if (q->q.qlen < dev->tx_queue_len) {
85                 __skb_queue_tail(&q->q, skb);
86                 sch->bstats.bytes += qdisc_pkt_len(skb);
87                 sch->bstats.packets++;
88                 return 0;
89         }
90
91         kfree_skb(skb);
92         sch->qstats.drops++;
93         return NET_XMIT_DROP;
94 }
95
96 static struct sk_buff *
97 teql_dequeue(struct Qdisc* sch)
98 {
99         struct teql_sched_data *dat = qdisc_priv(sch);
100         struct netdev_queue *dat_queue;
101         struct sk_buff *skb;
102
103         skb = __skb_dequeue(&dat->q);
104         dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
105         if (skb == NULL) {
106                 struct net_device *m = qdisc_dev(dat_queue->qdisc);
107                 if (m) {
108                         dat->m->slaves = sch;
109                         netif_wake_queue(m);
110                 }
111         }
112         sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen;
113         return skb;
114 }
115
116 static struct sk_buff *
117 teql_peek(struct Qdisc* sch)
118 {
119         /* teql is meant to be used as root qdisc */
120         return NULL;
121 }
122
123 static __inline__ void
124 teql_neigh_release(struct neighbour *n)
125 {
126         if (n)
127                 neigh_release(n);
128 }
129
130 static void
131 teql_reset(struct Qdisc* sch)
132 {
133         struct teql_sched_data *dat = qdisc_priv(sch);
134
135         skb_queue_purge(&dat->q);
136         sch->q.qlen = 0;
137         teql_neigh_release(xchg(&dat->ncache, NULL));
138 }
139
140 static void
141 teql_destroy(struct Qdisc* sch)
142 {
143         struct Qdisc *q, *prev;
144         struct teql_sched_data *dat = qdisc_priv(sch);
145         struct teql_master *master = dat->m;
146
147         if ((prev = master->slaves) != NULL) {
148                 do {
149                         q = NEXT_SLAVE(prev);
150                         if (q == sch) {
151                                 NEXT_SLAVE(prev) = NEXT_SLAVE(q);
152                                 if (q == master->slaves) {
153                                         master->slaves = NEXT_SLAVE(q);
154                                         if (q == master->slaves) {
155                                                 struct netdev_queue *txq;
156                                                 spinlock_t *root_lock;
157
158                                                 txq = netdev_get_tx_queue(master->dev, 0);
159                                                 master->slaves = NULL;
160
161                                                 root_lock = qdisc_root_sleeping_lock(txq->qdisc);
162                                                 spin_lock_bh(root_lock);
163                                                 qdisc_reset(txq->qdisc);
164                                                 spin_unlock_bh(root_lock);
165                                         }
166                                 }
167                                 skb_queue_purge(&dat->q);
168                                 teql_neigh_release(xchg(&dat->ncache, NULL));
169                                 break;
170                         }
171
172                 } while ((prev = q) != master->slaves);
173         }
174 }
175
176 static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
177 {
178         struct net_device *dev = qdisc_dev(sch);
179         struct teql_master *m = (struct teql_master*)sch->ops;
180         struct teql_sched_data *q = qdisc_priv(sch);
181
182         if (dev->hard_header_len > m->dev->hard_header_len)
183                 return -EINVAL;
184
185         if (m->dev == dev)
186                 return -ELOOP;
187
188         q->m = m;
189
190         skb_queue_head_init(&q->q);
191
192         if (m->slaves) {
193                 if (m->dev->flags & IFF_UP) {
194                         if ((m->dev->flags&IFF_POINTOPOINT && !(dev->flags&IFF_POINTOPOINT))
195                             || (m->dev->flags&IFF_BROADCAST && !(dev->flags&IFF_BROADCAST))
196                             || (m->dev->flags&IFF_MULTICAST && !(dev->flags&IFF_MULTICAST))
197                             || dev->mtu < m->dev->mtu)
198                                 return -EINVAL;
199                 } else {
200                         if (!(dev->flags&IFF_POINTOPOINT))
201                                 m->dev->flags &= ~IFF_POINTOPOINT;
202                         if (!(dev->flags&IFF_BROADCAST))
203                                 m->dev->flags &= ~IFF_BROADCAST;
204                         if (!(dev->flags&IFF_MULTICAST))
205                                 m->dev->flags &= ~IFF_MULTICAST;
206                         if (dev->mtu < m->dev->mtu)
207                                 m->dev->mtu = dev->mtu;
208                 }
209                 q->next = NEXT_SLAVE(m->slaves);
210                 NEXT_SLAVE(m->slaves) = sch;
211         } else {
212                 q->next = sch;
213                 m->slaves = sch;
214                 m->dev->mtu = dev->mtu;
215                 m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
216         }
217         return 0;
218 }
219
220
221 static int
222 __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
223 {
224         struct netdev_queue *dev_queue = netdev_get_tx_queue(dev, 0);
225         struct teql_sched_data *q = qdisc_priv(dev_queue->qdisc);
226         struct neighbour *mn = skb->dst->neighbour;
227         struct neighbour *n = q->ncache;
228
229         if (mn->tbl == NULL)
230                 return -EINVAL;
231         if (n && n->tbl == mn->tbl &&
232             memcmp(n->primary_key, mn->primary_key, mn->tbl->key_len) == 0) {
233                 atomic_inc(&n->refcnt);
234         } else {
235                 n = __neigh_lookup_errno(mn->tbl, mn->primary_key, dev);
236                 if (IS_ERR(n))
237                         return PTR_ERR(n);
238         }
239         if (neigh_event_send(n, skb_res) == 0) {
240                 int err;
241
242                 read_lock(&n->lock);
243                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
244                                       n->ha, NULL, skb->len);
245                 read_unlock(&n->lock);
246
247                 if (err < 0) {
248                         neigh_release(n);
249                         return -EINVAL;
250                 }
251                 teql_neigh_release(xchg(&q->ncache, n));
252                 return 0;
253         }
254         neigh_release(n);
255         return (skb_res == NULL) ? -EAGAIN : 1;
256 }
257
258 static inline int teql_resolve(struct sk_buff *skb,
259                                struct sk_buff *skb_res, struct net_device *dev)
260 {
261         struct netdev_queue *txq = netdev_get_tx_queue(dev, 0);
262         if (txq->qdisc == &noop_qdisc)
263                 return -ENODEV;
264
265         if (dev->header_ops == NULL ||
266             skb->dst == NULL ||
267             skb->dst->neighbour == NULL)
268                 return 0;
269         return __teql_resolve(skb, skb_res, dev);
270 }
271
272 static int teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
273 {
274         struct teql_master *master = netdev_priv(dev);
275         struct Qdisc *start, *q;
276         int busy;
277         int nores;
278         int subq = skb_get_queue_mapping(skb);
279         struct sk_buff *skb_res = NULL;
280
281         start = master->slaves;
282
283 restart:
284         nores = 0;
285         busy = 0;
286
287         if ((q = start) == NULL)
288                 goto drop;
289
290         do {
291                 struct net_device *slave = qdisc_dev(q);
292                 struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
293                 const struct net_device_ops *slave_ops = slave->netdev_ops;
294
295                 if (slave_txq->qdisc_sleeping != q)
296                         continue;
297                 if (__netif_subqueue_stopped(slave, subq) ||
298                     !netif_running(slave)) {
299                         busy = 1;
300                         continue;
301                 }
302
303                 switch (teql_resolve(skb, skb_res, slave)) {
304                 case 0:
305                         if (__netif_tx_trylock(slave_txq)) {
306                                 if (!netif_tx_queue_stopped(slave_txq) &&
307                                     !netif_tx_queue_frozen(slave_txq) &&
308                                     slave_ops->ndo_start_xmit(skb, slave) == 0) {
309                                         __netif_tx_unlock(slave_txq);
310                                         master->slaves = NEXT_SLAVE(q);
311                                         netif_wake_queue(dev);
312                                         master->stats.tx_packets++;
313                                         master->stats.tx_bytes +=
314                                                 qdisc_pkt_len(skb);
315                                         return 0;
316                                 }
317                                 __netif_tx_unlock(slave_txq);
318                         }
319                         if (netif_queue_stopped(dev))
320                                 busy = 1;
321                         break;
322                 case 1:
323                         master->slaves = NEXT_SLAVE(q);
324                         return 0;
325                 default:
326                         nores = 1;
327                         break;
328                 }
329                 __skb_pull(skb, skb_network_offset(skb));
330         } while ((q = NEXT_SLAVE(q)) != start);
331
332         if (nores && skb_res == NULL) {
333                 skb_res = skb;
334                 goto restart;
335         }
336
337         if (busy) {
338                 netif_stop_queue(dev);
339                 return 1;
340         }
341         master->stats.tx_errors++;
342
343 drop:
344         master->stats.tx_dropped++;
345         dev_kfree_skb(skb);
346         return 0;
347 }
348
349 static int teql_master_open(struct net_device *dev)
350 {
351         struct Qdisc * q;
352         struct teql_master *m = netdev_priv(dev);
353         int mtu = 0xFFFE;
354         unsigned flags = IFF_NOARP|IFF_MULTICAST;
355
356         if (m->slaves == NULL)
357                 return -EUNATCH;
358
359         flags = FMASK;
360
361         q = m->slaves;
362         do {
363                 struct net_device *slave = qdisc_dev(q);
364
365                 if (slave == NULL)
366                         return -EUNATCH;
367
368                 if (slave->mtu < mtu)
369                         mtu = slave->mtu;
370                 if (slave->hard_header_len > LL_MAX_HEADER)
371                         return -EINVAL;
372
373                 /* If all the slaves are BROADCAST, master is BROADCAST
374                    If all the slaves are PtP, master is PtP
375                    Otherwise, master is NBMA.
376                  */
377                 if (!(slave->flags&IFF_POINTOPOINT))
378                         flags &= ~IFF_POINTOPOINT;
379                 if (!(slave->flags&IFF_BROADCAST))
380                         flags &= ~IFF_BROADCAST;
381                 if (!(slave->flags&IFF_MULTICAST))
382                         flags &= ~IFF_MULTICAST;
383         } while ((q = NEXT_SLAVE(q)) != m->slaves);
384
385         m->dev->mtu = mtu;
386         m->dev->flags = (m->dev->flags&~FMASK) | flags;
387         netif_start_queue(m->dev);
388         return 0;
389 }
390
391 static int teql_master_close(struct net_device *dev)
392 {
393         netif_stop_queue(dev);
394         return 0;
395 }
396
397 static struct net_device_stats *teql_master_stats(struct net_device *dev)
398 {
399         struct teql_master *m = netdev_priv(dev);
400         return &m->stats;
401 }
402
403 static int teql_master_mtu(struct net_device *dev, int new_mtu)
404 {
405         struct teql_master *m = netdev_priv(dev);
406         struct Qdisc *q;
407
408         if (new_mtu < 68)
409                 return -EINVAL;
410
411         q = m->slaves;
412         if (q) {
413                 do {
414                         if (new_mtu > qdisc_dev(q)->mtu)
415                                 return -EINVAL;
416                 } while ((q=NEXT_SLAVE(q)) != m->slaves);
417         }
418
419         dev->mtu = new_mtu;
420         return 0;
421 }
422
423 static const struct net_device_ops teql_netdev_ops = {
424         .ndo_open       = teql_master_open,
425         .ndo_stop       = teql_master_close,
426         .ndo_start_xmit = teql_master_xmit,
427         .ndo_get_stats  = teql_master_stats,
428         .ndo_change_mtu = teql_master_mtu,
429 };
430
431 static __init void teql_master_setup(struct net_device *dev)
432 {
433         struct teql_master *master = netdev_priv(dev);
434         struct Qdisc_ops *ops = &master->qops;
435
436         master->dev     = dev;
437         ops->priv_size  = sizeof(struct teql_sched_data);
438
439         ops->enqueue    =       teql_enqueue;
440         ops->dequeue    =       teql_dequeue;
441         ops->peek       =       teql_peek;
442         ops->init       =       teql_qdisc_init;
443         ops->reset      =       teql_reset;
444         ops->destroy    =       teql_destroy;
445         ops->owner      =       THIS_MODULE;
446
447         dev->netdev_ops =       &teql_netdev_ops;
448         dev->type               = ARPHRD_VOID;
449         dev->mtu                = 1500;
450         dev->tx_queue_len       = 100;
451         dev->flags              = IFF_NOARP;
452         dev->hard_header_len    = LL_MAX_HEADER;
453 }
454
455 static LIST_HEAD(master_dev_list);
456 static int max_equalizers = 1;
457 module_param(max_equalizers, int, 0);
458 MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
459
460 static int __init teql_init(void)
461 {
462         int i;
463         int err = -ENODEV;
464
465         for (i = 0; i < max_equalizers; i++) {
466                 struct net_device *dev;
467                 struct teql_master *master;
468
469                 dev = alloc_netdev(sizeof(struct teql_master),
470                                   "teql%d", teql_master_setup);
471                 if (!dev) {
472                         err = -ENOMEM;
473                         break;
474                 }
475
476                 if ((err = register_netdev(dev))) {
477                         free_netdev(dev);
478                         break;
479                 }
480
481                 master = netdev_priv(dev);
482
483                 strlcpy(master->qops.id, dev->name, IFNAMSIZ);
484                 err = register_qdisc(&master->qops);
485
486                 if (err) {
487                         unregister_netdev(dev);
488                         free_netdev(dev);
489                         break;
490                 }
491
492                 list_add_tail(&master->master_list, &master_dev_list);
493         }
494         return i ? 0 : err;
495 }
496
497 static void __exit teql_exit(void)
498 {
499         struct teql_master *master, *nxt;
500
501         list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
502
503                 list_del(&master->master_list);
504
505                 unregister_qdisc(&master->qops);
506                 unregister_netdev(master->dev);
507                 free_netdev(master->dev);
508         }
509 }
510
511 module_init(teql_init);
512 module_exit(teql_exit);
513
514 MODULE_LICENSE("GPL");