[IPV4]: Unify access to the routing tables.
[linux-2.6] / net / sched / sch_teql.c
1 /* net/sched/sch_teql.c "True" (or "trivial") link equalizer.
2  *
3  *              This program is free software; you can redistribute it and/or
4  *              modify it under the terms of the GNU General Public License
5  *              as published by the Free Software Foundation; either version
6  *              2 of the License, or (at your option) any later version.
7  *
8  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9  */
10
11 #include <linux/module.h>
12 #include <linux/types.h>
13 #include <linux/kernel.h>
14 #include <linux/string.h>
15 #include <linux/errno.h>
16 #include <linux/if_arp.h>
17 #include <linux/netdevice.h>
18 #include <linux/init.h>
19 #include <linux/skbuff.h>
20 #include <linux/moduleparam.h>
21 #include <net/dst.h>
22 #include <net/neighbour.h>
23 #include <net/pkt_sched.h>
24
25 /*
26    How to setup it.
27    ----------------
28
29    After loading this module you will find a new device teqlN
30    and new qdisc with the same name. To join a slave to the equalizer
31    you should just set this qdisc on a device f.e.
32
33    # tc qdisc add dev eth0 root teql0
34    # tc qdisc add dev eth1 root teql0
35
36    That's all. Full PnP 8)
37
38    Applicability.
39    --------------
40
41    1. Slave devices MUST be active devices, i.e., they must raise the tbusy
42       signal and generate EOI events. If you want to equalize virtual devices
43       like tunnels, use a normal eql device.
44    2. This device puts no limitations on physical slave characteristics
45       f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
46       Certainly, large difference in link speeds will make the resulting
47       eqalized link unusable, because of huge packet reordering.
48       I estimate an upper useful difference as ~10 times.
49    3. If the slave requires address resolution, only protocols using
50       neighbour cache (IPv4/IPv6) will work over the equalized link.
51       Other protocols are still allowed to use the slave device directly,
52       which will not break load balancing, though native slave
53       traffic will have the highest priority.  */
54
55 struct teql_master
56 {
57         struct Qdisc_ops qops;
58         struct net_device *dev;
59         struct Qdisc *slaves;
60         struct list_head master_list;
61         struct net_device_stats stats;
62 };
63
64 struct teql_sched_data
65 {
66         struct Qdisc *next;
67         struct teql_master *m;
68         struct neighbour *ncache;
69         struct sk_buff_head q;
70 };
71
72 #define NEXT_SLAVE(q) (((struct teql_sched_data*)qdisc_priv(q))->next)
73
74 #define FMASK (IFF_BROADCAST|IFF_POINTOPOINT|IFF_BROADCAST)
75
76 /* "teql*" qdisc routines */
77
78 static int
79 teql_enqueue(struct sk_buff *skb, struct Qdisc* sch)
80 {
81         struct net_device *dev = sch->dev;
82         struct teql_sched_data *q = qdisc_priv(sch);
83
84         if (q->q.qlen < dev->tx_queue_len) {
85                 __skb_queue_tail(&q->q, skb);
86                 sch->bstats.bytes += skb->len;
87                 sch->bstats.packets++;
88                 return 0;
89         }
90
91         kfree_skb(skb);
92         sch->qstats.drops++;
93         return NET_XMIT_DROP;
94 }
95
96 static int
97 teql_requeue(struct sk_buff *skb, struct Qdisc* sch)
98 {
99         struct teql_sched_data *q = qdisc_priv(sch);
100
101         __skb_queue_head(&q->q, skb);
102         sch->qstats.requeues++;
103         return 0;
104 }
105
106 static struct sk_buff *
107 teql_dequeue(struct Qdisc* sch)
108 {
109         struct teql_sched_data *dat = qdisc_priv(sch);
110         struct sk_buff *skb;
111
112         skb = __skb_dequeue(&dat->q);
113         if (skb == NULL) {
114                 struct net_device *m = dat->m->dev->qdisc->dev;
115                 if (m) {
116                         dat->m->slaves = sch;
117                         netif_wake_queue(m);
118                 }
119         }
120         sch->q.qlen = dat->q.qlen + dat->m->dev->qdisc->q.qlen;
121         return skb;
122 }
123
124 static __inline__ void
125 teql_neigh_release(struct neighbour *n)
126 {
127         if (n)
128                 neigh_release(n);
129 }
130
131 static void
132 teql_reset(struct Qdisc* sch)
133 {
134         struct teql_sched_data *dat = qdisc_priv(sch);
135
136         skb_queue_purge(&dat->q);
137         sch->q.qlen = 0;
138         teql_neigh_release(xchg(&dat->ncache, NULL));
139 }
140
141 static void
142 teql_destroy(struct Qdisc* sch)
143 {
144         struct Qdisc *q, *prev;
145         struct teql_sched_data *dat = qdisc_priv(sch);
146         struct teql_master *master = dat->m;
147
148         if ((prev = master->slaves) != NULL) {
149                 do {
150                         q = NEXT_SLAVE(prev);
151                         if (q == sch) {
152                                 NEXT_SLAVE(prev) = NEXT_SLAVE(q);
153                                 if (q == master->slaves) {
154                                         master->slaves = NEXT_SLAVE(q);
155                                         if (q == master->slaves) {
156                                                 master->slaves = NULL;
157                                                 spin_lock_bh(&master->dev->queue_lock);
158                                                 qdisc_reset(master->dev->qdisc);
159                                                 spin_unlock_bh(&master->dev->queue_lock);
160                                         }
161                                 }
162                                 skb_queue_purge(&dat->q);
163                                 teql_neigh_release(xchg(&dat->ncache, NULL));
164                                 break;
165                         }
166
167                 } while ((prev = q) != master->slaves);
168         }
169 }
170
171 static int teql_qdisc_init(struct Qdisc *sch, struct rtattr *opt)
172 {
173         struct net_device *dev = sch->dev;
174         struct teql_master *m = (struct teql_master*)sch->ops;
175         struct teql_sched_data *q = qdisc_priv(sch);
176
177         if (dev->hard_header_len > m->dev->hard_header_len)
178                 return -EINVAL;
179
180         if (m->dev == dev)
181                 return -ELOOP;
182
183         q->m = m;
184
185         skb_queue_head_init(&q->q);
186
187         if (m->slaves) {
188                 if (m->dev->flags & IFF_UP) {
189                         if ((m->dev->flags&IFF_POINTOPOINT && !(dev->flags&IFF_POINTOPOINT))
190                             || (m->dev->flags&IFF_BROADCAST && !(dev->flags&IFF_BROADCAST))
191                             || (m->dev->flags&IFF_MULTICAST && !(dev->flags&IFF_MULTICAST))
192                             || dev->mtu < m->dev->mtu)
193                                 return -EINVAL;
194                 } else {
195                         if (!(dev->flags&IFF_POINTOPOINT))
196                                 m->dev->flags &= ~IFF_POINTOPOINT;
197                         if (!(dev->flags&IFF_BROADCAST))
198                                 m->dev->flags &= ~IFF_BROADCAST;
199                         if (!(dev->flags&IFF_MULTICAST))
200                                 m->dev->flags &= ~IFF_MULTICAST;
201                         if (dev->mtu < m->dev->mtu)
202                                 m->dev->mtu = dev->mtu;
203                 }
204                 q->next = NEXT_SLAVE(m->slaves);
205                 NEXT_SLAVE(m->slaves) = sch;
206         } else {
207                 q->next = sch;
208                 m->slaves = sch;
209                 m->dev->mtu = dev->mtu;
210                 m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
211         }
212         return 0;
213 }
214
215
216 static int
217 __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device *dev)
218 {
219         struct teql_sched_data *q = qdisc_priv(dev->qdisc);
220         struct neighbour *mn = skb->dst->neighbour;
221         struct neighbour *n = q->ncache;
222
223         if (mn->tbl == NULL)
224                 return -EINVAL;
225         if (n && n->tbl == mn->tbl &&
226             memcmp(n->primary_key, mn->primary_key, mn->tbl->key_len) == 0) {
227                 atomic_inc(&n->refcnt);
228         } else {
229                 n = __neigh_lookup_errno(mn->tbl, mn->primary_key, dev);
230                 if (IS_ERR(n))
231                         return PTR_ERR(n);
232         }
233         if (neigh_event_send(n, skb_res) == 0) {
234                 int err;
235
236                 read_lock(&n->lock);
237                 err = dev_hard_header(skb, dev, ntohs(skb->protocol),
238                                       n->ha, NULL, skb->len);
239                 read_unlock(&n->lock);
240
241                 if (err < 0) {
242                         neigh_release(n);
243                         return -EINVAL;
244                 }
245                 teql_neigh_release(xchg(&q->ncache, n));
246                 return 0;
247         }
248         neigh_release(n);
249         return (skb_res == NULL) ? -EAGAIN : 1;
250 }
251
252 static inline int teql_resolve(struct sk_buff *skb,
253                                struct sk_buff *skb_res, struct net_device *dev)
254 {
255         if (dev->qdisc == &noop_qdisc)
256                 return -ENODEV;
257
258         if (dev->header_ops == NULL ||
259             skb->dst == NULL ||
260             skb->dst->neighbour == NULL)
261                 return 0;
262         return __teql_resolve(skb, skb_res, dev);
263 }
264
265 static int teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
266 {
267         struct teql_master *master = netdev_priv(dev);
268         struct Qdisc *start, *q;
269         int busy;
270         int nores;
271         int len = skb->len;
272         int subq = skb_get_queue_mapping(skb);
273         struct sk_buff *skb_res = NULL;
274
275         start = master->slaves;
276
277 restart:
278         nores = 0;
279         busy = 0;
280
281         if ((q = start) == NULL)
282                 goto drop;
283
284         do {
285                 struct net_device *slave = q->dev;
286
287                 if (slave->qdisc_sleeping != q)
288                         continue;
289                 if (netif_queue_stopped(slave) ||
290                     __netif_subqueue_stopped(slave, subq) ||
291                     !netif_running(slave)) {
292                         busy = 1;
293                         continue;
294                 }
295
296                 switch (teql_resolve(skb, skb_res, slave)) {
297                 case 0:
298                         if (netif_tx_trylock(slave)) {
299                                 if (!netif_queue_stopped(slave) &&
300                                     !__netif_subqueue_stopped(slave, subq) &&
301                                     slave->hard_start_xmit(skb, slave) == 0) {
302                                         netif_tx_unlock(slave);
303                                         master->slaves = NEXT_SLAVE(q);
304                                         netif_wake_queue(dev);
305                                         master->stats.tx_packets++;
306                                         master->stats.tx_bytes += len;
307                                         return 0;
308                                 }
309                                 netif_tx_unlock(slave);
310                         }
311                         if (netif_queue_stopped(dev))
312                                 busy = 1;
313                         break;
314                 case 1:
315                         master->slaves = NEXT_SLAVE(q);
316                         return 0;
317                 default:
318                         nores = 1;
319                         break;
320                 }
321                 __skb_pull(skb, skb_network_offset(skb));
322         } while ((q = NEXT_SLAVE(q)) != start);
323
324         if (nores && skb_res == NULL) {
325                 skb_res = skb;
326                 goto restart;
327         }
328
329         if (busy) {
330                 netif_stop_queue(dev);
331                 return 1;
332         }
333         master->stats.tx_errors++;
334
335 drop:
336         master->stats.tx_dropped++;
337         dev_kfree_skb(skb);
338         return 0;
339 }
340
341 static int teql_master_open(struct net_device *dev)
342 {
343         struct Qdisc * q;
344         struct teql_master *m = netdev_priv(dev);
345         int mtu = 0xFFFE;
346         unsigned flags = IFF_NOARP|IFF_MULTICAST;
347
348         if (m->slaves == NULL)
349                 return -EUNATCH;
350
351         flags = FMASK;
352
353         q = m->slaves;
354         do {
355                 struct net_device *slave = q->dev;
356
357                 if (slave == NULL)
358                         return -EUNATCH;
359
360                 if (slave->mtu < mtu)
361                         mtu = slave->mtu;
362                 if (slave->hard_header_len > LL_MAX_HEADER)
363                         return -EINVAL;
364
365                 /* If all the slaves are BROADCAST, master is BROADCAST
366                    If all the slaves are PtP, master is PtP
367                    Otherwise, master is NBMA.
368                  */
369                 if (!(slave->flags&IFF_POINTOPOINT))
370                         flags &= ~IFF_POINTOPOINT;
371                 if (!(slave->flags&IFF_BROADCAST))
372                         flags &= ~IFF_BROADCAST;
373                 if (!(slave->flags&IFF_MULTICAST))
374                         flags &= ~IFF_MULTICAST;
375         } while ((q = NEXT_SLAVE(q)) != m->slaves);
376
377         m->dev->mtu = mtu;
378         m->dev->flags = (m->dev->flags&~FMASK) | flags;
379         netif_start_queue(m->dev);
380         return 0;
381 }
382
383 static int teql_master_close(struct net_device *dev)
384 {
385         netif_stop_queue(dev);
386         return 0;
387 }
388
389 static struct net_device_stats *teql_master_stats(struct net_device *dev)
390 {
391         struct teql_master *m = netdev_priv(dev);
392         return &m->stats;
393 }
394
395 static int teql_master_mtu(struct net_device *dev, int new_mtu)
396 {
397         struct teql_master *m = netdev_priv(dev);
398         struct Qdisc *q;
399
400         if (new_mtu < 68)
401                 return -EINVAL;
402
403         q = m->slaves;
404         if (q) {
405                 do {
406                         if (new_mtu > q->dev->mtu)
407                                 return -EINVAL;
408                 } while ((q=NEXT_SLAVE(q)) != m->slaves);
409         }
410
411         dev->mtu = new_mtu;
412         return 0;
413 }
414
415 static __init void teql_master_setup(struct net_device *dev)
416 {
417         struct teql_master *master = netdev_priv(dev);
418         struct Qdisc_ops *ops = &master->qops;
419
420         master->dev     = dev;
421         ops->priv_size  = sizeof(struct teql_sched_data);
422
423         ops->enqueue    =       teql_enqueue;
424         ops->dequeue    =       teql_dequeue;
425         ops->requeue    =       teql_requeue;
426         ops->init       =       teql_qdisc_init;
427         ops->reset      =       teql_reset;
428         ops->destroy    =       teql_destroy;
429         ops->owner      =       THIS_MODULE;
430
431         dev->open               = teql_master_open;
432         dev->hard_start_xmit    = teql_master_xmit;
433         dev->stop               = teql_master_close;
434         dev->get_stats          = teql_master_stats;
435         dev->change_mtu         = teql_master_mtu;
436         dev->type               = ARPHRD_VOID;
437         dev->mtu                = 1500;
438         dev->tx_queue_len       = 100;
439         dev->flags              = IFF_NOARP;
440         dev->hard_header_len    = LL_MAX_HEADER;
441 }
442
443 static LIST_HEAD(master_dev_list);
444 static int max_equalizers = 1;
445 module_param(max_equalizers, int, 0);
446 MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
447
448 static int __init teql_init(void)
449 {
450         int i;
451         int err = -ENODEV;
452
453         for (i = 0; i < max_equalizers; i++) {
454                 struct net_device *dev;
455                 struct teql_master *master;
456
457                 dev = alloc_netdev(sizeof(struct teql_master),
458                                   "teql%d", teql_master_setup);
459                 if (!dev) {
460                         err = -ENOMEM;
461                         break;
462                 }
463
464                 if ((err = register_netdev(dev))) {
465                         free_netdev(dev);
466                         break;
467                 }
468
469                 master = netdev_priv(dev);
470
471                 strlcpy(master->qops.id, dev->name, IFNAMSIZ);
472                 err = register_qdisc(&master->qops);
473
474                 if (err) {
475                         unregister_netdev(dev);
476                         free_netdev(dev);
477                         break;
478                 }
479
480                 list_add_tail(&master->master_list, &master_dev_list);
481         }
482         return i ? 0 : err;
483 }
484
485 static void __exit teql_exit(void)
486 {
487         struct teql_master *master, *nxt;
488
489         list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
490
491                 list_del(&master->master_list);
492
493                 unregister_qdisc(&master->qops);
494                 unregister_netdev(master->dev);
495                 free_netdev(master->dev);
496         }
497 }
498
499 module_init(teql_init);
500 module_exit(teql_exit);
501
502 MODULE_LICENSE("GPL");