Merge branch 'for-rmk' of git://git.marvell.com/orion
[linux-2.6] / net / sched / cls_rsvp.h
1 /*
2  * net/sched/cls_rsvp.h Template file for RSVPv[46] classifiers.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  */
11
12 /*
13    Comparing to general packet classification problem,
14    RSVP needs only sevaral relatively simple rules:
15
16    * (dst, protocol) are always specified,
17      so that we are able to hash them.
18    * src may be exact, or may be wildcard, so that
19      we can keep a hash table plus one wildcard entry.
20    * source port (or flow label) is important only if src is given.
21
22    IMPLEMENTATION.
23
24    We use a two level hash table: The top level is keyed by
25    destination address and protocol ID, every bucket contains a list
26    of "rsvp sessions", identified by destination address, protocol and
27    DPI(="Destination Port ID"): triple (key, mask, offset).
28
29    Every bucket has a smaller hash table keyed by source address
30    (cf. RSVP flowspec) and one wildcard entry for wildcard reservations.
31    Every bucket is again a list of "RSVP flows", selected by
32    source address and SPI(="Source Port ID" here rather than
33    "security parameter index"): triple (key, mask, offset).
34
35
36    NOTE 1. All the packets with IPv6 extension headers (but AH and ESP)
37    and all fragmented packets go to the best-effort traffic class.
38
39
40    NOTE 2. Two "port id"'s seems to be redundant, rfc2207 requires
41    only one "Generalized Port Identifier". So that for classic
42    ah, esp (and udp,tcp) both *pi should coincide or one of them
43    should be wildcard.
44
45    At first sight, this redundancy is just a waste of CPU
46    resources. But DPI and SPI add the possibility to assign different
47    priorities to GPIs. Look also at note 4 about tunnels below.
48
49
50    NOTE 3. One complication is the case of tunneled packets.
51    We implement it as following: if the first lookup
52    matches a special session with "tunnelhdr" value not zero,
53    flowid doesn't contain the true flow ID, but the tunnel ID (1...255).
54    In this case, we pull tunnelhdr bytes and restart lookup
55    with tunnel ID added to the list of keys. Simple and stupid 8)8)
56    It's enough for PIMREG and IPIP.
57
58
59    NOTE 4. Two GPIs make it possible to parse even GRE packets.
60    F.e. DPI can select ETH_P_IP (and necessary flags to make
61    tunnelhdr correct) in GRE protocol field and SPI matches
62    GRE key. Is it not nice? 8)8)
63
64
65    Well, as result, despite its simplicity, we get a pretty
66    powerful classification engine.  */
67
68
69 struct rsvp_head
70 {
71         u32                     tmap[256/32];
72         u32                     hgenerator;
73         u8                      tgenerator;
74         struct rsvp_session     *ht[256];
75 };
76
77 struct rsvp_session
78 {
79         struct rsvp_session     *next;
80         __be32                  dst[RSVP_DST_LEN];
81         struct tc_rsvp_gpi      dpi;
82         u8                      protocol;
83         u8                      tunnelid;
84         /* 16 (src,sport) hash slots, and one wildcard source slot */
85         struct rsvp_filter      *ht[16+1];
86 };
87
88
89 struct rsvp_filter
90 {
91         struct rsvp_filter      *next;
92         __be32                  src[RSVP_DST_LEN];
93         struct tc_rsvp_gpi      spi;
94         u8                      tunnelhdr;
95
96         struct tcf_result       res;
97         struct tcf_exts         exts;
98
99         u32                     handle;
100         struct rsvp_session     *sess;
101 };
102
103 static __inline__ unsigned hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
104 {
105         unsigned h = (__force __u32)dst[RSVP_DST_LEN-1];
106         h ^= h>>16;
107         h ^= h>>8;
108         return (h ^ protocol ^ tunnelid) & 0xFF;
109 }
110
111 static __inline__ unsigned hash_src(__be32 *src)
112 {
113         unsigned h = (__force __u32)src[RSVP_DST_LEN-1];
114         h ^= h>>16;
115         h ^= h>>8;
116         h ^= h>>4;
117         return h & 0xF;
118 }
119
120 static struct tcf_ext_map rsvp_ext_map = {
121         .police = TCA_RSVP_POLICE,
122         .action = TCA_RSVP_ACT
123 };
124
125 #define RSVP_APPLY_RESULT()                             \
126 {                                                       \
127         int r = tcf_exts_exec(skb, &f->exts, res);      \
128         if (r < 0)                                      \
129                 continue;                               \
130         else if (r > 0)                                 \
131                 return r;                               \
132 }
133
134 static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp,
135                          struct tcf_result *res)
136 {
137         struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
138         struct rsvp_session *s;
139         struct rsvp_filter *f;
140         unsigned h1, h2;
141         __be32 *dst, *src;
142         u8 protocol;
143         u8 tunnelid = 0;
144         u8 *xprt;
145 #if RSVP_DST_LEN == 4
146         struct ipv6hdr *nhptr = ipv6_hdr(skb);
147 #else
148         struct iphdr *nhptr = ip_hdr(skb);
149 #endif
150
151 restart:
152
153 #if RSVP_DST_LEN == 4
154         src = &nhptr->saddr.s6_addr32[0];
155         dst = &nhptr->daddr.s6_addr32[0];
156         protocol = nhptr->nexthdr;
157         xprt = ((u8*)nhptr) + sizeof(struct ipv6hdr);
158 #else
159         src = &nhptr->saddr;
160         dst = &nhptr->daddr;
161         protocol = nhptr->protocol;
162         xprt = ((u8*)nhptr) + (nhptr->ihl<<2);
163         if (nhptr->frag_off & htons(IP_MF|IP_OFFSET))
164                 return -1;
165 #endif
166
167         h1 = hash_dst(dst, protocol, tunnelid);
168         h2 = hash_src(src);
169
170         for (s = sht[h1]; s; s = s->next) {
171                 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
172                     protocol == s->protocol &&
173                     !(s->dpi.mask & (*(u32*)(xprt+s->dpi.offset)^s->dpi.key))
174 #if RSVP_DST_LEN == 4
175                     && dst[0] == s->dst[0]
176                     && dst[1] == s->dst[1]
177                     && dst[2] == s->dst[2]
178 #endif
179                     && tunnelid == s->tunnelid) {
180
181                         for (f = s->ht[h2]; f; f = f->next) {
182                                 if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN-1] &&
183                                     !(f->spi.mask & (*(u32*)(xprt+f->spi.offset)^f->spi.key))
184 #if RSVP_DST_LEN == 4
185                                     && src[0] == f->src[0]
186                                     && src[1] == f->src[1]
187                                     && src[2] == f->src[2]
188 #endif
189                                     ) {
190                                         *res = f->res;
191                                         RSVP_APPLY_RESULT();
192
193 matched:
194                                         if (f->tunnelhdr == 0)
195                                                 return 0;
196
197                                         tunnelid = f->res.classid;
198                                         nhptr = (void*)(xprt + f->tunnelhdr - sizeof(*nhptr));
199                                         goto restart;
200                                 }
201                         }
202
203                         /* And wildcard bucket... */
204                         for (f = s->ht[16]; f; f = f->next) {
205                                 *res = f->res;
206                                 RSVP_APPLY_RESULT();
207                                 goto matched;
208                         }
209                         return -1;
210                 }
211         }
212         return -1;
213 }
214
215 static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
216 {
217         struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
218         struct rsvp_session *s;
219         struct rsvp_filter *f;
220         unsigned h1 = handle&0xFF;
221         unsigned h2 = (handle>>8)&0xFF;
222
223         if (h2 > 16)
224                 return 0;
225
226         for (s = sht[h1]; s; s = s->next) {
227                 for (f = s->ht[h2]; f; f = f->next) {
228                         if (f->handle == handle)
229                                 return (unsigned long)f;
230                 }
231         }
232         return 0;
233 }
234
235 static void rsvp_put(struct tcf_proto *tp, unsigned long f)
236 {
237 }
238
239 static int rsvp_init(struct tcf_proto *tp)
240 {
241         struct rsvp_head *data;
242
243         data = kzalloc(sizeof(struct rsvp_head), GFP_KERNEL);
244         if (data) {
245                 tp->root = data;
246                 return 0;
247         }
248         return -ENOBUFS;
249 }
250
251 static inline void
252 rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
253 {
254         tcf_unbind_filter(tp, &f->res);
255         tcf_exts_destroy(tp, &f->exts);
256         kfree(f);
257 }
258
259 static void rsvp_destroy(struct tcf_proto *tp)
260 {
261         struct rsvp_head *data = xchg(&tp->root, NULL);
262         struct rsvp_session **sht;
263         int h1, h2;
264
265         if (data == NULL)
266                 return;
267
268         sht = data->ht;
269
270         for (h1=0; h1<256; h1++) {
271                 struct rsvp_session *s;
272
273                 while ((s = sht[h1]) != NULL) {
274                         sht[h1] = s->next;
275
276                         for (h2=0; h2<=16; h2++) {
277                                 struct rsvp_filter *f;
278
279                                 while ((f = s->ht[h2]) != NULL) {
280                                         s->ht[h2] = f->next;
281                                         rsvp_delete_filter(tp, f);
282                                 }
283                         }
284                         kfree(s);
285                 }
286         }
287         kfree(data);
288 }
289
290 static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
291 {
292         struct rsvp_filter **fp, *f = (struct rsvp_filter*)arg;
293         unsigned h = f->handle;
294         struct rsvp_session **sp;
295         struct rsvp_session *s = f->sess;
296         int i;
297
298         for (fp = &s->ht[(h>>8)&0xFF]; *fp; fp = &(*fp)->next) {
299                 if (*fp == f) {
300                         tcf_tree_lock(tp);
301                         *fp = f->next;
302                         tcf_tree_unlock(tp);
303                         rsvp_delete_filter(tp, f);
304
305                         /* Strip tree */
306
307                         for (i=0; i<=16; i++)
308                                 if (s->ht[i])
309                                         return 0;
310
311                         /* OK, session has no flows */
312                         for (sp = &((struct rsvp_head*)tp->root)->ht[h&0xFF];
313                              *sp; sp = &(*sp)->next) {
314                                 if (*sp == s) {
315                                         tcf_tree_lock(tp);
316                                         *sp = s->next;
317                                         tcf_tree_unlock(tp);
318
319                                         kfree(s);
320                                         return 0;
321                                 }
322                         }
323
324                         return 0;
325                 }
326         }
327         return 0;
328 }
329
330 static unsigned gen_handle(struct tcf_proto *tp, unsigned salt)
331 {
332         struct rsvp_head *data = tp->root;
333         int i = 0xFFFF;
334
335         while (i-- > 0) {
336                 u32 h;
337                 if ((data->hgenerator += 0x10000) == 0)
338                         data->hgenerator = 0x10000;
339                 h = data->hgenerator|salt;
340                 if (rsvp_get(tp, h) == 0)
341                         return h;
342         }
343         return 0;
344 }
345
346 static int tunnel_bts(struct rsvp_head *data)
347 {
348         int n = data->tgenerator>>5;
349         u32 b = 1<<(data->tgenerator&0x1F);
350
351         if (data->tmap[n]&b)
352                 return 0;
353         data->tmap[n] |= b;
354         return 1;
355 }
356
357 static void tunnel_recycle(struct rsvp_head *data)
358 {
359         struct rsvp_session **sht = data->ht;
360         u32 tmap[256/32];
361         int h1, h2;
362
363         memset(tmap, 0, sizeof(tmap));
364
365         for (h1=0; h1<256; h1++) {
366                 struct rsvp_session *s;
367                 for (s = sht[h1]; s; s = s->next) {
368                         for (h2=0; h2<=16; h2++) {
369                                 struct rsvp_filter *f;
370
371                                 for (f = s->ht[h2]; f; f = f->next) {
372                                         if (f->tunnelhdr == 0)
373                                                 continue;
374                                         data->tgenerator = f->res.classid;
375                                         tunnel_bts(data);
376                                 }
377                         }
378                 }
379         }
380
381         memcpy(data->tmap, tmap, sizeof(tmap));
382 }
383
384 static u32 gen_tunnel(struct rsvp_head *data)
385 {
386         int i, k;
387
388         for (k=0; k<2; k++) {
389                 for (i=255; i>0; i--) {
390                         if (++data->tgenerator == 0)
391                                 data->tgenerator = 1;
392                         if (tunnel_bts(data))
393                                 return data->tgenerator;
394                 }
395                 tunnel_recycle(data);
396         }
397         return 0;
398 }
399
400 static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = {
401         [TCA_RSVP_CLASSID]      = { .type = NLA_U32 },
402         [TCA_RSVP_DST]          = { .type = NLA_BINARY,
403                                     .len = RSVP_DST_LEN * sizeof(u32) },
404         [TCA_RSVP_SRC]          = { .type = NLA_BINARY,
405                                     .len = RSVP_DST_LEN * sizeof(u32) },
406         [TCA_RSVP_PINFO]        = { .len = sizeof(struct tc_rsvp_pinfo) },
407 };
408
409 static int rsvp_change(struct tcf_proto *tp, unsigned long base,
410                        u32 handle,
411                        struct nlattr **tca,
412                        unsigned long *arg)
413 {
414         struct rsvp_head *data = tp->root;
415         struct rsvp_filter *f, **fp;
416         struct rsvp_session *s, **sp;
417         struct tc_rsvp_pinfo *pinfo = NULL;
418         struct nlattr *opt = tca[TCA_OPTIONS-1];
419         struct nlattr *tb[TCA_RSVP_MAX + 1];
420         struct tcf_exts e;
421         unsigned h1, h2;
422         __be32 *dst;
423         int err;
424
425         if (opt == NULL)
426                 return handle ? -EINVAL : 0;
427
428         err = nla_parse_nested(tb, TCA_RSVP_MAX, opt, rsvp_policy);
429         if (err < 0)
430                 return err;
431
432         err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &rsvp_ext_map);
433         if (err < 0)
434                 return err;
435
436         if ((f = (struct rsvp_filter*)*arg) != NULL) {
437                 /* Node exists: adjust only classid */
438
439                 if (f->handle != handle && handle)
440                         goto errout2;
441                 if (tb[TCA_RSVP_CLASSID-1]) {
442                         f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]);
443                         tcf_bind_filter(tp, &f->res, base);
444                 }
445
446                 tcf_exts_change(tp, &f->exts, &e);
447                 return 0;
448         }
449
450         /* Now more serious part... */
451         err = -EINVAL;
452         if (handle)
453                 goto errout2;
454         if (tb[TCA_RSVP_DST-1] == NULL)
455                 goto errout2;
456
457         err = -ENOBUFS;
458         f = kzalloc(sizeof(struct rsvp_filter), GFP_KERNEL);
459         if (f == NULL)
460                 goto errout2;
461
462         h2 = 16;
463         if (tb[TCA_RSVP_SRC-1]) {
464                 memcpy(f->src, nla_data(tb[TCA_RSVP_SRC-1]), sizeof(f->src));
465                 h2 = hash_src(f->src);
466         }
467         if (tb[TCA_RSVP_PINFO-1]) {
468                 pinfo = nla_data(tb[TCA_RSVP_PINFO-1]);
469                 f->spi = pinfo->spi;
470                 f->tunnelhdr = pinfo->tunnelhdr;
471         }
472         if (tb[TCA_RSVP_CLASSID-1])
473                 f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]);
474
475         dst = nla_data(tb[TCA_RSVP_DST-1]);
476         h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0);
477
478         err = -ENOMEM;
479         if ((f->handle = gen_handle(tp, h1 | (h2<<8))) == 0)
480                 goto errout;
481
482         if (f->tunnelhdr) {
483                 err = -EINVAL;
484                 if (f->res.classid > 255)
485                         goto errout;
486
487                 err = -ENOMEM;
488                 if (f->res.classid == 0 &&
489                     (f->res.classid = gen_tunnel(data)) == 0)
490                         goto errout;
491         }
492
493         for (sp = &data->ht[h1]; (s=*sp) != NULL; sp = &s->next) {
494                 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
495                     pinfo && pinfo->protocol == s->protocol &&
496                     memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0
497 #if RSVP_DST_LEN == 4
498                     && dst[0] == s->dst[0]
499                     && dst[1] == s->dst[1]
500                     && dst[2] == s->dst[2]
501 #endif
502                     && pinfo->tunnelid == s->tunnelid) {
503
504 insert:
505                         /* OK, we found appropriate session */
506
507                         fp = &s->ht[h2];
508
509                         f->sess = s;
510                         if (f->tunnelhdr == 0)
511                                 tcf_bind_filter(tp, &f->res, base);
512
513                         tcf_exts_change(tp, &f->exts, &e);
514
515                         for (fp = &s->ht[h2]; *fp; fp = &(*fp)->next)
516                                 if (((*fp)->spi.mask&f->spi.mask) != f->spi.mask)
517                                         break;
518                         f->next = *fp;
519                         wmb();
520                         *fp = f;
521
522                         *arg = (unsigned long)f;
523                         return 0;
524                 }
525         }
526
527         /* No session found. Create new one. */
528
529         err = -ENOBUFS;
530         s = kzalloc(sizeof(struct rsvp_session), GFP_KERNEL);
531         if (s == NULL)
532                 goto errout;
533         memcpy(s->dst, dst, sizeof(s->dst));
534
535         if (pinfo) {
536                 s->dpi = pinfo->dpi;
537                 s->protocol = pinfo->protocol;
538                 s->tunnelid = pinfo->tunnelid;
539         }
540         for (sp = &data->ht[h1]; *sp; sp = &(*sp)->next) {
541                 if (((*sp)->dpi.mask&s->dpi.mask) != s->dpi.mask)
542                         break;
543         }
544         s->next = *sp;
545         wmb();
546         *sp = s;
547
548         goto insert;
549
550 errout:
551         kfree(f);
552 errout2:
553         tcf_exts_destroy(tp, &e);
554         return err;
555 }
556
557 static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
558 {
559         struct rsvp_head *head = tp->root;
560         unsigned h, h1;
561
562         if (arg->stop)
563                 return;
564
565         for (h = 0; h < 256; h++) {
566                 struct rsvp_session *s;
567
568                 for (s = head->ht[h]; s; s = s->next) {
569                         for (h1 = 0; h1 <= 16; h1++) {
570                                 struct rsvp_filter *f;
571
572                                 for (f = s->ht[h1]; f; f = f->next) {
573                                         if (arg->count < arg->skip) {
574                                                 arg->count++;
575                                                 continue;
576                                         }
577                                         if (arg->fn(tp, (unsigned long)f, arg) < 0) {
578                                                 arg->stop = 1;
579                                                 return;
580                                         }
581                                         arg->count++;
582                                 }
583                         }
584                 }
585         }
586 }
587
588 static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
589                      struct sk_buff *skb, struct tcmsg *t)
590 {
591         struct rsvp_filter *f = (struct rsvp_filter*)fh;
592         struct rsvp_session *s;
593         unsigned char *b = skb_tail_pointer(skb);
594         struct nlattr *nest;
595         struct tc_rsvp_pinfo pinfo;
596
597         if (f == NULL)
598                 return skb->len;
599         s = f->sess;
600
601         t->tcm_handle = f->handle;
602
603         nest = nla_nest_start(skb, TCA_OPTIONS);
604         if (nest == NULL)
605                 goto nla_put_failure;
606
607         NLA_PUT(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst);
608         pinfo.dpi = s->dpi;
609         pinfo.spi = f->spi;
610         pinfo.protocol = s->protocol;
611         pinfo.tunnelid = s->tunnelid;
612         pinfo.tunnelhdr = f->tunnelhdr;
613         pinfo.pad = 0;
614         NLA_PUT(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo);
615         if (f->res.classid)
616                 NLA_PUT_U32(skb, TCA_RSVP_CLASSID, f->res.classid);
617         if (((f->handle>>8)&0xFF) != 16)
618                 NLA_PUT(skb, TCA_RSVP_SRC, sizeof(f->src), f->src);
619
620         if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0)
621                 goto nla_put_failure;
622
623         nla_nest_end(skb, nest);
624
625         if (tcf_exts_dump_stats(skb, &f->exts, &rsvp_ext_map) < 0)
626                 goto nla_put_failure;
627         return skb->len;
628
629 nla_put_failure:
630         nlmsg_trim(skb, b);
631         return -1;
632 }
633
634 static struct tcf_proto_ops RSVP_OPS = {
635         .next           =       NULL,
636         .kind           =       RSVP_ID,
637         .classify       =       rsvp_classify,
638         .init           =       rsvp_init,
639         .destroy        =       rsvp_destroy,
640         .get            =       rsvp_get,
641         .put            =       rsvp_put,
642         .change         =       rsvp_change,
643         .delete         =       rsvp_delete,
644         .walk           =       rsvp_walk,
645         .dump           =       rsvp_dump,
646         .owner          =       THIS_MODULE,
647 };
648
649 static int __init init_rsvp(void)
650 {
651         return register_tcf_proto_ops(&RSVP_OPS);
652 }
653
654 static void __exit exit_rsvp(void)
655 {
656         unregister_tcf_proto_ops(&RSVP_OPS);
657 }
658
659 module_init(init_rsvp)
660 module_exit(exit_rsvp)