[INET]: Collect frag queues management objects together
[linux-2.6] / net / sched / cls_rsvp.h
1 /*
2  * net/sched/cls_rsvp.h Template file for RSVPv[46] classifiers.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  */
11
12 /*
13    Comparing to general packet classification problem,
14    RSVP needs only sevaral relatively simple rules:
15
16    * (dst, protocol) are always specified,
17      so that we are able to hash them.
18    * src may be exact, or may be wildcard, so that
19      we can keep a hash table plus one wildcard entry.
20    * source port (or flow label) is important only if src is given.
21
22    IMPLEMENTATION.
23
24    We use a two level hash table: The top level is keyed by
25    destination address and protocol ID, every bucket contains a list
26    of "rsvp sessions", identified by destination address, protocol and
27    DPI(="Destination Port ID"): triple (key, mask, offset).
28
29    Every bucket has a smaller hash table keyed by source address
30    (cf. RSVP flowspec) and one wildcard entry for wildcard reservations.
31    Every bucket is again a list of "RSVP flows", selected by
32    source address and SPI(="Source Port ID" here rather than
33    "security parameter index"): triple (key, mask, offset).
34
35
36    NOTE 1. All the packets with IPv6 extension headers (but AH and ESP)
37    and all fragmented packets go to the best-effort traffic class.
38
39
40    NOTE 2. Two "port id"'s seems to be redundant, rfc2207 requires
41    only one "Generalized Port Identifier". So that for classic
42    ah, esp (and udp,tcp) both *pi should coincide or one of them
43    should be wildcard.
44
45    At first sight, this redundancy is just a waste of CPU
46    resources. But DPI and SPI add the possibility to assign different
47    priorities to GPIs. Look also at note 4 about tunnels below.
48
49
50    NOTE 3. One complication is the case of tunneled packets.
51    We implement it as following: if the first lookup
52    matches a special session with "tunnelhdr" value not zero,
53    flowid doesn't contain the true flow ID, but the tunnel ID (1...255).
54    In this case, we pull tunnelhdr bytes and restart lookup
55    with tunnel ID added to the list of keys. Simple and stupid 8)8)
56    It's enough for PIMREG and IPIP.
57
58
59    NOTE 4. Two GPIs make it possible to parse even GRE packets.
60    F.e. DPI can select ETH_P_IP (and necessary flags to make
61    tunnelhdr correct) in GRE protocol field and SPI matches
62    GRE key. Is it not nice? 8)8)
63
64
65    Well, as result, despite its simplicity, we get a pretty
66    powerful classification engine.  */
67
68
69 struct rsvp_head
70 {
71         u32                     tmap[256/32];
72         u32                     hgenerator;
73         u8                      tgenerator;
74         struct rsvp_session     *ht[256];
75 };
76
77 struct rsvp_session
78 {
79         struct rsvp_session     *next;
80         __be32                  dst[RSVP_DST_LEN];
81         struct tc_rsvp_gpi      dpi;
82         u8                      protocol;
83         u8                      tunnelid;
84         /* 16 (src,sport) hash slots, and one wildcard source slot */
85         struct rsvp_filter      *ht[16+1];
86 };
87
88
89 struct rsvp_filter
90 {
91         struct rsvp_filter      *next;
92         __be32                  src[RSVP_DST_LEN];
93         struct tc_rsvp_gpi      spi;
94         u8                      tunnelhdr;
95
96         struct tcf_result       res;
97         struct tcf_exts         exts;
98
99         u32                     handle;
100         struct rsvp_session     *sess;
101 };
102
103 static __inline__ unsigned hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
104 {
105         unsigned h = (__force __u32)dst[RSVP_DST_LEN-1];
106         h ^= h>>16;
107         h ^= h>>8;
108         return (h ^ protocol ^ tunnelid) & 0xFF;
109 }
110
111 static __inline__ unsigned hash_src(__be32 *src)
112 {
113         unsigned h = (__force __u32)src[RSVP_DST_LEN-1];
114         h ^= h>>16;
115         h ^= h>>8;
116         h ^= h>>4;
117         return h & 0xF;
118 }
119
120 static struct tcf_ext_map rsvp_ext_map = {
121         .police = TCA_RSVP_POLICE,
122         .action = TCA_RSVP_ACT
123 };
124
125 #define RSVP_APPLY_RESULT()                             \
126 {                                                       \
127         int r = tcf_exts_exec(skb, &f->exts, res);      \
128         if (r < 0)                                      \
129                 continue;                               \
130         else if (r > 0)                                 \
131                 return r;                               \
132 }
133
134 static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp,
135                          struct tcf_result *res)
136 {
137         struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
138         struct rsvp_session *s;
139         struct rsvp_filter *f;
140         unsigned h1, h2;
141         __be32 *dst, *src;
142         u8 protocol;
143         u8 tunnelid = 0;
144         u8 *xprt;
145 #if RSVP_DST_LEN == 4
146         struct ipv6hdr *nhptr = ipv6_hdr(skb);
147 #else
148         struct iphdr *nhptr = ip_hdr(skb);
149 #endif
150
151 restart:
152
153 #if RSVP_DST_LEN == 4
154         src = &nhptr->saddr.s6_addr32[0];
155         dst = &nhptr->daddr.s6_addr32[0];
156         protocol = nhptr->nexthdr;
157         xprt = ((u8*)nhptr) + sizeof(struct ipv6hdr);
158 #else
159         src = &nhptr->saddr;
160         dst = &nhptr->daddr;
161         protocol = nhptr->protocol;
162         xprt = ((u8*)nhptr) + (nhptr->ihl<<2);
163         if (nhptr->frag_off & htons(IP_MF|IP_OFFSET))
164                 return -1;
165 #endif
166
167         h1 = hash_dst(dst, protocol, tunnelid);
168         h2 = hash_src(src);
169
170         for (s = sht[h1]; s; s = s->next) {
171                 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
172                     protocol == s->protocol &&
173                     !(s->dpi.mask & (*(u32*)(xprt+s->dpi.offset)^s->dpi.key))
174 #if RSVP_DST_LEN == 4
175                     && dst[0] == s->dst[0]
176                     && dst[1] == s->dst[1]
177                     && dst[2] == s->dst[2]
178 #endif
179                     && tunnelid == s->tunnelid) {
180
181                         for (f = s->ht[h2]; f; f = f->next) {
182                                 if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN-1] &&
183                                     !(f->spi.mask & (*(u32*)(xprt+f->spi.offset)^f->spi.key))
184 #if RSVP_DST_LEN == 4
185                                     && src[0] == f->src[0]
186                                     && src[1] == f->src[1]
187                                     && src[2] == f->src[2]
188 #endif
189                                     ) {
190                                         *res = f->res;
191                                         RSVP_APPLY_RESULT();
192
193 matched:
194                                         if (f->tunnelhdr == 0)
195                                                 return 0;
196
197                                         tunnelid = f->res.classid;
198                                         nhptr = (void*)(xprt + f->tunnelhdr - sizeof(*nhptr));
199                                         goto restart;
200                                 }
201                         }
202
203                         /* And wildcard bucket... */
204                         for (f = s->ht[16]; f; f = f->next) {
205                                 *res = f->res;
206                                 RSVP_APPLY_RESULT();
207                                 goto matched;
208                         }
209                         return -1;
210                 }
211         }
212         return -1;
213 }
214
215 static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
216 {
217         struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
218         struct rsvp_session *s;
219         struct rsvp_filter *f;
220         unsigned h1 = handle&0xFF;
221         unsigned h2 = (handle>>8)&0xFF;
222
223         if (h2 > 16)
224                 return 0;
225
226         for (s = sht[h1]; s; s = s->next) {
227                 for (f = s->ht[h2]; f; f = f->next) {
228                         if (f->handle == handle)
229                                 return (unsigned long)f;
230                 }
231         }
232         return 0;
233 }
234
235 static void rsvp_put(struct tcf_proto *tp, unsigned long f)
236 {
237 }
238
239 static int rsvp_init(struct tcf_proto *tp)
240 {
241         struct rsvp_head *data;
242
243         data = kzalloc(sizeof(struct rsvp_head), GFP_KERNEL);
244         if (data) {
245                 tp->root = data;
246                 return 0;
247         }
248         return -ENOBUFS;
249 }
250
251 static inline void
252 rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
253 {
254         tcf_unbind_filter(tp, &f->res);
255         tcf_exts_destroy(tp, &f->exts);
256         kfree(f);
257 }
258
259 static void rsvp_destroy(struct tcf_proto *tp)
260 {
261         struct rsvp_head *data = xchg(&tp->root, NULL);
262         struct rsvp_session **sht;
263         int h1, h2;
264
265         if (data == NULL)
266                 return;
267
268         sht = data->ht;
269
270         for (h1=0; h1<256; h1++) {
271                 struct rsvp_session *s;
272
273                 while ((s = sht[h1]) != NULL) {
274                         sht[h1] = s->next;
275
276                         for (h2=0; h2<=16; h2++) {
277                                 struct rsvp_filter *f;
278
279                                 while ((f = s->ht[h2]) != NULL) {
280                                         s->ht[h2] = f->next;
281                                         rsvp_delete_filter(tp, f);
282                                 }
283                         }
284                         kfree(s);
285                 }
286         }
287         kfree(data);
288 }
289
290 static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
291 {
292         struct rsvp_filter **fp, *f = (struct rsvp_filter*)arg;
293         unsigned h = f->handle;
294         struct rsvp_session **sp;
295         struct rsvp_session *s = f->sess;
296         int i;
297
298         for (fp = &s->ht[(h>>8)&0xFF]; *fp; fp = &(*fp)->next) {
299                 if (*fp == f) {
300                         tcf_tree_lock(tp);
301                         *fp = f->next;
302                         tcf_tree_unlock(tp);
303                         rsvp_delete_filter(tp, f);
304
305                         /* Strip tree */
306
307                         for (i=0; i<=16; i++)
308                                 if (s->ht[i])
309                                         return 0;
310
311                         /* OK, session has no flows */
312                         for (sp = &((struct rsvp_head*)tp->root)->ht[h&0xFF];
313                              *sp; sp = &(*sp)->next) {
314                                 if (*sp == s) {
315                                         tcf_tree_lock(tp);
316                                         *sp = s->next;
317                                         tcf_tree_unlock(tp);
318
319                                         kfree(s);
320                                         return 0;
321                                 }
322                         }
323
324                         return 0;
325                 }
326         }
327         return 0;
328 }
329
330 static unsigned gen_handle(struct tcf_proto *tp, unsigned salt)
331 {
332         struct rsvp_head *data = tp->root;
333         int i = 0xFFFF;
334
335         while (i-- > 0) {
336                 u32 h;
337                 if ((data->hgenerator += 0x10000) == 0)
338                         data->hgenerator = 0x10000;
339                 h = data->hgenerator|salt;
340                 if (rsvp_get(tp, h) == 0)
341                         return h;
342         }
343         return 0;
344 }
345
346 static int tunnel_bts(struct rsvp_head *data)
347 {
348         int n = data->tgenerator>>5;
349         u32 b = 1<<(data->tgenerator&0x1F);
350
351         if (data->tmap[n]&b)
352                 return 0;
353         data->tmap[n] |= b;
354         return 1;
355 }
356
357 static void tunnel_recycle(struct rsvp_head *data)
358 {
359         struct rsvp_session **sht = data->ht;
360         u32 tmap[256/32];
361         int h1, h2;
362
363         memset(tmap, 0, sizeof(tmap));
364
365         for (h1=0; h1<256; h1++) {
366                 struct rsvp_session *s;
367                 for (s = sht[h1]; s; s = s->next) {
368                         for (h2=0; h2<=16; h2++) {
369                                 struct rsvp_filter *f;
370
371                                 for (f = s->ht[h2]; f; f = f->next) {
372                                         if (f->tunnelhdr == 0)
373                                                 continue;
374                                         data->tgenerator = f->res.classid;
375                                         tunnel_bts(data);
376                                 }
377                         }
378                 }
379         }
380
381         memcpy(data->tmap, tmap, sizeof(tmap));
382 }
383
384 static u32 gen_tunnel(struct rsvp_head *data)
385 {
386         int i, k;
387
388         for (k=0; k<2; k++) {
389                 for (i=255; i>0; i--) {
390                         if (++data->tgenerator == 0)
391                                 data->tgenerator = 1;
392                         if (tunnel_bts(data))
393                                 return data->tgenerator;
394                 }
395                 tunnel_recycle(data);
396         }
397         return 0;
398 }
399
400 static int rsvp_change(struct tcf_proto *tp, unsigned long base,
401                        u32 handle,
402                        struct rtattr **tca,
403                        unsigned long *arg)
404 {
405         struct rsvp_head *data = tp->root;
406         struct rsvp_filter *f, **fp;
407         struct rsvp_session *s, **sp;
408         struct tc_rsvp_pinfo *pinfo = NULL;
409         struct rtattr *opt = tca[TCA_OPTIONS-1];
410         struct rtattr *tb[TCA_RSVP_MAX];
411         struct tcf_exts e;
412         unsigned h1, h2;
413         __be32 *dst;
414         int err;
415
416         if (opt == NULL)
417                 return handle ? -EINVAL : 0;
418
419         if (rtattr_parse_nested(tb, TCA_RSVP_MAX, opt) < 0)
420                 return -EINVAL;
421
422         err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &rsvp_ext_map);
423         if (err < 0)
424                 return err;
425
426         if ((f = (struct rsvp_filter*)*arg) != NULL) {
427                 /* Node exists: adjust only classid */
428
429                 if (f->handle != handle && handle)
430                         goto errout2;
431                 if (tb[TCA_RSVP_CLASSID-1]) {
432                         f->res.classid = *(u32*)RTA_DATA(tb[TCA_RSVP_CLASSID-1]);
433                         tcf_bind_filter(tp, &f->res, base);
434                 }
435
436                 tcf_exts_change(tp, &f->exts, &e);
437                 return 0;
438         }
439
440         /* Now more serious part... */
441         err = -EINVAL;
442         if (handle)
443                 goto errout2;
444         if (tb[TCA_RSVP_DST-1] == NULL)
445                 goto errout2;
446
447         err = -ENOBUFS;
448         f = kzalloc(sizeof(struct rsvp_filter), GFP_KERNEL);
449         if (f == NULL)
450                 goto errout2;
451
452         h2 = 16;
453         if (tb[TCA_RSVP_SRC-1]) {
454                 err = -EINVAL;
455                 if (RTA_PAYLOAD(tb[TCA_RSVP_SRC-1]) != sizeof(f->src))
456                         goto errout;
457                 memcpy(f->src, RTA_DATA(tb[TCA_RSVP_SRC-1]), sizeof(f->src));
458                 h2 = hash_src(f->src);
459         }
460         if (tb[TCA_RSVP_PINFO-1]) {
461                 err = -EINVAL;
462                 if (RTA_PAYLOAD(tb[TCA_RSVP_PINFO-1]) < sizeof(struct tc_rsvp_pinfo))
463                         goto errout;
464                 pinfo = RTA_DATA(tb[TCA_RSVP_PINFO-1]);
465                 f->spi = pinfo->spi;
466                 f->tunnelhdr = pinfo->tunnelhdr;
467         }
468         if (tb[TCA_RSVP_CLASSID-1]) {
469                 err = -EINVAL;
470                 if (RTA_PAYLOAD(tb[TCA_RSVP_CLASSID-1]) != 4)
471                         goto errout;
472                 f->res.classid = *(u32*)RTA_DATA(tb[TCA_RSVP_CLASSID-1]);
473         }
474
475         err = -EINVAL;
476         if (RTA_PAYLOAD(tb[TCA_RSVP_DST-1]) != sizeof(f->src))
477                 goto errout;
478         dst = RTA_DATA(tb[TCA_RSVP_DST-1]);
479         h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0);
480
481         err = -ENOMEM;
482         if ((f->handle = gen_handle(tp, h1 | (h2<<8))) == 0)
483                 goto errout;
484
485         if (f->tunnelhdr) {
486                 err = -EINVAL;
487                 if (f->res.classid > 255)
488                         goto errout;
489
490                 err = -ENOMEM;
491                 if (f->res.classid == 0 &&
492                     (f->res.classid = gen_tunnel(data)) == 0)
493                         goto errout;
494         }
495
496         for (sp = &data->ht[h1]; (s=*sp) != NULL; sp = &s->next) {
497                 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
498                     pinfo && pinfo->protocol == s->protocol &&
499                     memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0
500 #if RSVP_DST_LEN == 4
501                     && dst[0] == s->dst[0]
502                     && dst[1] == s->dst[1]
503                     && dst[2] == s->dst[2]
504 #endif
505                     && pinfo->tunnelid == s->tunnelid) {
506
507 insert:
508                         /* OK, we found appropriate session */
509
510                         fp = &s->ht[h2];
511
512                         f->sess = s;
513                         if (f->tunnelhdr == 0)
514                                 tcf_bind_filter(tp, &f->res, base);
515
516                         tcf_exts_change(tp, &f->exts, &e);
517
518                         for (fp = &s->ht[h2]; *fp; fp = &(*fp)->next)
519                                 if (((*fp)->spi.mask&f->spi.mask) != f->spi.mask)
520                                         break;
521                         f->next = *fp;
522                         wmb();
523                         *fp = f;
524
525                         *arg = (unsigned long)f;
526                         return 0;
527                 }
528         }
529
530         /* No session found. Create new one. */
531
532         err = -ENOBUFS;
533         s = kzalloc(sizeof(struct rsvp_session), GFP_KERNEL);
534         if (s == NULL)
535                 goto errout;
536         memcpy(s->dst, dst, sizeof(s->dst));
537
538         if (pinfo) {
539                 s->dpi = pinfo->dpi;
540                 s->protocol = pinfo->protocol;
541                 s->tunnelid = pinfo->tunnelid;
542         }
543         for (sp = &data->ht[h1]; *sp; sp = &(*sp)->next) {
544                 if (((*sp)->dpi.mask&s->dpi.mask) != s->dpi.mask)
545                         break;
546         }
547         s->next = *sp;
548         wmb();
549         *sp = s;
550
551         goto insert;
552
553 errout:
554         kfree(f);
555 errout2:
556         tcf_exts_destroy(tp, &e);
557         return err;
558 }
559
560 static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
561 {
562         struct rsvp_head *head = tp->root;
563         unsigned h, h1;
564
565         if (arg->stop)
566                 return;
567
568         for (h = 0; h < 256; h++) {
569                 struct rsvp_session *s;
570
571                 for (s = head->ht[h]; s; s = s->next) {
572                         for (h1 = 0; h1 <= 16; h1++) {
573                                 struct rsvp_filter *f;
574
575                                 for (f = s->ht[h1]; f; f = f->next) {
576                                         if (arg->count < arg->skip) {
577                                                 arg->count++;
578                                                 continue;
579                                         }
580                                         if (arg->fn(tp, (unsigned long)f, arg) < 0) {
581                                                 arg->stop = 1;
582                                                 return;
583                                         }
584                                         arg->count++;
585                                 }
586                         }
587                 }
588         }
589 }
590
591 static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
592                      struct sk_buff *skb, struct tcmsg *t)
593 {
594         struct rsvp_filter *f = (struct rsvp_filter*)fh;
595         struct rsvp_session *s;
596         unsigned char *b = skb_tail_pointer(skb);
597         struct rtattr *rta;
598         struct tc_rsvp_pinfo pinfo;
599
600         if (f == NULL)
601                 return skb->len;
602         s = f->sess;
603
604         t->tcm_handle = f->handle;
605
606
607         rta = (struct rtattr*)b;
608         RTA_PUT(skb, TCA_OPTIONS, 0, NULL);
609
610         RTA_PUT(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst);
611         pinfo.dpi = s->dpi;
612         pinfo.spi = f->spi;
613         pinfo.protocol = s->protocol;
614         pinfo.tunnelid = s->tunnelid;
615         pinfo.tunnelhdr = f->tunnelhdr;
616         pinfo.pad = 0;
617         RTA_PUT(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo);
618         if (f->res.classid)
619                 RTA_PUT(skb, TCA_RSVP_CLASSID, 4, &f->res.classid);
620         if (((f->handle>>8)&0xFF) != 16)
621                 RTA_PUT(skb, TCA_RSVP_SRC, sizeof(f->src), f->src);
622
623         if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0)
624                 goto rtattr_failure;
625
626         rta->rta_len = skb_tail_pointer(skb) - b;
627
628         if (tcf_exts_dump_stats(skb, &f->exts, &rsvp_ext_map) < 0)
629                 goto rtattr_failure;
630         return skb->len;
631
632 rtattr_failure:
633         nlmsg_trim(skb, b);
634         return -1;
635 }
636
637 static struct tcf_proto_ops RSVP_OPS = {
638         .next           =       NULL,
639         .kind           =       RSVP_ID,
640         .classify       =       rsvp_classify,
641         .init           =       rsvp_init,
642         .destroy        =       rsvp_destroy,
643         .get            =       rsvp_get,
644         .put            =       rsvp_put,
645         .change         =       rsvp_change,
646         .delete         =       rsvp_delete,
647         .walk           =       rsvp_walk,
648         .dump           =       rsvp_dump,
649         .owner          =       THIS_MODULE,
650 };
651
652 static int __init init_rsvp(void)
653 {
654         return register_tcf_proto_ops(&RSVP_OPS);
655 }
656
657 static void __exit exit_rsvp(void)
658 {
659         unregister_tcf_proto_ops(&RSVP_OPS);
660 }
661
662 module_init(init_rsvp)
663 module_exit(exit_rsvp)