S2IO: statistics for memory allocation failuers
[linux-2.6] / net / dccp / ackvec.c
1 /*
2  *  net/dccp/ackvec.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Copyright (c) 2005 Arnaldo Carvalho de Melo <acme@ghostprotocols.net>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License as published by the
9  *      Free Software Foundation; version 2 of the License;
10  */
11
12 #include "ackvec.h"
13 #include "dccp.h"
14
15 #include <linux/dccp.h>
16 #include <linux/init.h>
17 #include <linux/errno.h>
18 #include <linux/kernel.h>
19 #include <linux/skbuff.h>
20 #include <linux/slab.h>
21
22 #include <net/sock.h>
23
24 static struct kmem_cache *dccp_ackvec_slab;
25 static struct kmem_cache *dccp_ackvec_record_slab;
26
27 static struct dccp_ackvec_record *dccp_ackvec_record_new(void)
28 {
29         struct dccp_ackvec_record *avr =
30                         kmem_cache_alloc(dccp_ackvec_record_slab, GFP_ATOMIC);
31
32         if (avr != NULL)
33                 INIT_LIST_HEAD(&avr->dccpavr_node);
34
35         return avr;
36 }
37
38 static void dccp_ackvec_record_delete(struct dccp_ackvec_record *avr)
39 {
40         if (unlikely(avr == NULL))
41                 return;
42         /* Check if deleting a linked record */
43         WARN_ON(!list_empty(&avr->dccpavr_node));
44         kmem_cache_free(dccp_ackvec_record_slab, avr);
45 }
46
47 static void dccp_ackvec_insert_avr(struct dccp_ackvec *av,
48                                    struct dccp_ackvec_record *avr)
49 {
50         /*
51          * AVRs are sorted by seqno. Since we are sending them in order, we
52          * just add the AVR at the head of the list.
53          * -sorbo.
54          */
55         if (!list_empty(&av->dccpav_records)) {
56                 const struct dccp_ackvec_record *head =
57                                         list_entry(av->dccpav_records.next,
58                                                    struct dccp_ackvec_record,
59                                                    dccpavr_node);
60                 BUG_ON(before48(avr->dccpavr_ack_seqno,
61                                 head->dccpavr_ack_seqno));
62         }
63
64         list_add(&avr->dccpavr_node, &av->dccpav_records);
65 }
66
67 int dccp_insert_option_ackvec(struct sock *sk, struct sk_buff *skb)
68 {
69         struct dccp_sock *dp = dccp_sk(sk);
70         struct dccp_ackvec *av = dp->dccps_hc_rx_ackvec;
71         /* Figure out how many options do we need to represent the ackvec */
72         const u16 nr_opts = (av->dccpav_vec_len +
73                              DCCP_MAX_ACKVEC_OPT_LEN - 1) /
74                             DCCP_MAX_ACKVEC_OPT_LEN;
75         u16 len = av->dccpav_vec_len + 2 * nr_opts, i;
76         struct timeval now;
77         u32 elapsed_time;
78         const unsigned char *tail, *from;
79         unsigned char *to;
80         struct dccp_ackvec_record *avr;
81
82         if (DCCP_SKB_CB(skb)->dccpd_opt_len + len > DCCP_MAX_OPT_LEN)
83                 return -1;
84
85         dccp_timestamp(sk, &now);
86         elapsed_time = timeval_delta(&now, &av->dccpav_time) / 10;
87
88         if (elapsed_time != 0 &&
89             dccp_insert_option_elapsed_time(sk, skb, elapsed_time))
90                 return -1;
91
92         avr = dccp_ackvec_record_new();
93         if (avr == NULL)
94                 return -1;
95
96         DCCP_SKB_CB(skb)->dccpd_opt_len += len;
97
98         to   = skb_push(skb, len);
99         len  = av->dccpav_vec_len;
100         from = av->dccpav_buf + av->dccpav_buf_head;
101         tail = av->dccpav_buf + DCCP_MAX_ACKVEC_LEN;
102
103         for (i = 0; i < nr_opts; ++i) {
104                 int copylen = len;
105
106                 if (len > DCCP_MAX_ACKVEC_OPT_LEN)
107                         copylen = DCCP_MAX_ACKVEC_OPT_LEN;
108
109                 *to++ = DCCPO_ACK_VECTOR_0;
110                 *to++ = copylen + 2;
111
112                 /* Check if buf_head wraps */
113                 if (from + copylen > tail) {
114                         const u16 tailsize = tail - from;
115
116                         memcpy(to, from, tailsize);
117                         to      += tailsize;
118                         len     -= tailsize;
119                         copylen -= tailsize;
120                         from    = av->dccpav_buf;
121                 }
122
123                 memcpy(to, from, copylen);
124                 from += copylen;
125                 to   += copylen;
126                 len  -= copylen;
127         }
128
129         /*
130          *      From RFC 4340, A.2:
131          *
132          *      For each acknowledgement it sends, the HC-Receiver will add an
133          *      acknowledgement record.  ack_seqno will equal the HC-Receiver
134          *      sequence number it used for the ack packet; ack_ptr will equal
135          *      buf_head; ack_ackno will equal buf_ackno; and ack_nonce will
136          *      equal buf_nonce.
137          */
138         avr->dccpavr_ack_seqno = DCCP_SKB_CB(skb)->dccpd_seq;
139         avr->dccpavr_ack_ptr   = av->dccpav_buf_head;
140         avr->dccpavr_ack_ackno = av->dccpav_buf_ackno;
141         avr->dccpavr_ack_nonce = av->dccpav_buf_nonce;
142         avr->dccpavr_sent_len  = av->dccpav_vec_len;
143
144         dccp_ackvec_insert_avr(av, avr);
145
146         dccp_pr_debug("%s ACK Vector 0, len=%d, ack_seqno=%llu, "
147                       "ack_ackno=%llu\n",
148                       dccp_role(sk), avr->dccpavr_sent_len,
149                       (unsigned long long)avr->dccpavr_ack_seqno,
150                       (unsigned long long)avr->dccpavr_ack_ackno);
151         return 0;
152 }
153
154 struct dccp_ackvec *dccp_ackvec_alloc(const gfp_t priority)
155 {
156         struct dccp_ackvec *av = kmem_cache_alloc(dccp_ackvec_slab, priority);
157
158         if (av != NULL) {
159                 av->dccpav_buf_head     = DCCP_MAX_ACKVEC_LEN - 1;
160                 av->dccpav_buf_ackno    = UINT48_MAX + 1;
161                 av->dccpav_buf_nonce = av->dccpav_buf_nonce = 0;
162                 av->dccpav_time.tv_sec  = 0;
163                 av->dccpav_time.tv_usec = 0;
164                 av->dccpav_vec_len      = 0;
165                 INIT_LIST_HEAD(&av->dccpav_records);
166         }
167
168         return av;
169 }
170
171 void dccp_ackvec_free(struct dccp_ackvec *av)
172 {
173         if (unlikely(av == NULL))
174                 return;
175
176         if (!list_empty(&av->dccpav_records)) {
177                 struct dccp_ackvec_record *avr, *next;
178
179                 list_for_each_entry_safe(avr, next, &av->dccpav_records,
180                                          dccpavr_node) {
181                         list_del_init(&avr->dccpavr_node);
182                         dccp_ackvec_record_delete(avr);
183                 }
184         }
185
186         kmem_cache_free(dccp_ackvec_slab, av);
187 }
188
189 static inline u8 dccp_ackvec_state(const struct dccp_ackvec *av,
190                                    const u32 index)
191 {
192         return av->dccpav_buf[index] & DCCP_ACKVEC_STATE_MASK;
193 }
194
195 static inline u8 dccp_ackvec_len(const struct dccp_ackvec *av,
196                                  const u32 index)
197 {
198         return av->dccpav_buf[index] & DCCP_ACKVEC_LEN_MASK;
199 }
200
201 /*
202  * If several packets are missing, the HC-Receiver may prefer to enter multiple
203  * bytes with run length 0, rather than a single byte with a larger run length;
204  * this simplifies table updates if one of the missing packets arrives.
205  */
206 static inline int dccp_ackvec_set_buf_head_state(struct dccp_ackvec *av,
207                                                  const unsigned int packets,
208                                                  const unsigned char state)
209 {
210         unsigned int gap;
211         long new_head;
212
213         if (av->dccpav_vec_len + packets > DCCP_MAX_ACKVEC_LEN)
214                 return -ENOBUFS;
215
216         gap      = packets - 1;
217         new_head = av->dccpav_buf_head - packets;
218
219         if (new_head < 0) {
220                 if (gap > 0) {
221                         memset(av->dccpav_buf, DCCP_ACKVEC_STATE_NOT_RECEIVED,
222                                gap + new_head + 1);
223                         gap = -new_head;
224                 }
225                 new_head += DCCP_MAX_ACKVEC_LEN;
226         }
227
228         av->dccpav_buf_head = new_head;
229
230         if (gap > 0)
231                 memset(av->dccpav_buf + av->dccpav_buf_head + 1,
232                        DCCP_ACKVEC_STATE_NOT_RECEIVED, gap);
233
234         av->dccpav_buf[av->dccpav_buf_head] = state;
235         av->dccpav_vec_len += packets;
236         return 0;
237 }
238
239 /*
240  * Implements the RFC 4340, Appendix A
241  */
242 int dccp_ackvec_add(struct dccp_ackvec *av, const struct sock *sk,
243                     const u64 ackno, const u8 state)
244 {
245         /*
246          * Check at the right places if the buffer is full, if it is, tell the
247          * caller to start dropping packets till the HC-Sender acks our ACK
248          * vectors, when we will free up space in dccpav_buf.
249          *
250          * We may well decide to do buffer compression, etc, but for now lets
251          * just drop.
252          *
253          * From Appendix A.1.1 (`New Packets'):
254          *
255          *      Of course, the circular buffer may overflow, either when the
256          *      HC-Sender is sending data at a very high rate, when the
257          *      HC-Receiver's acknowledgements are not reaching the HC-Sender,
258          *      or when the HC-Sender is forgetting to acknowledge those acks
259          *      (so the HC-Receiver is unable to clean up old state). In this
260          *      case, the HC-Receiver should either compress the buffer (by
261          *      increasing run lengths when possible), transfer its state to
262          *      a larger buffer, or, as a last resort, drop all received
263          *      packets, without processing them whatsoever, until its buffer
264          *      shrinks again.
265          */
266
267         /* See if this is the first ackno being inserted */
268         if (av->dccpav_vec_len == 0) {
269                 av->dccpav_buf[av->dccpav_buf_head] = state;
270                 av->dccpav_vec_len = 1;
271         } else if (after48(ackno, av->dccpav_buf_ackno)) {
272                 const u64 delta = dccp_delta_seqno(av->dccpav_buf_ackno,
273                                                    ackno);
274
275                 /*
276                  * Look if the state of this packet is the same as the
277                  * previous ackno and if so if we can bump the head len.
278                  */
279                 if (delta == 1 &&
280                     dccp_ackvec_state(av, av->dccpav_buf_head) == state &&
281                     (dccp_ackvec_len(av, av->dccpav_buf_head) <
282                      DCCP_ACKVEC_LEN_MASK))
283                         av->dccpav_buf[av->dccpav_buf_head]++;
284                 else if (dccp_ackvec_set_buf_head_state(av, delta, state))
285                         return -ENOBUFS;
286         } else {
287                 /*
288                  * A.1.2.  Old Packets
289                  *
290                  *      When a packet with Sequence Number S <= buf_ackno
291                  *      arrives, the HC-Receiver will scan the table for
292                  *      the byte corresponding to S. (Indexing structures
293                  *      could reduce the complexity of this scan.)
294                  */
295                 u64 delta = dccp_delta_seqno(ackno, av->dccpav_buf_ackno);
296                 u32 index = av->dccpav_buf_head;
297
298                 while (1) {
299                         const u8 len = dccp_ackvec_len(av, index);
300                         const u8 state = dccp_ackvec_state(av, index);
301                         /*
302                          * valid packets not yet in dccpav_buf have a reserved
303                          * entry, with a len equal to 0.
304                          */
305                         if (state == DCCP_ACKVEC_STATE_NOT_RECEIVED &&
306                             len == 0 && delta == 0) { /* Found our
307                                                          reserved seat! */
308                                 dccp_pr_debug("Found %llu reserved seat!\n",
309                                               (unsigned long long)ackno);
310                                 av->dccpav_buf[index] = state;
311                                 goto out;
312                         }
313                         /* len == 0 means one packet */
314                         if (delta < len + 1)
315                                 goto out_duplicate;
316
317                         delta -= len + 1;
318                         if (++index == DCCP_MAX_ACKVEC_LEN)
319                                 index = 0;
320                 }
321         }
322
323         av->dccpav_buf_ackno = ackno;
324         dccp_timestamp(sk, &av->dccpav_time);
325 out:
326         return 0;
327
328 out_duplicate:
329         /* Duplicate packet */
330         dccp_pr_debug("Received a dup or already considered lost "
331                       "packet: %llu\n", (unsigned long long)ackno);
332         return -EILSEQ;
333 }
334
335 #ifdef CONFIG_IP_DCCP_DEBUG
336 void dccp_ackvector_print(const u64 ackno, const unsigned char *vector, int len)
337 {
338         dccp_pr_debug_cat("ACK vector len=%d, ackno=%llu |", len,
339                          (unsigned long long)ackno);
340
341         while (len--) {
342                 const u8 state = (*vector & DCCP_ACKVEC_STATE_MASK) >> 6;
343                 const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK;
344
345                 dccp_pr_debug_cat("%d,%d|", state, rl);
346                 ++vector;
347         }
348
349         dccp_pr_debug_cat("\n");
350 }
351
352 void dccp_ackvec_print(const struct dccp_ackvec *av)
353 {
354         dccp_ackvector_print(av->dccpav_buf_ackno,
355                              av->dccpav_buf + av->dccpav_buf_head,
356                              av->dccpav_vec_len);
357 }
358 #endif
359
360 static void dccp_ackvec_throw_record(struct dccp_ackvec *av,
361                                      struct dccp_ackvec_record *avr)
362 {
363         struct dccp_ackvec_record *next;
364
365         /* sort out vector length */
366         if (av->dccpav_buf_head <= avr->dccpavr_ack_ptr)
367                 av->dccpav_vec_len = avr->dccpavr_ack_ptr - av->dccpav_buf_head;
368         else
369                 av->dccpav_vec_len = DCCP_MAX_ACKVEC_LEN - 1
370                                      - av->dccpav_buf_head
371                                      + avr->dccpavr_ack_ptr;
372
373         /* free records */
374         list_for_each_entry_safe_from(avr, next, &av->dccpav_records,
375                                       dccpavr_node) {
376                 list_del_init(&avr->dccpavr_node);
377                 dccp_ackvec_record_delete(avr);
378         }
379 }
380
381 void dccp_ackvec_check_rcv_ackno(struct dccp_ackvec *av, struct sock *sk,
382                                  const u64 ackno)
383 {
384         struct dccp_ackvec_record *avr;
385
386         /*
387          * If we traverse backwards, it should be faster when we have large
388          * windows. We will be receiving ACKs for stuff we sent a while back
389          * -sorbo.
390          */
391         list_for_each_entry_reverse(avr, &av->dccpav_records, dccpavr_node) {
392                 if (ackno == avr->dccpavr_ack_seqno) {
393                         dccp_pr_debug("%s ACK packet 0, len=%d, ack_seqno=%llu, "
394                                       "ack_ackno=%llu, ACKED!\n",
395                                       dccp_role(sk), 1,
396                                       (unsigned long long)avr->dccpavr_ack_seqno,
397                                       (unsigned long long)avr->dccpavr_ack_ackno);
398                         dccp_ackvec_throw_record(av, avr);
399                         break;
400                 } else if (avr->dccpavr_ack_seqno > ackno)
401                         break; /* old news */
402         }
403 }
404
405 static void dccp_ackvec_check_rcv_ackvector(struct dccp_ackvec *av,
406                                             struct sock *sk, u64 *ackno,
407                                             const unsigned char len,
408                                             const unsigned char *vector)
409 {
410         unsigned char i;
411         struct dccp_ackvec_record *avr;
412
413         /* Check if we actually sent an ACK vector */
414         if (list_empty(&av->dccpav_records))
415                 return;
416
417         i = len;
418         /*
419          * XXX
420          * I think it might be more efficient to work backwards. See comment on
421          * rcv_ackno. -sorbo.
422          */
423         avr = list_entry(av->dccpav_records.next, struct dccp_ackvec_record,
424                          dccpavr_node);
425         while (i--) {
426                 const u8 rl = *vector & DCCP_ACKVEC_LEN_MASK;
427                 u64 ackno_end_rl;
428
429                 dccp_set_seqno(&ackno_end_rl, *ackno - rl);
430
431                 /*
432                  * If our AVR sequence number is greater than the ack, go
433                  * forward in the AVR list until it is not so.
434                  */
435                 list_for_each_entry_from(avr, &av->dccpav_records,
436                                          dccpavr_node) {
437                         if (!after48(avr->dccpavr_ack_seqno, *ackno))
438                                 goto found;
439                 }
440                 /* End of the dccpav_records list, not found, exit */
441                 break;
442 found:
443                 if (between48(avr->dccpavr_ack_seqno, ackno_end_rl, *ackno)) {
444                         const u8 state = *vector & DCCP_ACKVEC_STATE_MASK;
445                         if (state != DCCP_ACKVEC_STATE_NOT_RECEIVED) {
446                                 dccp_pr_debug("%s ACK vector 0, len=%d, "
447                                               "ack_seqno=%llu, ack_ackno=%llu, "
448                                               "ACKED!\n",
449                                               dccp_role(sk), len,
450                                               (unsigned long long)
451                                               avr->dccpavr_ack_seqno,
452                                               (unsigned long long)
453                                               avr->dccpavr_ack_ackno);
454                                 dccp_ackvec_throw_record(av, avr);
455                                 break;
456                         }
457                         /*
458                          * If it wasn't received, continue scanning... we might
459                          * find another one.
460                          */
461                 }
462
463                 dccp_set_seqno(ackno, ackno_end_rl - 1);
464                 ++vector;
465         }
466 }
467
468 int dccp_ackvec_parse(struct sock *sk, const struct sk_buff *skb,
469                       u64 *ackno, const u8 opt, const u8 *value, const u8 len)
470 {
471         if (len > DCCP_MAX_ACKVEC_OPT_LEN)
472                 return -1;
473
474         /* dccp_ackvector_print(DCCP_SKB_CB(skb)->dccpd_ack_seq, value, len); */
475         dccp_ackvec_check_rcv_ackvector(dccp_sk(sk)->dccps_hc_rx_ackvec, sk,
476                                         ackno, len, value);
477         return 0;
478 }
479
480 int __init dccp_ackvec_init(void)
481 {
482         dccp_ackvec_slab = kmem_cache_create("dccp_ackvec",
483                                              sizeof(struct dccp_ackvec), 0,
484                                              SLAB_HWCACHE_ALIGN, NULL, NULL);
485         if (dccp_ackvec_slab == NULL)
486                 goto out_err;
487
488         dccp_ackvec_record_slab =
489                         kmem_cache_create("dccp_ackvec_record",
490                                           sizeof(struct dccp_ackvec_record),
491                                           0, SLAB_HWCACHE_ALIGN, NULL, NULL);
492         if (dccp_ackvec_record_slab == NULL)
493                 goto out_destroy_slab;
494
495         return 0;
496
497 out_destroy_slab:
498         kmem_cache_destroy(dccp_ackvec_slab);
499         dccp_ackvec_slab = NULL;
500 out_err:
501         DCCP_CRIT("Unable to create Ack Vector slab cache");
502         return -ENOBUFS;
503 }
504
505 void dccp_ackvec_exit(void)
506 {
507         if (dccp_ackvec_slab != NULL) {
508                 kmem_cache_destroy(dccp_ackvec_slab);
509                 dccp_ackvec_slab = NULL;
510         }
511         if (dccp_ackvec_record_slab != NULL) {
512                 kmem_cache_destroy(dccp_ackvec_record_slab);
513                 dccp_ackvec_record_slab = NULL;
514         }
515 }