Btrfs: Raise thresholds for metadata writeback
[linux-2.6] / net / dccp / feat.c
1 /*
2  *  net/dccp/feat.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Andrea Bittau <a.bittau@cs.ucl.ac.uk>
6  *
7  *  ASSUMPTIONS
8  *  -----------
9  *  o All currently known SP features have 1-byte quantities. If in the future
10  *    extensions of RFCs 4340..42 define features with item lengths larger than
11  *    one byte, a feature-specific extension of the code will be required.
12  *
13  *  This program is free software; you can redistribute it and/or
14  *  modify it under the terms of the GNU General Public License
15  *  as published by the Free Software Foundation; either version
16  *  2 of the License, or (at your option) any later version.
17  */
18
19 #include <linux/module.h>
20
21 #include "ccid.h"
22 #include "feat.h"
23
24 #define DCCP_FEAT_SP_NOAGREE (-123)
25
26 int dccp_feat_change(struct dccp_minisock *dmsk, u8 type, u8 feature,
27                      u8 *val, u8 len, gfp_t gfp)
28 {
29         struct dccp_opt_pend *opt;
30
31         dccp_feat_debug(type, feature, *val);
32
33         if (len > 3) {
34                 DCCP_WARN("invalid length %d\n", len);
35                 return -EINVAL;
36         }
37         /* XXX add further sanity checks */
38
39         /* check if that feature is already being negotiated */
40         list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) {
41                 /* ok we found a negotiation for this option already */
42                 if (opt->dccpop_feat == feature && opt->dccpop_type == type) {
43                         dccp_pr_debug("Replacing old\n");
44                         /* replace */
45                         BUG_ON(opt->dccpop_val == NULL);
46                         kfree(opt->dccpop_val);
47                         opt->dccpop_val  = val;
48                         opt->dccpop_len  = len;
49                         opt->dccpop_conf = 0;
50                         return 0;
51                 }
52         }
53
54         /* negotiation for a new feature */
55         opt = kmalloc(sizeof(*opt), gfp);
56         if (opt == NULL)
57                 return -ENOMEM;
58
59         opt->dccpop_type = type;
60         opt->dccpop_feat = feature;
61         opt->dccpop_len  = len;
62         opt->dccpop_val  = val;
63         opt->dccpop_conf = 0;
64         opt->dccpop_sc   = NULL;
65
66         BUG_ON(opt->dccpop_val == NULL);
67
68         list_add_tail(&opt->dccpop_node, &dmsk->dccpms_pending);
69         return 0;
70 }
71
72 EXPORT_SYMBOL_GPL(dccp_feat_change);
73
74 static int dccp_feat_update_ccid(struct sock *sk, u8 type, u8 new_ccid_nr)
75 {
76         struct dccp_sock *dp = dccp_sk(sk);
77         struct dccp_minisock *dmsk = dccp_msk(sk);
78         /* figure out if we are changing our CCID or the peer's */
79         const int rx = type == DCCPO_CHANGE_R;
80         const u8 ccid_nr = rx ? dmsk->dccpms_rx_ccid : dmsk->dccpms_tx_ccid;
81         struct ccid *new_ccid;
82
83         /* Check if nothing is being changed. */
84         if (ccid_nr == new_ccid_nr)
85                 return 0;
86
87         new_ccid = ccid_new(new_ccid_nr, sk, rx, GFP_ATOMIC);
88         if (new_ccid == NULL)
89                 return -ENOMEM;
90
91         if (rx) {
92                 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
93                 dp->dccps_hc_rx_ccid = new_ccid;
94                 dmsk->dccpms_rx_ccid = new_ccid_nr;
95         } else {
96                 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
97                 dp->dccps_hc_tx_ccid = new_ccid;
98                 dmsk->dccpms_tx_ccid = new_ccid_nr;
99         }
100
101         return 0;
102 }
103
104 static int dccp_feat_update(struct sock *sk, u8 type, u8 feat, u8 val)
105 {
106         dccp_feat_debug(type, feat, val);
107
108         switch (feat) {
109         case DCCPF_CCID:
110                 return dccp_feat_update_ccid(sk, type, val);
111         default:
112                 dccp_pr_debug("UNIMPLEMENTED: %s(%d, ...)\n",
113                               dccp_feat_typename(type), feat);
114                 break;
115         }
116         return 0;
117 }
118
119 static int dccp_feat_reconcile(struct sock *sk, struct dccp_opt_pend *opt,
120                                u8 *rpref, u8 rlen)
121 {
122         struct dccp_sock *dp = dccp_sk(sk);
123         u8 *spref, slen, *res = NULL;
124         int i, j, rc, agree = 1;
125
126         BUG_ON(rpref == NULL);
127
128         /* check if we are the black sheep */
129         if (dp->dccps_role == DCCP_ROLE_CLIENT) {
130                 spref = rpref;
131                 slen  = rlen;
132                 rpref = opt->dccpop_val;
133                 rlen  = opt->dccpop_len;
134         } else {
135                 spref = opt->dccpop_val;
136                 slen  = opt->dccpop_len;
137         }
138         /*
139          * Now we have server preference list in spref and client preference in
140          * rpref
141          */
142         BUG_ON(spref == NULL);
143         BUG_ON(rpref == NULL);
144
145         /* FIXME sanity check vals */
146
147         /* Are values in any order?  XXX Lame "algorithm" here */
148         for (i = 0; i < slen; i++) {
149                 for (j = 0; j < rlen; j++) {
150                         if (spref[i] == rpref[j]) {
151                                 res = &spref[i];
152                                 break;
153                         }
154                 }
155                 if (res)
156                         break;
157         }
158
159         /* we didn't agree on anything */
160         if (res == NULL) {
161                 /* confirm previous value */
162                 switch (opt->dccpop_feat) {
163                 case DCCPF_CCID:
164                         /* XXX did i get this right? =P */
165                         if (opt->dccpop_type == DCCPO_CHANGE_L)
166                                 res = &dccp_msk(sk)->dccpms_tx_ccid;
167                         else
168                                 res = &dccp_msk(sk)->dccpms_rx_ccid;
169                         break;
170
171                 default:
172                         DCCP_BUG("Fell through, feat=%d", opt->dccpop_feat);
173                         /* XXX implement res */
174                         return -EFAULT;
175                 }
176
177                 dccp_pr_debug("Don't agree... reconfirming %d\n", *res);
178                 agree = 0; /* this is used for mandatory options... */
179         }
180
181         /* need to put result and our preference list */
182         rlen = 1 + opt->dccpop_len;
183         rpref = kmalloc(rlen, GFP_ATOMIC);
184         if (rpref == NULL)
185                 return -ENOMEM;
186
187         *rpref = *res;
188         memcpy(&rpref[1], opt->dccpop_val, opt->dccpop_len);
189
190         /* put it in the "confirm queue" */
191         if (opt->dccpop_sc == NULL) {
192                 opt->dccpop_sc = kmalloc(sizeof(*opt->dccpop_sc), GFP_ATOMIC);
193                 if (opt->dccpop_sc == NULL) {
194                         kfree(rpref);
195                         return -ENOMEM;
196                 }
197         } else {
198                 /* recycle the confirm slot */
199                 BUG_ON(opt->dccpop_sc->dccpoc_val == NULL);
200                 kfree(opt->dccpop_sc->dccpoc_val);
201                 dccp_pr_debug("recycling confirm slot\n");
202         }
203         memset(opt->dccpop_sc, 0, sizeof(*opt->dccpop_sc));
204
205         opt->dccpop_sc->dccpoc_val = rpref;
206         opt->dccpop_sc->dccpoc_len = rlen;
207
208         /* update the option on our side [we are about to send the confirm] */
209         rc = dccp_feat_update(sk, opt->dccpop_type, opt->dccpop_feat, *res);
210         if (rc) {
211                 kfree(opt->dccpop_sc->dccpoc_val);
212                 kfree(opt->dccpop_sc);
213                 opt->dccpop_sc = NULL;
214                 return rc;
215         }
216
217         dccp_pr_debug("Will confirm %d\n", *rpref);
218
219         /* say we want to change to X but we just got a confirm X, suppress our
220          * change
221          */
222         if (!opt->dccpop_conf) {
223                 if (*opt->dccpop_val == *res)
224                         opt->dccpop_conf = 1;
225                 dccp_pr_debug("won't ask for change of same feature\n");
226         }
227
228         return agree ? 0 : DCCP_FEAT_SP_NOAGREE; /* used for mandatory opts */
229 }
230
231 static int dccp_feat_sp(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len)
232 {
233         struct dccp_minisock *dmsk = dccp_msk(sk);
234         struct dccp_opt_pend *opt;
235         int rc = 1;
236         u8 t;
237
238         /*
239          * We received a CHANGE.  We gotta match it against our own preference
240          * list.  If we got a CHANGE_R it means it's a change for us, so we need
241          * to compare our CHANGE_L list.
242          */
243         if (type == DCCPO_CHANGE_L)
244                 t = DCCPO_CHANGE_R;
245         else
246                 t = DCCPO_CHANGE_L;
247
248         /* find our preference list for this feature */
249         list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) {
250                 if (opt->dccpop_type != t || opt->dccpop_feat != feature)
251                         continue;
252
253                 /* find the winner from the two preference lists */
254                 rc = dccp_feat_reconcile(sk, opt, val, len);
255                 break;
256         }
257
258         /* We didn't deal with the change.  This can happen if we have no
259          * preference list for the feature.  In fact, it just shouldn't
260          * happen---if we understand a feature, we should have a preference list
261          * with at least the default value.
262          */
263         BUG_ON(rc == 1);
264
265         return rc;
266 }
267
268 static int dccp_feat_nn(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len)
269 {
270         struct dccp_opt_pend *opt;
271         struct dccp_minisock *dmsk = dccp_msk(sk);
272         u8 *copy;
273         int rc;
274
275         /* NN features must be Change L (sec. 6.3.2) */
276         if (type != DCCPO_CHANGE_L) {
277                 dccp_pr_debug("received %s for NN feature %d\n",
278                                 dccp_feat_typename(type), feature);
279                 return -EFAULT;
280         }
281
282         /* XXX sanity check opt val */
283
284         /* copy option so we can confirm it */
285         opt = kzalloc(sizeof(*opt), GFP_ATOMIC);
286         if (opt == NULL)
287                 return -ENOMEM;
288
289         copy = kmemdup(val, len, GFP_ATOMIC);
290         if (copy == NULL) {
291                 kfree(opt);
292                 return -ENOMEM;
293         }
294
295         opt->dccpop_type = DCCPO_CONFIRM_R; /* NN can only confirm R */
296         opt->dccpop_feat = feature;
297         opt->dccpop_val  = copy;
298         opt->dccpop_len  = len;
299
300         /* change feature */
301         rc = dccp_feat_update(sk, type, feature, *val);
302         if (rc) {
303                 kfree(opt->dccpop_val);
304                 kfree(opt);
305                 return rc;
306         }
307
308         dccp_feat_debug(type, feature, *copy);
309
310         list_add_tail(&opt->dccpop_node, &dmsk->dccpms_conf);
311
312         return 0;
313 }
314
315 static void dccp_feat_empty_confirm(struct dccp_minisock *dmsk,
316                                     u8 type, u8 feature)
317 {
318         /* XXX check if other confirms for that are queued and recycle slot */
319         struct dccp_opt_pend *opt = kzalloc(sizeof(*opt), GFP_ATOMIC);
320
321         if (opt == NULL) {
322                 /* XXX what do we do?  Ignoring should be fine.  It's a change
323                  * after all =P
324                  */
325                 return;
326         }
327
328         switch (type) {
329         case DCCPO_CHANGE_L:
330                 opt->dccpop_type = DCCPO_CONFIRM_R;
331                 break;
332         case DCCPO_CHANGE_R:
333                 opt->dccpop_type = DCCPO_CONFIRM_L;
334                 break;
335         default:
336                 DCCP_WARN("invalid type %d\n", type);
337                 kfree(opt);
338                 return;
339         }
340         opt->dccpop_feat = feature;
341         opt->dccpop_val  = NULL;
342         opt->dccpop_len  = 0;
343
344         /* change feature */
345         dccp_pr_debug("Empty %s(%d)\n", dccp_feat_typename(type), feature);
346
347         list_add_tail(&opt->dccpop_node, &dmsk->dccpms_conf);
348 }
349
350 static void dccp_feat_flush_confirm(struct sock *sk)
351 {
352         struct dccp_minisock *dmsk = dccp_msk(sk);
353         /* Check if there is anything to confirm in the first place */
354         int yes = !list_empty(&dmsk->dccpms_conf);
355
356         if (!yes) {
357                 struct dccp_opt_pend *opt;
358
359                 list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) {
360                         if (opt->dccpop_conf) {
361                                 yes = 1;
362                                 break;
363                         }
364                 }
365         }
366
367         if (!yes)
368                 return;
369
370         /* OK there is something to confirm... */
371         /* XXX check if packet is in flight?  Send delayed ack?? */
372         if (sk->sk_state == DCCP_OPEN)
373                 dccp_send_ack(sk);
374 }
375
376 int dccp_feat_change_recv(struct sock *sk, u8 type, u8 feature, u8 *val, u8 len)
377 {
378         int rc;
379
380         dccp_feat_debug(type, feature, *val);
381
382         /* figure out if it's SP or NN feature */
383         switch (feature) {
384         /* deal with SP features */
385         case DCCPF_CCID:
386                 rc = dccp_feat_sp(sk, type, feature, val, len);
387                 break;
388
389         /* deal with NN features */
390         case DCCPF_ACK_RATIO:
391                 rc = dccp_feat_nn(sk, type, feature, val, len);
392                 break;
393
394         /* XXX implement other features */
395         default:
396                 dccp_pr_debug("UNIMPLEMENTED: not handling %s(%d, ...)\n",
397                               dccp_feat_typename(type), feature);
398                 rc = -EFAULT;
399                 break;
400         }
401
402         /* check if there were problems changing features */
403         if (rc) {
404                 /* If we don't agree on SP, we sent a confirm for old value.
405                  * However we propagate rc to caller in case option was
406                  * mandatory
407                  */
408                 if (rc != DCCP_FEAT_SP_NOAGREE)
409                         dccp_feat_empty_confirm(dccp_msk(sk), type, feature);
410         }
411
412         /* generate the confirm [if required] */
413         dccp_feat_flush_confirm(sk);
414
415         return rc;
416 }
417
418 EXPORT_SYMBOL_GPL(dccp_feat_change_recv);
419
420 int dccp_feat_confirm_recv(struct sock *sk, u8 type, u8 feature,
421                            u8 *val, u8 len)
422 {
423         u8 t;
424         struct dccp_opt_pend *opt;
425         struct dccp_minisock *dmsk = dccp_msk(sk);
426         int found = 0;
427         int all_confirmed = 1;
428
429         dccp_feat_debug(type, feature, *val);
430
431         /* locate our change request */
432         switch (type) {
433         case DCCPO_CONFIRM_L: t = DCCPO_CHANGE_R; break;
434         case DCCPO_CONFIRM_R: t = DCCPO_CHANGE_L; break;
435         default:              DCCP_WARN("invalid type %d\n", type);
436                               return 1;
437
438         }
439         /* XXX sanity check feature value */
440
441         list_for_each_entry(opt, &dmsk->dccpms_pending, dccpop_node) {
442                 if (!opt->dccpop_conf && opt->dccpop_type == t &&
443                     opt->dccpop_feat == feature) {
444                         found = 1;
445                         dccp_pr_debug("feature %d found\n", opt->dccpop_feat);
446
447                         /* XXX do sanity check */
448
449                         opt->dccpop_conf = 1;
450
451                         /* We got a confirmation---change the option */
452                         dccp_feat_update(sk, opt->dccpop_type,
453                                          opt->dccpop_feat, *val);
454
455                         /* XXX check the return value of dccp_feat_update */
456                         break;
457                 }
458
459                 if (!opt->dccpop_conf)
460                         all_confirmed = 0;
461         }
462
463         /* fix re-transmit timer */
464         /* XXX gotta make sure that no option negotiation occurs during
465          * connection shutdown.  Consider that the CLOSEREQ is sent and timer is
466          * on.  if all options are confirmed it might kill timer which should
467          * remain alive until close is received.
468          */
469         if (all_confirmed) {
470                 dccp_pr_debug("clear feat negotiation timer %p\n", sk);
471                 inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
472         }
473
474         if (!found)
475                 dccp_pr_debug("%s(%d, ...) never requested\n",
476                               dccp_feat_typename(type), feature);
477         return 0;
478 }
479
480 EXPORT_SYMBOL_GPL(dccp_feat_confirm_recv);
481
482 void dccp_feat_clean(struct dccp_minisock *dmsk)
483 {
484         struct dccp_opt_pend *opt, *next;
485
486         list_for_each_entry_safe(opt, next, &dmsk->dccpms_pending,
487                                  dccpop_node) {
488                 BUG_ON(opt->dccpop_val == NULL);
489                 kfree(opt->dccpop_val);
490
491                 if (opt->dccpop_sc != NULL) {
492                         BUG_ON(opt->dccpop_sc->dccpoc_val == NULL);
493                         kfree(opt->dccpop_sc->dccpoc_val);
494                         kfree(opt->dccpop_sc);
495                 }
496
497                 kfree(opt);
498         }
499         INIT_LIST_HEAD(&dmsk->dccpms_pending);
500
501         list_for_each_entry_safe(opt, next, &dmsk->dccpms_conf, dccpop_node) {
502                 BUG_ON(opt == NULL);
503                 if (opt->dccpop_val != NULL)
504                         kfree(opt->dccpop_val);
505                 kfree(opt);
506         }
507         INIT_LIST_HEAD(&dmsk->dccpms_conf);
508 }
509
510 EXPORT_SYMBOL_GPL(dccp_feat_clean);
511
512 /* this is to be called only when a listening sock creates its child.  It is
513  * assumed by the function---the confirm is not duplicated, but rather it is
514  * "passed on".
515  */
516 int dccp_feat_clone(struct sock *oldsk, struct sock *newsk)
517 {
518         struct dccp_minisock *olddmsk = dccp_msk(oldsk);
519         struct dccp_minisock *newdmsk = dccp_msk(newsk);
520         struct dccp_opt_pend *opt;
521         int rc = 0;
522
523         INIT_LIST_HEAD(&newdmsk->dccpms_pending);
524         INIT_LIST_HEAD(&newdmsk->dccpms_conf);
525
526         list_for_each_entry(opt, &olddmsk->dccpms_pending, dccpop_node) {
527                 struct dccp_opt_pend *newopt;
528                 /* copy the value of the option */
529                 u8 *val = kmemdup(opt->dccpop_val, opt->dccpop_len, GFP_ATOMIC);
530
531                 if (val == NULL)
532                         goto out_clean;
533
534                 newopt = kmemdup(opt, sizeof(*newopt), GFP_ATOMIC);
535                 if (newopt == NULL) {
536                         kfree(val);
537                         goto out_clean;
538                 }
539
540                 /* insert the option */
541                 newopt->dccpop_val = val;
542                 list_add_tail(&newopt->dccpop_node, &newdmsk->dccpms_pending);
543
544                 /* XXX what happens with backlogs and multiple connections at
545                  * once...
546                  */
547                 /* the master socket no longer needs to worry about confirms */
548                 opt->dccpop_sc = NULL; /* it's not a memleak---new socket has it */
549
550                 /* reset state for a new socket */
551                 opt->dccpop_conf = 0;
552         }
553
554         /* XXX not doing anything about the conf queue */
555
556 out:
557         return rc;
558
559 out_clean:
560         dccp_feat_clean(newdmsk);
561         rc = -ENOMEM;
562         goto out;
563 }
564
565 EXPORT_SYMBOL_GPL(dccp_feat_clone);
566
567 static int __dccp_feat_init(struct dccp_minisock *dmsk, u8 type, u8 feat,
568                             u8 *val, u8 len)
569 {
570         int rc = -ENOMEM;
571         u8 *copy = kmemdup(val, len, GFP_KERNEL);
572
573         if (copy != NULL) {
574                 rc = dccp_feat_change(dmsk, type, feat, copy, len, GFP_KERNEL);
575                 if (rc)
576                         kfree(copy);
577         }
578         return rc;
579 }
580
581 int dccp_feat_init(struct dccp_minisock *dmsk)
582 {
583         int rc;
584
585         INIT_LIST_HEAD(&dmsk->dccpms_pending);
586         INIT_LIST_HEAD(&dmsk->dccpms_conf);
587
588         /* CCID L */
589         rc = __dccp_feat_init(dmsk, DCCPO_CHANGE_L, DCCPF_CCID,
590                               &dmsk->dccpms_tx_ccid, 1);
591         if (rc)
592                 goto out;
593
594         /* CCID R */
595         rc = __dccp_feat_init(dmsk, DCCPO_CHANGE_R, DCCPF_CCID,
596                               &dmsk->dccpms_rx_ccid, 1);
597         if (rc)
598                 goto out;
599
600         /* Ack ratio */
601         rc = __dccp_feat_init(dmsk, DCCPO_CHANGE_L, DCCPF_ACK_RATIO,
602                               &dmsk->dccpms_ack_ratio, 1);
603 out:
604         return rc;
605 }
606
607 EXPORT_SYMBOL_GPL(dccp_feat_init);
608
609 #ifdef CONFIG_IP_DCCP_DEBUG
610 const char *dccp_feat_typename(const u8 type)
611 {
612         switch(type) {
613         case DCCPO_CHANGE_L:  return("ChangeL");
614         case DCCPO_CONFIRM_L: return("ConfirmL");
615         case DCCPO_CHANGE_R:  return("ChangeR");
616         case DCCPO_CONFIRM_R: return("ConfirmR");
617         /* the following case must not appear in feature negotation  */
618         default:              dccp_pr_debug("unknown type %d [BUG!]\n", type);
619         }
620         return NULL;
621 }
622
623 EXPORT_SYMBOL_GPL(dccp_feat_typename);
624
625 const char *dccp_feat_name(const u8 feat)
626 {
627         static const char *feature_names[] = {
628                 [DCCPF_RESERVED]        = "Reserved",
629                 [DCCPF_CCID]            = "CCID",
630                 [DCCPF_SHORT_SEQNOS]    = "Allow Short Seqnos",
631                 [DCCPF_SEQUENCE_WINDOW] = "Sequence Window",
632                 [DCCPF_ECN_INCAPABLE]   = "ECN Incapable",
633                 [DCCPF_ACK_RATIO]       = "Ack Ratio",
634                 [DCCPF_SEND_ACK_VECTOR] = "Send ACK Vector",
635                 [DCCPF_SEND_NDP_COUNT]  = "Send NDP Count",
636                 [DCCPF_MIN_CSUM_COVER]  = "Min. Csum Coverage",
637                 [DCCPF_DATA_CHECKSUM]   = "Send Data Checksum",
638         };
639         if (feat > DCCPF_DATA_CHECKSUM && feat < DCCPF_MIN_CCID_SPECIFIC)
640                 return feature_names[DCCPF_RESERVED];
641
642         if (feat >= DCCPF_MIN_CCID_SPECIFIC)
643                 return "CCID-specific";
644
645         return feature_names[feat];
646 }
647
648 EXPORT_SYMBOL_GPL(dccp_feat_name);
649 #endif /* CONFIG_IP_DCCP_DEBUG */