dccp: Resolve dependencies of features on choice of CCID
[linux-2.6] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
24
25 #include <net/inet_sock.h>
26 #include <net/sock.h>
27 #include <net/xfrm.h>
28
29 #include <asm/ioctls.h>
30 #include <linux/spinlock.h>
31 #include <linux/timer.h>
32 #include <linux/delay.h>
33 #include <linux/poll.h>
34
35 #include "ccid.h"
36 #include "dccp.h"
37 #include "feat.h"
38
39 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
40
41 EXPORT_SYMBOL_GPL(dccp_statistics);
42
43 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
44
45 EXPORT_SYMBOL_GPL(dccp_orphan_count);
46
47 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
48         .lhash_lock     = RW_LOCK_UNLOCKED,
49         .lhash_users    = ATOMIC_INIT(0),
50         .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
51 };
52
53 EXPORT_SYMBOL_GPL(dccp_hashinfo);
54
55 /* the maximum queue length for tx in packets. 0 is no limit */
56 int sysctl_dccp_tx_qlen __read_mostly = 5;
57
58 void dccp_set_state(struct sock *sk, const int state)
59 {
60         const int oldstate = sk->sk_state;
61
62         dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
63                       dccp_state_name(oldstate), dccp_state_name(state));
64         WARN_ON(state == oldstate);
65
66         switch (state) {
67         case DCCP_OPEN:
68                 if (oldstate != DCCP_OPEN)
69                         DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
70                 break;
71
72         case DCCP_CLOSED:
73                 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
74                     oldstate == DCCP_CLOSING)
75                         DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
76
77                 sk->sk_prot->unhash(sk);
78                 if (inet_csk(sk)->icsk_bind_hash != NULL &&
79                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
80                         inet_put_port(sk);
81                 /* fall through */
82         default:
83                 if (oldstate == DCCP_OPEN)
84                         DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
85         }
86
87         /* Change state AFTER socket is unhashed to avoid closed
88          * socket sitting in hash tables.
89          */
90         sk->sk_state = state;
91 }
92
93 EXPORT_SYMBOL_GPL(dccp_set_state);
94
95 static void dccp_finish_passive_close(struct sock *sk)
96 {
97         switch (sk->sk_state) {
98         case DCCP_PASSIVE_CLOSE:
99                 /* Node (client or server) has received Close packet. */
100                 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
101                 dccp_set_state(sk, DCCP_CLOSED);
102                 break;
103         case DCCP_PASSIVE_CLOSEREQ:
104                 /*
105                  * Client received CloseReq. We set the `active' flag so that
106                  * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
107                  */
108                 dccp_send_close(sk, 1);
109                 dccp_set_state(sk, DCCP_CLOSING);
110         }
111 }
112
113 void dccp_done(struct sock *sk)
114 {
115         dccp_set_state(sk, DCCP_CLOSED);
116         dccp_clear_xmit_timers(sk);
117
118         sk->sk_shutdown = SHUTDOWN_MASK;
119
120         if (!sock_flag(sk, SOCK_DEAD))
121                 sk->sk_state_change(sk);
122         else
123                 inet_csk_destroy_sock(sk);
124 }
125
126 EXPORT_SYMBOL_GPL(dccp_done);
127
128 const char *dccp_packet_name(const int type)
129 {
130         static const char *dccp_packet_names[] = {
131                 [DCCP_PKT_REQUEST]  = "REQUEST",
132                 [DCCP_PKT_RESPONSE] = "RESPONSE",
133                 [DCCP_PKT_DATA]     = "DATA",
134                 [DCCP_PKT_ACK]      = "ACK",
135                 [DCCP_PKT_DATAACK]  = "DATAACK",
136                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
137                 [DCCP_PKT_CLOSE]    = "CLOSE",
138                 [DCCP_PKT_RESET]    = "RESET",
139                 [DCCP_PKT_SYNC]     = "SYNC",
140                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
141         };
142
143         if (type >= DCCP_NR_PKT_TYPES)
144                 return "INVALID";
145         else
146                 return dccp_packet_names[type];
147 }
148
149 EXPORT_SYMBOL_GPL(dccp_packet_name);
150
151 const char *dccp_state_name(const int state)
152 {
153         static char *dccp_state_names[] = {
154         [DCCP_OPEN]             = "OPEN",
155         [DCCP_REQUESTING]       = "REQUESTING",
156         [DCCP_PARTOPEN]         = "PARTOPEN",
157         [DCCP_LISTEN]           = "LISTEN",
158         [DCCP_RESPOND]          = "RESPOND",
159         [DCCP_CLOSING]          = "CLOSING",
160         [DCCP_ACTIVE_CLOSEREQ]  = "CLOSEREQ",
161         [DCCP_PASSIVE_CLOSE]    = "PASSIVE_CLOSE",
162         [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
163         [DCCP_TIME_WAIT]        = "TIME_WAIT",
164         [DCCP_CLOSED]           = "CLOSED",
165         };
166
167         if (state >= DCCP_MAX_STATES)
168                 return "INVALID STATE!";
169         else
170                 return dccp_state_names[state];
171 }
172
173 EXPORT_SYMBOL_GPL(dccp_state_name);
174
175 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
176 {
177         struct dccp_sock *dp = dccp_sk(sk);
178         struct dccp_minisock *dmsk = dccp_msk(sk);
179         struct inet_connection_sock *icsk = inet_csk(sk);
180
181         dccp_minisock_init(&dp->dccps_minisock);
182
183         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
184         icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
185         sk->sk_state            = DCCP_CLOSED;
186         sk->sk_write_space      = dccp_write_space;
187         icsk->icsk_sync_mss     = dccp_sync_mss;
188         dp->dccps_mss_cache     = 536;
189         dp->dccps_rate_last     = jiffies;
190         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
191         dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
192         dp->dccps_l_ack_ratio   = dp->dccps_r_ack_ratio = 1;
193
194         dccp_init_xmit_timers(sk);
195
196         INIT_LIST_HEAD(&dp->dccps_featneg);
197         /*
198          * FIXME: We're hardcoding the CCID, and doing this at this point makes
199          * the listening (master) sock get CCID control blocks, which is not
200          * necessary, but for now, to not mess with the test userspace apps,
201          * lets leave it here, later the real solution is to do this in a
202          * setsockopt(CCIDs-I-want/accept). -acme
203          */
204         if (likely(ctl_sock_initialized)) {
205                 int rc = dccp_feat_init(sk);
206
207                 if (rc)
208                         return rc;
209
210                 if (dmsk->dccpms_send_ack_vector) {
211                         dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
212                         if (dp->dccps_hc_rx_ackvec == NULL)
213                                 return -ENOMEM;
214                 }
215                 dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
216                                                       sk, GFP_KERNEL);
217                 dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
218                                                       sk, GFP_KERNEL);
219                 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
220                              dp->dccps_hc_tx_ccid == NULL)) {
221                         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
222                         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
223                         if (dmsk->dccpms_send_ack_vector) {
224                                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
225                                 dp->dccps_hc_rx_ackvec = NULL;
226                         }
227                         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
228                         return -ENOMEM;
229                 }
230         } else {
231                 /* control socket doesn't need feat nego */
232                 INIT_LIST_HEAD(&dmsk->dccpms_pending);
233                 INIT_LIST_HEAD(&dmsk->dccpms_conf);
234         }
235
236         return 0;
237 }
238
239 EXPORT_SYMBOL_GPL(dccp_init_sock);
240
241 void dccp_destroy_sock(struct sock *sk)
242 {
243         struct dccp_sock *dp = dccp_sk(sk);
244         struct dccp_minisock *dmsk = dccp_msk(sk);
245
246         /*
247          * DCCP doesn't use sk_write_queue, just sk_send_head
248          * for retransmissions
249          */
250         if (sk->sk_send_head != NULL) {
251                 kfree_skb(sk->sk_send_head);
252                 sk->sk_send_head = NULL;
253         }
254
255         /* Clean up a referenced DCCP bind bucket. */
256         if (inet_csk(sk)->icsk_bind_hash != NULL)
257                 inet_put_port(sk);
258
259         kfree(dp->dccps_service_list);
260         dp->dccps_service_list = NULL;
261
262         if (dmsk->dccpms_send_ack_vector) {
263                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
264                 dp->dccps_hc_rx_ackvec = NULL;
265         }
266         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
267         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
268         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
269
270         /* clean up feature negotiation state */
271         dccp_feat_list_purge(&dp->dccps_featneg);
272 }
273
274 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
275
276 static inline int dccp_listen_start(struct sock *sk, int backlog)
277 {
278         struct dccp_sock *dp = dccp_sk(sk);
279
280         dp->dccps_role = DCCP_ROLE_LISTEN;
281         /* do not start to listen if feature negotiation setup fails */
282         if (dccp_feat_finalise_settings(dp))
283                 return -EPROTO;
284         return inet_csk_listen_start(sk, backlog);
285 }
286
287 static inline int dccp_need_reset(int state)
288 {
289         return state != DCCP_CLOSED && state != DCCP_LISTEN &&
290                state != DCCP_REQUESTING;
291 }
292
293 int dccp_disconnect(struct sock *sk, int flags)
294 {
295         struct inet_connection_sock *icsk = inet_csk(sk);
296         struct inet_sock *inet = inet_sk(sk);
297         int err = 0;
298         const int old_state = sk->sk_state;
299
300         if (old_state != DCCP_CLOSED)
301                 dccp_set_state(sk, DCCP_CLOSED);
302
303         /*
304          * This corresponds to the ABORT function of RFC793, sec. 3.8
305          * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
306          */
307         if (old_state == DCCP_LISTEN) {
308                 inet_csk_listen_stop(sk);
309         } else if (dccp_need_reset(old_state)) {
310                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
311                 sk->sk_err = ECONNRESET;
312         } else if (old_state == DCCP_REQUESTING)
313                 sk->sk_err = ECONNRESET;
314
315         dccp_clear_xmit_timers(sk);
316
317         __skb_queue_purge(&sk->sk_receive_queue);
318         __skb_queue_purge(&sk->sk_write_queue);
319         if (sk->sk_send_head != NULL) {
320                 __kfree_skb(sk->sk_send_head);
321                 sk->sk_send_head = NULL;
322         }
323
324         inet->dport = 0;
325
326         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
327                 inet_reset_saddr(sk);
328
329         sk->sk_shutdown = 0;
330         sock_reset_flag(sk, SOCK_DONE);
331
332         icsk->icsk_backoff = 0;
333         inet_csk_delack_init(sk);
334         __sk_dst_reset(sk);
335
336         WARN_ON(inet->num && !icsk->icsk_bind_hash);
337
338         sk->sk_error_report(sk);
339         return err;
340 }
341
342 EXPORT_SYMBOL_GPL(dccp_disconnect);
343
344 /*
345  *      Wait for a DCCP event.
346  *
347  *      Note that we don't need to lock the socket, as the upper poll layers
348  *      take care of normal races (between the test and the event) and we don't
349  *      go look at any of the socket buffers directly.
350  */
351 unsigned int dccp_poll(struct file *file, struct socket *sock,
352                        poll_table *wait)
353 {
354         unsigned int mask;
355         struct sock *sk = sock->sk;
356
357         poll_wait(file, sk->sk_sleep, wait);
358         if (sk->sk_state == DCCP_LISTEN)
359                 return inet_csk_listen_poll(sk);
360
361         /* Socket is not locked. We are protected from async events
362            by poll logic and correct handling of state changes
363            made by another threads is impossible in any case.
364          */
365
366         mask = 0;
367         if (sk->sk_err)
368                 mask = POLLERR;
369
370         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
371                 mask |= POLLHUP;
372         if (sk->sk_shutdown & RCV_SHUTDOWN)
373                 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
374
375         /* Connected? */
376         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
377                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
378                         mask |= POLLIN | POLLRDNORM;
379
380                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
381                         if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
382                                 mask |= POLLOUT | POLLWRNORM;
383                         } else {  /* send SIGIO later */
384                                 set_bit(SOCK_ASYNC_NOSPACE,
385                                         &sk->sk_socket->flags);
386                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
387
388                                 /* Race breaker. If space is freed after
389                                  * wspace test but before the flags are set,
390                                  * IO signal will be lost.
391                                  */
392                                 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
393                                         mask |= POLLOUT | POLLWRNORM;
394                         }
395                 }
396         }
397         return mask;
398 }
399
400 EXPORT_SYMBOL_GPL(dccp_poll);
401
402 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
403 {
404         int rc = -ENOTCONN;
405
406         lock_sock(sk);
407
408         if (sk->sk_state == DCCP_LISTEN)
409                 goto out;
410
411         switch (cmd) {
412         case SIOCINQ: {
413                 struct sk_buff *skb;
414                 unsigned long amount = 0;
415
416                 skb = skb_peek(&sk->sk_receive_queue);
417                 if (skb != NULL) {
418                         /*
419                          * We will only return the amount of this packet since
420                          * that is all that will be read.
421                          */
422                         amount = skb->len;
423                 }
424                 rc = put_user(amount, (int __user *)arg);
425         }
426                 break;
427         default:
428                 rc = -ENOIOCTLCMD;
429                 break;
430         }
431 out:
432         release_sock(sk);
433         return rc;
434 }
435
436 EXPORT_SYMBOL_GPL(dccp_ioctl);
437
438 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
439                                    char __user *optval, int optlen)
440 {
441         struct dccp_sock *dp = dccp_sk(sk);
442         struct dccp_service_list *sl = NULL;
443
444         if (service == DCCP_SERVICE_INVALID_VALUE ||
445             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
446                 return -EINVAL;
447
448         if (optlen > sizeof(service)) {
449                 sl = kmalloc(optlen, GFP_KERNEL);
450                 if (sl == NULL)
451                         return -ENOMEM;
452
453                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
454                 if (copy_from_user(sl->dccpsl_list,
455                                    optval + sizeof(service),
456                                    optlen - sizeof(service)) ||
457                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
458                         kfree(sl);
459                         return -EFAULT;
460                 }
461         }
462
463         lock_sock(sk);
464         dp->dccps_service = service;
465
466         kfree(dp->dccps_service_list);
467
468         dp->dccps_service_list = sl;
469         release_sock(sk);
470         return 0;
471 }
472
473 /* byte 1 is feature.  the rest is the preference list */
474 static int dccp_setsockopt_change(struct sock *sk, int type,
475                                   struct dccp_so_feat __user *optval)
476 {
477         struct dccp_so_feat opt;
478         u8 *val;
479         int rc;
480
481         if (copy_from_user(&opt, optval, sizeof(opt)))
482                 return -EFAULT;
483         /*
484          * rfc4340: 6.1. Change Options
485          */
486         if (opt.dccpsf_len < 1)
487                 return -EINVAL;
488
489         val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
490         if (!val)
491                 return -ENOMEM;
492
493         if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
494                 rc = -EFAULT;
495                 goto out_free_val;
496         }
497
498         rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
499                               val, opt.dccpsf_len, GFP_KERNEL);
500         if (rc)
501                 goto out_free_val;
502
503 out:
504         return rc;
505
506 out_free_val:
507         kfree(val);
508         goto out;
509 }
510
511 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
512                 char __user *optval, int optlen)
513 {
514         struct dccp_sock *dp = dccp_sk(sk);
515         int val, err = 0;
516
517         if (optlen < sizeof(int))
518                 return -EINVAL;
519
520         if (get_user(val, (int __user *)optval))
521                 return -EFAULT;
522
523         if (optname == DCCP_SOCKOPT_SERVICE)
524                 return dccp_setsockopt_service(sk, val, optval, optlen);
525
526         lock_sock(sk);
527         switch (optname) {
528         case DCCP_SOCKOPT_PACKET_SIZE:
529                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
530                 err = 0;
531                 break;
532         case DCCP_SOCKOPT_CHANGE_L:
533                 if (optlen != sizeof(struct dccp_so_feat))
534                         err = -EINVAL;
535                 else
536                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
537                                                      (struct dccp_so_feat __user *)
538                                                      optval);
539                 break;
540         case DCCP_SOCKOPT_CHANGE_R:
541                 if (optlen != sizeof(struct dccp_so_feat))
542                         err = -EINVAL;
543                 else
544                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
545                                                      (struct dccp_so_feat __user *)
546                                                      optval);
547                 break;
548         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
549                 if (dp->dccps_role != DCCP_ROLE_SERVER)
550                         err = -EOPNOTSUPP;
551                 else
552                         dp->dccps_server_timewait = (val != 0);
553                 break;
554         case DCCP_SOCKOPT_SEND_CSCOV:   /* sender side, RFC 4340, sec. 9.2 */
555                 if (val < 0 || val > 15)
556                         err = -EINVAL;
557                 else
558                         dp->dccps_pcslen = val;
559                 break;
560         case DCCP_SOCKOPT_RECV_CSCOV:   /* receiver side, RFC 4340 sec. 9.2.1 */
561                 if (val < 0 || val > 15)
562                         err = -EINVAL;
563                 else {
564                         dp->dccps_pcrlen = val;
565                         /* FIXME: add feature negotiation,
566                          * ChangeL(MinimumChecksumCoverage, val) */
567                 }
568                 break;
569         default:
570                 err = -ENOPROTOOPT;
571                 break;
572         }
573
574         release_sock(sk);
575         return err;
576 }
577
578 int dccp_setsockopt(struct sock *sk, int level, int optname,
579                     char __user *optval, int optlen)
580 {
581         if (level != SOL_DCCP)
582                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
583                                                              optname, optval,
584                                                              optlen);
585         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
586 }
587
588 EXPORT_SYMBOL_GPL(dccp_setsockopt);
589
590 #ifdef CONFIG_COMPAT
591 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
592                            char __user *optval, int optlen)
593 {
594         if (level != SOL_DCCP)
595                 return inet_csk_compat_setsockopt(sk, level, optname,
596                                                   optval, optlen);
597         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
598 }
599
600 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
601 #endif
602
603 static int dccp_getsockopt_service(struct sock *sk, int len,
604                                    __be32 __user *optval,
605                                    int __user *optlen)
606 {
607         const struct dccp_sock *dp = dccp_sk(sk);
608         const struct dccp_service_list *sl;
609         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
610
611         lock_sock(sk);
612         if ((sl = dp->dccps_service_list) != NULL) {
613                 slen = sl->dccpsl_nr * sizeof(u32);
614                 total_len += slen;
615         }
616
617         err = -EINVAL;
618         if (total_len > len)
619                 goto out;
620
621         err = 0;
622         if (put_user(total_len, optlen) ||
623             put_user(dp->dccps_service, optval) ||
624             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
625                 err = -EFAULT;
626 out:
627         release_sock(sk);
628         return err;
629 }
630
631 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
632                     char __user *optval, int __user *optlen)
633 {
634         struct dccp_sock *dp;
635         int val, len;
636
637         if (get_user(len, optlen))
638                 return -EFAULT;
639
640         if (len < (int)sizeof(int))
641                 return -EINVAL;
642
643         dp = dccp_sk(sk);
644
645         switch (optname) {
646         case DCCP_SOCKOPT_PACKET_SIZE:
647                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
648                 return 0;
649         case DCCP_SOCKOPT_SERVICE:
650                 return dccp_getsockopt_service(sk, len,
651                                                (__be32 __user *)optval, optlen);
652         case DCCP_SOCKOPT_GET_CUR_MPS:
653                 val = dp->dccps_mss_cache;
654                 break;
655         case DCCP_SOCKOPT_AVAILABLE_CCIDS:
656                 return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
657         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
658                 val = dp->dccps_server_timewait;
659                 break;
660         case DCCP_SOCKOPT_SEND_CSCOV:
661                 val = dp->dccps_pcslen;
662                 break;
663         case DCCP_SOCKOPT_RECV_CSCOV:
664                 val = dp->dccps_pcrlen;
665                 break;
666         case 128 ... 191:
667                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
668                                              len, (u32 __user *)optval, optlen);
669         case 192 ... 255:
670                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
671                                              len, (u32 __user *)optval, optlen);
672         default:
673                 return -ENOPROTOOPT;
674         }
675
676         len = sizeof(val);
677         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
678                 return -EFAULT;
679
680         return 0;
681 }
682
683 int dccp_getsockopt(struct sock *sk, int level, int optname,
684                     char __user *optval, int __user *optlen)
685 {
686         if (level != SOL_DCCP)
687                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
688                                                              optname, optval,
689                                                              optlen);
690         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
691 }
692
693 EXPORT_SYMBOL_GPL(dccp_getsockopt);
694
695 #ifdef CONFIG_COMPAT
696 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
697                            char __user *optval, int __user *optlen)
698 {
699         if (level != SOL_DCCP)
700                 return inet_csk_compat_getsockopt(sk, level, optname,
701                                                   optval, optlen);
702         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
703 }
704
705 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
706 #endif
707
708 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
709                  size_t len)
710 {
711         const struct dccp_sock *dp = dccp_sk(sk);
712         const int flags = msg->msg_flags;
713         const int noblock = flags & MSG_DONTWAIT;
714         struct sk_buff *skb;
715         int rc, size;
716         long timeo;
717
718         if (len > dp->dccps_mss_cache)
719                 return -EMSGSIZE;
720
721         lock_sock(sk);
722
723         if (sysctl_dccp_tx_qlen &&
724             (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
725                 rc = -EAGAIN;
726                 goto out_release;
727         }
728
729         timeo = sock_sndtimeo(sk, noblock);
730
731         /*
732          * We have to use sk_stream_wait_connect here to set sk_write_pending,
733          * so that the trick in dccp_rcv_request_sent_state_process.
734          */
735         /* Wait for a connection to finish. */
736         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
737                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
738                         goto out_release;
739
740         size = sk->sk_prot->max_header + len;
741         release_sock(sk);
742         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
743         lock_sock(sk);
744         if (skb == NULL)
745                 goto out_release;
746
747         skb_reserve(skb, sk->sk_prot->max_header);
748         rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
749         if (rc != 0)
750                 goto out_discard;
751
752         skb_queue_tail(&sk->sk_write_queue, skb);
753         dccp_write_xmit(sk,0);
754 out_release:
755         release_sock(sk);
756         return rc ? : len;
757 out_discard:
758         kfree_skb(skb);
759         goto out_release;
760 }
761
762 EXPORT_SYMBOL_GPL(dccp_sendmsg);
763
764 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
765                  size_t len, int nonblock, int flags, int *addr_len)
766 {
767         const struct dccp_hdr *dh;
768         long timeo;
769
770         lock_sock(sk);
771
772         if (sk->sk_state == DCCP_LISTEN) {
773                 len = -ENOTCONN;
774                 goto out;
775         }
776
777         timeo = sock_rcvtimeo(sk, nonblock);
778
779         do {
780                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
781
782                 if (skb == NULL)
783                         goto verify_sock_status;
784
785                 dh = dccp_hdr(skb);
786
787                 switch (dh->dccph_type) {
788                 case DCCP_PKT_DATA:
789                 case DCCP_PKT_DATAACK:
790                         goto found_ok_skb;
791
792                 case DCCP_PKT_CLOSE:
793                 case DCCP_PKT_CLOSEREQ:
794                         if (!(flags & MSG_PEEK))
795                                 dccp_finish_passive_close(sk);
796                         /* fall through */
797                 case DCCP_PKT_RESET:
798                         dccp_pr_debug("found fin (%s) ok!\n",
799                                       dccp_packet_name(dh->dccph_type));
800                         len = 0;
801                         goto found_fin_ok;
802                 default:
803                         dccp_pr_debug("packet_type=%s\n",
804                                       dccp_packet_name(dh->dccph_type));
805                         sk_eat_skb(sk, skb, 0);
806                 }
807 verify_sock_status:
808                 if (sock_flag(sk, SOCK_DONE)) {
809                         len = 0;
810                         break;
811                 }
812
813                 if (sk->sk_err) {
814                         len = sock_error(sk);
815                         break;
816                 }
817
818                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
819                         len = 0;
820                         break;
821                 }
822
823                 if (sk->sk_state == DCCP_CLOSED) {
824                         if (!sock_flag(sk, SOCK_DONE)) {
825                                 /* This occurs when user tries to read
826                                  * from never connected socket.
827                                  */
828                                 len = -ENOTCONN;
829                                 break;
830                         }
831                         len = 0;
832                         break;
833                 }
834
835                 if (!timeo) {
836                         len = -EAGAIN;
837                         break;
838                 }
839
840                 if (signal_pending(current)) {
841                         len = sock_intr_errno(timeo);
842                         break;
843                 }
844
845                 sk_wait_data(sk, &timeo);
846                 continue;
847         found_ok_skb:
848                 if (len > skb->len)
849                         len = skb->len;
850                 else if (len < skb->len)
851                         msg->msg_flags |= MSG_TRUNC;
852
853                 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
854                         /* Exception. Bailout! */
855                         len = -EFAULT;
856                         break;
857                 }
858         found_fin_ok:
859                 if (!(flags & MSG_PEEK))
860                         sk_eat_skb(sk, skb, 0);
861                 break;
862         } while (1);
863 out:
864         release_sock(sk);
865         return len;
866 }
867
868 EXPORT_SYMBOL_GPL(dccp_recvmsg);
869
870 int inet_dccp_listen(struct socket *sock, int backlog)
871 {
872         struct sock *sk = sock->sk;
873         unsigned char old_state;
874         int err;
875
876         lock_sock(sk);
877
878         err = -EINVAL;
879         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
880                 goto out;
881
882         old_state = sk->sk_state;
883         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
884                 goto out;
885
886         /* Really, if the socket is already in listen state
887          * we can only allow the backlog to be adjusted.
888          */
889         if (old_state != DCCP_LISTEN) {
890                 /*
891                  * FIXME: here it probably should be sk->sk_prot->listen_start
892                  * see tcp_listen_start
893                  */
894                 err = dccp_listen_start(sk, backlog);
895                 if (err)
896                         goto out;
897         }
898         sk->sk_max_ack_backlog = backlog;
899         err = 0;
900
901 out:
902         release_sock(sk);
903         return err;
904 }
905
906 EXPORT_SYMBOL_GPL(inet_dccp_listen);
907
908 static void dccp_terminate_connection(struct sock *sk)
909 {
910         u8 next_state = DCCP_CLOSED;
911
912         switch (sk->sk_state) {
913         case DCCP_PASSIVE_CLOSE:
914         case DCCP_PASSIVE_CLOSEREQ:
915                 dccp_finish_passive_close(sk);
916                 break;
917         case DCCP_PARTOPEN:
918                 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
919                 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
920                 /* fall through */
921         case DCCP_OPEN:
922                 dccp_send_close(sk, 1);
923
924                 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
925                     !dccp_sk(sk)->dccps_server_timewait)
926                         next_state = DCCP_ACTIVE_CLOSEREQ;
927                 else
928                         next_state = DCCP_CLOSING;
929                 /* fall through */
930         default:
931                 dccp_set_state(sk, next_state);
932         }
933 }
934
935 void dccp_close(struct sock *sk, long timeout)
936 {
937         struct dccp_sock *dp = dccp_sk(sk);
938         struct sk_buff *skb;
939         u32 data_was_unread = 0;
940         int state;
941
942         lock_sock(sk);
943
944         sk->sk_shutdown = SHUTDOWN_MASK;
945
946         if (sk->sk_state == DCCP_LISTEN) {
947                 dccp_set_state(sk, DCCP_CLOSED);
948
949                 /* Special case. */
950                 inet_csk_listen_stop(sk);
951
952                 goto adjudge_to_death;
953         }
954
955         sk_stop_timer(sk, &dp->dccps_xmit_timer);
956
957         /*
958          * We need to flush the recv. buffs.  We do this only on the
959          * descriptor close, not protocol-sourced closes, because the
960           *reader process may not have drained the data yet!
961          */
962         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
963                 data_was_unread += skb->len;
964                 __kfree_skb(skb);
965         }
966
967         if (data_was_unread) {
968                 /* Unread data was tossed, send an appropriate Reset Code */
969                 DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
970                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
971                 dccp_set_state(sk, DCCP_CLOSED);
972         } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
973                 /* Check zero linger _after_ checking for unread data. */
974                 sk->sk_prot->disconnect(sk, 0);
975         } else if (sk->sk_state != DCCP_CLOSED) {
976                 dccp_terminate_connection(sk);
977         }
978
979         sk_stream_wait_close(sk, timeout);
980
981 adjudge_to_death:
982         state = sk->sk_state;
983         sock_hold(sk);
984         sock_orphan(sk);
985         atomic_inc(sk->sk_prot->orphan_count);
986
987         /*
988          * It is the last release_sock in its life. It will remove backlog.
989          */
990         release_sock(sk);
991         /*
992          * Now socket is owned by kernel and we acquire BH lock
993          * to finish close. No need to check for user refs.
994          */
995         local_bh_disable();
996         bh_lock_sock(sk);
997         WARN_ON(sock_owned_by_user(sk));
998
999         /* Have we already been destroyed by a softirq or backlog? */
1000         if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
1001                 goto out;
1002
1003         if (sk->sk_state == DCCP_CLOSED)
1004                 inet_csk_destroy_sock(sk);
1005
1006         /* Otherwise, socket is reprieved until protocol close. */
1007
1008 out:
1009         bh_unlock_sock(sk);
1010         local_bh_enable();
1011         sock_put(sk);
1012 }
1013
1014 EXPORT_SYMBOL_GPL(dccp_close);
1015
1016 void dccp_shutdown(struct sock *sk, int how)
1017 {
1018         dccp_pr_debug("called shutdown(%x)\n", how);
1019 }
1020
1021 EXPORT_SYMBOL_GPL(dccp_shutdown);
1022
1023 static inline int dccp_mib_init(void)
1024 {
1025         return snmp_mib_init((void**)dccp_statistics, sizeof(struct dccp_mib));
1026 }
1027
1028 static inline void dccp_mib_exit(void)
1029 {
1030         snmp_mib_free((void**)dccp_statistics);
1031 }
1032
1033 static int thash_entries;
1034 module_param(thash_entries, int, 0444);
1035 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1036
1037 #ifdef CONFIG_IP_DCCP_DEBUG
1038 int dccp_debug;
1039 module_param(dccp_debug, bool, 0644);
1040 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1041
1042 EXPORT_SYMBOL_GPL(dccp_debug);
1043 #endif
1044
1045 static int __init dccp_init(void)
1046 {
1047         unsigned long goal;
1048         int ehash_order, bhash_order, i;
1049         int rc = -ENOBUFS;
1050
1051         BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1052                      FIELD_SIZEOF(struct sk_buff, cb));
1053
1054         dccp_hashinfo.bind_bucket_cachep =
1055                 kmem_cache_create("dccp_bind_bucket",
1056                                   sizeof(struct inet_bind_bucket), 0,
1057                                   SLAB_HWCACHE_ALIGN, NULL);
1058         if (!dccp_hashinfo.bind_bucket_cachep)
1059                 goto out;
1060
1061         /*
1062          * Size and allocate the main established and bind bucket
1063          * hash tables.
1064          *
1065          * The methodology is similar to that of the buffer cache.
1066          */
1067         if (num_physpages >= (128 * 1024))
1068                 goal = num_physpages >> (21 - PAGE_SHIFT);
1069         else
1070                 goal = num_physpages >> (23 - PAGE_SHIFT);
1071
1072         if (thash_entries)
1073                 goal = (thash_entries *
1074                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1075         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1076                 ;
1077         do {
1078                 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1079                                         sizeof(struct inet_ehash_bucket);
1080                 while (dccp_hashinfo.ehash_size &
1081                        (dccp_hashinfo.ehash_size - 1))
1082                         dccp_hashinfo.ehash_size--;
1083                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1084                         __get_free_pages(GFP_ATOMIC, ehash_order);
1085         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1086
1087         if (!dccp_hashinfo.ehash) {
1088                 DCCP_CRIT("Failed to allocate DCCP established hash table");
1089                 goto out_free_bind_bucket_cachep;
1090         }
1091
1092         for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1093                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1094                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
1095         }
1096
1097         if (inet_ehash_locks_alloc(&dccp_hashinfo))
1098                         goto out_free_dccp_ehash;
1099
1100         bhash_order = ehash_order;
1101
1102         do {
1103                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1104                                         sizeof(struct inet_bind_hashbucket);
1105                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1106                     bhash_order > 0)
1107                         continue;
1108                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1109                         __get_free_pages(GFP_ATOMIC, bhash_order);
1110         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1111
1112         if (!dccp_hashinfo.bhash) {
1113                 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1114                 goto out_free_dccp_locks;
1115         }
1116
1117         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1118                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1119                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1120         }
1121
1122         rc = dccp_mib_init();
1123         if (rc)
1124                 goto out_free_dccp_bhash;
1125
1126         rc = dccp_ackvec_init();
1127         if (rc)
1128                 goto out_free_dccp_mib;
1129
1130         rc = dccp_sysctl_init();
1131         if (rc)
1132                 goto out_ackvec_exit;
1133
1134         dccp_timestamping_init();
1135 out:
1136         return rc;
1137 out_ackvec_exit:
1138         dccp_ackvec_exit();
1139 out_free_dccp_mib:
1140         dccp_mib_exit();
1141 out_free_dccp_bhash:
1142         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1143         dccp_hashinfo.bhash = NULL;
1144 out_free_dccp_locks:
1145         inet_ehash_locks_free(&dccp_hashinfo);
1146 out_free_dccp_ehash:
1147         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1148         dccp_hashinfo.ehash = NULL;
1149 out_free_bind_bucket_cachep:
1150         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1151         dccp_hashinfo.bind_bucket_cachep = NULL;
1152         goto out;
1153 }
1154
1155 static void __exit dccp_fini(void)
1156 {
1157         dccp_mib_exit();
1158         free_pages((unsigned long)dccp_hashinfo.bhash,
1159                    get_order(dccp_hashinfo.bhash_size *
1160                              sizeof(struct inet_bind_hashbucket)));
1161         free_pages((unsigned long)dccp_hashinfo.ehash,
1162                    get_order(dccp_hashinfo.ehash_size *
1163                              sizeof(struct inet_ehash_bucket)));
1164         inet_ehash_locks_free(&dccp_hashinfo);
1165         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1166         dccp_ackvec_exit();
1167         dccp_sysctl_exit();
1168 }
1169
1170 module_init(dccp_init);
1171 module_exit(dccp_fini);
1172
1173 MODULE_LICENSE("GPL");
1174 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1175 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");