4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
25 #include <net/inet_sock.h>
29 #include <asm/ioctls.h>
30 #include <linux/spinlock.h>
31 #include <linux/timer.h>
32 #include <linux/delay.h>
33 #include <linux/poll.h>
39 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
41 EXPORT_SYMBOL_GPL(dccp_statistics);
43 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
45 EXPORT_SYMBOL_GPL(dccp_orphan_count);
47 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
48 .lhash_lock = RW_LOCK_UNLOCKED,
49 .lhash_users = ATOMIC_INIT(0),
50 .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
53 EXPORT_SYMBOL_GPL(dccp_hashinfo);
55 /* the maximum queue length for tx in packets. 0 is no limit */
56 int sysctl_dccp_tx_qlen __read_mostly = 5;
58 void dccp_set_state(struct sock *sk, const int state)
60 const int oldstate = sk->sk_state;
62 dccp_pr_debug("%s(%p) %s --> %s\n", dccp_role(sk), sk,
63 dccp_state_name(oldstate), dccp_state_name(state));
64 WARN_ON(state == oldstate);
68 if (oldstate != DCCP_OPEN)
69 DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
73 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
74 oldstate == DCCP_CLOSING)
75 DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
77 sk->sk_prot->unhash(sk);
78 if (inet_csk(sk)->icsk_bind_hash != NULL &&
79 !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
83 if (oldstate == DCCP_OPEN)
84 DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
87 /* Change state AFTER socket is unhashed to avoid closed
88 * socket sitting in hash tables.
93 EXPORT_SYMBOL_GPL(dccp_set_state);
95 static void dccp_finish_passive_close(struct sock *sk)
97 switch (sk->sk_state) {
98 case DCCP_PASSIVE_CLOSE:
99 /* Node (client or server) has received Close packet. */
100 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
101 dccp_set_state(sk, DCCP_CLOSED);
103 case DCCP_PASSIVE_CLOSEREQ:
105 * Client received CloseReq. We set the `active' flag so that
106 * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
108 dccp_send_close(sk, 1);
109 dccp_set_state(sk, DCCP_CLOSING);
113 void dccp_done(struct sock *sk)
115 dccp_set_state(sk, DCCP_CLOSED);
116 dccp_clear_xmit_timers(sk);
118 sk->sk_shutdown = SHUTDOWN_MASK;
120 if (!sock_flag(sk, SOCK_DEAD))
121 sk->sk_state_change(sk);
123 inet_csk_destroy_sock(sk);
126 EXPORT_SYMBOL_GPL(dccp_done);
128 const char *dccp_packet_name(const int type)
130 static const char *dccp_packet_names[] = {
131 [DCCP_PKT_REQUEST] = "REQUEST",
132 [DCCP_PKT_RESPONSE] = "RESPONSE",
133 [DCCP_PKT_DATA] = "DATA",
134 [DCCP_PKT_ACK] = "ACK",
135 [DCCP_PKT_DATAACK] = "DATAACK",
136 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
137 [DCCP_PKT_CLOSE] = "CLOSE",
138 [DCCP_PKT_RESET] = "RESET",
139 [DCCP_PKT_SYNC] = "SYNC",
140 [DCCP_PKT_SYNCACK] = "SYNCACK",
143 if (type >= DCCP_NR_PKT_TYPES)
146 return dccp_packet_names[type];
149 EXPORT_SYMBOL_GPL(dccp_packet_name);
151 const char *dccp_state_name(const int state)
153 static char *dccp_state_names[] = {
154 [DCCP_OPEN] = "OPEN",
155 [DCCP_REQUESTING] = "REQUESTING",
156 [DCCP_PARTOPEN] = "PARTOPEN",
157 [DCCP_LISTEN] = "LISTEN",
158 [DCCP_RESPOND] = "RESPOND",
159 [DCCP_CLOSING] = "CLOSING",
160 [DCCP_ACTIVE_CLOSEREQ] = "CLOSEREQ",
161 [DCCP_PASSIVE_CLOSE] = "PASSIVE_CLOSE",
162 [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
163 [DCCP_TIME_WAIT] = "TIME_WAIT",
164 [DCCP_CLOSED] = "CLOSED",
167 if (state >= DCCP_MAX_STATES)
168 return "INVALID STATE!";
170 return dccp_state_names[state];
173 EXPORT_SYMBOL_GPL(dccp_state_name);
175 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
177 struct dccp_sock *dp = dccp_sk(sk);
178 struct dccp_minisock *dmsk = dccp_msk(sk);
179 struct inet_connection_sock *icsk = inet_csk(sk);
181 dccp_minisock_init(&dp->dccps_minisock);
183 icsk->icsk_rto = DCCP_TIMEOUT_INIT;
184 icsk->icsk_syn_retries = sysctl_dccp_request_retries;
185 sk->sk_state = DCCP_CLOSED;
186 sk->sk_write_space = dccp_write_space;
187 icsk->icsk_sync_mss = dccp_sync_mss;
188 dp->dccps_mss_cache = 536;
189 dp->dccps_rate_last = jiffies;
190 dp->dccps_role = DCCP_ROLE_UNDEFINED;
191 dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT;
192 dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1;
194 dccp_init_xmit_timers(sk);
196 INIT_LIST_HEAD(&dp->dccps_featneg);
198 * FIXME: We're hardcoding the CCID, and doing this at this point makes
199 * the listening (master) sock get CCID control blocks, which is not
200 * necessary, but for now, to not mess with the test userspace apps,
201 * lets leave it here, later the real solution is to do this in a
202 * setsockopt(CCIDs-I-want/accept). -acme
204 if (likely(ctl_sock_initialized)) {
205 int rc = dccp_feat_init(sk);
210 if (dmsk->dccpms_send_ack_vector) {
211 dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
212 if (dp->dccps_hc_rx_ackvec == NULL)
215 dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
217 dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
219 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
220 dp->dccps_hc_tx_ccid == NULL)) {
221 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
222 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
223 if (dmsk->dccpms_send_ack_vector) {
224 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
225 dp->dccps_hc_rx_ackvec = NULL;
227 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
231 /* control socket doesn't need feat nego */
232 INIT_LIST_HEAD(&dmsk->dccpms_pending);
233 INIT_LIST_HEAD(&dmsk->dccpms_conf);
239 EXPORT_SYMBOL_GPL(dccp_init_sock);
241 void dccp_destroy_sock(struct sock *sk)
243 struct dccp_sock *dp = dccp_sk(sk);
244 struct dccp_minisock *dmsk = dccp_msk(sk);
247 * DCCP doesn't use sk_write_queue, just sk_send_head
248 * for retransmissions
250 if (sk->sk_send_head != NULL) {
251 kfree_skb(sk->sk_send_head);
252 sk->sk_send_head = NULL;
255 /* Clean up a referenced DCCP bind bucket. */
256 if (inet_csk(sk)->icsk_bind_hash != NULL)
259 kfree(dp->dccps_service_list);
260 dp->dccps_service_list = NULL;
262 if (dmsk->dccpms_send_ack_vector) {
263 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
264 dp->dccps_hc_rx_ackvec = NULL;
266 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
267 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
268 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
270 /* clean up feature negotiation state */
271 dccp_feat_list_purge(&dp->dccps_featneg);
274 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
276 static inline int dccp_listen_start(struct sock *sk, int backlog)
278 struct dccp_sock *dp = dccp_sk(sk);
280 dp->dccps_role = DCCP_ROLE_LISTEN;
281 /* do not start to listen if feature negotiation setup fails */
282 if (dccp_feat_finalise_settings(dp))
284 return inet_csk_listen_start(sk, backlog);
287 static inline int dccp_need_reset(int state)
289 return state != DCCP_CLOSED && state != DCCP_LISTEN &&
290 state != DCCP_REQUESTING;
293 int dccp_disconnect(struct sock *sk, int flags)
295 struct inet_connection_sock *icsk = inet_csk(sk);
296 struct inet_sock *inet = inet_sk(sk);
298 const int old_state = sk->sk_state;
300 if (old_state != DCCP_CLOSED)
301 dccp_set_state(sk, DCCP_CLOSED);
304 * This corresponds to the ABORT function of RFC793, sec. 3.8
305 * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
307 if (old_state == DCCP_LISTEN) {
308 inet_csk_listen_stop(sk);
309 } else if (dccp_need_reset(old_state)) {
310 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
311 sk->sk_err = ECONNRESET;
312 } else if (old_state == DCCP_REQUESTING)
313 sk->sk_err = ECONNRESET;
315 dccp_clear_xmit_timers(sk);
317 __skb_queue_purge(&sk->sk_receive_queue);
318 __skb_queue_purge(&sk->sk_write_queue);
319 if (sk->sk_send_head != NULL) {
320 __kfree_skb(sk->sk_send_head);
321 sk->sk_send_head = NULL;
326 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
327 inet_reset_saddr(sk);
330 sock_reset_flag(sk, SOCK_DONE);
332 icsk->icsk_backoff = 0;
333 inet_csk_delack_init(sk);
336 WARN_ON(inet->num && !icsk->icsk_bind_hash);
338 sk->sk_error_report(sk);
342 EXPORT_SYMBOL_GPL(dccp_disconnect);
345 * Wait for a DCCP event.
347 * Note that we don't need to lock the socket, as the upper poll layers
348 * take care of normal races (between the test and the event) and we don't
349 * go look at any of the socket buffers directly.
351 unsigned int dccp_poll(struct file *file, struct socket *sock,
355 struct sock *sk = sock->sk;
357 poll_wait(file, sk->sk_sleep, wait);
358 if (sk->sk_state == DCCP_LISTEN)
359 return inet_csk_listen_poll(sk);
361 /* Socket is not locked. We are protected from async events
362 by poll logic and correct handling of state changes
363 made by another threads is impossible in any case.
370 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
372 if (sk->sk_shutdown & RCV_SHUTDOWN)
373 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
376 if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
377 if (atomic_read(&sk->sk_rmem_alloc) > 0)
378 mask |= POLLIN | POLLRDNORM;
380 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
381 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
382 mask |= POLLOUT | POLLWRNORM;
383 } else { /* send SIGIO later */
384 set_bit(SOCK_ASYNC_NOSPACE,
385 &sk->sk_socket->flags);
386 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
388 /* Race breaker. If space is freed after
389 * wspace test but before the flags are set,
390 * IO signal will be lost.
392 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
393 mask |= POLLOUT | POLLWRNORM;
400 EXPORT_SYMBOL_GPL(dccp_poll);
402 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
408 if (sk->sk_state == DCCP_LISTEN)
414 unsigned long amount = 0;
416 skb = skb_peek(&sk->sk_receive_queue);
419 * We will only return the amount of this packet since
420 * that is all that will be read.
424 rc = put_user(amount, (int __user *)arg);
436 EXPORT_SYMBOL_GPL(dccp_ioctl);
438 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
439 char __user *optval, int optlen)
441 struct dccp_sock *dp = dccp_sk(sk);
442 struct dccp_service_list *sl = NULL;
444 if (service == DCCP_SERVICE_INVALID_VALUE ||
445 optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
448 if (optlen > sizeof(service)) {
449 sl = kmalloc(optlen, GFP_KERNEL);
453 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
454 if (copy_from_user(sl->dccpsl_list,
455 optval + sizeof(service),
456 optlen - sizeof(service)) ||
457 dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
464 dp->dccps_service = service;
466 kfree(dp->dccps_service_list);
468 dp->dccps_service_list = sl;
473 static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
478 if (cscov < 0 || cscov > 15)
481 * Populate a list of permissible values, in the range cscov...15. This
482 * is necessary since feature negotiation of single values only works if
483 * both sides incidentally choose the same value. Since the list starts
484 * lowest-value first, negotiation will pick the smallest shared value.
490 list = kmalloc(len, GFP_KERNEL);
494 for (i = 0; i < len; i++)
497 rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
501 dccp_sk(sk)->dccps_pcrlen = cscov;
503 dccp_sk(sk)->dccps_pcslen = cscov;
509 static int dccp_setsockopt_ccid(struct sock *sk, int type,
510 char __user *optval, int optlen)
515 if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
518 val = kmalloc(optlen, GFP_KERNEL);
522 if (copy_from_user(val, optval, optlen)) {
528 if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
529 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
531 if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
532 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
539 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
540 char __user *optval, int optlen)
542 struct dccp_sock *dp = dccp_sk(sk);
546 case DCCP_SOCKOPT_PACKET_SIZE:
547 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
549 case DCCP_SOCKOPT_CHANGE_L:
550 case DCCP_SOCKOPT_CHANGE_R:
551 DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
553 case DCCP_SOCKOPT_CCID:
554 case DCCP_SOCKOPT_RX_CCID:
555 case DCCP_SOCKOPT_TX_CCID:
556 return dccp_setsockopt_ccid(sk, optname, optval, optlen);
559 if (optlen < (int)sizeof(int))
562 if (get_user(val, (int __user *)optval))
565 if (optname == DCCP_SOCKOPT_SERVICE)
566 return dccp_setsockopt_service(sk, val, optval, optlen);
570 case DCCP_SOCKOPT_SERVER_TIMEWAIT:
571 if (dp->dccps_role != DCCP_ROLE_SERVER)
574 dp->dccps_server_timewait = (val != 0);
576 case DCCP_SOCKOPT_SEND_CSCOV:
577 err = dccp_setsockopt_cscov(sk, val, false);
579 case DCCP_SOCKOPT_RECV_CSCOV:
580 err = dccp_setsockopt_cscov(sk, val, true);
591 int dccp_setsockopt(struct sock *sk, int level, int optname,
592 char __user *optval, int optlen)
594 if (level != SOL_DCCP)
595 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
598 return do_dccp_setsockopt(sk, level, optname, optval, optlen);
601 EXPORT_SYMBOL_GPL(dccp_setsockopt);
604 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
605 char __user *optval, int optlen)
607 if (level != SOL_DCCP)
608 return inet_csk_compat_setsockopt(sk, level, optname,
610 return do_dccp_setsockopt(sk, level, optname, optval, optlen);
613 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
616 static int dccp_getsockopt_service(struct sock *sk, int len,
617 __be32 __user *optval,
620 const struct dccp_sock *dp = dccp_sk(sk);
621 const struct dccp_service_list *sl;
622 int err = -ENOENT, slen = 0, total_len = sizeof(u32);
625 if ((sl = dp->dccps_service_list) != NULL) {
626 slen = sl->dccpsl_nr * sizeof(u32);
635 if (put_user(total_len, optlen) ||
636 put_user(dp->dccps_service, optval) ||
637 (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
644 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
645 char __user *optval, int __user *optlen)
647 struct dccp_sock *dp;
650 if (get_user(len, optlen))
653 if (len < (int)sizeof(int))
659 case DCCP_SOCKOPT_PACKET_SIZE:
660 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
662 case DCCP_SOCKOPT_SERVICE:
663 return dccp_getsockopt_service(sk, len,
664 (__be32 __user *)optval, optlen);
665 case DCCP_SOCKOPT_GET_CUR_MPS:
666 val = dp->dccps_mss_cache;
668 case DCCP_SOCKOPT_AVAILABLE_CCIDS:
669 return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
670 case DCCP_SOCKOPT_TX_CCID:
671 val = ccid_get_current_tx_ccid(dp);
675 case DCCP_SOCKOPT_RX_CCID:
676 val = ccid_get_current_rx_ccid(dp);
680 case DCCP_SOCKOPT_SERVER_TIMEWAIT:
681 val = dp->dccps_server_timewait;
683 case DCCP_SOCKOPT_SEND_CSCOV:
684 val = dp->dccps_pcslen;
686 case DCCP_SOCKOPT_RECV_CSCOV:
687 val = dp->dccps_pcrlen;
690 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
691 len, (u32 __user *)optval, optlen);
693 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
694 len, (u32 __user *)optval, optlen);
700 if (put_user(len, optlen) || copy_to_user(optval, &val, len))
706 int dccp_getsockopt(struct sock *sk, int level, int optname,
707 char __user *optval, int __user *optlen)
709 if (level != SOL_DCCP)
710 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
713 return do_dccp_getsockopt(sk, level, optname, optval, optlen);
716 EXPORT_SYMBOL_GPL(dccp_getsockopt);
719 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
720 char __user *optval, int __user *optlen)
722 if (level != SOL_DCCP)
723 return inet_csk_compat_getsockopt(sk, level, optname,
725 return do_dccp_getsockopt(sk, level, optname, optval, optlen);
728 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
731 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
734 const struct dccp_sock *dp = dccp_sk(sk);
735 const int flags = msg->msg_flags;
736 const int noblock = flags & MSG_DONTWAIT;
741 if (len > dp->dccps_mss_cache)
746 if (sysctl_dccp_tx_qlen &&
747 (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
752 timeo = sock_sndtimeo(sk, noblock);
755 * We have to use sk_stream_wait_connect here to set sk_write_pending,
756 * so that the trick in dccp_rcv_request_sent_state_process.
758 /* Wait for a connection to finish. */
759 if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
760 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
763 size = sk->sk_prot->max_header + len;
765 skb = sock_alloc_send_skb(sk, size, noblock, &rc);
770 skb_reserve(skb, sk->sk_prot->max_header);
771 rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
775 skb_queue_tail(&sk->sk_write_queue, skb);
776 dccp_write_xmit(sk,0);
785 EXPORT_SYMBOL_GPL(dccp_sendmsg);
787 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
788 size_t len, int nonblock, int flags, int *addr_len)
790 const struct dccp_hdr *dh;
795 if (sk->sk_state == DCCP_LISTEN) {
800 timeo = sock_rcvtimeo(sk, nonblock);
803 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
806 goto verify_sock_status;
810 switch (dh->dccph_type) {
812 case DCCP_PKT_DATAACK:
816 case DCCP_PKT_CLOSEREQ:
817 if (!(flags & MSG_PEEK))
818 dccp_finish_passive_close(sk);
821 dccp_pr_debug("found fin (%s) ok!\n",
822 dccp_packet_name(dh->dccph_type));
826 dccp_pr_debug("packet_type=%s\n",
827 dccp_packet_name(dh->dccph_type));
828 sk_eat_skb(sk, skb, 0);
831 if (sock_flag(sk, SOCK_DONE)) {
837 len = sock_error(sk);
841 if (sk->sk_shutdown & RCV_SHUTDOWN) {
846 if (sk->sk_state == DCCP_CLOSED) {
847 if (!sock_flag(sk, SOCK_DONE)) {
848 /* This occurs when user tries to read
849 * from never connected socket.
863 if (signal_pending(current)) {
864 len = sock_intr_errno(timeo);
868 sk_wait_data(sk, &timeo);
873 else if (len < skb->len)
874 msg->msg_flags |= MSG_TRUNC;
876 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
877 /* Exception. Bailout! */
882 if (!(flags & MSG_PEEK))
883 sk_eat_skb(sk, skb, 0);
891 EXPORT_SYMBOL_GPL(dccp_recvmsg);
893 int inet_dccp_listen(struct socket *sock, int backlog)
895 struct sock *sk = sock->sk;
896 unsigned char old_state;
902 if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
905 old_state = sk->sk_state;
906 if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
909 /* Really, if the socket is already in listen state
910 * we can only allow the backlog to be adjusted.
912 if (old_state != DCCP_LISTEN) {
914 * FIXME: here it probably should be sk->sk_prot->listen_start
915 * see tcp_listen_start
917 err = dccp_listen_start(sk, backlog);
921 sk->sk_max_ack_backlog = backlog;
929 EXPORT_SYMBOL_GPL(inet_dccp_listen);
931 static void dccp_terminate_connection(struct sock *sk)
933 u8 next_state = DCCP_CLOSED;
935 switch (sk->sk_state) {
936 case DCCP_PASSIVE_CLOSE:
937 case DCCP_PASSIVE_CLOSEREQ:
938 dccp_finish_passive_close(sk);
941 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
942 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
945 dccp_send_close(sk, 1);
947 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
948 !dccp_sk(sk)->dccps_server_timewait)
949 next_state = DCCP_ACTIVE_CLOSEREQ;
951 next_state = DCCP_CLOSING;
954 dccp_set_state(sk, next_state);
958 void dccp_close(struct sock *sk, long timeout)
960 struct dccp_sock *dp = dccp_sk(sk);
962 u32 data_was_unread = 0;
967 sk->sk_shutdown = SHUTDOWN_MASK;
969 if (sk->sk_state == DCCP_LISTEN) {
970 dccp_set_state(sk, DCCP_CLOSED);
973 inet_csk_listen_stop(sk);
975 goto adjudge_to_death;
978 sk_stop_timer(sk, &dp->dccps_xmit_timer);
981 * We need to flush the recv. buffs. We do this only on the
982 * descriptor close, not protocol-sourced closes, because the
983 *reader process may not have drained the data yet!
985 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
986 data_was_unread += skb->len;
990 if (data_was_unread) {
991 /* Unread data was tossed, send an appropriate Reset Code */
992 DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
993 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
994 dccp_set_state(sk, DCCP_CLOSED);
995 } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
996 /* Check zero linger _after_ checking for unread data. */
997 sk->sk_prot->disconnect(sk, 0);
998 } else if (sk->sk_state != DCCP_CLOSED) {
999 dccp_terminate_connection(sk);
1002 sk_stream_wait_close(sk, timeout);
1005 state = sk->sk_state;
1008 atomic_inc(sk->sk_prot->orphan_count);
1011 * It is the last release_sock in its life. It will remove backlog.
1015 * Now socket is owned by kernel and we acquire BH lock
1016 * to finish close. No need to check for user refs.
1020 WARN_ON(sock_owned_by_user(sk));
1022 /* Have we already been destroyed by a softirq or backlog? */
1023 if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
1026 if (sk->sk_state == DCCP_CLOSED)
1027 inet_csk_destroy_sock(sk);
1029 /* Otherwise, socket is reprieved until protocol close. */
1037 EXPORT_SYMBOL_GPL(dccp_close);
1039 void dccp_shutdown(struct sock *sk, int how)
1041 dccp_pr_debug("called shutdown(%x)\n", how);
1044 EXPORT_SYMBOL_GPL(dccp_shutdown);
1046 static inline int dccp_mib_init(void)
1048 return snmp_mib_init((void**)dccp_statistics, sizeof(struct dccp_mib));
1051 static inline void dccp_mib_exit(void)
1053 snmp_mib_free((void**)dccp_statistics);
1056 static int thash_entries;
1057 module_param(thash_entries, int, 0444);
1058 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1060 #ifdef CONFIG_IP_DCCP_DEBUG
1062 module_param(dccp_debug, bool, 0644);
1063 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1065 EXPORT_SYMBOL_GPL(dccp_debug);
1068 static int __init dccp_init(void)
1071 int ehash_order, bhash_order, i;
1074 BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1075 FIELD_SIZEOF(struct sk_buff, cb));
1077 dccp_hashinfo.bind_bucket_cachep =
1078 kmem_cache_create("dccp_bind_bucket",
1079 sizeof(struct inet_bind_bucket), 0,
1080 SLAB_HWCACHE_ALIGN, NULL);
1081 if (!dccp_hashinfo.bind_bucket_cachep)
1085 * Size and allocate the main established and bind bucket
1088 * The methodology is similar to that of the buffer cache.
1090 if (num_physpages >= (128 * 1024))
1091 goal = num_physpages >> (21 - PAGE_SHIFT);
1093 goal = num_physpages >> (23 - PAGE_SHIFT);
1096 goal = (thash_entries *
1097 sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1098 for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1101 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1102 sizeof(struct inet_ehash_bucket);
1103 while (dccp_hashinfo.ehash_size &
1104 (dccp_hashinfo.ehash_size - 1))
1105 dccp_hashinfo.ehash_size--;
1106 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1107 __get_free_pages(GFP_ATOMIC, ehash_order);
1108 } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1110 if (!dccp_hashinfo.ehash) {
1111 DCCP_CRIT("Failed to allocate DCCP established hash table");
1112 goto out_free_bind_bucket_cachep;
1115 for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1116 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1117 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
1120 if (inet_ehash_locks_alloc(&dccp_hashinfo))
1121 goto out_free_dccp_ehash;
1123 bhash_order = ehash_order;
1126 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1127 sizeof(struct inet_bind_hashbucket);
1128 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1131 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1132 __get_free_pages(GFP_ATOMIC, bhash_order);
1133 } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1135 if (!dccp_hashinfo.bhash) {
1136 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1137 goto out_free_dccp_locks;
1140 for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1141 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1142 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1145 rc = dccp_mib_init();
1147 goto out_free_dccp_bhash;
1149 rc = dccp_ackvec_init();
1151 goto out_free_dccp_mib;
1153 rc = dccp_sysctl_init();
1155 goto out_ackvec_exit;
1157 dccp_timestamping_init();
1164 out_free_dccp_bhash:
1165 free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1166 dccp_hashinfo.bhash = NULL;
1167 out_free_dccp_locks:
1168 inet_ehash_locks_free(&dccp_hashinfo);
1169 out_free_dccp_ehash:
1170 free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1171 dccp_hashinfo.ehash = NULL;
1172 out_free_bind_bucket_cachep:
1173 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1174 dccp_hashinfo.bind_bucket_cachep = NULL;
1178 static void __exit dccp_fini(void)
1181 free_pages((unsigned long)dccp_hashinfo.bhash,
1182 get_order(dccp_hashinfo.bhash_size *
1183 sizeof(struct inet_bind_hashbucket)));
1184 free_pages((unsigned long)dccp_hashinfo.ehash,
1185 get_order(dccp_hashinfo.ehash_size *
1186 sizeof(struct inet_ehash_bucket)));
1187 inet_ehash_locks_free(&dccp_hashinfo);
1188 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1193 module_init(dccp_init);
1194 module_exit(dccp_fini);
1196 MODULE_LICENSE("GPL");
1197 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1198 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");