4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
25 #include <net/inet_sock.h>
29 #include <asm/ioctls.h>
30 #include <asm/semaphore.h>
31 #include <linux/spinlock.h>
32 #include <linux/timer.h>
33 #include <linux/delay.h>
34 #include <linux/poll.h>
40 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
42 EXPORT_SYMBOL_GPL(dccp_statistics);
44 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
46 EXPORT_SYMBOL_GPL(dccp_orphan_count);
48 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
49 .lhash_lock = RW_LOCK_UNLOCKED,
50 .lhash_users = ATOMIC_INIT(0),
51 .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
54 EXPORT_SYMBOL_GPL(dccp_hashinfo);
56 /* the maximum queue length for tx in packets. 0 is no limit */
57 int sysctl_dccp_tx_qlen __read_mostly = 5;
59 void dccp_set_state(struct sock *sk, const int state)
61 const int oldstate = sk->sk_state;
63 dccp_pr_debug("%s(%p) %-10.10s -> %s\n",
65 dccp_state_name(oldstate), dccp_state_name(state));
66 WARN_ON(state == oldstate);
70 if (oldstate != DCCP_OPEN)
71 DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
75 if (oldstate == DCCP_CLOSING || oldstate == DCCP_OPEN)
76 DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
78 sk->sk_prot->unhash(sk);
79 if (inet_csk(sk)->icsk_bind_hash != NULL &&
80 !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
81 inet_put_port(&dccp_hashinfo, sk);
84 if (oldstate == DCCP_OPEN)
85 DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
88 /* Change state AFTER socket is unhashed to avoid closed
89 * socket sitting in hash tables.
94 EXPORT_SYMBOL_GPL(dccp_set_state);
96 void dccp_done(struct sock *sk)
98 dccp_set_state(sk, DCCP_CLOSED);
99 dccp_clear_xmit_timers(sk);
101 sk->sk_shutdown = SHUTDOWN_MASK;
103 if (!sock_flag(sk, SOCK_DEAD))
104 sk->sk_state_change(sk);
106 inet_csk_destroy_sock(sk);
109 EXPORT_SYMBOL_GPL(dccp_done);
111 const char *dccp_packet_name(const int type)
113 static const char *dccp_packet_names[] = {
114 [DCCP_PKT_REQUEST] = "REQUEST",
115 [DCCP_PKT_RESPONSE] = "RESPONSE",
116 [DCCP_PKT_DATA] = "DATA",
117 [DCCP_PKT_ACK] = "ACK",
118 [DCCP_PKT_DATAACK] = "DATAACK",
119 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
120 [DCCP_PKT_CLOSE] = "CLOSE",
121 [DCCP_PKT_RESET] = "RESET",
122 [DCCP_PKT_SYNC] = "SYNC",
123 [DCCP_PKT_SYNCACK] = "SYNCACK",
126 if (type >= DCCP_NR_PKT_TYPES)
129 return dccp_packet_names[type];
132 EXPORT_SYMBOL_GPL(dccp_packet_name);
134 const char *dccp_state_name(const int state)
136 static char *dccp_state_names[] = {
137 [DCCP_OPEN] = "OPEN",
138 [DCCP_REQUESTING] = "REQUESTING",
139 [DCCP_PARTOPEN] = "PARTOPEN",
140 [DCCP_LISTEN] = "LISTEN",
141 [DCCP_RESPOND] = "RESPOND",
142 [DCCP_CLOSING] = "CLOSING",
143 [DCCP_TIME_WAIT] = "TIME_WAIT",
144 [DCCP_CLOSED] = "CLOSED",
147 if (state >= DCCP_MAX_STATES)
148 return "INVALID STATE!";
150 return dccp_state_names[state];
153 EXPORT_SYMBOL_GPL(dccp_state_name);
155 void dccp_hash(struct sock *sk)
157 inet_hash(&dccp_hashinfo, sk);
160 EXPORT_SYMBOL_GPL(dccp_hash);
162 void dccp_unhash(struct sock *sk)
164 inet_unhash(&dccp_hashinfo, sk);
167 EXPORT_SYMBOL_GPL(dccp_unhash);
169 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
171 struct dccp_sock *dp = dccp_sk(sk);
172 struct dccp_minisock *dmsk = dccp_msk(sk);
173 struct inet_connection_sock *icsk = inet_csk(sk);
175 dccp_minisock_init(&dp->dccps_minisock);
178 * FIXME: We're hardcoding the CCID, and doing this at this point makes
179 * the listening (master) sock get CCID control blocks, which is not
180 * necessary, but for now, to not mess with the test userspace apps,
181 * lets leave it here, later the real solution is to do this in a
182 * setsockopt(CCIDs-I-want/accept). -acme
184 if (likely(ctl_sock_initialized)) {
185 int rc = dccp_feat_init(dmsk);
190 if (dmsk->dccpms_send_ack_vector) {
191 dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
192 if (dp->dccps_hc_rx_ackvec == NULL)
195 dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
197 dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
199 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
200 dp->dccps_hc_tx_ccid == NULL)) {
201 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
202 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
203 if (dmsk->dccpms_send_ack_vector) {
204 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
205 dp->dccps_hc_rx_ackvec = NULL;
207 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
211 /* control socket doesn't need feat nego */
212 INIT_LIST_HEAD(&dmsk->dccpms_pending);
213 INIT_LIST_HEAD(&dmsk->dccpms_conf);
216 dccp_init_xmit_timers(sk);
217 icsk->icsk_rto = DCCP_TIMEOUT_INIT;
218 icsk->icsk_syn_retries = sysctl_dccp_request_retries;
219 sk->sk_state = DCCP_CLOSED;
220 sk->sk_write_space = dccp_write_space;
221 icsk->icsk_sync_mss = dccp_sync_mss;
222 dp->dccps_mss_cache = 536;
223 dp->dccps_rate_last = jiffies;
224 dp->dccps_role = DCCP_ROLE_UNDEFINED;
225 dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT;
226 dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1;
231 EXPORT_SYMBOL_GPL(dccp_init_sock);
233 int dccp_destroy_sock(struct sock *sk)
235 struct dccp_sock *dp = dccp_sk(sk);
236 struct dccp_minisock *dmsk = dccp_msk(sk);
239 * DCCP doesn't use sk_write_queue, just sk_send_head
240 * for retransmissions
242 if (sk->sk_send_head != NULL) {
243 kfree_skb(sk->sk_send_head);
244 sk->sk_send_head = NULL;
247 /* Clean up a referenced DCCP bind bucket. */
248 if (inet_csk(sk)->icsk_bind_hash != NULL)
249 inet_put_port(&dccp_hashinfo, sk);
251 kfree(dp->dccps_service_list);
252 dp->dccps_service_list = NULL;
254 if (dmsk->dccpms_send_ack_vector) {
255 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
256 dp->dccps_hc_rx_ackvec = NULL;
258 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
259 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
260 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
262 /* clean up feature negotiation state */
263 dccp_feat_clean(dmsk);
268 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
270 static inline int dccp_listen_start(struct sock *sk, int backlog)
272 struct dccp_sock *dp = dccp_sk(sk);
274 dp->dccps_role = DCCP_ROLE_LISTEN;
275 return inet_csk_listen_start(sk, backlog);
278 int dccp_disconnect(struct sock *sk, int flags)
280 struct inet_connection_sock *icsk = inet_csk(sk);
281 struct inet_sock *inet = inet_sk(sk);
283 const int old_state = sk->sk_state;
285 if (old_state != DCCP_CLOSED)
286 dccp_set_state(sk, DCCP_CLOSED);
288 /* ABORT function of RFC793 */
289 if (old_state == DCCP_LISTEN) {
290 inet_csk_listen_stop(sk);
291 /* FIXME: do the active reset thing */
292 } else if (old_state == DCCP_REQUESTING)
293 sk->sk_err = ECONNRESET;
295 dccp_clear_xmit_timers(sk);
296 __skb_queue_purge(&sk->sk_receive_queue);
297 if (sk->sk_send_head != NULL) {
298 __kfree_skb(sk->sk_send_head);
299 sk->sk_send_head = NULL;
304 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
305 inet_reset_saddr(sk);
308 sock_reset_flag(sk, SOCK_DONE);
310 icsk->icsk_backoff = 0;
311 inet_csk_delack_init(sk);
314 BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
316 sk->sk_error_report(sk);
320 EXPORT_SYMBOL_GPL(dccp_disconnect);
323 * Wait for a DCCP event.
325 * Note that we don't need to lock the socket, as the upper poll layers
326 * take care of normal races (between the test and the event) and we don't
327 * go look at any of the socket buffers directly.
329 unsigned int dccp_poll(struct file *file, struct socket *sock,
333 struct sock *sk = sock->sk;
335 poll_wait(file, sk->sk_sleep, wait);
336 if (sk->sk_state == DCCP_LISTEN)
337 return inet_csk_listen_poll(sk);
339 /* Socket is not locked. We are protected from async events
340 by poll logic and correct handling of state changes
341 made by another threads is impossible in any case.
348 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
350 if (sk->sk_shutdown & RCV_SHUTDOWN)
351 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
354 if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
355 if (atomic_read(&sk->sk_rmem_alloc) > 0)
356 mask |= POLLIN | POLLRDNORM;
358 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
359 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
360 mask |= POLLOUT | POLLWRNORM;
361 } else { /* send SIGIO later */
362 set_bit(SOCK_ASYNC_NOSPACE,
363 &sk->sk_socket->flags);
364 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
366 /* Race breaker. If space is freed after
367 * wspace test but before the flags are set,
368 * IO signal will be lost.
370 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
371 mask |= POLLOUT | POLLWRNORM;
378 EXPORT_SYMBOL_GPL(dccp_poll);
380 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
386 if (sk->sk_state == DCCP_LISTEN)
392 unsigned long amount = 0;
394 skb = skb_peek(&sk->sk_receive_queue);
397 * We will only return the amount of this packet since
398 * that is all that will be read.
402 rc = put_user(amount, (int __user *)arg);
414 EXPORT_SYMBOL_GPL(dccp_ioctl);
416 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
417 char __user *optval, int optlen)
419 struct dccp_sock *dp = dccp_sk(sk);
420 struct dccp_service_list *sl = NULL;
422 if (service == DCCP_SERVICE_INVALID_VALUE ||
423 optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
426 if (optlen > sizeof(service)) {
427 sl = kmalloc(optlen, GFP_KERNEL);
431 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
432 if (copy_from_user(sl->dccpsl_list,
433 optval + sizeof(service),
434 optlen - sizeof(service)) ||
435 dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
442 dp->dccps_service = service;
444 kfree(dp->dccps_service_list);
446 dp->dccps_service_list = sl;
451 /* byte 1 is feature. the rest is the preference list */
452 static int dccp_setsockopt_change(struct sock *sk, int type,
453 struct dccp_so_feat __user *optval)
455 struct dccp_so_feat opt;
459 if (copy_from_user(&opt, optval, sizeof(opt)))
462 val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
466 if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
471 rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
472 val, opt.dccpsf_len, GFP_KERNEL);
484 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
485 char __user *optval, int optlen)
487 struct dccp_sock *dp = dccp_sk(sk);
490 if (optlen < sizeof(int))
493 if (get_user(val, (int __user *)optval))
496 if (optname == DCCP_SOCKOPT_SERVICE)
497 return dccp_setsockopt_service(sk, val, optval, optlen);
501 case DCCP_SOCKOPT_PACKET_SIZE:
502 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
505 case DCCP_SOCKOPT_CHANGE_L:
506 if (optlen != sizeof(struct dccp_so_feat))
509 err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
510 (struct dccp_so_feat __user *)
513 case DCCP_SOCKOPT_CHANGE_R:
514 if (optlen != sizeof(struct dccp_so_feat))
517 err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
518 (struct dccp_so_feat __user *)
521 case DCCP_SOCKOPT_SEND_CSCOV: /* sender side, RFC 4340, sec. 9.2 */
522 if (val < 0 || val > 15)
525 dp->dccps_pcslen = val;
527 case DCCP_SOCKOPT_RECV_CSCOV: /* receiver side, RFC 4340 sec. 9.2.1 */
528 if (val < 0 || val > 15)
531 dp->dccps_pcrlen = val;
532 /* FIXME: add feature negotiation,
533 * ChangeL(MinimumChecksumCoverage, val) */
545 int dccp_setsockopt(struct sock *sk, int level, int optname,
546 char __user *optval, int optlen)
548 if (level != SOL_DCCP)
549 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
552 return do_dccp_setsockopt(sk, level, optname, optval, optlen);
555 EXPORT_SYMBOL_GPL(dccp_setsockopt);
558 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
559 char __user *optval, int optlen)
561 if (level != SOL_DCCP)
562 return inet_csk_compat_setsockopt(sk, level, optname,
564 return do_dccp_setsockopt(sk, level, optname, optval, optlen);
567 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
570 static int dccp_getsockopt_service(struct sock *sk, int len,
571 __be32 __user *optval,
574 const struct dccp_sock *dp = dccp_sk(sk);
575 const struct dccp_service_list *sl;
576 int err = -ENOENT, slen = 0, total_len = sizeof(u32);
579 if ((sl = dp->dccps_service_list) != NULL) {
580 slen = sl->dccpsl_nr * sizeof(u32);
589 if (put_user(total_len, optlen) ||
590 put_user(dp->dccps_service, optval) ||
591 (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
598 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
599 char __user *optval, int __user *optlen)
601 struct dccp_sock *dp;
604 if (get_user(len, optlen))
607 if (len < (int)sizeof(int))
613 case DCCP_SOCKOPT_PACKET_SIZE:
614 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
616 case DCCP_SOCKOPT_SERVICE:
617 return dccp_getsockopt_service(sk, len,
618 (__be32 __user *)optval, optlen);
619 case DCCP_SOCKOPT_GET_CUR_MPS:
620 val = dp->dccps_mss_cache;
623 case DCCP_SOCKOPT_SEND_CSCOV:
624 val = dp->dccps_pcslen;
627 case DCCP_SOCKOPT_RECV_CSCOV:
628 val = dp->dccps_pcrlen;
632 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
633 len, (u32 __user *)optval, optlen);
635 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
636 len, (u32 __user *)optval, optlen);
641 if (put_user(len, optlen) || copy_to_user(optval, &val, len))
647 int dccp_getsockopt(struct sock *sk, int level, int optname,
648 char __user *optval, int __user *optlen)
650 if (level != SOL_DCCP)
651 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
654 return do_dccp_getsockopt(sk, level, optname, optval, optlen);
657 EXPORT_SYMBOL_GPL(dccp_getsockopt);
660 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
661 char __user *optval, int __user *optlen)
663 if (level != SOL_DCCP)
664 return inet_csk_compat_getsockopt(sk, level, optname,
666 return do_dccp_getsockopt(sk, level, optname, optval, optlen);
669 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
672 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
675 const struct dccp_sock *dp = dccp_sk(sk);
676 const int flags = msg->msg_flags;
677 const int noblock = flags & MSG_DONTWAIT;
682 if (len > dp->dccps_mss_cache)
687 if (sysctl_dccp_tx_qlen &&
688 (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
693 timeo = sock_sndtimeo(sk, noblock);
696 * We have to use sk_stream_wait_connect here to set sk_write_pending,
697 * so that the trick in dccp_rcv_request_sent_state_process.
699 /* Wait for a connection to finish. */
700 if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
701 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
704 size = sk->sk_prot->max_header + len;
706 skb = sock_alloc_send_skb(sk, size, noblock, &rc);
711 skb_reserve(skb, sk->sk_prot->max_header);
712 rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
716 skb_queue_tail(&sk->sk_write_queue, skb);
717 dccp_write_xmit(sk,0);
726 EXPORT_SYMBOL_GPL(dccp_sendmsg);
728 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
729 size_t len, int nonblock, int flags, int *addr_len)
731 const struct dccp_hdr *dh;
736 if (sk->sk_state == DCCP_LISTEN) {
741 timeo = sock_rcvtimeo(sk, nonblock);
744 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
747 goto verify_sock_status;
751 if (dh->dccph_type == DCCP_PKT_DATA ||
752 dh->dccph_type == DCCP_PKT_DATAACK)
755 if (dh->dccph_type == DCCP_PKT_RESET ||
756 dh->dccph_type == DCCP_PKT_CLOSE) {
757 dccp_pr_debug("found fin ok!\n");
761 dccp_pr_debug("packet_type=%s\n",
762 dccp_packet_name(dh->dccph_type));
763 sk_eat_skb(sk, skb, 0);
765 if (sock_flag(sk, SOCK_DONE)) {
771 len = sock_error(sk);
775 if (sk->sk_shutdown & RCV_SHUTDOWN) {
780 if (sk->sk_state == DCCP_CLOSED) {
781 if (!sock_flag(sk, SOCK_DONE)) {
782 /* This occurs when user tries to read
783 * from never connected socket.
797 if (signal_pending(current)) {
798 len = sock_intr_errno(timeo);
802 sk_wait_data(sk, &timeo);
807 else if (len < skb->len)
808 msg->msg_flags |= MSG_TRUNC;
810 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
811 /* Exception. Bailout! */
816 if (!(flags & MSG_PEEK))
817 sk_eat_skb(sk, skb, 0);
825 EXPORT_SYMBOL_GPL(dccp_recvmsg);
827 int inet_dccp_listen(struct socket *sock, int backlog)
829 struct sock *sk = sock->sk;
830 unsigned char old_state;
836 if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
839 old_state = sk->sk_state;
840 if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
843 /* Really, if the socket is already in listen state
844 * we can only allow the backlog to be adjusted.
846 if (old_state != DCCP_LISTEN) {
848 * FIXME: here it probably should be sk->sk_prot->listen_start
849 * see tcp_listen_start
851 err = dccp_listen_start(sk, backlog);
855 sk->sk_max_ack_backlog = backlog;
863 EXPORT_SYMBOL_GPL(inet_dccp_listen);
865 static const unsigned char dccp_new_state[] = {
866 /* current state: new state: action: */
868 [DCCP_OPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
869 [DCCP_REQUESTING] = DCCP_CLOSED,
870 [DCCP_PARTOPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
871 [DCCP_LISTEN] = DCCP_CLOSED,
872 [DCCP_RESPOND] = DCCP_CLOSED,
873 [DCCP_CLOSING] = DCCP_CLOSED,
874 [DCCP_TIME_WAIT] = DCCP_CLOSED,
875 [DCCP_CLOSED] = DCCP_CLOSED,
878 static int dccp_close_state(struct sock *sk)
880 const int next = dccp_new_state[sk->sk_state];
881 const int ns = next & DCCP_STATE_MASK;
883 if (ns != sk->sk_state)
884 dccp_set_state(sk, ns);
886 return next & DCCP_ACTION_FIN;
889 void dccp_close(struct sock *sk, long timeout)
891 struct dccp_sock *dp = dccp_sk(sk);
897 sk->sk_shutdown = SHUTDOWN_MASK;
899 if (sk->sk_state == DCCP_LISTEN) {
900 dccp_set_state(sk, DCCP_CLOSED);
903 inet_csk_listen_stop(sk);
905 goto adjudge_to_death;
908 sk_stop_timer(sk, &dp->dccps_xmit_timer);
911 * We need to flush the recv. buffs. We do this only on the
912 * descriptor close, not protocol-sourced closes, because the
913 *reader process may not have drained the data yet!
915 /* FIXME: check for unread data */
916 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
920 if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
921 /* Check zero linger _after_ checking for unread data. */
922 sk->sk_prot->disconnect(sk, 0);
923 } else if (dccp_close_state(sk)) {
924 dccp_send_close(sk, 1);
927 sk_stream_wait_close(sk, timeout);
930 state = sk->sk_state;
933 atomic_inc(sk->sk_prot->orphan_count);
936 * It is the last release_sock in its life. It will remove backlog.
940 * Now socket is owned by kernel and we acquire BH lock
941 * to finish close. No need to check for user refs.
945 BUG_TRAP(!sock_owned_by_user(sk));
947 /* Have we already been destroyed by a softirq or backlog? */
948 if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
952 * The last release_sock may have processed the CLOSE or RESET
953 * packet moving sock to CLOSED state, if not we have to fire
954 * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
955 * in draft-ietf-dccp-spec-11. -acme
957 if (sk->sk_state == DCCP_CLOSING) {
958 /* FIXME: should start at 2 * RTT */
959 /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
960 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
961 inet_csk(sk)->icsk_rto,
964 /* Yeah, we should use sk->sk_prot->orphan_count, etc */
965 dccp_set_state(sk, DCCP_CLOSED);
969 if (sk->sk_state == DCCP_CLOSED)
970 inet_csk_destroy_sock(sk);
972 /* Otherwise, socket is reprieved until protocol close. */
980 EXPORT_SYMBOL_GPL(dccp_close);
982 void dccp_shutdown(struct sock *sk, int how)
984 dccp_pr_debug("entry\n");
987 EXPORT_SYMBOL_GPL(dccp_shutdown);
989 static int __init dccp_mib_init(void)
993 dccp_statistics[0] = alloc_percpu(struct dccp_mib);
994 if (dccp_statistics[0] == NULL)
997 dccp_statistics[1] = alloc_percpu(struct dccp_mib);
998 if (dccp_statistics[1] == NULL)
1005 free_percpu(dccp_statistics[0]);
1006 dccp_statistics[0] = NULL;
1011 static void dccp_mib_exit(void)
1013 free_percpu(dccp_statistics[0]);
1014 free_percpu(dccp_statistics[1]);
1015 dccp_statistics[0] = dccp_statistics[1] = NULL;
1018 static int thash_entries;
1019 module_param(thash_entries, int, 0444);
1020 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1022 #ifdef CONFIG_IP_DCCP_DEBUG
1024 module_param(dccp_debug, bool, 0444);
1025 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1027 EXPORT_SYMBOL_GPL(dccp_debug);
1030 static int __init dccp_init(void)
1033 int ehash_order, bhash_order, i;
1036 dccp_hashinfo.bind_bucket_cachep =
1037 kmem_cache_create("dccp_bind_bucket",
1038 sizeof(struct inet_bind_bucket), 0,
1039 SLAB_HWCACHE_ALIGN, NULL);
1040 if (!dccp_hashinfo.bind_bucket_cachep)
1044 * Size and allocate the main established and bind bucket
1047 * The methodology is similar to that of the buffer cache.
1049 if (num_physpages >= (128 * 1024))
1050 goal = num_physpages >> (21 - PAGE_SHIFT);
1052 goal = num_physpages >> (23 - PAGE_SHIFT);
1055 goal = (thash_entries *
1056 sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1057 for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1060 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1061 sizeof(struct inet_ehash_bucket);
1062 while (dccp_hashinfo.ehash_size &
1063 (dccp_hashinfo.ehash_size - 1))
1064 dccp_hashinfo.ehash_size--;
1065 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1066 __get_free_pages(GFP_ATOMIC, ehash_order);
1067 } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1069 if (!dccp_hashinfo.ehash) {
1070 DCCP_CRIT("Failed to allocate DCCP established hash table");
1071 goto out_free_bind_bucket_cachep;
1074 for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1075 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1076 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
1079 if (inet_ehash_locks_alloc(&dccp_hashinfo))
1080 goto out_free_dccp_ehash;
1082 bhash_order = ehash_order;
1085 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1086 sizeof(struct inet_bind_hashbucket);
1087 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1090 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1091 __get_free_pages(GFP_ATOMIC, bhash_order);
1092 } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1094 if (!dccp_hashinfo.bhash) {
1095 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1096 goto out_free_dccp_locks;
1099 for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1100 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1101 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1104 rc = dccp_mib_init();
1106 goto out_free_dccp_bhash;
1108 rc = dccp_ackvec_init();
1110 goto out_free_dccp_mib;
1112 rc = dccp_sysctl_init();
1114 goto out_ackvec_exit;
1116 dccp_timestamping_init();
1123 out_free_dccp_bhash:
1124 free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1125 dccp_hashinfo.bhash = NULL;
1126 out_free_dccp_locks:
1127 inet_ehash_locks_free(&dccp_hashinfo);
1128 out_free_dccp_ehash:
1129 free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1130 dccp_hashinfo.ehash = NULL;
1131 out_free_bind_bucket_cachep:
1132 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1133 dccp_hashinfo.bind_bucket_cachep = NULL;
1137 static void __exit dccp_fini(void)
1140 free_pages((unsigned long)dccp_hashinfo.bhash,
1141 get_order(dccp_hashinfo.bhash_size *
1142 sizeof(struct inet_bind_hashbucket)));
1143 free_pages((unsigned long)dccp_hashinfo.ehash,
1144 get_order(dccp_hashinfo.ehash_size *
1145 sizeof(struct inet_ehash_bucket)));
1146 inet_ehash_locks_free(&dccp_hashinfo);
1147 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1152 module_init(dccp_init);
1153 module_exit(dccp_fini);
1155 MODULE_LICENSE("GPL");
1156 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1157 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");