4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
25 #include <net/inet_sock.h>
29 #include <asm/ioctls.h>
30 #include <asm/semaphore.h>
31 #include <linux/spinlock.h>
32 #include <linux/timer.h>
33 #include <linux/delay.h>
34 #include <linux/poll.h>
40 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
42 EXPORT_SYMBOL_GPL(dccp_statistics);
44 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
46 EXPORT_SYMBOL_GPL(dccp_orphan_count);
48 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
49 .lhash_lock = RW_LOCK_UNLOCKED,
50 .lhash_users = ATOMIC_INIT(0),
51 .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
54 EXPORT_SYMBOL_GPL(dccp_hashinfo);
56 /* the maximum queue length for tx in packets. 0 is no limit */
57 int sysctl_dccp_tx_qlen __read_mostly = 5;
59 void dccp_set_state(struct sock *sk, const int state)
61 const int oldstate = sk->sk_state;
63 dccp_pr_debug("%s(%p) %s --> %s\n", dccp_role(sk), sk,
64 dccp_state_name(oldstate), dccp_state_name(state));
65 WARN_ON(state == oldstate);
69 if (oldstate != DCCP_OPEN)
70 DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
74 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
75 oldstate == DCCP_CLOSING)
76 DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
78 sk->sk_prot->unhash(sk);
79 if (inet_csk(sk)->icsk_bind_hash != NULL &&
80 !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
84 if (oldstate == DCCP_OPEN)
85 DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
88 /* Change state AFTER socket is unhashed to avoid closed
89 * socket sitting in hash tables.
94 EXPORT_SYMBOL_GPL(dccp_set_state);
96 static void dccp_finish_passive_close(struct sock *sk)
98 switch (sk->sk_state) {
99 case DCCP_PASSIVE_CLOSE:
100 /* Node (client or server) has received Close packet. */
101 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
102 dccp_set_state(sk, DCCP_CLOSED);
104 case DCCP_PASSIVE_CLOSEREQ:
106 * Client received CloseReq. We set the `active' flag so that
107 * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
109 dccp_send_close(sk, 1);
110 dccp_set_state(sk, DCCP_CLOSING);
114 void dccp_done(struct sock *sk)
116 dccp_set_state(sk, DCCP_CLOSED);
117 dccp_clear_xmit_timers(sk);
119 sk->sk_shutdown = SHUTDOWN_MASK;
121 if (!sock_flag(sk, SOCK_DEAD))
122 sk->sk_state_change(sk);
124 inet_csk_destroy_sock(sk);
127 EXPORT_SYMBOL_GPL(dccp_done);
129 const char *dccp_packet_name(const int type)
131 static const char *dccp_packet_names[] = {
132 [DCCP_PKT_REQUEST] = "REQUEST",
133 [DCCP_PKT_RESPONSE] = "RESPONSE",
134 [DCCP_PKT_DATA] = "DATA",
135 [DCCP_PKT_ACK] = "ACK",
136 [DCCP_PKT_DATAACK] = "DATAACK",
137 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
138 [DCCP_PKT_CLOSE] = "CLOSE",
139 [DCCP_PKT_RESET] = "RESET",
140 [DCCP_PKT_SYNC] = "SYNC",
141 [DCCP_PKT_SYNCACK] = "SYNCACK",
144 if (type >= DCCP_NR_PKT_TYPES)
147 return dccp_packet_names[type];
150 EXPORT_SYMBOL_GPL(dccp_packet_name);
152 const char *dccp_state_name(const int state)
154 static char *dccp_state_names[] = {
155 [DCCP_OPEN] = "OPEN",
156 [DCCP_REQUESTING] = "REQUESTING",
157 [DCCP_PARTOPEN] = "PARTOPEN",
158 [DCCP_LISTEN] = "LISTEN",
159 [DCCP_RESPOND] = "RESPOND",
160 [DCCP_CLOSING] = "CLOSING",
161 [DCCP_ACTIVE_CLOSEREQ] = "CLOSEREQ",
162 [DCCP_PASSIVE_CLOSE] = "PASSIVE_CLOSE",
163 [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
164 [DCCP_TIME_WAIT] = "TIME_WAIT",
165 [DCCP_CLOSED] = "CLOSED",
168 if (state >= DCCP_MAX_STATES)
169 return "INVALID STATE!";
171 return dccp_state_names[state];
174 EXPORT_SYMBOL_GPL(dccp_state_name);
176 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
178 struct dccp_sock *dp = dccp_sk(sk);
179 struct dccp_minisock *dmsk = dccp_msk(sk);
180 struct inet_connection_sock *icsk = inet_csk(sk);
182 dccp_minisock_init(&dp->dccps_minisock);
184 icsk->icsk_rto = DCCP_TIMEOUT_INIT;
185 icsk->icsk_syn_retries = sysctl_dccp_request_retries;
186 sk->sk_state = DCCP_CLOSED;
187 sk->sk_write_space = dccp_write_space;
188 icsk->icsk_sync_mss = dccp_sync_mss;
189 dp->dccps_mss_cache = 536;
190 dp->dccps_rate_last = jiffies;
191 dp->dccps_role = DCCP_ROLE_UNDEFINED;
192 dp->dccps_service = DCCP_SERVICE_CODE_IS_ABSENT;
193 dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1;
195 dccp_init_xmit_timers(sk);
198 * FIXME: We're hardcoding the CCID, and doing this at this point makes
199 * the listening (master) sock get CCID control blocks, which is not
200 * necessary, but for now, to not mess with the test userspace apps,
201 * lets leave it here, later the real solution is to do this in a
202 * setsockopt(CCIDs-I-want/accept). -acme
204 if (likely(ctl_sock_initialized)) {
205 int rc = dccp_feat_init(dmsk);
210 if (dmsk->dccpms_send_ack_vector) {
211 dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
212 if (dp->dccps_hc_rx_ackvec == NULL)
215 dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
217 dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
219 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
220 dp->dccps_hc_tx_ccid == NULL)) {
221 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
222 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
223 if (dmsk->dccpms_send_ack_vector) {
224 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
225 dp->dccps_hc_rx_ackvec = NULL;
227 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
231 /* control socket doesn't need feat nego */
232 INIT_LIST_HEAD(&dmsk->dccpms_pending);
233 INIT_LIST_HEAD(&dmsk->dccpms_conf);
239 EXPORT_SYMBOL_GPL(dccp_init_sock);
241 int dccp_destroy_sock(struct sock *sk)
243 struct dccp_sock *dp = dccp_sk(sk);
244 struct dccp_minisock *dmsk = dccp_msk(sk);
247 * DCCP doesn't use sk_write_queue, just sk_send_head
248 * for retransmissions
250 if (sk->sk_send_head != NULL) {
251 kfree_skb(sk->sk_send_head);
252 sk->sk_send_head = NULL;
255 /* Clean up a referenced DCCP bind bucket. */
256 if (inet_csk(sk)->icsk_bind_hash != NULL)
259 kfree(dp->dccps_service_list);
260 dp->dccps_service_list = NULL;
262 if (dmsk->dccpms_send_ack_vector) {
263 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
264 dp->dccps_hc_rx_ackvec = NULL;
266 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
267 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
268 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
270 /* clean up feature negotiation state */
271 dccp_feat_clean(dmsk);
276 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
278 static inline int dccp_listen_start(struct sock *sk, int backlog)
280 struct dccp_sock *dp = dccp_sk(sk);
282 dp->dccps_role = DCCP_ROLE_LISTEN;
283 return inet_csk_listen_start(sk, backlog);
286 static inline int dccp_need_reset(int state)
288 return state != DCCP_CLOSED && state != DCCP_LISTEN &&
289 state != DCCP_REQUESTING;
292 int dccp_disconnect(struct sock *sk, int flags)
294 struct inet_connection_sock *icsk = inet_csk(sk);
295 struct inet_sock *inet = inet_sk(sk);
297 const int old_state = sk->sk_state;
299 if (old_state != DCCP_CLOSED)
300 dccp_set_state(sk, DCCP_CLOSED);
303 * This corresponds to the ABORT function of RFC793, sec. 3.8
304 * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
306 if (old_state == DCCP_LISTEN) {
307 inet_csk_listen_stop(sk);
308 } else if (dccp_need_reset(old_state)) {
309 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
310 sk->sk_err = ECONNRESET;
311 } else if (old_state == DCCP_REQUESTING)
312 sk->sk_err = ECONNRESET;
314 dccp_clear_xmit_timers(sk);
315 __skb_queue_purge(&sk->sk_receive_queue);
316 if (sk->sk_send_head != NULL) {
317 __kfree_skb(sk->sk_send_head);
318 sk->sk_send_head = NULL;
323 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
324 inet_reset_saddr(sk);
327 sock_reset_flag(sk, SOCK_DONE);
329 icsk->icsk_backoff = 0;
330 inet_csk_delack_init(sk);
333 BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
335 sk->sk_error_report(sk);
339 EXPORT_SYMBOL_GPL(dccp_disconnect);
342 * Wait for a DCCP event.
344 * Note that we don't need to lock the socket, as the upper poll layers
345 * take care of normal races (between the test and the event) and we don't
346 * go look at any of the socket buffers directly.
348 unsigned int dccp_poll(struct file *file, struct socket *sock,
352 struct sock *sk = sock->sk;
354 poll_wait(file, sk->sk_sleep, wait);
355 if (sk->sk_state == DCCP_LISTEN)
356 return inet_csk_listen_poll(sk);
358 /* Socket is not locked. We are protected from async events
359 by poll logic and correct handling of state changes
360 made by another threads is impossible in any case.
367 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
369 if (sk->sk_shutdown & RCV_SHUTDOWN)
370 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
373 if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
374 if (atomic_read(&sk->sk_rmem_alloc) > 0)
375 mask |= POLLIN | POLLRDNORM;
377 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
378 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
379 mask |= POLLOUT | POLLWRNORM;
380 } else { /* send SIGIO later */
381 set_bit(SOCK_ASYNC_NOSPACE,
382 &sk->sk_socket->flags);
383 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
385 /* Race breaker. If space is freed after
386 * wspace test but before the flags are set,
387 * IO signal will be lost.
389 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
390 mask |= POLLOUT | POLLWRNORM;
397 EXPORT_SYMBOL_GPL(dccp_poll);
399 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
405 if (sk->sk_state == DCCP_LISTEN)
411 unsigned long amount = 0;
413 skb = skb_peek(&sk->sk_receive_queue);
416 * We will only return the amount of this packet since
417 * that is all that will be read.
421 rc = put_user(amount, (int __user *)arg);
433 EXPORT_SYMBOL_GPL(dccp_ioctl);
435 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
436 char __user *optval, int optlen)
438 struct dccp_sock *dp = dccp_sk(sk);
439 struct dccp_service_list *sl = NULL;
441 if (service == DCCP_SERVICE_INVALID_VALUE ||
442 optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
445 if (optlen > sizeof(service)) {
446 sl = kmalloc(optlen, GFP_KERNEL);
450 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
451 if (copy_from_user(sl->dccpsl_list,
452 optval + sizeof(service),
453 optlen - sizeof(service)) ||
454 dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
461 dp->dccps_service = service;
463 kfree(dp->dccps_service_list);
465 dp->dccps_service_list = sl;
470 /* byte 1 is feature. the rest is the preference list */
471 static int dccp_setsockopt_change(struct sock *sk, int type,
472 struct dccp_so_feat __user *optval)
474 struct dccp_so_feat opt;
478 if (copy_from_user(&opt, optval, sizeof(opt)))
481 val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
485 if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
490 rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
491 val, opt.dccpsf_len, GFP_KERNEL);
503 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
504 char __user *optval, int optlen)
506 struct dccp_sock *dp = dccp_sk(sk);
509 if (optlen < sizeof(int))
512 if (get_user(val, (int __user *)optval))
515 if (optname == DCCP_SOCKOPT_SERVICE)
516 return dccp_setsockopt_service(sk, val, optval, optlen);
520 case DCCP_SOCKOPT_PACKET_SIZE:
521 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
524 case DCCP_SOCKOPT_CHANGE_L:
525 if (optlen != sizeof(struct dccp_so_feat))
528 err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
529 (struct dccp_so_feat __user *)
532 case DCCP_SOCKOPT_CHANGE_R:
533 if (optlen != sizeof(struct dccp_so_feat))
536 err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
537 (struct dccp_so_feat __user *)
540 case DCCP_SOCKOPT_SERVER_TIMEWAIT:
541 if (dp->dccps_role != DCCP_ROLE_SERVER)
544 dp->dccps_server_timewait = (val != 0);
546 case DCCP_SOCKOPT_SEND_CSCOV: /* sender side, RFC 4340, sec. 9.2 */
547 if (val < 0 || val > 15)
550 dp->dccps_pcslen = val;
552 case DCCP_SOCKOPT_RECV_CSCOV: /* receiver side, RFC 4340 sec. 9.2.1 */
553 if (val < 0 || val > 15)
556 dp->dccps_pcrlen = val;
557 /* FIXME: add feature negotiation,
558 * ChangeL(MinimumChecksumCoverage, val) */
570 int dccp_setsockopt(struct sock *sk, int level, int optname,
571 char __user *optval, int optlen)
573 if (level != SOL_DCCP)
574 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
577 return do_dccp_setsockopt(sk, level, optname, optval, optlen);
580 EXPORT_SYMBOL_GPL(dccp_setsockopt);
583 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
584 char __user *optval, int optlen)
586 if (level != SOL_DCCP)
587 return inet_csk_compat_setsockopt(sk, level, optname,
589 return do_dccp_setsockopt(sk, level, optname, optval, optlen);
592 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
595 static int dccp_getsockopt_service(struct sock *sk, int len,
596 __be32 __user *optval,
599 const struct dccp_sock *dp = dccp_sk(sk);
600 const struct dccp_service_list *sl;
601 int err = -ENOENT, slen = 0, total_len = sizeof(u32);
604 if ((sl = dp->dccps_service_list) != NULL) {
605 slen = sl->dccpsl_nr * sizeof(u32);
614 if (put_user(total_len, optlen) ||
615 put_user(dp->dccps_service, optval) ||
616 (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
623 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
624 char __user *optval, int __user *optlen)
626 struct dccp_sock *dp;
629 if (get_user(len, optlen))
632 if (len < (int)sizeof(int))
638 case DCCP_SOCKOPT_PACKET_SIZE:
639 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
641 case DCCP_SOCKOPT_SERVICE:
642 return dccp_getsockopt_service(sk, len,
643 (__be32 __user *)optval, optlen);
644 case DCCP_SOCKOPT_GET_CUR_MPS:
645 val = dp->dccps_mss_cache;
647 case DCCP_SOCKOPT_SERVER_TIMEWAIT:
648 val = dp->dccps_server_timewait;
650 case DCCP_SOCKOPT_SEND_CSCOV:
651 val = dp->dccps_pcslen;
653 case DCCP_SOCKOPT_RECV_CSCOV:
654 val = dp->dccps_pcrlen;
657 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
658 len, (u32 __user *)optval, optlen);
660 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
661 len, (u32 __user *)optval, optlen);
667 if (put_user(len, optlen) || copy_to_user(optval, &val, len))
673 int dccp_getsockopt(struct sock *sk, int level, int optname,
674 char __user *optval, int __user *optlen)
676 if (level != SOL_DCCP)
677 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
680 return do_dccp_getsockopt(sk, level, optname, optval, optlen);
683 EXPORT_SYMBOL_GPL(dccp_getsockopt);
686 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
687 char __user *optval, int __user *optlen)
689 if (level != SOL_DCCP)
690 return inet_csk_compat_getsockopt(sk, level, optname,
692 return do_dccp_getsockopt(sk, level, optname, optval, optlen);
695 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
698 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
701 const struct dccp_sock *dp = dccp_sk(sk);
702 const int flags = msg->msg_flags;
703 const int noblock = flags & MSG_DONTWAIT;
708 if (len > dp->dccps_mss_cache)
713 if (sysctl_dccp_tx_qlen &&
714 (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
719 timeo = sock_sndtimeo(sk, noblock);
722 * We have to use sk_stream_wait_connect here to set sk_write_pending,
723 * so that the trick in dccp_rcv_request_sent_state_process.
725 /* Wait for a connection to finish. */
726 if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
727 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
730 size = sk->sk_prot->max_header + len;
732 skb = sock_alloc_send_skb(sk, size, noblock, &rc);
737 skb_reserve(skb, sk->sk_prot->max_header);
738 rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
742 skb_queue_tail(&sk->sk_write_queue, skb);
743 dccp_write_xmit(sk,0);
752 EXPORT_SYMBOL_GPL(dccp_sendmsg);
754 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
755 size_t len, int nonblock, int flags, int *addr_len)
757 const struct dccp_hdr *dh;
762 if (sk->sk_state == DCCP_LISTEN) {
767 timeo = sock_rcvtimeo(sk, nonblock);
770 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
773 goto verify_sock_status;
777 switch (dh->dccph_type) {
779 case DCCP_PKT_DATAACK:
783 case DCCP_PKT_CLOSEREQ:
784 if (!(flags & MSG_PEEK))
785 dccp_finish_passive_close(sk);
788 dccp_pr_debug("found fin (%s) ok!\n",
789 dccp_packet_name(dh->dccph_type));
793 dccp_pr_debug("packet_type=%s\n",
794 dccp_packet_name(dh->dccph_type));
795 sk_eat_skb(sk, skb, 0);
798 if (sock_flag(sk, SOCK_DONE)) {
804 len = sock_error(sk);
808 if (sk->sk_shutdown & RCV_SHUTDOWN) {
813 if (sk->sk_state == DCCP_CLOSED) {
814 if (!sock_flag(sk, SOCK_DONE)) {
815 /* This occurs when user tries to read
816 * from never connected socket.
830 if (signal_pending(current)) {
831 len = sock_intr_errno(timeo);
835 sk_wait_data(sk, &timeo);
840 else if (len < skb->len)
841 msg->msg_flags |= MSG_TRUNC;
843 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
844 /* Exception. Bailout! */
849 if (!(flags & MSG_PEEK))
850 sk_eat_skb(sk, skb, 0);
858 EXPORT_SYMBOL_GPL(dccp_recvmsg);
860 int inet_dccp_listen(struct socket *sock, int backlog)
862 struct sock *sk = sock->sk;
863 unsigned char old_state;
869 if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
872 old_state = sk->sk_state;
873 if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
876 /* Really, if the socket is already in listen state
877 * we can only allow the backlog to be adjusted.
879 if (old_state != DCCP_LISTEN) {
881 * FIXME: here it probably should be sk->sk_prot->listen_start
882 * see tcp_listen_start
884 err = dccp_listen_start(sk, backlog);
888 sk->sk_max_ack_backlog = backlog;
896 EXPORT_SYMBOL_GPL(inet_dccp_listen);
898 static void dccp_terminate_connection(struct sock *sk)
900 u8 next_state = DCCP_CLOSED;
902 switch (sk->sk_state) {
903 case DCCP_PASSIVE_CLOSE:
904 case DCCP_PASSIVE_CLOSEREQ:
905 dccp_finish_passive_close(sk);
908 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
909 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
912 dccp_send_close(sk, 1);
914 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
915 !dccp_sk(sk)->dccps_server_timewait)
916 next_state = DCCP_ACTIVE_CLOSEREQ;
918 next_state = DCCP_CLOSING;
921 dccp_set_state(sk, next_state);
925 void dccp_close(struct sock *sk, long timeout)
927 struct dccp_sock *dp = dccp_sk(sk);
929 u32 data_was_unread = 0;
934 sk->sk_shutdown = SHUTDOWN_MASK;
936 if (sk->sk_state == DCCP_LISTEN) {
937 dccp_set_state(sk, DCCP_CLOSED);
940 inet_csk_listen_stop(sk);
942 goto adjudge_to_death;
945 sk_stop_timer(sk, &dp->dccps_xmit_timer);
948 * We need to flush the recv. buffs. We do this only on the
949 * descriptor close, not protocol-sourced closes, because the
950 *reader process may not have drained the data yet!
952 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
953 data_was_unread += skb->len;
957 if (data_was_unread) {
958 /* Unread data was tossed, send an appropriate Reset Code */
959 DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
960 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
961 dccp_set_state(sk, DCCP_CLOSED);
962 } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
963 /* Check zero linger _after_ checking for unread data. */
964 sk->sk_prot->disconnect(sk, 0);
965 } else if (sk->sk_state != DCCP_CLOSED) {
966 dccp_terminate_connection(sk);
969 sk_stream_wait_close(sk, timeout);
972 state = sk->sk_state;
975 atomic_inc(sk->sk_prot->orphan_count);
978 * It is the last release_sock in its life. It will remove backlog.
982 * Now socket is owned by kernel and we acquire BH lock
983 * to finish close. No need to check for user refs.
987 BUG_TRAP(!sock_owned_by_user(sk));
989 /* Have we already been destroyed by a softirq or backlog? */
990 if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
993 if (sk->sk_state == DCCP_CLOSED)
994 inet_csk_destroy_sock(sk);
996 /* Otherwise, socket is reprieved until protocol close. */
1004 EXPORT_SYMBOL_GPL(dccp_close);
1006 void dccp_shutdown(struct sock *sk, int how)
1008 dccp_pr_debug("called shutdown(%x)\n", how);
1011 EXPORT_SYMBOL_GPL(dccp_shutdown);
1013 static int __init dccp_mib_init(void)
1017 dccp_statistics[0] = alloc_percpu(struct dccp_mib);
1018 if (dccp_statistics[0] == NULL)
1021 dccp_statistics[1] = alloc_percpu(struct dccp_mib);
1022 if (dccp_statistics[1] == NULL)
1029 free_percpu(dccp_statistics[0]);
1030 dccp_statistics[0] = NULL;
1035 static void dccp_mib_exit(void)
1037 free_percpu(dccp_statistics[0]);
1038 free_percpu(dccp_statistics[1]);
1039 dccp_statistics[0] = dccp_statistics[1] = NULL;
1042 static int thash_entries;
1043 module_param(thash_entries, int, 0444);
1044 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1046 #ifdef CONFIG_IP_DCCP_DEBUG
1048 module_param(dccp_debug, bool, 0444);
1049 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1051 EXPORT_SYMBOL_GPL(dccp_debug);
1054 static int __init dccp_init(void)
1057 int ehash_order, bhash_order, i;
1060 dccp_hashinfo.bind_bucket_cachep =
1061 kmem_cache_create("dccp_bind_bucket",
1062 sizeof(struct inet_bind_bucket), 0,
1063 SLAB_HWCACHE_ALIGN, NULL);
1064 if (!dccp_hashinfo.bind_bucket_cachep)
1068 * Size and allocate the main established and bind bucket
1071 * The methodology is similar to that of the buffer cache.
1073 if (num_physpages >= (128 * 1024))
1074 goal = num_physpages >> (21 - PAGE_SHIFT);
1076 goal = num_physpages >> (23 - PAGE_SHIFT);
1079 goal = (thash_entries *
1080 sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1081 for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1084 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1085 sizeof(struct inet_ehash_bucket);
1086 while (dccp_hashinfo.ehash_size &
1087 (dccp_hashinfo.ehash_size - 1))
1088 dccp_hashinfo.ehash_size--;
1089 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1090 __get_free_pages(GFP_ATOMIC, ehash_order);
1091 } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1093 if (!dccp_hashinfo.ehash) {
1094 DCCP_CRIT("Failed to allocate DCCP established hash table");
1095 goto out_free_bind_bucket_cachep;
1098 for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1099 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1100 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
1103 if (inet_ehash_locks_alloc(&dccp_hashinfo))
1104 goto out_free_dccp_ehash;
1106 bhash_order = ehash_order;
1109 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1110 sizeof(struct inet_bind_hashbucket);
1111 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1114 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1115 __get_free_pages(GFP_ATOMIC, bhash_order);
1116 } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1118 if (!dccp_hashinfo.bhash) {
1119 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1120 goto out_free_dccp_locks;
1123 for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1124 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1125 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1128 rc = dccp_mib_init();
1130 goto out_free_dccp_bhash;
1132 rc = dccp_ackvec_init();
1134 goto out_free_dccp_mib;
1136 rc = dccp_sysctl_init();
1138 goto out_ackvec_exit;
1140 dccp_timestamping_init();
1147 out_free_dccp_bhash:
1148 free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1149 dccp_hashinfo.bhash = NULL;
1150 out_free_dccp_locks:
1151 inet_ehash_locks_free(&dccp_hashinfo);
1152 out_free_dccp_ehash:
1153 free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1154 dccp_hashinfo.ehash = NULL;
1155 out_free_bind_bucket_cachep:
1156 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1157 dccp_hashinfo.bind_bucket_cachep = NULL;
1161 static void __exit dccp_fini(void)
1164 free_pages((unsigned long)dccp_hashinfo.bhash,
1165 get_order(dccp_hashinfo.bhash_size *
1166 sizeof(struct inet_bind_hashbucket)));
1167 free_pages((unsigned long)dccp_hashinfo.ehash,
1168 get_order(dccp_hashinfo.ehash_size *
1169 sizeof(struct inet_ehash_bucket)));
1170 inet_ehash_locks_free(&dccp_hashinfo);
1171 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1176 module_init(dccp_init);
1177 module_exit(dccp_fini);
1179 MODULE_LICENSE("GPL");
1180 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1181 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");