4 * An implementation of the DCCP protocol
5 * Arnaldo Carvalho de Melo <acme@conectiva.com.br>
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License version 2 as
9 * published by the Free Software Foundation.
12 #include <linux/config.h>
13 #include <linux/dccp.h>
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/sched.h>
17 #include <linux/kernel.h>
18 #include <linux/skbuff.h>
19 #include <linux/netdevice.h>
21 #include <linux/if_arp.h>
22 #include <linux/init.h>
23 #include <linux/random.h>
24 #include <net/checksum.h>
26 #include <net/inet_common.h>
27 #include <net/inet_sock.h>
28 #include <net/protocol.h>
32 #include <asm/semaphore.h>
33 #include <linux/spinlock.h>
34 #include <linux/timer.h>
35 #include <linux/delay.h>
36 #include <linux/poll.h>
42 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
44 EXPORT_SYMBOL_GPL(dccp_statistics);
46 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
48 EXPORT_SYMBOL_GPL(dccp_orphan_count);
50 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
51 .lhash_lock = RW_LOCK_UNLOCKED,
52 .lhash_users = ATOMIC_INIT(0),
53 .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
56 EXPORT_SYMBOL_GPL(dccp_hashinfo);
58 static struct net_protocol dccp_protocol = {
59 .handler = dccp_v4_rcv,
60 .err_handler = dccp_v4_err,
64 const char *dccp_packet_name(const int type)
66 static const char *dccp_packet_names[] = {
67 [DCCP_PKT_REQUEST] = "REQUEST",
68 [DCCP_PKT_RESPONSE] = "RESPONSE",
69 [DCCP_PKT_DATA] = "DATA",
70 [DCCP_PKT_ACK] = "ACK",
71 [DCCP_PKT_DATAACK] = "DATAACK",
72 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
73 [DCCP_PKT_CLOSE] = "CLOSE",
74 [DCCP_PKT_RESET] = "RESET",
75 [DCCP_PKT_SYNC] = "SYNC",
76 [DCCP_PKT_SYNCACK] = "SYNCACK",
79 if (type >= DCCP_NR_PKT_TYPES)
82 return dccp_packet_names[type];
85 EXPORT_SYMBOL_GPL(dccp_packet_name);
87 const char *dccp_state_name(const int state)
89 static char *dccp_state_names[] = {
91 [DCCP_REQUESTING] = "REQUESTING",
92 [DCCP_PARTOPEN] = "PARTOPEN",
93 [DCCP_LISTEN] = "LISTEN",
94 [DCCP_RESPOND] = "RESPOND",
95 [DCCP_CLOSING] = "CLOSING",
96 [DCCP_TIME_WAIT] = "TIME_WAIT",
97 [DCCP_CLOSED] = "CLOSED",
100 if (state >= DCCP_MAX_STATES)
101 return "INVALID STATE!";
103 return dccp_state_names[state];
106 EXPORT_SYMBOL_GPL(dccp_state_name);
108 void dccp_hash(struct sock *sk)
110 inet_hash(&dccp_hashinfo, sk);
113 EXPORT_SYMBOL_GPL(dccp_hash);
115 void dccp_unhash(struct sock *sk)
117 inet_unhash(&dccp_hashinfo, sk);
120 EXPORT_SYMBOL_GPL(dccp_unhash);
122 int dccp_init_sock(struct sock *sk)
124 struct dccp_sock *dp = dccp_sk(sk);
125 struct inet_connection_sock *icsk = inet_csk(sk);
126 static int dccp_ctl_socket_init = 1;
128 dccp_options_init(&dp->dccps_options);
129 do_gettimeofday(&dp->dccps_epoch);
132 * FIXME: We're hardcoding the CCID, and doing this at this point makes
133 * the listening (master) sock get CCID control blocks, which is not
134 * necessary, but for now, to not mess with the test userspace apps,
135 * lets leave it here, later the real solution is to do this in a
136 * setsockopt(CCIDs-I-want/accept). -acme
138 if (likely(!dccp_ctl_socket_init)) {
139 int rc = dccp_feat_init(sk);
144 if (dp->dccps_options.dccpo_send_ack_vector) {
145 dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
146 if (dp->dccps_hc_rx_ackvec == NULL)
149 dp->dccps_hc_rx_ccid =
150 ccid_hc_rx_new(dp->dccps_options.dccpo_rx_ccid,
152 dp->dccps_hc_tx_ccid =
153 ccid_hc_tx_new(dp->dccps_options.dccpo_tx_ccid,
155 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
156 dp->dccps_hc_tx_ccid == NULL)) {
157 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
158 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
159 if (dp->dccps_options.dccpo_send_ack_vector) {
160 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
161 dp->dccps_hc_rx_ackvec = NULL;
163 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
167 /* control socket doesn't need feat nego */
168 INIT_LIST_HEAD(&dp->dccps_options.dccpo_pending);
169 INIT_LIST_HEAD(&dp->dccps_options.dccpo_conf);
170 dccp_ctl_socket_init = 0;
173 dccp_init_xmit_timers(sk);
174 icsk->icsk_rto = DCCP_TIMEOUT_INIT;
175 sk->sk_state = DCCP_CLOSED;
176 sk->sk_write_space = dccp_write_space;
177 icsk->icsk_sync_mss = dccp_sync_mss;
178 dp->dccps_mss_cache = 536;
179 dp->dccps_role = DCCP_ROLE_UNDEFINED;
180 dp->dccps_service = DCCP_SERVICE_INVALID_VALUE;
181 dp->dccps_l_ack_ratio = dp->dccps_r_ack_ratio = 1;
186 EXPORT_SYMBOL_GPL(dccp_init_sock);
188 int dccp_destroy_sock(struct sock *sk)
190 struct dccp_sock *dp = dccp_sk(sk);
193 * DCCP doesn't use sk_write_queue, just sk_send_head
194 * for retransmissions
196 if (sk->sk_send_head != NULL) {
197 kfree_skb(sk->sk_send_head);
198 sk->sk_send_head = NULL;
201 /* Clean up a referenced DCCP bind bucket. */
202 if (inet_csk(sk)->icsk_bind_hash != NULL)
203 inet_put_port(&dccp_hashinfo, sk);
205 kfree(dp->dccps_service_list);
206 dp->dccps_service_list = NULL;
208 if (dp->dccps_options.dccpo_send_ack_vector) {
209 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
210 dp->dccps_hc_rx_ackvec = NULL;
212 ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
213 ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
214 dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
216 /* clean up feature negotiation state */
222 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
224 static inline int dccp_listen_start(struct sock *sk)
226 struct dccp_sock *dp = dccp_sk(sk);
228 dp->dccps_role = DCCP_ROLE_LISTEN;
230 * Apps need to use setsockopt(DCCP_SOCKOPT_SERVICE)
231 * before calling listen()
233 if (dccp_service_not_initialized(sk))
235 return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
238 int dccp_disconnect(struct sock *sk, int flags)
240 struct inet_connection_sock *icsk = inet_csk(sk);
241 struct inet_sock *inet = inet_sk(sk);
243 const int old_state = sk->sk_state;
245 if (old_state != DCCP_CLOSED)
246 dccp_set_state(sk, DCCP_CLOSED);
248 /* ABORT function of RFC793 */
249 if (old_state == DCCP_LISTEN) {
250 inet_csk_listen_stop(sk);
251 /* FIXME: do the active reset thing */
252 } else if (old_state == DCCP_REQUESTING)
253 sk->sk_err = ECONNRESET;
255 dccp_clear_xmit_timers(sk);
256 __skb_queue_purge(&sk->sk_receive_queue);
257 if (sk->sk_send_head != NULL) {
258 __kfree_skb(sk->sk_send_head);
259 sk->sk_send_head = NULL;
264 if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
265 inet_reset_saddr(sk);
268 sock_reset_flag(sk, SOCK_DONE);
270 icsk->icsk_backoff = 0;
271 inet_csk_delack_init(sk);
274 BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
276 sk->sk_error_report(sk);
280 EXPORT_SYMBOL_GPL(dccp_disconnect);
283 * Wait for a DCCP event.
285 * Note that we don't need to lock the socket, as the upper poll layers
286 * take care of normal races (between the test and the event) and we don't
287 * go look at any of the socket buffers directly.
289 unsigned int dccp_poll(struct file *file, struct socket *sock,
293 struct sock *sk = sock->sk;
295 poll_wait(file, sk->sk_sleep, wait);
296 if (sk->sk_state == DCCP_LISTEN)
297 return inet_csk_listen_poll(sk);
299 /* Socket is not locked. We are protected from async events
300 by poll logic and correct handling of state changes
301 made by another threads is impossible in any case.
308 if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
310 if (sk->sk_shutdown & RCV_SHUTDOWN)
311 mask |= POLLIN | POLLRDNORM;
314 if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
315 if (atomic_read(&sk->sk_rmem_alloc) > 0)
316 mask |= POLLIN | POLLRDNORM;
318 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
319 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
320 mask |= POLLOUT | POLLWRNORM;
321 } else { /* send SIGIO later */
322 set_bit(SOCK_ASYNC_NOSPACE,
323 &sk->sk_socket->flags);
324 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
326 /* Race breaker. If space is freed after
327 * wspace test but before the flags are set,
328 * IO signal will be lost.
330 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
331 mask |= POLLOUT | POLLWRNORM;
338 EXPORT_SYMBOL_GPL(dccp_poll);
340 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
342 dccp_pr_debug("entry\n");
346 EXPORT_SYMBOL_GPL(dccp_ioctl);
348 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
349 char __user *optval, int optlen)
351 struct dccp_sock *dp = dccp_sk(sk);
352 struct dccp_service_list *sl = NULL;
354 if (service == DCCP_SERVICE_INVALID_VALUE ||
355 optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
358 if (optlen > sizeof(service)) {
359 sl = kmalloc(optlen, GFP_KERNEL);
363 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
364 if (copy_from_user(sl->dccpsl_list,
365 optval + sizeof(service),
366 optlen - sizeof(service)) ||
367 dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
374 dp->dccps_service = service;
376 kfree(dp->dccps_service_list);
378 dp->dccps_service_list = sl;
383 /* byte 1 is feature. the rest is the preference list */
384 static int dccp_setsockopt_change(struct sock *sk, int type,
385 struct dccp_so_feat __user *optval)
387 struct dccp_so_feat opt;
391 if (copy_from_user(&opt, optval, sizeof(opt)))
394 val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
398 if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
403 rc = dccp_feat_change(sk, type, opt.dccpsf_feat, val, opt.dccpsf_len,
416 int dccp_setsockopt(struct sock *sk, int level, int optname,
417 char __user *optval, int optlen)
419 struct dccp_sock *dp;
423 if (level != SOL_DCCP)
424 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
428 if (optlen < sizeof(int))
431 if (get_user(val, (int __user *)optval))
434 if (optname == DCCP_SOCKOPT_SERVICE)
435 return dccp_setsockopt_service(sk, val, optval, optlen);
442 case DCCP_SOCKOPT_PACKET_SIZE:
443 dp->dccps_packet_size = val;
446 case DCCP_SOCKOPT_CHANGE_L:
447 if (optlen != sizeof(struct dccp_so_feat))
450 err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
451 (struct dccp_so_feat *)
455 case DCCP_SOCKOPT_CHANGE_R:
456 if (optlen != sizeof(struct dccp_so_feat))
459 err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
460 (struct dccp_so_feat *)
473 EXPORT_SYMBOL_GPL(dccp_setsockopt);
475 static int dccp_getsockopt_service(struct sock *sk, int len,
476 __be32 __user *optval,
479 const struct dccp_sock *dp = dccp_sk(sk);
480 const struct dccp_service_list *sl;
481 int err = -ENOENT, slen = 0, total_len = sizeof(u32);
484 if (dccp_service_not_initialized(sk))
487 if ((sl = dp->dccps_service_list) != NULL) {
488 slen = sl->dccpsl_nr * sizeof(u32);
497 if (put_user(total_len, optlen) ||
498 put_user(dp->dccps_service, optval) ||
499 (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
506 int dccp_getsockopt(struct sock *sk, int level, int optname,
507 char __user *optval, int __user *optlen)
509 struct dccp_sock *dp;
512 if (level != SOL_DCCP)
513 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
516 if (get_user(len, optlen))
519 if (len < sizeof(int))
525 case DCCP_SOCKOPT_PACKET_SIZE:
526 val = dp->dccps_packet_size;
527 len = sizeof(dp->dccps_packet_size);
529 case DCCP_SOCKOPT_SERVICE:
530 return dccp_getsockopt_service(sk, len,
531 (__be32 __user *)optval, optlen);
533 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
534 len, (u32 __user *)optval, optlen);
536 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
537 len, (u32 __user *)optval, optlen);
542 if (put_user(len, optlen) || copy_to_user(optval, &val, len))
548 EXPORT_SYMBOL_GPL(dccp_getsockopt);
550 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
553 const struct dccp_sock *dp = dccp_sk(sk);
554 const int flags = msg->msg_flags;
555 const int noblock = flags & MSG_DONTWAIT;
560 if (len > dp->dccps_mss_cache)
564 timeo = sock_sndtimeo(sk, noblock);
567 * We have to use sk_stream_wait_connect here to set sk_write_pending,
568 * so that the trick in dccp_rcv_request_sent_state_process.
570 /* Wait for a connection to finish. */
571 if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING))
572 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
575 size = sk->sk_prot->max_header + len;
577 skb = sock_alloc_send_skb(sk, size, noblock, &rc);
582 skb_reserve(skb, sk->sk_prot->max_header);
583 rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
587 rc = dccp_write_xmit(sk, skb, &timeo);
589 * XXX we don't use sk_write_queue, so just discard the packet.
590 * Current plan however is to _use_ sk_write_queue with
591 * an algorith similar to tcp_sendmsg, where the main difference
592 * is that in DCCP we have to respect packet boundaries, so
593 * no coalescing of skbs.
595 * This bug was _quickly_ found & fixed by just looking at an OSTRA
596 * generated callgraph 8) -acme
606 EXPORT_SYMBOL_GPL(dccp_sendmsg);
608 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
609 size_t len, int nonblock, int flags, int *addr_len)
611 const struct dccp_hdr *dh;
616 if (sk->sk_state == DCCP_LISTEN) {
621 timeo = sock_rcvtimeo(sk, nonblock);
624 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
627 goto verify_sock_status;
631 if (dh->dccph_type == DCCP_PKT_DATA ||
632 dh->dccph_type == DCCP_PKT_DATAACK)
635 if (dh->dccph_type == DCCP_PKT_RESET ||
636 dh->dccph_type == DCCP_PKT_CLOSE) {
637 dccp_pr_debug("found fin ok!\n");
641 dccp_pr_debug("packet_type=%s\n",
642 dccp_packet_name(dh->dccph_type));
645 if (sock_flag(sk, SOCK_DONE)) {
651 len = sock_error(sk);
655 if (sk->sk_shutdown & RCV_SHUTDOWN) {
660 if (sk->sk_state == DCCP_CLOSED) {
661 if (!sock_flag(sk, SOCK_DONE)) {
662 /* This occurs when user tries to read
663 * from never connected socket.
677 if (signal_pending(current)) {
678 len = sock_intr_errno(timeo);
682 sk_wait_data(sk, &timeo);
687 else if (len < skb->len)
688 msg->msg_flags |= MSG_TRUNC;
690 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
691 /* Exception. Bailout! */
696 if (!(flags & MSG_PEEK))
705 EXPORT_SYMBOL_GPL(dccp_recvmsg);
707 int inet_dccp_listen(struct socket *sock, int backlog)
709 struct sock *sk = sock->sk;
710 unsigned char old_state;
716 if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
719 old_state = sk->sk_state;
720 if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
723 /* Really, if the socket is already in listen state
724 * we can only allow the backlog to be adjusted.
726 if (old_state != DCCP_LISTEN) {
728 * FIXME: here it probably should be sk->sk_prot->listen_start
729 * see tcp_listen_start
731 err = dccp_listen_start(sk);
735 sk->sk_max_ack_backlog = backlog;
743 EXPORT_SYMBOL_GPL(inet_dccp_listen);
745 static const unsigned char dccp_new_state[] = {
746 /* current state: new state: action: */
748 [DCCP_OPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
749 [DCCP_REQUESTING] = DCCP_CLOSED,
750 [DCCP_PARTOPEN] = DCCP_CLOSING | DCCP_ACTION_FIN,
751 [DCCP_LISTEN] = DCCP_CLOSED,
752 [DCCP_RESPOND] = DCCP_CLOSED,
753 [DCCP_CLOSING] = DCCP_CLOSED,
754 [DCCP_TIME_WAIT] = DCCP_CLOSED,
755 [DCCP_CLOSED] = DCCP_CLOSED,
758 static int dccp_close_state(struct sock *sk)
760 const int next = dccp_new_state[sk->sk_state];
761 const int ns = next & DCCP_STATE_MASK;
763 if (ns != sk->sk_state)
764 dccp_set_state(sk, ns);
766 return next & DCCP_ACTION_FIN;
769 void dccp_close(struct sock *sk, long timeout)
775 sk->sk_shutdown = SHUTDOWN_MASK;
777 if (sk->sk_state == DCCP_LISTEN) {
778 dccp_set_state(sk, DCCP_CLOSED);
781 inet_csk_listen_stop(sk);
783 goto adjudge_to_death;
787 * We need to flush the recv. buffs. We do this only on the
788 * descriptor close, not protocol-sourced closes, because the
789 *reader process may not have drained the data yet!
791 /* FIXME: check for unread data */
792 while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
796 if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
797 /* Check zero linger _after_ checking for unread data. */
798 sk->sk_prot->disconnect(sk, 0);
799 } else if (dccp_close_state(sk)) {
800 dccp_send_close(sk, 1);
803 sk_stream_wait_close(sk, timeout);
807 * It is the last release_sock in its life. It will remove backlog.
811 * Now socket is owned by kernel and we acquire BH lock
812 * to finish close. No need to check for user refs.
816 BUG_TRAP(!sock_owned_by_user(sk));
822 * The last release_sock may have processed the CLOSE or RESET
823 * packet moving sock to CLOSED state, if not we have to fire
824 * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
825 * in draft-ietf-dccp-spec-11. -acme
827 if (sk->sk_state == DCCP_CLOSING) {
828 /* FIXME: should start at 2 * RTT */
829 /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
830 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
831 inet_csk(sk)->icsk_rto,
834 /* Yeah, we should use sk->sk_prot->orphan_count, etc */
835 dccp_set_state(sk, DCCP_CLOSED);
839 atomic_inc(sk->sk_prot->orphan_count);
840 if (sk->sk_state == DCCP_CLOSED)
841 inet_csk_destroy_sock(sk);
843 /* Otherwise, socket is reprieved until protocol close. */
850 EXPORT_SYMBOL_GPL(dccp_close);
852 void dccp_shutdown(struct sock *sk, int how)
854 dccp_pr_debug("entry\n");
857 EXPORT_SYMBOL_GPL(dccp_shutdown);
859 static const struct proto_ops inet_dccp_ops = {
861 .owner = THIS_MODULE,
862 .release = inet_release,
864 .connect = inet_stream_connect,
865 .socketpair = sock_no_socketpair,
866 .accept = inet_accept,
867 .getname = inet_getname,
868 /* FIXME: work on tcp_poll to rename it to inet_csk_poll */
871 /* FIXME: work on inet_listen to rename it to sock_common_listen */
872 .listen = inet_dccp_listen,
873 .shutdown = inet_shutdown,
874 .setsockopt = sock_common_setsockopt,
875 .getsockopt = sock_common_getsockopt,
876 .sendmsg = inet_sendmsg,
877 .recvmsg = sock_common_recvmsg,
878 .mmap = sock_no_mmap,
879 .sendpage = sock_no_sendpage,
882 extern struct net_proto_family inet_family_ops;
884 static struct inet_protosw dccp_v4_protosw = {
886 .protocol = IPPROTO_DCCP,
888 .ops = &inet_dccp_ops,
891 .flags = INET_PROTOSW_ICSK,
895 * This is the global socket data structure used for responding to
896 * the Out-of-the-blue (OOTB) packets. A control sock will be created
897 * for this socket at the initialization time.
899 struct socket *dccp_ctl_socket;
901 static char dccp_ctl_socket_err_msg[] __initdata =
902 KERN_ERR "DCCP: Failed to create the control socket.\n";
904 static int __init dccp_ctl_sock_init(void)
906 int rc = sock_create_kern(PF_INET, SOCK_DCCP, IPPROTO_DCCP,
909 printk(dccp_ctl_socket_err_msg);
911 dccp_ctl_socket->sk->sk_allocation = GFP_ATOMIC;
912 inet_sk(dccp_ctl_socket->sk)->uc_ttl = -1;
914 /* Unhash it so that IP input processing does not even
915 * see it, we do not wish this socket to see incoming
918 dccp_ctl_socket->sk->sk_prot->unhash(dccp_ctl_socket->sk);
924 #ifdef CONFIG_IP_DCCP_UNLOAD_HACK
925 void dccp_ctl_sock_exit(void)
927 if (dccp_ctl_socket != NULL) {
928 sock_release(dccp_ctl_socket);
929 dccp_ctl_socket = NULL;
933 EXPORT_SYMBOL_GPL(dccp_ctl_sock_exit);
936 static int __init init_dccp_v4_mibs(void)
940 dccp_statistics[0] = alloc_percpu(struct dccp_mib);
941 if (dccp_statistics[0] == NULL)
944 dccp_statistics[1] = alloc_percpu(struct dccp_mib);
945 if (dccp_statistics[1] == NULL)
952 free_percpu(dccp_statistics[0]);
953 dccp_statistics[0] = NULL;
958 static int thash_entries;
959 module_param(thash_entries, int, 0444);
960 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
962 #ifdef CONFIG_IP_DCCP_DEBUG
964 module_param(dccp_debug, int, 0444);
965 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
967 EXPORT_SYMBOL_GPL(dccp_debug);
970 static int __init dccp_init(void)
973 int ehash_order, bhash_order, i;
974 int rc = proto_register(&dccp_prot, 1);
980 dccp_hashinfo.bind_bucket_cachep =
981 kmem_cache_create("dccp_bind_bucket",
982 sizeof(struct inet_bind_bucket), 0,
983 SLAB_HWCACHE_ALIGN, NULL, NULL);
984 if (!dccp_hashinfo.bind_bucket_cachep)
985 goto out_proto_unregister;
988 * Size and allocate the main established and bind bucket
991 * The methodology is similar to that of the buffer cache.
993 if (num_physpages >= (128 * 1024))
994 goal = num_physpages >> (21 - PAGE_SHIFT);
996 goal = num_physpages >> (23 - PAGE_SHIFT);
999 goal = (thash_entries *
1000 sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1001 for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1004 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1005 sizeof(struct inet_ehash_bucket);
1006 dccp_hashinfo.ehash_size >>= 1;
1007 while (dccp_hashinfo.ehash_size &
1008 (dccp_hashinfo.ehash_size - 1))
1009 dccp_hashinfo.ehash_size--;
1010 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1011 __get_free_pages(GFP_ATOMIC, ehash_order);
1012 } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1014 if (!dccp_hashinfo.ehash) {
1015 printk(KERN_CRIT "Failed to allocate DCCP "
1016 "established hash table\n");
1017 goto out_free_bind_bucket_cachep;
1020 for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) {
1021 rwlock_init(&dccp_hashinfo.ehash[i].lock);
1022 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1025 bhash_order = ehash_order;
1028 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1029 sizeof(struct inet_bind_hashbucket);
1030 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1033 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1034 __get_free_pages(GFP_ATOMIC, bhash_order);
1035 } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1037 if (!dccp_hashinfo.bhash) {
1038 printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n");
1039 goto out_free_dccp_ehash;
1042 for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1043 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1044 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1047 rc = init_dccp_v4_mibs();
1049 goto out_free_dccp_bhash;
1052 if (inet_add_protocol(&dccp_protocol, IPPROTO_DCCP))
1053 goto out_free_dccp_v4_mibs;
1055 inet_register_protosw(&dccp_v4_protosw);
1057 rc = dccp_ackvec_init();
1059 goto out_unregister_protosw;
1061 rc = dccp_sysctl_init();
1063 goto out_ackvec_exit;
1065 rc = dccp_ctl_sock_init();
1067 goto out_sysctl_exit;
1074 out_unregister_protosw:
1075 inet_unregister_protosw(&dccp_v4_protosw);
1076 inet_del_protocol(&dccp_protocol, IPPROTO_DCCP);
1077 out_free_dccp_v4_mibs:
1078 free_percpu(dccp_statistics[0]);
1079 free_percpu(dccp_statistics[1]);
1080 dccp_statistics[0] = dccp_statistics[1] = NULL;
1081 out_free_dccp_bhash:
1082 free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1083 dccp_hashinfo.bhash = NULL;
1084 out_free_dccp_ehash:
1085 free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1086 dccp_hashinfo.ehash = NULL;
1087 out_free_bind_bucket_cachep:
1088 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1089 dccp_hashinfo.bind_bucket_cachep = NULL;
1090 out_proto_unregister:
1091 proto_unregister(&dccp_prot);
1095 static const char dccp_del_proto_err_msg[] __exitdata =
1096 KERN_ERR "can't remove dccp net_protocol\n";
1098 static void __exit dccp_fini(void)
1100 inet_unregister_protosw(&dccp_v4_protosw);
1102 if (inet_del_protocol(&dccp_protocol, IPPROTO_DCCP) < 0)
1103 printk(dccp_del_proto_err_msg);
1105 free_percpu(dccp_statistics[0]);
1106 free_percpu(dccp_statistics[1]);
1107 free_pages((unsigned long)dccp_hashinfo.bhash,
1108 get_order(dccp_hashinfo.bhash_size *
1109 sizeof(struct inet_bind_hashbucket)));
1110 free_pages((unsigned long)dccp_hashinfo.ehash,
1111 get_order(dccp_hashinfo.ehash_size *
1112 sizeof(struct inet_ehash_bucket)));
1113 kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1114 proto_unregister(&dccp_prot);
1119 module_init(dccp_init);
1120 module_exit(dccp_fini);
1123 * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33)
1124 * values directly, Also cover the case where the protocol is not specified,
1125 * i.e. net-pf-PF_INET-proto-0-type-SOCK_DCCP
1127 MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-33-type-6");
1128 MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-0-type-6");
1129 MODULE_LICENSE("GPL");
1130 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1131 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");