Merge git://git.kernel.org/pub/scm/linux/kernel/git/bunk/trivial
[linux-2.6] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/config.h>
13 #include <linux/dccp.h>
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/sched.h>
17 #include <linux/kernel.h>
18 #include <linux/skbuff.h>
19 #include <linux/netdevice.h>
20 #include <linux/in.h>
21 #include <linux/if_arp.h>
22 #include <linux/init.h>
23 #include <linux/random.h>
24 #include <net/checksum.h>
25
26 #include <net/inet_common.h>
27 #include <net/ip.h>
28 #include <net/protocol.h>
29 #include <net/sock.h>
30 #include <net/xfrm.h>
31
32 #include <asm/semaphore.h>
33 #include <linux/spinlock.h>
34 #include <linux/timer.h>
35 #include <linux/delay.h>
36 #include <linux/poll.h>
37 #include <linux/dccp.h>
38
39 #include "ccid.h"
40 #include "dccp.h"
41
42 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
43
44 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
45
46 static struct net_protocol dccp_protocol = {
47         .handler        = dccp_v4_rcv,
48         .err_handler    = dccp_v4_err,
49 };
50
51 const char *dccp_packet_name(const int type)
52 {
53         static const char *dccp_packet_names[] = {
54                 [DCCP_PKT_REQUEST]  = "REQUEST",
55                 [DCCP_PKT_RESPONSE] = "RESPONSE",
56                 [DCCP_PKT_DATA]     = "DATA",
57                 [DCCP_PKT_ACK]      = "ACK",
58                 [DCCP_PKT_DATAACK]  = "DATAACK",
59                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
60                 [DCCP_PKT_CLOSE]    = "CLOSE",
61                 [DCCP_PKT_RESET]    = "RESET",
62                 [DCCP_PKT_SYNC]     = "SYNC",
63                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
64         };
65
66         if (type >= DCCP_NR_PKT_TYPES)
67                 return "INVALID";
68         else
69                 return dccp_packet_names[type];
70 }
71
72 EXPORT_SYMBOL_GPL(dccp_packet_name);
73
74 const char *dccp_state_name(const int state)
75 {
76         static char *dccp_state_names[] = {
77         [DCCP_OPEN]       = "OPEN",
78         [DCCP_REQUESTING] = "REQUESTING",
79         [DCCP_PARTOPEN]   = "PARTOPEN",
80         [DCCP_LISTEN]     = "LISTEN",
81         [DCCP_RESPOND]    = "RESPOND",
82         [DCCP_CLOSING]    = "CLOSING",
83         [DCCP_TIME_WAIT]  = "TIME_WAIT",
84         [DCCP_CLOSED]     = "CLOSED",
85         };
86
87         if (state >= DCCP_MAX_STATES)
88                 return "INVALID STATE!";
89         else
90                 return dccp_state_names[state];
91 }
92
93 EXPORT_SYMBOL_GPL(dccp_state_name);
94
95 static inline int dccp_listen_start(struct sock *sk)
96 {
97         struct dccp_sock *dp = dccp_sk(sk);
98
99         dp->dccps_role = DCCP_ROLE_LISTEN;
100         /*
101          * Apps need to use setsockopt(DCCP_SOCKOPT_SERVICE)
102          * before calling listen()
103          */
104         if (dccp_service_not_initialized(sk))
105                 return -EPROTO;
106         return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
107 }
108
109 int dccp_disconnect(struct sock *sk, int flags)
110 {
111         struct inet_connection_sock *icsk = inet_csk(sk);
112         struct inet_sock *inet = inet_sk(sk);
113         int err = 0;
114         const int old_state = sk->sk_state;
115
116         if (old_state != DCCP_CLOSED)
117                 dccp_set_state(sk, DCCP_CLOSED);
118
119         /* ABORT function of RFC793 */
120         if (old_state == DCCP_LISTEN) {
121                 inet_csk_listen_stop(sk);
122         /* FIXME: do the active reset thing */
123         } else if (old_state == DCCP_REQUESTING)
124                 sk->sk_err = ECONNRESET;
125
126         dccp_clear_xmit_timers(sk);
127         __skb_queue_purge(&sk->sk_receive_queue);
128         if (sk->sk_send_head != NULL) {
129                 __kfree_skb(sk->sk_send_head);
130                 sk->sk_send_head = NULL;
131         }
132
133         inet->dport = 0;
134
135         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
136                 inet_reset_saddr(sk);
137
138         sk->sk_shutdown = 0;
139         sock_reset_flag(sk, SOCK_DONE);
140
141         icsk->icsk_backoff = 0;
142         inet_csk_delack_init(sk);
143         __sk_dst_reset(sk);
144
145         BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
146
147         sk->sk_error_report(sk);
148         return err;
149 }
150
151 /*
152  *      Wait for a DCCP event.
153  *
154  *      Note that we don't need to lock the socket, as the upper poll layers
155  *      take care of normal races (between the test and the event) and we don't
156  *      go look at any of the socket buffers directly.
157  */
158 static unsigned int dccp_poll(struct file *file, struct socket *sock,
159                               poll_table *wait)
160 {
161         unsigned int mask;
162         struct sock *sk = sock->sk;
163
164         poll_wait(file, sk->sk_sleep, wait);
165         if (sk->sk_state == DCCP_LISTEN)
166                 return inet_csk_listen_poll(sk);
167
168         /* Socket is not locked. We are protected from async events
169            by poll logic and correct handling of state changes
170            made by another threads is impossible in any case.
171          */
172
173         mask = 0;
174         if (sk->sk_err)
175                 mask = POLLERR;
176
177         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
178                 mask |= POLLHUP;
179         if (sk->sk_shutdown & RCV_SHUTDOWN)
180                 mask |= POLLIN | POLLRDNORM;
181
182         /* Connected? */
183         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
184                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
185                         mask |= POLLIN | POLLRDNORM;
186
187                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
188                         if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
189                                 mask |= POLLOUT | POLLWRNORM;
190                         } else {  /* send SIGIO later */
191                                 set_bit(SOCK_ASYNC_NOSPACE,
192                                         &sk->sk_socket->flags);
193                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
194
195                                 /* Race breaker. If space is freed after
196                                  * wspace test but before the flags are set,
197                                  * IO signal will be lost.
198                                  */
199                                 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
200                                         mask |= POLLOUT | POLLWRNORM;
201                         }
202                 }
203         }
204         return mask;
205 }
206
207 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
208 {
209         dccp_pr_debug("entry\n");
210         return -ENOIOCTLCMD;
211 }
212
213 static int dccp_setsockopt_service(struct sock *sk, const u32 service,
214                                    char __user *optval, int optlen)
215 {
216         struct dccp_sock *dp = dccp_sk(sk);
217         struct dccp_service_list *sl = NULL;
218
219         if (service == DCCP_SERVICE_INVALID_VALUE || 
220             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
221                 return -EINVAL;
222
223         if (optlen > sizeof(service)) {
224                 sl = kmalloc(optlen, GFP_KERNEL);
225                 if (sl == NULL)
226                         return -ENOMEM;
227
228                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
229                 if (copy_from_user(sl->dccpsl_list,
230                                    optval + sizeof(service),
231                                    optlen - sizeof(service)) ||
232                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
233                         kfree(sl);
234                         return -EFAULT;
235                 }
236         }
237
238         lock_sock(sk);
239         dp->dccps_service = service;
240
241         kfree(dp->dccps_service_list);
242
243         dp->dccps_service_list = sl;
244         release_sock(sk);
245         return 0;
246 }
247
248 int dccp_setsockopt(struct sock *sk, int level, int optname,
249                     char __user *optval, int optlen)
250 {
251         struct dccp_sock *dp;
252         int err;
253         int val;
254
255         if (level != SOL_DCCP)
256                 return ip_setsockopt(sk, level, optname, optval, optlen);
257
258         if (optlen < sizeof(int))
259                 return -EINVAL;
260
261         if (get_user(val, (int __user *)optval))
262                 return -EFAULT;
263
264         if (optname == DCCP_SOCKOPT_SERVICE)
265                 return dccp_setsockopt_service(sk, val, optval, optlen);
266
267         lock_sock(sk);
268         dp = dccp_sk(sk);
269         err = 0;
270
271         switch (optname) {
272         case DCCP_SOCKOPT_PACKET_SIZE:
273                 dp->dccps_packet_size = val;
274                 break;
275         default:
276                 err = -ENOPROTOOPT;
277                 break;
278         }
279         
280         release_sock(sk);
281         return err;
282 }
283
284 static int dccp_getsockopt_service(struct sock *sk, int len,
285                                    u32 __user *optval,
286                                    int __user *optlen)
287 {
288         const struct dccp_sock *dp = dccp_sk(sk);
289         const struct dccp_service_list *sl;
290         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
291
292         lock_sock(sk);
293         if (dccp_service_not_initialized(sk))
294                 goto out;
295
296         if ((sl = dp->dccps_service_list) != NULL) {
297                 slen = sl->dccpsl_nr * sizeof(u32);
298                 total_len += slen;
299         }
300
301         err = -EINVAL;
302         if (total_len > len)
303                 goto out;
304
305         err = 0;
306         if (put_user(total_len, optlen) ||
307             put_user(dp->dccps_service, optval) ||
308             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
309                 err = -EFAULT;
310 out:
311         release_sock(sk);
312         return err;
313 }
314
315 int dccp_getsockopt(struct sock *sk, int level, int optname,
316                     char __user *optval, int __user *optlen)
317 {
318         struct dccp_sock *dp;
319         int val, len;
320
321         if (level != SOL_DCCP)
322                 return ip_getsockopt(sk, level, optname, optval, optlen);
323
324         if (get_user(len, optlen))
325                 return -EFAULT;
326
327         if (len < sizeof(int))
328                 return -EINVAL;
329
330         dp = dccp_sk(sk);
331
332         switch (optname) {
333         case DCCP_SOCKOPT_PACKET_SIZE:
334                 val = dp->dccps_packet_size;
335                 len = sizeof(dp->dccps_packet_size);
336                 break;
337         case DCCP_SOCKOPT_SERVICE:
338                 return dccp_getsockopt_service(sk, len,
339                                                (u32 __user *)optval, optlen);
340         case 128 ... 191:
341                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
342                                              len, (u32 __user *)optval, optlen);
343         case 192 ... 255:
344                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
345                                              len, (u32 __user *)optval, optlen);
346         default:
347                 return -ENOPROTOOPT;
348         }
349
350         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
351                 return -EFAULT;
352
353         return 0;
354 }
355
356 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
357                  size_t len)
358 {
359         const struct dccp_sock *dp = dccp_sk(sk);
360         const int flags = msg->msg_flags;
361         const int noblock = flags & MSG_DONTWAIT;
362         struct sk_buff *skb;
363         int rc, size;
364         long timeo;
365
366         if (len > dp->dccps_mss_cache)
367                 return -EMSGSIZE;
368
369         lock_sock(sk);
370         timeo = sock_sndtimeo(sk, noblock);
371
372         /*
373          * We have to use sk_stream_wait_connect here to set sk_write_pending,
374          * so that the trick in dccp_rcv_request_sent_state_process.
375          */
376         /* Wait for a connection to finish. */
377         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING))
378                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
379                         goto out_release;
380
381         size = sk->sk_prot->max_header + len;
382         release_sock(sk);
383         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
384         lock_sock(sk);
385         if (skb == NULL)
386                 goto out_release;
387
388         skb_reserve(skb, sk->sk_prot->max_header);
389         rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
390         if (rc != 0)
391                 goto out_discard;
392
393         rc = dccp_write_xmit(sk, skb, &timeo);
394         /*
395          * XXX we don't use sk_write_queue, so just discard the packet.
396          *     Current plan however is to _use_ sk_write_queue with
397          *     an algorith similar to tcp_sendmsg, where the main difference
398          *     is that in DCCP we have to respect packet boundaries, so
399          *     no coalescing of skbs.
400          *
401          *     This bug was _quickly_ found & fixed by just looking at an OSTRA
402          *     generated callgraph 8) -acme
403          */
404 out_release:
405         release_sock(sk);
406         return rc ? : len;
407 out_discard:
408         kfree_skb(skb);
409         goto out_release;
410 }
411
412 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
413                  size_t len, int nonblock, int flags, int *addr_len)
414 {
415         const struct dccp_hdr *dh;
416         long timeo;
417
418         lock_sock(sk);
419
420         if (sk->sk_state == DCCP_LISTEN) {
421                 len = -ENOTCONN;
422                 goto out;
423         }
424
425         timeo = sock_rcvtimeo(sk, nonblock);
426
427         do {
428                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
429
430                 if (skb == NULL)
431                         goto verify_sock_status;
432
433                 dh = dccp_hdr(skb);
434
435                 if (dh->dccph_type == DCCP_PKT_DATA ||
436                     dh->dccph_type == DCCP_PKT_DATAACK)
437                         goto found_ok_skb;
438
439                 if (dh->dccph_type == DCCP_PKT_RESET ||
440                     dh->dccph_type == DCCP_PKT_CLOSE) {
441                         dccp_pr_debug("found fin ok!\n");
442                         len = 0;
443                         goto found_fin_ok;
444                 }
445                 dccp_pr_debug("packet_type=%s\n",
446                               dccp_packet_name(dh->dccph_type));
447                 sk_eat_skb(sk, skb);
448 verify_sock_status:
449                 if (sock_flag(sk, SOCK_DONE)) {
450                         len = 0;
451                         break;
452                 }
453
454                 if (sk->sk_err) {
455                         len = sock_error(sk);
456                         break;
457                 }
458
459                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
460                         len = 0;
461                         break;
462                 }
463
464                 if (sk->sk_state == DCCP_CLOSED) {
465                         if (!sock_flag(sk, SOCK_DONE)) {
466                                 /* This occurs when user tries to read
467                                  * from never connected socket.
468                                  */
469                                 len = -ENOTCONN;
470                                 break;
471                         }
472                         len = 0;
473                         break;
474                 }
475
476                 if (!timeo) {
477                         len = -EAGAIN;
478                         break;
479                 }
480
481                 if (signal_pending(current)) {
482                         len = sock_intr_errno(timeo);
483                         break;
484                 }
485
486                 sk_wait_data(sk, &timeo);
487                 continue;
488         found_ok_skb:
489                 if (len > skb->len)
490                         len = skb->len;
491                 else if (len < skb->len)
492                         msg->msg_flags |= MSG_TRUNC;
493
494                 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
495                         /* Exception. Bailout! */
496                         len = -EFAULT;
497                         break;
498                 }
499         found_fin_ok:
500                 if (!(flags & MSG_PEEK))
501                         sk_eat_skb(sk, skb);
502                 break;
503         } while (1);
504 out:
505         release_sock(sk);
506         return len;
507 }
508
509 static int inet_dccp_listen(struct socket *sock, int backlog)
510 {
511         struct sock *sk = sock->sk;
512         unsigned char old_state;
513         int err;
514
515         lock_sock(sk);
516
517         err = -EINVAL;
518         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
519                 goto out;
520
521         old_state = sk->sk_state;
522         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
523                 goto out;
524
525         /* Really, if the socket is already in listen state
526          * we can only allow the backlog to be adjusted.
527          */
528         if (old_state != DCCP_LISTEN) {
529                 /*
530                  * FIXME: here it probably should be sk->sk_prot->listen_start
531                  * see tcp_listen_start
532                  */
533                 err = dccp_listen_start(sk);
534                 if (err)
535                         goto out;
536         }
537         sk->sk_max_ack_backlog = backlog;
538         err = 0;
539
540 out:
541         release_sock(sk);
542         return err;
543 }
544
545 static const unsigned char dccp_new_state[] = {
546         /* current state:   new state:      action:     */
547         [0]               = DCCP_CLOSED,
548         [DCCP_OPEN]       = DCCP_CLOSING | DCCP_ACTION_FIN,
549         [DCCP_REQUESTING] = DCCP_CLOSED,
550         [DCCP_PARTOPEN]   = DCCP_CLOSING | DCCP_ACTION_FIN,
551         [DCCP_LISTEN]     = DCCP_CLOSED,
552         [DCCP_RESPOND]    = DCCP_CLOSED,
553         [DCCP_CLOSING]    = DCCP_CLOSED,
554         [DCCP_TIME_WAIT]  = DCCP_CLOSED,
555         [DCCP_CLOSED]     = DCCP_CLOSED,
556 };
557
558 static int dccp_close_state(struct sock *sk)
559 {
560         const int next = dccp_new_state[sk->sk_state];
561         const int ns = next & DCCP_STATE_MASK;
562
563         if (ns != sk->sk_state)
564                 dccp_set_state(sk, ns);
565
566         return next & DCCP_ACTION_FIN;
567 }
568
569 void dccp_close(struct sock *sk, long timeout)
570 {
571         struct sk_buff *skb;
572
573         lock_sock(sk);
574
575         sk->sk_shutdown = SHUTDOWN_MASK;
576
577         if (sk->sk_state == DCCP_LISTEN) {
578                 dccp_set_state(sk, DCCP_CLOSED);
579
580                 /* Special case. */
581                 inet_csk_listen_stop(sk);
582
583                 goto adjudge_to_death;
584         }
585
586         /*
587          * We need to flush the recv. buffs.  We do this only on the
588          * descriptor close, not protocol-sourced closes, because the
589           *reader process may not have drained the data yet!
590          */
591         /* FIXME: check for unread data */
592         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
593                 __kfree_skb(skb);
594         }
595
596         if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
597                 /* Check zero linger _after_ checking for unread data. */
598                 sk->sk_prot->disconnect(sk, 0);
599         } else if (dccp_close_state(sk)) {
600                 dccp_send_close(sk, 1);
601         }
602
603         sk_stream_wait_close(sk, timeout);
604
605 adjudge_to_death:
606         /*
607          * It is the last release_sock in its life. It will remove backlog.
608          */
609         release_sock(sk);
610         /*
611          * Now socket is owned by kernel and we acquire BH lock
612          * to finish close. No need to check for user refs.
613          */
614         local_bh_disable();
615         bh_lock_sock(sk);
616         BUG_TRAP(!sock_owned_by_user(sk));
617
618         sock_hold(sk);
619         sock_orphan(sk);
620
621         /*
622          * The last release_sock may have processed the CLOSE or RESET
623          * packet moving sock to CLOSED state, if not we have to fire
624          * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
625          * in draft-ietf-dccp-spec-11. -acme
626          */
627         if (sk->sk_state == DCCP_CLOSING) {
628                 /* FIXME: should start at 2 * RTT */
629                 /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
630                 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
631                                           inet_csk(sk)->icsk_rto,
632                                           DCCP_RTO_MAX);
633 #if 0
634                 /* Yeah, we should use sk->sk_prot->orphan_count, etc */
635                 dccp_set_state(sk, DCCP_CLOSED);
636 #endif
637         }
638
639         atomic_inc(sk->sk_prot->orphan_count);
640         if (sk->sk_state == DCCP_CLOSED)
641                 inet_csk_destroy_sock(sk);
642
643         /* Otherwise, socket is reprieved until protocol close. */
644
645         bh_unlock_sock(sk);
646         local_bh_enable();
647         sock_put(sk);
648 }
649
650 void dccp_shutdown(struct sock *sk, int how)
651 {
652         dccp_pr_debug("entry\n");
653 }
654
655 static struct proto_ops inet_dccp_ops = {
656         .family         = PF_INET,
657         .owner          = THIS_MODULE,
658         .release        = inet_release,
659         .bind           = inet_bind,
660         .connect        = inet_stream_connect,
661         .socketpair     = sock_no_socketpair,
662         .accept         = inet_accept,
663         .getname        = inet_getname,
664         /* FIXME: work on tcp_poll to rename it to inet_csk_poll */
665         .poll           = dccp_poll,
666         .ioctl          = inet_ioctl,
667         /* FIXME: work on inet_listen to rename it to sock_common_listen */
668         .listen         = inet_dccp_listen,
669         .shutdown       = inet_shutdown,
670         .setsockopt     = sock_common_setsockopt,
671         .getsockopt     = sock_common_getsockopt,
672         .sendmsg        = inet_sendmsg,
673         .recvmsg        = sock_common_recvmsg,
674         .mmap           = sock_no_mmap,
675         .sendpage       = sock_no_sendpage,
676 };
677
678 extern struct net_proto_family inet_family_ops;
679
680 static struct inet_protosw dccp_v4_protosw = {
681         .type           = SOCK_DCCP,
682         .protocol       = IPPROTO_DCCP,
683         .prot           = &dccp_v4_prot,
684         .ops            = &inet_dccp_ops,
685         .capability     = -1,
686         .no_check       = 0,
687         .flags          = 0,
688 };
689
690 /*
691  * This is the global socket data structure used for responding to
692  * the Out-of-the-blue (OOTB) packets. A control sock will be created
693  * for this socket at the initialization time.
694  */
695 struct socket *dccp_ctl_socket;
696
697 static char dccp_ctl_socket_err_msg[] __initdata =
698         KERN_ERR "DCCP: Failed to create the control socket.\n";
699
700 static int __init dccp_ctl_sock_init(void)
701 {
702         int rc = sock_create_kern(PF_INET, SOCK_DCCP, IPPROTO_DCCP,
703                                   &dccp_ctl_socket);
704         if (rc < 0)
705                 printk(dccp_ctl_socket_err_msg);
706         else {
707                 dccp_ctl_socket->sk->sk_allocation = GFP_ATOMIC;
708                 inet_sk(dccp_ctl_socket->sk)->uc_ttl = -1;
709
710                 /* Unhash it so that IP input processing does not even
711                  * see it, we do not wish this socket to see incoming
712                  * packets.
713                  */
714                 dccp_ctl_socket->sk->sk_prot->unhash(dccp_ctl_socket->sk);
715         }
716
717         return rc;
718 }
719
720 #ifdef CONFIG_IP_DCCP_UNLOAD_HACK
721 void dccp_ctl_sock_exit(void)
722 {
723         if (dccp_ctl_socket != NULL) {
724                 sock_release(dccp_ctl_socket);
725                 dccp_ctl_socket = NULL;
726         }
727 }
728
729 EXPORT_SYMBOL_GPL(dccp_ctl_sock_exit);
730 #endif
731
732 static int __init init_dccp_v4_mibs(void)
733 {
734         int rc = -ENOMEM;
735
736         dccp_statistics[0] = alloc_percpu(struct dccp_mib);
737         if (dccp_statistics[0] == NULL)
738                 goto out;
739
740         dccp_statistics[1] = alloc_percpu(struct dccp_mib);
741         if (dccp_statistics[1] == NULL)
742                 goto out_free_one;
743
744         rc = 0;
745 out:
746         return rc;
747 out_free_one:
748         free_percpu(dccp_statistics[0]);
749         dccp_statistics[0] = NULL;
750         goto out;
751
752 }
753
754 static int thash_entries;
755 module_param(thash_entries, int, 0444);
756 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
757
758 #ifdef CONFIG_IP_DCCP_DEBUG
759 int dccp_debug;
760 module_param(dccp_debug, int, 0444);
761 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
762 #endif
763
764 static int __init dccp_init(void)
765 {
766         unsigned long goal;
767         int ehash_order, bhash_order, i;
768         int rc = proto_register(&dccp_v4_prot, 1);
769
770         if (rc)
771                 goto out;
772
773         dccp_hashinfo.bind_bucket_cachep =
774                 kmem_cache_create("dccp_bind_bucket",
775                                   sizeof(struct inet_bind_bucket), 0,
776                                   SLAB_HWCACHE_ALIGN, NULL, NULL);
777         if (!dccp_hashinfo.bind_bucket_cachep)
778                 goto out_proto_unregister;
779
780         /*
781          * Size and allocate the main established and bind bucket
782          * hash tables.
783          *
784          * The methodology is similar to that of the buffer cache.
785          */
786         if (num_physpages >= (128 * 1024))
787                 goal = num_physpages >> (21 - PAGE_SHIFT);
788         else
789                 goal = num_physpages >> (23 - PAGE_SHIFT);
790
791         if (thash_entries)
792                 goal = (thash_entries *
793                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
794         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
795                 ;
796         do {
797                 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
798                                         sizeof(struct inet_ehash_bucket);
799                 dccp_hashinfo.ehash_size >>= 1;
800                 while (dccp_hashinfo.ehash_size &
801                        (dccp_hashinfo.ehash_size - 1))
802                         dccp_hashinfo.ehash_size--;
803                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
804                         __get_free_pages(GFP_ATOMIC, ehash_order);
805         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
806
807         if (!dccp_hashinfo.ehash) {
808                 printk(KERN_CRIT "Failed to allocate DCCP "
809                                  "established hash table\n");
810                 goto out_free_bind_bucket_cachep;
811         }
812
813         for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) {
814                 rwlock_init(&dccp_hashinfo.ehash[i].lock);
815                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
816         }
817
818         bhash_order = ehash_order;
819
820         do {
821                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
822                                         sizeof(struct inet_bind_hashbucket);
823                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
824                     bhash_order > 0)
825                         continue;
826                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
827                         __get_free_pages(GFP_ATOMIC, bhash_order);
828         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
829
830         if (!dccp_hashinfo.bhash) {
831                 printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n");
832                 goto out_free_dccp_ehash;
833         }
834
835         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
836                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
837                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
838         }
839
840         if (init_dccp_v4_mibs())
841                 goto out_free_dccp_bhash;
842
843         rc = -EAGAIN;
844         if (inet_add_protocol(&dccp_protocol, IPPROTO_DCCP))
845                 goto out_free_dccp_v4_mibs;
846
847         inet_register_protosw(&dccp_v4_protosw);
848
849         rc = dccp_ctl_sock_init();
850         if (rc)
851                 goto out_unregister_protosw;
852 out:
853         return rc;
854 out_unregister_protosw:
855         inet_unregister_protosw(&dccp_v4_protosw);
856         inet_del_protocol(&dccp_protocol, IPPROTO_DCCP);
857 out_free_dccp_v4_mibs:
858         free_percpu(dccp_statistics[0]);
859         free_percpu(dccp_statistics[1]);
860         dccp_statistics[0] = dccp_statistics[1] = NULL;
861 out_free_dccp_bhash:
862         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
863         dccp_hashinfo.bhash = NULL;
864 out_free_dccp_ehash:
865         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
866         dccp_hashinfo.ehash = NULL;
867 out_free_bind_bucket_cachep:
868         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
869         dccp_hashinfo.bind_bucket_cachep = NULL;
870 out_proto_unregister:
871         proto_unregister(&dccp_v4_prot);
872         goto out;
873 }
874
875 static const char dccp_del_proto_err_msg[] __exitdata =
876         KERN_ERR "can't remove dccp net_protocol\n";
877
878 static void __exit dccp_fini(void)
879 {
880         inet_unregister_protosw(&dccp_v4_protosw);
881
882         if (inet_del_protocol(&dccp_protocol, IPPROTO_DCCP) < 0)
883                 printk(dccp_del_proto_err_msg);
884
885         free_percpu(dccp_statistics[0]);
886         free_percpu(dccp_statistics[1]);
887         free_pages((unsigned long)dccp_hashinfo.bhash,
888                    get_order(dccp_hashinfo.bhash_size *
889                              sizeof(struct inet_bind_hashbucket)));
890         free_pages((unsigned long)dccp_hashinfo.ehash,
891                    get_order(dccp_hashinfo.ehash_size *
892                              sizeof(struct inet_ehash_bucket)));
893         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
894         proto_unregister(&dccp_v4_prot);
895 }
896
897 module_init(dccp_init);
898 module_exit(dccp_fini);
899
900 /*
901  * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33)
902  * values directly, Also cover the case where the protocol is not specified,
903  * i.e. net-pf-PF_INET-proto-0-type-SOCK_DCCP
904  */
905 MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-33-type-6");
906 MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-0-type-6");
907 MODULE_LICENSE("GPL");
908 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
909 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");