Merge /spare/repo/linux-2.6/
[linux-2.6] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/config.h>
13 #include <linux/dccp.h>
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/sched.h>
17 #include <linux/kernel.h>
18 #include <linux/skbuff.h>
19 #include <linux/netdevice.h>
20 #include <linux/in.h>
21 #include <linux/if_arp.h>
22 #include <linux/init.h>
23 #include <linux/random.h>
24 #include <net/checksum.h>
25
26 #include <net/inet_common.h>
27 #include <net/ip.h>
28 #include <net/protocol.h>
29 #include <net/sock.h>
30 #include <net/xfrm.h>
31
32 #include <asm/semaphore.h>
33 #include <linux/spinlock.h>
34 #include <linux/timer.h>
35 #include <linux/delay.h>
36 #include <linux/poll.h>
37 #include <linux/dccp.h>
38
39 #include "ccid.h"
40 #include "dccp.h"
41
42 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
43
44 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
45
46 static struct net_protocol dccp_protocol = {
47         .handler        = dccp_v4_rcv,
48         .err_handler    = dccp_v4_err,
49 };
50
51 const char *dccp_packet_name(const int type)
52 {
53         static const char *dccp_packet_names[] = {
54                 [DCCP_PKT_REQUEST]  = "REQUEST",
55                 [DCCP_PKT_RESPONSE] = "RESPONSE",
56                 [DCCP_PKT_DATA]     = "DATA",
57                 [DCCP_PKT_ACK]      = "ACK",
58                 [DCCP_PKT_DATAACK]  = "DATAACK",
59                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
60                 [DCCP_PKT_CLOSE]    = "CLOSE",
61                 [DCCP_PKT_RESET]    = "RESET",
62                 [DCCP_PKT_SYNC]     = "SYNC",
63                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
64         };
65
66         if (type >= DCCP_NR_PKT_TYPES)
67                 return "INVALID";
68         else
69                 return dccp_packet_names[type];
70 }
71
72 EXPORT_SYMBOL_GPL(dccp_packet_name);
73
74 const char *dccp_state_name(const int state)
75 {
76         static char *dccp_state_names[] = {
77         [DCCP_OPEN]       = "OPEN",
78         [DCCP_REQUESTING] = "REQUESTING",
79         [DCCP_PARTOPEN]   = "PARTOPEN",
80         [DCCP_LISTEN]     = "LISTEN",
81         [DCCP_RESPOND]    = "RESPOND",
82         [DCCP_CLOSING]    = "CLOSING",
83         [DCCP_TIME_WAIT]  = "TIME_WAIT",
84         [DCCP_CLOSED]     = "CLOSED",
85         };
86
87         if (state >= DCCP_MAX_STATES)
88                 return "INVALID STATE!";
89         else
90                 return dccp_state_names[state];
91 }
92
93 EXPORT_SYMBOL_GPL(dccp_state_name);
94
95 static inline int dccp_listen_start(struct sock *sk)
96 {
97         struct dccp_sock *dp = dccp_sk(sk);
98
99         dp->dccps_role = DCCP_ROLE_LISTEN;
100         /*
101          * Apps need to use setsockopt(DCCP_SOCKOPT_SERVICE)
102          * before calling listen()
103          */
104         if (dccp_service_not_initialized(sk))
105                 return -EPROTO;
106         return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
107 }
108
109 int dccp_disconnect(struct sock *sk, int flags)
110 {
111         struct inet_connection_sock *icsk = inet_csk(sk);
112         struct inet_sock *inet = inet_sk(sk);
113         int err = 0;
114         const int old_state = sk->sk_state;
115
116         if (old_state != DCCP_CLOSED)
117                 dccp_set_state(sk, DCCP_CLOSED);
118
119         /* ABORT function of RFC793 */
120         if (old_state == DCCP_LISTEN) {
121                 inet_csk_listen_stop(sk);
122         /* FIXME: do the active reset thing */
123         } else if (old_state == DCCP_REQUESTING)
124                 sk->sk_err = ECONNRESET;
125
126         dccp_clear_xmit_timers(sk);
127         __skb_queue_purge(&sk->sk_receive_queue);
128         if (sk->sk_send_head != NULL) {
129                 __kfree_skb(sk->sk_send_head);
130                 sk->sk_send_head = NULL;
131         }
132
133         inet->dport = 0;
134
135         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
136                 inet_reset_saddr(sk);
137
138         sk->sk_shutdown = 0;
139         sock_reset_flag(sk, SOCK_DONE);
140
141         icsk->icsk_backoff = 0;
142         inet_csk_delack_init(sk);
143         __sk_dst_reset(sk);
144
145         BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
146
147         sk->sk_error_report(sk);
148         return err;
149 }
150
151 /*
152  *      Wait for a DCCP event.
153  *
154  *      Note that we don't need to lock the socket, as the upper poll layers
155  *      take care of normal races (between the test and the event) and we don't
156  *      go look at any of the socket buffers directly.
157  */
158 static unsigned int dccp_poll(struct file *file, struct socket *sock,
159                               poll_table *wait)
160 {
161         unsigned int mask;
162         struct sock *sk = sock->sk;
163
164         poll_wait(file, sk->sk_sleep, wait);
165         if (sk->sk_state == DCCP_LISTEN)
166                 return inet_csk_listen_poll(sk);
167
168         /* Socket is not locked. We are protected from async events
169            by poll logic and correct handling of state changes
170            made by another threads is impossible in any case.
171          */
172
173         mask = 0;
174         if (sk->sk_err)
175                 mask = POLLERR;
176
177         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
178                 mask |= POLLHUP;
179         if (sk->sk_shutdown & RCV_SHUTDOWN)
180                 mask |= POLLIN | POLLRDNORM;
181
182         /* Connected? */
183         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
184                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
185                         mask |= POLLIN | POLLRDNORM;
186
187                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
188                         if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
189                                 mask |= POLLOUT | POLLWRNORM;
190                         } else {  /* send SIGIO later */
191                                 set_bit(SOCK_ASYNC_NOSPACE,
192                                         &sk->sk_socket->flags);
193                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
194
195                                 /* Race breaker. If space is freed after
196                                  * wspace test but before the flags are set,
197                                  * IO signal will be lost.
198                                  */
199                                 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
200                                         mask |= POLLOUT | POLLWRNORM;
201                         }
202                 }
203         }
204         return mask;
205 }
206
207 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
208 {
209         dccp_pr_debug("entry\n");
210         return -ENOIOCTLCMD;
211 }
212
213 static int dccp_setsockopt_service(struct sock *sk, const u32 service,
214                                    char __user *optval, int optlen)
215 {
216         struct dccp_sock *dp = dccp_sk(sk);
217         struct dccp_service_list *sl = NULL;
218
219         if (service == DCCP_SERVICE_INVALID_VALUE || 
220             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
221                 return -EINVAL;
222
223         if (optlen > sizeof(service)) {
224                 sl = kmalloc(optlen, GFP_KERNEL);
225                 if (sl == NULL)
226                         return -ENOMEM;
227
228                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
229                 if (copy_from_user(sl->dccpsl_list,
230                                    optval + sizeof(service),
231                                    optlen - sizeof(service)) ||
232                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
233                         kfree(sl);
234                         return -EFAULT;
235                 }
236         }
237
238         lock_sock(sk);
239         dp->dccps_service = service;
240
241         if (dp->dccps_service_list != NULL)
242                 kfree(dp->dccps_service_list);
243
244         dp->dccps_service_list = sl;
245         release_sock(sk);
246         return 0;
247 }
248
249 int dccp_setsockopt(struct sock *sk, int level, int optname,
250                     char __user *optval, int optlen)
251 {
252         struct dccp_sock *dp;
253         int err;
254         int val;
255
256         if (level != SOL_DCCP)
257                 return ip_setsockopt(sk, level, optname, optval, optlen);
258
259         if (optlen < sizeof(int))
260                 return -EINVAL;
261
262         if (get_user(val, (int __user *)optval))
263                 return -EFAULT;
264
265         if (optname == DCCP_SOCKOPT_SERVICE)
266                 return dccp_setsockopt_service(sk, val, optval, optlen);
267
268         lock_sock(sk);
269         dp = dccp_sk(sk);
270         err = 0;
271
272         switch (optname) {
273         case DCCP_SOCKOPT_PACKET_SIZE:
274                 dp->dccps_packet_size = val;
275                 break;
276         default:
277                 err = -ENOPROTOOPT;
278                 break;
279         }
280         
281         release_sock(sk);
282         return err;
283 }
284
285 static int dccp_getsockopt_service(struct sock *sk, int len,
286                                    u32 __user *optval,
287                                    int __user *optlen)
288 {
289         const struct dccp_sock *dp = dccp_sk(sk);
290         const struct dccp_service_list *sl;
291         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
292
293         lock_sock(sk);
294         if (dccp_service_not_initialized(sk))
295                 goto out;
296
297         if ((sl = dp->dccps_service_list) != NULL) {
298                 slen = sl->dccpsl_nr * sizeof(u32);
299                 total_len += slen;
300         }
301
302         err = -EINVAL;
303         if (total_len > len)
304                 goto out;
305
306         err = 0;
307         if (put_user(total_len, optlen) ||
308             put_user(dp->dccps_service, optval) ||
309             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
310                 err = -EFAULT;
311 out:
312         release_sock(sk);
313         return err;
314 }
315
316 int dccp_getsockopt(struct sock *sk, int level, int optname,
317                     char __user *optval, int __user *optlen)
318 {
319         struct dccp_sock *dp;
320         int val, len;
321
322         if (level != SOL_DCCP)
323                 return ip_getsockopt(sk, level, optname, optval, optlen);
324
325         if (get_user(len, optlen))
326                 return -EFAULT;
327
328         if (len < sizeof(int))
329                 return -EINVAL;
330
331         dp = dccp_sk(sk);
332
333         switch (optname) {
334         case DCCP_SOCKOPT_PACKET_SIZE:
335                 val = dp->dccps_packet_size;
336                 len = sizeof(dp->dccps_packet_size);
337                 break;
338         case DCCP_SOCKOPT_SERVICE:
339                 return dccp_getsockopt_service(sk, len,
340                                                (u32 __user *)optval, optlen);
341         case 128 ... 191:
342                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
343                                              len, (u32 __user *)optval, optlen);
344         case 192 ... 255:
345                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
346                                              len, (u32 __user *)optval, optlen);
347         default:
348                 return -ENOPROTOOPT;
349         }
350
351         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
352                 return -EFAULT;
353
354         return 0;
355 }
356
357 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
358                  size_t len)
359 {
360         const struct dccp_sock *dp = dccp_sk(sk);
361         const int flags = msg->msg_flags;
362         const int noblock = flags & MSG_DONTWAIT;
363         struct sk_buff *skb;
364         int rc, size;
365         long timeo;
366
367         if (len > dp->dccps_mss_cache)
368                 return -EMSGSIZE;
369
370         lock_sock(sk);
371         timeo = sock_sndtimeo(sk, noblock);
372
373         /*
374          * We have to use sk_stream_wait_connect here to set sk_write_pending,
375          * so that the trick in dccp_rcv_request_sent_state_process.
376          */
377         /* Wait for a connection to finish. */
378         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING))
379                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
380                         goto out_release;
381
382         size = sk->sk_prot->max_header + len;
383         release_sock(sk);
384         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
385         lock_sock(sk);
386         if (skb == NULL)
387                 goto out_release;
388
389         skb_reserve(skb, sk->sk_prot->max_header);
390         rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
391         if (rc != 0)
392                 goto out_discard;
393
394         rc = dccp_write_xmit(sk, skb, &timeo);
395         /*
396          * XXX we don't use sk_write_queue, so just discard the packet.
397          *     Current plan however is to _use_ sk_write_queue with
398          *     an algorith similar to tcp_sendmsg, where the main difference
399          *     is that in DCCP we have to respect packet boundaries, so
400          *     no coalescing of skbs.
401          *
402          *     This bug was _quickly_ found & fixed by just looking at an OSTRA
403          *     generated callgraph 8) -acme
404          */
405         if (rc != 0)
406                 goto out_discard;
407 out_release:
408         release_sock(sk);
409         return rc ? : len;
410 out_discard:
411         kfree_skb(skb);
412         goto out_release;
413 }
414
415 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
416                  size_t len, int nonblock, int flags, int *addr_len)
417 {
418         const struct dccp_hdr *dh;
419         long timeo;
420
421         lock_sock(sk);
422
423         if (sk->sk_state == DCCP_LISTEN) {
424                 len = -ENOTCONN;
425                 goto out;
426         }
427
428         timeo = sock_rcvtimeo(sk, nonblock);
429
430         do {
431                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
432
433                 if (skb == NULL)
434                         goto verify_sock_status;
435
436                 dh = dccp_hdr(skb);
437
438                 if (dh->dccph_type == DCCP_PKT_DATA ||
439                     dh->dccph_type == DCCP_PKT_DATAACK)
440                         goto found_ok_skb;
441
442                 if (dh->dccph_type == DCCP_PKT_RESET ||
443                     dh->dccph_type == DCCP_PKT_CLOSE) {
444                         dccp_pr_debug("found fin ok!\n");
445                         len = 0;
446                         goto found_fin_ok;
447                 }
448                 dccp_pr_debug("packet_type=%s\n",
449                               dccp_packet_name(dh->dccph_type));
450                 sk_eat_skb(sk, skb);
451 verify_sock_status:
452                 if (sock_flag(sk, SOCK_DONE)) {
453                         len = 0;
454                         break;
455                 }
456
457                 if (sk->sk_err) {
458                         len = sock_error(sk);
459                         break;
460                 }
461
462                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
463                         len = 0;
464                         break;
465                 }
466
467                 if (sk->sk_state == DCCP_CLOSED) {
468                         if (!sock_flag(sk, SOCK_DONE)) {
469                                 /* This occurs when user tries to read
470                                  * from never connected socket.
471                                  */
472                                 len = -ENOTCONN;
473                                 break;
474                         }
475                         len = 0;
476                         break;
477                 }
478
479                 if (!timeo) {
480                         len = -EAGAIN;
481                         break;
482                 }
483
484                 if (signal_pending(current)) {
485                         len = sock_intr_errno(timeo);
486                         break;
487                 }
488
489                 sk_wait_data(sk, &timeo);
490                 continue;
491         found_ok_skb:
492                 if (len > skb->len)
493                         len = skb->len;
494                 else if (len < skb->len)
495                         msg->msg_flags |= MSG_TRUNC;
496
497                 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
498                         /* Exception. Bailout! */
499                         len = -EFAULT;
500                         break;
501                 }
502         found_fin_ok:
503                 if (!(flags & MSG_PEEK))
504                         sk_eat_skb(sk, skb);
505                 break;
506         } while (1);
507 out:
508         release_sock(sk);
509         return len;
510 }
511
512 static int inet_dccp_listen(struct socket *sock, int backlog)
513 {
514         struct sock *sk = sock->sk;
515         unsigned char old_state;
516         int err;
517
518         lock_sock(sk);
519
520         err = -EINVAL;
521         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
522                 goto out;
523
524         old_state = sk->sk_state;
525         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
526                 goto out;
527
528         /* Really, if the socket is already in listen state
529          * we can only allow the backlog to be adjusted.
530          */
531         if (old_state != DCCP_LISTEN) {
532                 /*
533                  * FIXME: here it probably should be sk->sk_prot->listen_start
534                  * see tcp_listen_start
535                  */
536                 err = dccp_listen_start(sk);
537                 if (err)
538                         goto out;
539         }
540         sk->sk_max_ack_backlog = backlog;
541         err = 0;
542
543 out:
544         release_sock(sk);
545         return err;
546 }
547
548 static const unsigned char dccp_new_state[] = {
549         /* current state:   new state:      action:     */
550         [0]               = DCCP_CLOSED,
551         [DCCP_OPEN]       = DCCP_CLOSING | DCCP_ACTION_FIN,
552         [DCCP_REQUESTING] = DCCP_CLOSED,
553         [DCCP_PARTOPEN]   = DCCP_CLOSING | DCCP_ACTION_FIN,
554         [DCCP_LISTEN]     = DCCP_CLOSED,
555         [DCCP_RESPOND]    = DCCP_CLOSED,
556         [DCCP_CLOSING]    = DCCP_CLOSED,
557         [DCCP_TIME_WAIT]  = DCCP_CLOSED,
558         [DCCP_CLOSED]     = DCCP_CLOSED,
559 };
560
561 static int dccp_close_state(struct sock *sk)
562 {
563         const int next = dccp_new_state[sk->sk_state];
564         const int ns = next & DCCP_STATE_MASK;
565
566         if (ns != sk->sk_state)
567                 dccp_set_state(sk, ns);
568
569         return next & DCCP_ACTION_FIN;
570 }
571
572 void dccp_close(struct sock *sk, long timeout)
573 {
574         struct sk_buff *skb;
575
576         lock_sock(sk);
577
578         sk->sk_shutdown = SHUTDOWN_MASK;
579
580         if (sk->sk_state == DCCP_LISTEN) {
581                 dccp_set_state(sk, DCCP_CLOSED);
582
583                 /* Special case. */
584                 inet_csk_listen_stop(sk);
585
586                 goto adjudge_to_death;
587         }
588
589         /*
590          * We need to flush the recv. buffs.  We do this only on the
591          * descriptor close, not protocol-sourced closes, because the
592           *reader process may not have drained the data yet!
593          */
594         /* FIXME: check for unread data */
595         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
596                 __kfree_skb(skb);
597         }
598
599         if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
600                 /* Check zero linger _after_ checking for unread data. */
601                 sk->sk_prot->disconnect(sk, 0);
602         } else if (dccp_close_state(sk)) {
603                 dccp_send_close(sk, 1);
604         }
605
606         sk_stream_wait_close(sk, timeout);
607
608 adjudge_to_death:
609         /*
610          * It is the last release_sock in its life. It will remove backlog.
611          */
612         release_sock(sk);
613         /*
614          * Now socket is owned by kernel and we acquire BH lock
615          * to finish close. No need to check for user refs.
616          */
617         local_bh_disable();
618         bh_lock_sock(sk);
619         BUG_TRAP(!sock_owned_by_user(sk));
620
621         sock_hold(sk);
622         sock_orphan(sk);
623
624         /*
625          * The last release_sock may have processed the CLOSE or RESET
626          * packet moving sock to CLOSED state, if not we have to fire
627          * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
628          * in draft-ietf-dccp-spec-11. -acme
629          */
630         if (sk->sk_state == DCCP_CLOSING) {
631                 /* FIXME: should start at 2 * RTT */
632                 /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
633                 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
634                                           inet_csk(sk)->icsk_rto,
635                                           DCCP_RTO_MAX);
636 #if 0
637                 /* Yeah, we should use sk->sk_prot->orphan_count, etc */
638                 dccp_set_state(sk, DCCP_CLOSED);
639 #endif
640         }
641
642         atomic_inc(sk->sk_prot->orphan_count);
643         if (sk->sk_state == DCCP_CLOSED)
644                 inet_csk_destroy_sock(sk);
645
646         /* Otherwise, socket is reprieved until protocol close. */
647
648         bh_unlock_sock(sk);
649         local_bh_enable();
650         sock_put(sk);
651 }
652
653 void dccp_shutdown(struct sock *sk, int how)
654 {
655         dccp_pr_debug("entry\n");
656 }
657
658 static struct proto_ops inet_dccp_ops = {
659         .family         = PF_INET,
660         .owner          = THIS_MODULE,
661         .release        = inet_release,
662         .bind           = inet_bind,
663         .connect        = inet_stream_connect,
664         .socketpair     = sock_no_socketpair,
665         .accept         = inet_accept,
666         .getname        = inet_getname,
667         /* FIXME: work on tcp_poll to rename it to inet_csk_poll */
668         .poll           = dccp_poll,
669         .ioctl          = inet_ioctl,
670         /* FIXME: work on inet_listen to rename it to sock_common_listen */
671         .listen         = inet_dccp_listen,
672         .shutdown       = inet_shutdown,
673         .setsockopt     = sock_common_setsockopt,
674         .getsockopt     = sock_common_getsockopt,
675         .sendmsg        = inet_sendmsg,
676         .recvmsg        = sock_common_recvmsg,
677         .mmap           = sock_no_mmap,
678         .sendpage       = sock_no_sendpage,
679 };
680
681 extern struct net_proto_family inet_family_ops;
682
683 static struct inet_protosw dccp_v4_protosw = {
684         .type           = SOCK_DCCP,
685         .protocol       = IPPROTO_DCCP,
686         .prot           = &dccp_v4_prot,
687         .ops            = &inet_dccp_ops,
688         .capability     = -1,
689         .no_check       = 0,
690         .flags          = 0,
691 };
692
693 /*
694  * This is the global socket data structure used for responding to
695  * the Out-of-the-blue (OOTB) packets. A control sock will be created
696  * for this socket at the initialization time.
697  */
698 struct socket *dccp_ctl_socket;
699
700 static char dccp_ctl_socket_err_msg[] __initdata =
701         KERN_ERR "DCCP: Failed to create the control socket.\n";
702
703 static int __init dccp_ctl_sock_init(void)
704 {
705         int rc = sock_create_kern(PF_INET, SOCK_DCCP, IPPROTO_DCCP,
706                                   &dccp_ctl_socket);
707         if (rc < 0)
708                 printk(dccp_ctl_socket_err_msg);
709         else {
710                 dccp_ctl_socket->sk->sk_allocation = GFP_ATOMIC;
711                 inet_sk(dccp_ctl_socket->sk)->uc_ttl = -1;
712
713                 /* Unhash it so that IP input processing does not even
714                  * see it, we do not wish this socket to see incoming
715                  * packets.
716                  */
717                 dccp_ctl_socket->sk->sk_prot->unhash(dccp_ctl_socket->sk);
718         }
719
720         return rc;
721 }
722
723 #ifdef CONFIG_IP_DCCP_UNLOAD_HACK
724 void dccp_ctl_sock_exit(void)
725 {
726         if (dccp_ctl_socket != NULL) {
727                 sock_release(dccp_ctl_socket);
728                 dccp_ctl_socket = NULL;
729         }
730 }
731
732 EXPORT_SYMBOL_GPL(dccp_ctl_sock_exit);
733 #endif
734
735 static int __init init_dccp_v4_mibs(void)
736 {
737         int rc = -ENOMEM;
738
739         dccp_statistics[0] = alloc_percpu(struct dccp_mib);
740         if (dccp_statistics[0] == NULL)
741                 goto out;
742
743         dccp_statistics[1] = alloc_percpu(struct dccp_mib);
744         if (dccp_statistics[1] == NULL)
745                 goto out_free_one;
746
747         rc = 0;
748 out:
749         return rc;
750 out_free_one:
751         free_percpu(dccp_statistics[0]);
752         dccp_statistics[0] = NULL;
753         goto out;
754
755 }
756
757 static int thash_entries;
758 module_param(thash_entries, int, 0444);
759 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
760
761 #ifdef CONFIG_IP_DCCP_DEBUG
762 int dccp_debug;
763 module_param(dccp_debug, int, 0444);
764 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
765 #endif
766
767 static int __init dccp_init(void)
768 {
769         unsigned long goal;
770         int ehash_order, bhash_order, i;
771         int rc = proto_register(&dccp_v4_prot, 1);
772
773         if (rc)
774                 goto out;
775
776         dccp_hashinfo.bind_bucket_cachep =
777                 kmem_cache_create("dccp_bind_bucket",
778                                   sizeof(struct inet_bind_bucket), 0,
779                                   SLAB_HWCACHE_ALIGN, NULL, NULL);
780         if (!dccp_hashinfo.bind_bucket_cachep)
781                 goto out_proto_unregister;
782
783         /*
784          * Size and allocate the main established and bind bucket
785          * hash tables.
786          *
787          * The methodology is similar to that of the buffer cache.
788          */
789         if (num_physpages >= (128 * 1024))
790                 goal = num_physpages >> (21 - PAGE_SHIFT);
791         else
792                 goal = num_physpages >> (23 - PAGE_SHIFT);
793
794         if (thash_entries)
795                 goal = (thash_entries *
796                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
797         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
798                 ;
799         do {
800                 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
801                                         sizeof(struct inet_ehash_bucket);
802                 dccp_hashinfo.ehash_size >>= 1;
803                 while (dccp_hashinfo.ehash_size &
804                        (dccp_hashinfo.ehash_size - 1))
805                         dccp_hashinfo.ehash_size--;
806                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
807                         __get_free_pages(GFP_ATOMIC, ehash_order);
808         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
809
810         if (!dccp_hashinfo.ehash) {
811                 printk(KERN_CRIT "Failed to allocate DCCP "
812                                  "established hash table\n");
813                 goto out_free_bind_bucket_cachep;
814         }
815
816         for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) {
817                 rwlock_init(&dccp_hashinfo.ehash[i].lock);
818                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
819         }
820
821         bhash_order = ehash_order;
822
823         do {
824                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
825                                         sizeof(struct inet_bind_hashbucket);
826                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
827                     bhash_order > 0)
828                         continue;
829                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
830                         __get_free_pages(GFP_ATOMIC, bhash_order);
831         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
832
833         if (!dccp_hashinfo.bhash) {
834                 printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n");
835                 goto out_free_dccp_ehash;
836         }
837
838         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
839                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
840                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
841         }
842
843         if (init_dccp_v4_mibs())
844                 goto out_free_dccp_bhash;
845
846         rc = -EAGAIN;
847         if (inet_add_protocol(&dccp_protocol, IPPROTO_DCCP))
848                 goto out_free_dccp_v4_mibs;
849
850         inet_register_protosw(&dccp_v4_protosw);
851
852         rc = dccp_ctl_sock_init();
853         if (rc)
854                 goto out_unregister_protosw;
855 out:
856         return rc;
857 out_unregister_protosw:
858         inet_unregister_protosw(&dccp_v4_protosw);
859         inet_del_protocol(&dccp_protocol, IPPROTO_DCCP);
860 out_free_dccp_v4_mibs:
861         free_percpu(dccp_statistics[0]);
862         free_percpu(dccp_statistics[1]);
863         dccp_statistics[0] = dccp_statistics[1] = NULL;
864 out_free_dccp_bhash:
865         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
866         dccp_hashinfo.bhash = NULL;
867 out_free_dccp_ehash:
868         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
869         dccp_hashinfo.ehash = NULL;
870 out_free_bind_bucket_cachep:
871         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
872         dccp_hashinfo.bind_bucket_cachep = NULL;
873 out_proto_unregister:
874         proto_unregister(&dccp_v4_prot);
875         goto out;
876 }
877
878 static const char dccp_del_proto_err_msg[] __exitdata =
879         KERN_ERR "can't remove dccp net_protocol\n";
880
881 static void __exit dccp_fini(void)
882 {
883         inet_unregister_protosw(&dccp_v4_protosw);
884
885         if (inet_del_protocol(&dccp_protocol, IPPROTO_DCCP) < 0)
886                 printk(dccp_del_proto_err_msg);
887
888         free_percpu(dccp_statistics[0]);
889         free_percpu(dccp_statistics[1]);
890         free_pages((unsigned long)dccp_hashinfo.bhash,
891                    get_order(dccp_hashinfo.bhash_size *
892                              sizeof(struct inet_bind_hashbucket)));
893         free_pages((unsigned long)dccp_hashinfo.ehash,
894                    get_order(dccp_hashinfo.ehash_size *
895                              sizeof(struct inet_ehash_bucket)));
896         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
897         proto_unregister(&dccp_v4_prot);
898 }
899
900 module_init(dccp_init);
901 module_exit(dccp_fini);
902
903 /*
904  * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33)
905  * values directly, Also cover the case where the protocol is not specified,
906  * i.e. net-pf-PF_INET-proto-0-type-SOCK_DCCP
907  */
908 MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-33-type-6");
909 MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-0-type-6");
910 MODULE_LICENSE("GPL");
911 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
912 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");