Merge branch 'master'
[linux-2.6] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/config.h>
13 #include <linux/dccp.h>
14 #include <linux/module.h>
15 #include <linux/types.h>
16 #include <linux/sched.h>
17 #include <linux/kernel.h>
18 #include <linux/skbuff.h>
19 #include <linux/netdevice.h>
20 #include <linux/in.h>
21 #include <linux/if_arp.h>
22 #include <linux/init.h>
23 #include <linux/random.h>
24 #include <net/checksum.h>
25
26 #include <net/inet_common.h>
27 #include <net/ip.h>
28 #include <net/protocol.h>
29 #include <net/sock.h>
30 #include <net/xfrm.h>
31
32 #include <asm/semaphore.h>
33 #include <linux/spinlock.h>
34 #include <linux/timer.h>
35 #include <linux/delay.h>
36 #include <linux/poll.h>
37 #include <linux/dccp.h>
38
39 #include "ccid.h"
40 #include "dccp.h"
41
42 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
43
44 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
45
46 static struct net_protocol dccp_protocol = {
47         .handler        = dccp_v4_rcv,
48         .err_handler    = dccp_v4_err,
49 };
50
51 const char *dccp_packet_name(const int type)
52 {
53         static const char *dccp_packet_names[] = {
54                 [DCCP_PKT_REQUEST]  = "REQUEST",
55                 [DCCP_PKT_RESPONSE] = "RESPONSE",
56                 [DCCP_PKT_DATA]     = "DATA",
57                 [DCCP_PKT_ACK]      = "ACK",
58                 [DCCP_PKT_DATAACK]  = "DATAACK",
59                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
60                 [DCCP_PKT_CLOSE]    = "CLOSE",
61                 [DCCP_PKT_RESET]    = "RESET",
62                 [DCCP_PKT_SYNC]     = "SYNC",
63                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
64         };
65
66         if (type >= DCCP_NR_PKT_TYPES)
67                 return "INVALID";
68         else
69                 return dccp_packet_names[type];
70 }
71
72 EXPORT_SYMBOL_GPL(dccp_packet_name);
73
74 const char *dccp_state_name(const int state)
75 {
76         static char *dccp_state_names[] = {
77         [DCCP_OPEN]       = "OPEN",
78         [DCCP_REQUESTING] = "REQUESTING",
79         [DCCP_PARTOPEN]   = "PARTOPEN",
80         [DCCP_LISTEN]     = "LISTEN",
81         [DCCP_RESPOND]    = "RESPOND",
82         [DCCP_CLOSING]    = "CLOSING",
83         [DCCP_TIME_WAIT]  = "TIME_WAIT",
84         [DCCP_CLOSED]     = "CLOSED",
85         };
86
87         if (state >= DCCP_MAX_STATES)
88                 return "INVALID STATE!";
89         else
90                 return dccp_state_names[state];
91 }
92
93 EXPORT_SYMBOL_GPL(dccp_state_name);
94
95 static inline int dccp_listen_start(struct sock *sk)
96 {
97         struct dccp_sock *dp = dccp_sk(sk);
98
99         dp->dccps_role = DCCP_ROLE_LISTEN;
100         /*
101          * Apps need to use setsockopt(DCCP_SOCKOPT_SERVICE)
102          * before calling listen()
103          */
104         if (dccp_service_not_initialized(sk))
105                 return -EPROTO;
106         return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
107 }
108
109 int dccp_disconnect(struct sock *sk, int flags)
110 {
111         struct inet_connection_sock *icsk = inet_csk(sk);
112         struct inet_sock *inet = inet_sk(sk);
113         int err = 0;
114         const int old_state = sk->sk_state;
115
116         if (old_state != DCCP_CLOSED)
117                 dccp_set_state(sk, DCCP_CLOSED);
118
119         /* ABORT function of RFC793 */
120         if (old_state == DCCP_LISTEN) {
121                 inet_csk_listen_stop(sk);
122         /* FIXME: do the active reset thing */
123         } else if (old_state == DCCP_REQUESTING)
124                 sk->sk_err = ECONNRESET;
125
126         dccp_clear_xmit_timers(sk);
127         __skb_queue_purge(&sk->sk_receive_queue);
128         if (sk->sk_send_head != NULL) {
129                 __kfree_skb(sk->sk_send_head);
130                 sk->sk_send_head = NULL;
131         }
132
133         inet->dport = 0;
134
135         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
136                 inet_reset_saddr(sk);
137
138         sk->sk_shutdown = 0;
139         sock_reset_flag(sk, SOCK_DONE);
140
141         icsk->icsk_backoff = 0;
142         inet_csk_delack_init(sk);
143         __sk_dst_reset(sk);
144
145         BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
146
147         sk->sk_error_report(sk);
148         return err;
149 }
150
151 /*
152  *      Wait for a DCCP event.
153  *
154  *      Note that we don't need to lock the socket, as the upper poll layers
155  *      take care of normal races (between the test and the event) and we don't
156  *      go look at any of the socket buffers directly.
157  */
158 static unsigned int dccp_poll(struct file *file, struct socket *sock,
159                               poll_table *wait)
160 {
161         unsigned int mask;
162         struct sock *sk = sock->sk;
163
164         poll_wait(file, sk->sk_sleep, wait);
165         if (sk->sk_state == DCCP_LISTEN)
166                 return inet_csk_listen_poll(sk);
167
168         /* Socket is not locked. We are protected from async events
169            by poll logic and correct handling of state changes
170            made by another threads is impossible in any case.
171          */
172
173         mask = 0;
174         if (sk->sk_err)
175                 mask = POLLERR;
176
177         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
178                 mask |= POLLHUP;
179         if (sk->sk_shutdown & RCV_SHUTDOWN)
180                 mask |= POLLIN | POLLRDNORM;
181
182         /* Connected? */
183         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
184                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
185                         mask |= POLLIN | POLLRDNORM;
186
187                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
188                         if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
189                                 mask |= POLLOUT | POLLWRNORM;
190                         } else {  /* send SIGIO later */
191                                 set_bit(SOCK_ASYNC_NOSPACE,
192                                         &sk->sk_socket->flags);
193                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
194
195                                 /* Race breaker. If space is freed after
196                                  * wspace test but before the flags are set,
197                                  * IO signal will be lost.
198                                  */
199                                 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
200                                         mask |= POLLOUT | POLLWRNORM;
201                         }
202                 }
203         }
204         return mask;
205 }
206
207 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
208 {
209         dccp_pr_debug("entry\n");
210         return -ENOIOCTLCMD;
211 }
212
213 static int dccp_setsockopt_service(struct sock *sk, const u32 service,
214                                    char __user *optval, int optlen)
215 {
216         struct dccp_sock *dp = dccp_sk(sk);
217         struct dccp_service_list *sl = NULL;
218
219         if (service == DCCP_SERVICE_INVALID_VALUE || 
220             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
221                 return -EINVAL;
222
223         if (optlen > sizeof(service)) {
224                 sl = kmalloc(optlen, GFP_KERNEL);
225                 if (sl == NULL)
226                         return -ENOMEM;
227
228                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
229                 if (copy_from_user(sl->dccpsl_list,
230                                    optval + sizeof(service),
231                                    optlen - sizeof(service)) ||
232                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
233                         kfree(sl);
234                         return -EFAULT;
235                 }
236         }
237
238         lock_sock(sk);
239         dp->dccps_service = service;
240
241         if (dp->dccps_service_list != NULL)
242                 kfree(dp->dccps_service_list);
243
244         dp->dccps_service_list = sl;
245         release_sock(sk);
246         return 0;
247 }
248
249 int dccp_setsockopt(struct sock *sk, int level, int optname,
250                     char __user *optval, int optlen)
251 {
252         struct dccp_sock *dp;
253         int err;
254         int val;
255
256         if (level != SOL_DCCP)
257                 return ip_setsockopt(sk, level, optname, optval, optlen);
258
259         if (optlen < sizeof(int))
260                 return -EINVAL;
261
262         if (get_user(val, (int __user *)optval))
263                 return -EFAULT;
264
265         if (optname == DCCP_SOCKOPT_SERVICE)
266                 return dccp_setsockopt_service(sk, val, optval, optlen);
267
268         lock_sock(sk);
269         dp = dccp_sk(sk);
270         err = 0;
271
272         switch (optname) {
273         case DCCP_SOCKOPT_PACKET_SIZE:
274                 dp->dccps_packet_size = val;
275                 break;
276         default:
277                 err = -ENOPROTOOPT;
278                 break;
279         }
280         
281         release_sock(sk);
282         return err;
283 }
284
285 static int dccp_getsockopt_service(struct sock *sk, int len,
286                                    u32 __user *optval,
287                                    int __user *optlen)
288 {
289         const struct dccp_sock *dp = dccp_sk(sk);
290         const struct dccp_service_list *sl;
291         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
292
293         lock_sock(sk);
294         if (dccp_service_not_initialized(sk))
295                 goto out;
296
297         if ((sl = dp->dccps_service_list) != NULL) {
298                 slen = sl->dccpsl_nr * sizeof(u32);
299                 total_len += slen;
300         }
301
302         err = -EINVAL;
303         if (total_len > len)
304                 goto out;
305
306         err = 0;
307         if (put_user(total_len, optlen) ||
308             put_user(dp->dccps_service, optval) ||
309             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
310                 err = -EFAULT;
311 out:
312         release_sock(sk);
313         return err;
314 }
315
316 int dccp_getsockopt(struct sock *sk, int level, int optname,
317                     char __user *optval, int __user *optlen)
318 {
319         struct dccp_sock *dp;
320         int val, len;
321
322         if (level != SOL_DCCP)
323                 return ip_getsockopt(sk, level, optname, optval, optlen);
324
325         if (get_user(len, optlen))
326                 return -EFAULT;
327
328         if (len < sizeof(int))
329                 return -EINVAL;
330
331         dp = dccp_sk(sk);
332
333         switch (optname) {
334         case DCCP_SOCKOPT_PACKET_SIZE:
335                 val = dp->dccps_packet_size;
336                 len = sizeof(dp->dccps_packet_size);
337                 break;
338         case DCCP_SOCKOPT_SERVICE:
339                 return dccp_getsockopt_service(sk, len,
340                                                (u32 __user *)optval, optlen);
341         case 128 ... 191:
342                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
343                                              len, (u32 __user *)optval, optlen);
344         case 192 ... 255:
345                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
346                                              len, (u32 __user *)optval, optlen);
347         default:
348                 return -ENOPROTOOPT;
349         }
350
351         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
352                 return -EFAULT;
353
354         return 0;
355 }
356
357 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
358                  size_t len)
359 {
360         const struct dccp_sock *dp = dccp_sk(sk);
361         const int flags = msg->msg_flags;
362         const int noblock = flags & MSG_DONTWAIT;
363         struct sk_buff *skb;
364         int rc, size;
365         long timeo;
366
367         if (len > dp->dccps_mss_cache)
368                 return -EMSGSIZE;
369
370         lock_sock(sk);
371         timeo = sock_sndtimeo(sk, noblock);
372
373         /*
374          * We have to use sk_stream_wait_connect here to set sk_write_pending,
375          * so that the trick in dccp_rcv_request_sent_state_process.
376          */
377         /* Wait for a connection to finish. */
378         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING))
379                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
380                         goto out_release;
381
382         size = sk->sk_prot->max_header + len;
383         release_sock(sk);
384         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
385         lock_sock(sk);
386         if (skb == NULL)
387                 goto out_release;
388
389         skb_reserve(skb, sk->sk_prot->max_header);
390         rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
391         if (rc != 0)
392                 goto out_discard;
393
394         rc = dccp_write_xmit(sk, skb, &timeo);
395         /*
396          * XXX we don't use sk_write_queue, so just discard the packet.
397          *     Current plan however is to _use_ sk_write_queue with
398          *     an algorith similar to tcp_sendmsg, where the main difference
399          *     is that in DCCP we have to respect packet boundaries, so
400          *     no coalescing of skbs.
401          *
402          *     This bug was _quickly_ found & fixed by just looking at an OSTRA
403          *     generated callgraph 8) -acme
404          */
405 out_release:
406         release_sock(sk);
407         return rc ? : len;
408 out_discard:
409         kfree_skb(skb);
410         goto out_release;
411 }
412
413 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
414                  size_t len, int nonblock, int flags, int *addr_len)
415 {
416         const struct dccp_hdr *dh;
417         long timeo;
418
419         lock_sock(sk);
420
421         if (sk->sk_state == DCCP_LISTEN) {
422                 len = -ENOTCONN;
423                 goto out;
424         }
425
426         timeo = sock_rcvtimeo(sk, nonblock);
427
428         do {
429                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
430
431                 if (skb == NULL)
432                         goto verify_sock_status;
433
434                 dh = dccp_hdr(skb);
435
436                 if (dh->dccph_type == DCCP_PKT_DATA ||
437                     dh->dccph_type == DCCP_PKT_DATAACK)
438                         goto found_ok_skb;
439
440                 if (dh->dccph_type == DCCP_PKT_RESET ||
441                     dh->dccph_type == DCCP_PKT_CLOSE) {
442                         dccp_pr_debug("found fin ok!\n");
443                         len = 0;
444                         goto found_fin_ok;
445                 }
446                 dccp_pr_debug("packet_type=%s\n",
447                               dccp_packet_name(dh->dccph_type));
448                 sk_eat_skb(sk, skb);
449 verify_sock_status:
450                 if (sock_flag(sk, SOCK_DONE)) {
451                         len = 0;
452                         break;
453                 }
454
455                 if (sk->sk_err) {
456                         len = sock_error(sk);
457                         break;
458                 }
459
460                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
461                         len = 0;
462                         break;
463                 }
464
465                 if (sk->sk_state == DCCP_CLOSED) {
466                         if (!sock_flag(sk, SOCK_DONE)) {
467                                 /* This occurs when user tries to read
468                                  * from never connected socket.
469                                  */
470                                 len = -ENOTCONN;
471                                 break;
472                         }
473                         len = 0;
474                         break;
475                 }
476
477                 if (!timeo) {
478                         len = -EAGAIN;
479                         break;
480                 }
481
482                 if (signal_pending(current)) {
483                         len = sock_intr_errno(timeo);
484                         break;
485                 }
486
487                 sk_wait_data(sk, &timeo);
488                 continue;
489         found_ok_skb:
490                 if (len > skb->len)
491                         len = skb->len;
492                 else if (len < skb->len)
493                         msg->msg_flags |= MSG_TRUNC;
494
495                 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
496                         /* Exception. Bailout! */
497                         len = -EFAULT;
498                         break;
499                 }
500         found_fin_ok:
501                 if (!(flags & MSG_PEEK))
502                         sk_eat_skb(sk, skb);
503                 break;
504         } while (1);
505 out:
506         release_sock(sk);
507         return len;
508 }
509
510 static int inet_dccp_listen(struct socket *sock, int backlog)
511 {
512         struct sock *sk = sock->sk;
513         unsigned char old_state;
514         int err;
515
516         lock_sock(sk);
517
518         err = -EINVAL;
519         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
520                 goto out;
521
522         old_state = sk->sk_state;
523         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
524                 goto out;
525
526         /* Really, if the socket is already in listen state
527          * we can only allow the backlog to be adjusted.
528          */
529         if (old_state != DCCP_LISTEN) {
530                 /*
531                  * FIXME: here it probably should be sk->sk_prot->listen_start
532                  * see tcp_listen_start
533                  */
534                 err = dccp_listen_start(sk);
535                 if (err)
536                         goto out;
537         }
538         sk->sk_max_ack_backlog = backlog;
539         err = 0;
540
541 out:
542         release_sock(sk);
543         return err;
544 }
545
546 static const unsigned char dccp_new_state[] = {
547         /* current state:   new state:      action:     */
548         [0]               = DCCP_CLOSED,
549         [DCCP_OPEN]       = DCCP_CLOSING | DCCP_ACTION_FIN,
550         [DCCP_REQUESTING] = DCCP_CLOSED,
551         [DCCP_PARTOPEN]   = DCCP_CLOSING | DCCP_ACTION_FIN,
552         [DCCP_LISTEN]     = DCCP_CLOSED,
553         [DCCP_RESPOND]    = DCCP_CLOSED,
554         [DCCP_CLOSING]    = DCCP_CLOSED,
555         [DCCP_TIME_WAIT]  = DCCP_CLOSED,
556         [DCCP_CLOSED]     = DCCP_CLOSED,
557 };
558
559 static int dccp_close_state(struct sock *sk)
560 {
561         const int next = dccp_new_state[sk->sk_state];
562         const int ns = next & DCCP_STATE_MASK;
563
564         if (ns != sk->sk_state)
565                 dccp_set_state(sk, ns);
566
567         return next & DCCP_ACTION_FIN;
568 }
569
570 void dccp_close(struct sock *sk, long timeout)
571 {
572         struct sk_buff *skb;
573
574         lock_sock(sk);
575
576         sk->sk_shutdown = SHUTDOWN_MASK;
577
578         if (sk->sk_state == DCCP_LISTEN) {
579                 dccp_set_state(sk, DCCP_CLOSED);
580
581                 /* Special case. */
582                 inet_csk_listen_stop(sk);
583
584                 goto adjudge_to_death;
585         }
586
587         /*
588          * We need to flush the recv. buffs.  We do this only on the
589          * descriptor close, not protocol-sourced closes, because the
590           *reader process may not have drained the data yet!
591          */
592         /* FIXME: check for unread data */
593         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
594                 __kfree_skb(skb);
595         }
596
597         if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
598                 /* Check zero linger _after_ checking for unread data. */
599                 sk->sk_prot->disconnect(sk, 0);
600         } else if (dccp_close_state(sk)) {
601                 dccp_send_close(sk, 1);
602         }
603
604         sk_stream_wait_close(sk, timeout);
605
606 adjudge_to_death:
607         /*
608          * It is the last release_sock in its life. It will remove backlog.
609          */
610         release_sock(sk);
611         /*
612          * Now socket is owned by kernel and we acquire BH lock
613          * to finish close. No need to check for user refs.
614          */
615         local_bh_disable();
616         bh_lock_sock(sk);
617         BUG_TRAP(!sock_owned_by_user(sk));
618
619         sock_hold(sk);
620         sock_orphan(sk);
621
622         /*
623          * The last release_sock may have processed the CLOSE or RESET
624          * packet moving sock to CLOSED state, if not we have to fire
625          * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
626          * in draft-ietf-dccp-spec-11. -acme
627          */
628         if (sk->sk_state == DCCP_CLOSING) {
629                 /* FIXME: should start at 2 * RTT */
630                 /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
631                 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
632                                           inet_csk(sk)->icsk_rto,
633                                           DCCP_RTO_MAX);
634 #if 0
635                 /* Yeah, we should use sk->sk_prot->orphan_count, etc */
636                 dccp_set_state(sk, DCCP_CLOSED);
637 #endif
638         }
639
640         atomic_inc(sk->sk_prot->orphan_count);
641         if (sk->sk_state == DCCP_CLOSED)
642                 inet_csk_destroy_sock(sk);
643
644         /* Otherwise, socket is reprieved until protocol close. */
645
646         bh_unlock_sock(sk);
647         local_bh_enable();
648         sock_put(sk);
649 }
650
651 void dccp_shutdown(struct sock *sk, int how)
652 {
653         dccp_pr_debug("entry\n");
654 }
655
656 static struct proto_ops inet_dccp_ops = {
657         .family         = PF_INET,
658         .owner          = THIS_MODULE,
659         .release        = inet_release,
660         .bind           = inet_bind,
661         .connect        = inet_stream_connect,
662         .socketpair     = sock_no_socketpair,
663         .accept         = inet_accept,
664         .getname        = inet_getname,
665         /* FIXME: work on tcp_poll to rename it to inet_csk_poll */
666         .poll           = dccp_poll,
667         .ioctl          = inet_ioctl,
668         /* FIXME: work on inet_listen to rename it to sock_common_listen */
669         .listen         = inet_dccp_listen,
670         .shutdown       = inet_shutdown,
671         .setsockopt     = sock_common_setsockopt,
672         .getsockopt     = sock_common_getsockopt,
673         .sendmsg        = inet_sendmsg,
674         .recvmsg        = sock_common_recvmsg,
675         .mmap           = sock_no_mmap,
676         .sendpage       = sock_no_sendpage,
677 };
678
679 extern struct net_proto_family inet_family_ops;
680
681 static struct inet_protosw dccp_v4_protosw = {
682         .type           = SOCK_DCCP,
683         .protocol       = IPPROTO_DCCP,
684         .prot           = &dccp_v4_prot,
685         .ops            = &inet_dccp_ops,
686         .capability     = -1,
687         .no_check       = 0,
688         .flags          = 0,
689 };
690
691 /*
692  * This is the global socket data structure used for responding to
693  * the Out-of-the-blue (OOTB) packets. A control sock will be created
694  * for this socket at the initialization time.
695  */
696 struct socket *dccp_ctl_socket;
697
698 static char dccp_ctl_socket_err_msg[] __initdata =
699         KERN_ERR "DCCP: Failed to create the control socket.\n";
700
701 static int __init dccp_ctl_sock_init(void)
702 {
703         int rc = sock_create_kern(PF_INET, SOCK_DCCP, IPPROTO_DCCP,
704                                   &dccp_ctl_socket);
705         if (rc < 0)
706                 printk(dccp_ctl_socket_err_msg);
707         else {
708                 dccp_ctl_socket->sk->sk_allocation = GFP_ATOMIC;
709                 inet_sk(dccp_ctl_socket->sk)->uc_ttl = -1;
710
711                 /* Unhash it so that IP input processing does not even
712                  * see it, we do not wish this socket to see incoming
713                  * packets.
714                  */
715                 dccp_ctl_socket->sk->sk_prot->unhash(dccp_ctl_socket->sk);
716         }
717
718         return rc;
719 }
720
721 #ifdef CONFIG_IP_DCCP_UNLOAD_HACK
722 void dccp_ctl_sock_exit(void)
723 {
724         if (dccp_ctl_socket != NULL) {
725                 sock_release(dccp_ctl_socket);
726                 dccp_ctl_socket = NULL;
727         }
728 }
729
730 EXPORT_SYMBOL_GPL(dccp_ctl_sock_exit);
731 #endif
732
733 static int __init init_dccp_v4_mibs(void)
734 {
735         int rc = -ENOMEM;
736
737         dccp_statistics[0] = alloc_percpu(struct dccp_mib);
738         if (dccp_statistics[0] == NULL)
739                 goto out;
740
741         dccp_statistics[1] = alloc_percpu(struct dccp_mib);
742         if (dccp_statistics[1] == NULL)
743                 goto out_free_one;
744
745         rc = 0;
746 out:
747         return rc;
748 out_free_one:
749         free_percpu(dccp_statistics[0]);
750         dccp_statistics[0] = NULL;
751         goto out;
752
753 }
754
755 static int thash_entries;
756 module_param(thash_entries, int, 0444);
757 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
758
759 #ifdef CONFIG_IP_DCCP_DEBUG
760 int dccp_debug;
761 module_param(dccp_debug, int, 0444);
762 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
763 #endif
764
765 static int __init dccp_init(void)
766 {
767         unsigned long goal;
768         int ehash_order, bhash_order, i;
769         int rc = proto_register(&dccp_v4_prot, 1);
770
771         if (rc)
772                 goto out;
773
774         dccp_hashinfo.bind_bucket_cachep =
775                 kmem_cache_create("dccp_bind_bucket",
776                                   sizeof(struct inet_bind_bucket), 0,
777                                   SLAB_HWCACHE_ALIGN, NULL, NULL);
778         if (!dccp_hashinfo.bind_bucket_cachep)
779                 goto out_proto_unregister;
780
781         /*
782          * Size and allocate the main established and bind bucket
783          * hash tables.
784          *
785          * The methodology is similar to that of the buffer cache.
786          */
787         if (num_physpages >= (128 * 1024))
788                 goal = num_physpages >> (21 - PAGE_SHIFT);
789         else
790                 goal = num_physpages >> (23 - PAGE_SHIFT);
791
792         if (thash_entries)
793                 goal = (thash_entries *
794                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
795         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
796                 ;
797         do {
798                 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
799                                         sizeof(struct inet_ehash_bucket);
800                 dccp_hashinfo.ehash_size >>= 1;
801                 while (dccp_hashinfo.ehash_size &
802                        (dccp_hashinfo.ehash_size - 1))
803                         dccp_hashinfo.ehash_size--;
804                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
805                         __get_free_pages(GFP_ATOMIC, ehash_order);
806         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
807
808         if (!dccp_hashinfo.ehash) {
809                 printk(KERN_CRIT "Failed to allocate DCCP "
810                                  "established hash table\n");
811                 goto out_free_bind_bucket_cachep;
812         }
813
814         for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) {
815                 rwlock_init(&dccp_hashinfo.ehash[i].lock);
816                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
817         }
818
819         bhash_order = ehash_order;
820
821         do {
822                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
823                                         sizeof(struct inet_bind_hashbucket);
824                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
825                     bhash_order > 0)
826                         continue;
827                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
828                         __get_free_pages(GFP_ATOMIC, bhash_order);
829         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
830
831         if (!dccp_hashinfo.bhash) {
832                 printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n");
833                 goto out_free_dccp_ehash;
834         }
835
836         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
837                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
838                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
839         }
840
841         if (init_dccp_v4_mibs())
842                 goto out_free_dccp_bhash;
843
844         rc = -EAGAIN;
845         if (inet_add_protocol(&dccp_protocol, IPPROTO_DCCP))
846                 goto out_free_dccp_v4_mibs;
847
848         inet_register_protosw(&dccp_v4_protosw);
849
850         rc = dccp_ctl_sock_init();
851         if (rc)
852                 goto out_unregister_protosw;
853 out:
854         return rc;
855 out_unregister_protosw:
856         inet_unregister_protosw(&dccp_v4_protosw);
857         inet_del_protocol(&dccp_protocol, IPPROTO_DCCP);
858 out_free_dccp_v4_mibs:
859         free_percpu(dccp_statistics[0]);
860         free_percpu(dccp_statistics[1]);
861         dccp_statistics[0] = dccp_statistics[1] = NULL;
862 out_free_dccp_bhash:
863         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
864         dccp_hashinfo.bhash = NULL;
865 out_free_dccp_ehash:
866         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
867         dccp_hashinfo.ehash = NULL;
868 out_free_bind_bucket_cachep:
869         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
870         dccp_hashinfo.bind_bucket_cachep = NULL;
871 out_proto_unregister:
872         proto_unregister(&dccp_v4_prot);
873         goto out;
874 }
875
876 static const char dccp_del_proto_err_msg[] __exitdata =
877         KERN_ERR "can't remove dccp net_protocol\n";
878
879 static void __exit dccp_fini(void)
880 {
881         inet_unregister_protosw(&dccp_v4_protosw);
882
883         if (inet_del_protocol(&dccp_protocol, IPPROTO_DCCP) < 0)
884                 printk(dccp_del_proto_err_msg);
885
886         free_percpu(dccp_statistics[0]);
887         free_percpu(dccp_statistics[1]);
888         free_pages((unsigned long)dccp_hashinfo.bhash,
889                    get_order(dccp_hashinfo.bhash_size *
890                              sizeof(struct inet_bind_hashbucket)));
891         free_pages((unsigned long)dccp_hashinfo.ehash,
892                    get_order(dccp_hashinfo.ehash_size *
893                              sizeof(struct inet_ehash_bucket)));
894         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
895         proto_unregister(&dccp_v4_prot);
896 }
897
898 module_init(dccp_init);
899 module_exit(dccp_fini);
900
901 /*
902  * __stringify doesn't likes enums, so use SOCK_DCCP (6) and IPPROTO_DCCP (33)
903  * values directly, Also cover the case where the protocol is not specified,
904  * i.e. net-pf-PF_INET-proto-0-type-SOCK_DCCP
905  */
906 MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-33-type-6");
907 MODULE_ALIAS("net-pf-" __stringify(PF_INET) "-proto-0-type-6");
908 MODULE_LICENSE("GPL");
909 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
910 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");