[DCCP]: Add CCID3 debug support to Kconfig
[linux-2.6] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
24
25 #include <net/inet_sock.h>
26 #include <net/sock.h>
27 #include <net/xfrm.h>
28
29 #include <asm/semaphore.h>
30 #include <linux/spinlock.h>
31 #include <linux/timer.h>
32 #include <linux/delay.h>
33 #include <linux/poll.h>
34
35 #include "ccid.h"
36 #include "dccp.h"
37 #include "feat.h"
38
39 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
40
41 EXPORT_SYMBOL_GPL(dccp_statistics);
42
43 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
44
45 EXPORT_SYMBOL_GPL(dccp_orphan_count);
46
47 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
48         .lhash_lock     = RW_LOCK_UNLOCKED,
49         .lhash_users    = ATOMIC_INIT(0),
50         .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
51 };
52
53 EXPORT_SYMBOL_GPL(dccp_hashinfo);
54
55 void dccp_set_state(struct sock *sk, const int state)
56 {
57         const int oldstate = sk->sk_state;
58
59         dccp_pr_debug("%s(%p) %-10.10s -> %s\n",
60                       dccp_role(sk), sk,
61                       dccp_state_name(oldstate), dccp_state_name(state));
62         WARN_ON(state == oldstate);
63
64         switch (state) {
65         case DCCP_OPEN:
66                 if (oldstate != DCCP_OPEN)
67                         DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
68                 break;
69
70         case DCCP_CLOSED:
71                 if (oldstate == DCCP_CLOSING || oldstate == DCCP_OPEN)
72                         DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
73
74                 sk->sk_prot->unhash(sk);
75                 if (inet_csk(sk)->icsk_bind_hash != NULL &&
76                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
77                         inet_put_port(&dccp_hashinfo, sk);
78                 /* fall through */
79         default:
80                 if (oldstate == DCCP_OPEN)
81                         DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
82         }
83
84         /* Change state AFTER socket is unhashed to avoid closed
85          * socket sitting in hash tables.
86          */
87         sk->sk_state = state;
88 }
89
90 EXPORT_SYMBOL_GPL(dccp_set_state);
91
92 void dccp_done(struct sock *sk)
93 {
94         dccp_set_state(sk, DCCP_CLOSED);
95         dccp_clear_xmit_timers(sk);
96
97         sk->sk_shutdown = SHUTDOWN_MASK;
98
99         if (!sock_flag(sk, SOCK_DEAD))
100                 sk->sk_state_change(sk);
101         else
102                 inet_csk_destroy_sock(sk);
103 }
104
105 EXPORT_SYMBOL_GPL(dccp_done);
106
107 const char *dccp_packet_name(const int type)
108 {
109         static const char *dccp_packet_names[] = {
110                 [DCCP_PKT_REQUEST]  = "REQUEST",
111                 [DCCP_PKT_RESPONSE] = "RESPONSE",
112                 [DCCP_PKT_DATA]     = "DATA",
113                 [DCCP_PKT_ACK]      = "ACK",
114                 [DCCP_PKT_DATAACK]  = "DATAACK",
115                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
116                 [DCCP_PKT_CLOSE]    = "CLOSE",
117                 [DCCP_PKT_RESET]    = "RESET",
118                 [DCCP_PKT_SYNC]     = "SYNC",
119                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
120         };
121
122         if (type >= DCCP_NR_PKT_TYPES)
123                 return "INVALID";
124         else
125                 return dccp_packet_names[type];
126 }
127
128 EXPORT_SYMBOL_GPL(dccp_packet_name);
129
130 const char *dccp_state_name(const int state)
131 {
132         static char *dccp_state_names[] = {
133         [DCCP_OPEN]       = "OPEN",
134         [DCCP_REQUESTING] = "REQUESTING",
135         [DCCP_PARTOPEN]   = "PARTOPEN",
136         [DCCP_LISTEN]     = "LISTEN",
137         [DCCP_RESPOND]    = "RESPOND",
138         [DCCP_CLOSING]    = "CLOSING",
139         [DCCP_TIME_WAIT]  = "TIME_WAIT",
140         [DCCP_CLOSED]     = "CLOSED",
141         };
142
143         if (state >= DCCP_MAX_STATES)
144                 return "INVALID STATE!";
145         else
146                 return dccp_state_names[state];
147 }
148
149 EXPORT_SYMBOL_GPL(dccp_state_name);
150
151 void dccp_hash(struct sock *sk)
152 {
153         inet_hash(&dccp_hashinfo, sk);
154 }
155
156 EXPORT_SYMBOL_GPL(dccp_hash);
157
158 void dccp_unhash(struct sock *sk)
159 {
160         inet_unhash(&dccp_hashinfo, sk);
161 }
162
163 EXPORT_SYMBOL_GPL(dccp_unhash);
164
165 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
166 {
167         struct dccp_sock *dp = dccp_sk(sk);
168         struct dccp_minisock *dmsk = dccp_msk(sk);
169         struct inet_connection_sock *icsk = inet_csk(sk);
170
171         dccp_minisock_init(&dp->dccps_minisock);
172         do_gettimeofday(&dp->dccps_epoch);
173
174         /*
175          * FIXME: We're hardcoding the CCID, and doing this at this point makes
176          * the listening (master) sock get CCID control blocks, which is not
177          * necessary, but for now, to not mess with the test userspace apps,
178          * lets leave it here, later the real solution is to do this in a
179          * setsockopt(CCIDs-I-want/accept). -acme
180          */
181         if (likely(ctl_sock_initialized)) {
182                 int rc = dccp_feat_init(dmsk);
183
184                 if (rc)
185                         return rc;
186
187                 if (dmsk->dccpms_send_ack_vector) {
188                         dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
189                         if (dp->dccps_hc_rx_ackvec == NULL)
190                                 return -ENOMEM;
191                 }
192                 dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
193                                                       sk, GFP_KERNEL);
194                 dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
195                                                       sk, GFP_KERNEL);
196                 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
197                              dp->dccps_hc_tx_ccid == NULL)) {
198                         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
199                         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
200                         if (dmsk->dccpms_send_ack_vector) {
201                                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
202                                 dp->dccps_hc_rx_ackvec = NULL;
203                         }
204                         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
205                         return -ENOMEM;
206                 }
207         } else {
208                 /* control socket doesn't need feat nego */
209                 INIT_LIST_HEAD(&dmsk->dccpms_pending);
210                 INIT_LIST_HEAD(&dmsk->dccpms_conf);
211         }
212
213         dccp_init_xmit_timers(sk);
214         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
215         icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
216         sk->sk_state            = DCCP_CLOSED;
217         sk->sk_write_space      = dccp_write_space;
218         icsk->icsk_sync_mss     = dccp_sync_mss;
219         dp->dccps_mss_cache     = 536;
220         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
221         dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
222         dp->dccps_l_ack_ratio   = dp->dccps_r_ack_ratio = 1;
223
224         return 0;
225 }
226
227 EXPORT_SYMBOL_GPL(dccp_init_sock);
228
229 int dccp_destroy_sock(struct sock *sk)
230 {
231         struct dccp_sock *dp = dccp_sk(sk);
232         struct dccp_minisock *dmsk = dccp_msk(sk);
233
234         /*
235          * DCCP doesn't use sk_write_queue, just sk_send_head
236          * for retransmissions
237          */
238         if (sk->sk_send_head != NULL) {
239                 kfree_skb(sk->sk_send_head);
240                 sk->sk_send_head = NULL;
241         }
242
243         /* Clean up a referenced DCCP bind bucket. */
244         if (inet_csk(sk)->icsk_bind_hash != NULL)
245                 inet_put_port(&dccp_hashinfo, sk);
246
247         kfree(dp->dccps_service_list);
248         dp->dccps_service_list = NULL;
249
250         if (dmsk->dccpms_send_ack_vector) {
251                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
252                 dp->dccps_hc_rx_ackvec = NULL;
253         }
254         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
255         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
256         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
257
258         /* clean up feature negotiation state */
259         dccp_feat_clean(dmsk);
260
261         return 0;
262 }
263
264 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
265
266 static inline int dccp_listen_start(struct sock *sk, int backlog)
267 {
268         struct dccp_sock *dp = dccp_sk(sk);
269
270         dp->dccps_role = DCCP_ROLE_LISTEN;
271         return inet_csk_listen_start(sk, backlog);
272 }
273
274 int dccp_disconnect(struct sock *sk, int flags)
275 {
276         struct inet_connection_sock *icsk = inet_csk(sk);
277         struct inet_sock *inet = inet_sk(sk);
278         int err = 0;
279         const int old_state = sk->sk_state;
280
281         if (old_state != DCCP_CLOSED)
282                 dccp_set_state(sk, DCCP_CLOSED);
283
284         /* ABORT function of RFC793 */
285         if (old_state == DCCP_LISTEN) {
286                 inet_csk_listen_stop(sk);
287         /* FIXME: do the active reset thing */
288         } else if (old_state == DCCP_REQUESTING)
289                 sk->sk_err = ECONNRESET;
290
291         dccp_clear_xmit_timers(sk);
292         __skb_queue_purge(&sk->sk_receive_queue);
293         if (sk->sk_send_head != NULL) {
294                 __kfree_skb(sk->sk_send_head);
295                 sk->sk_send_head = NULL;
296         }
297
298         inet->dport = 0;
299
300         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
301                 inet_reset_saddr(sk);
302
303         sk->sk_shutdown = 0;
304         sock_reset_flag(sk, SOCK_DONE);
305
306         icsk->icsk_backoff = 0;
307         inet_csk_delack_init(sk);
308         __sk_dst_reset(sk);
309
310         BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
311
312         sk->sk_error_report(sk);
313         return err;
314 }
315
316 EXPORT_SYMBOL_GPL(dccp_disconnect);
317
318 /*
319  *      Wait for a DCCP event.
320  *
321  *      Note that we don't need to lock the socket, as the upper poll layers
322  *      take care of normal races (between the test and the event) and we don't
323  *      go look at any of the socket buffers directly.
324  */
325 unsigned int dccp_poll(struct file *file, struct socket *sock,
326                        poll_table *wait)
327 {
328         unsigned int mask;
329         struct sock *sk = sock->sk;
330
331         poll_wait(file, sk->sk_sleep, wait);
332         if (sk->sk_state == DCCP_LISTEN)
333                 return inet_csk_listen_poll(sk);
334
335         /* Socket is not locked. We are protected from async events
336            by poll logic and correct handling of state changes
337            made by another threads is impossible in any case.
338          */
339
340         mask = 0;
341         if (sk->sk_err)
342                 mask = POLLERR;
343
344         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
345                 mask |= POLLHUP;
346         if (sk->sk_shutdown & RCV_SHUTDOWN)
347                 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
348
349         /* Connected? */
350         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
351                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
352                         mask |= POLLIN | POLLRDNORM;
353
354                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
355                         if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
356                                 mask |= POLLOUT | POLLWRNORM;
357                         } else {  /* send SIGIO later */
358                                 set_bit(SOCK_ASYNC_NOSPACE,
359                                         &sk->sk_socket->flags);
360                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
361
362                                 /* Race breaker. If space is freed after
363                                  * wspace test but before the flags are set,
364                                  * IO signal will be lost.
365                                  */
366                                 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
367                                         mask |= POLLOUT | POLLWRNORM;
368                         }
369                 }
370         }
371         return mask;
372 }
373
374 EXPORT_SYMBOL_GPL(dccp_poll);
375
376 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
377 {
378         dccp_pr_debug("entry\n");
379         return -ENOIOCTLCMD;
380 }
381
382 EXPORT_SYMBOL_GPL(dccp_ioctl);
383
384 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
385                                    char __user *optval, int optlen)
386 {
387         struct dccp_sock *dp = dccp_sk(sk);
388         struct dccp_service_list *sl = NULL;
389
390         if (service == DCCP_SERVICE_INVALID_VALUE || 
391             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
392                 return -EINVAL;
393
394         if (optlen > sizeof(service)) {
395                 sl = kmalloc(optlen, GFP_KERNEL);
396                 if (sl == NULL)
397                         return -ENOMEM;
398
399                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
400                 if (copy_from_user(sl->dccpsl_list,
401                                    optval + sizeof(service),
402                                    optlen - sizeof(service)) ||
403                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
404                         kfree(sl);
405                         return -EFAULT;
406                 }
407         }
408
409         lock_sock(sk);
410         dp->dccps_service = service;
411
412         kfree(dp->dccps_service_list);
413
414         dp->dccps_service_list = sl;
415         release_sock(sk);
416         return 0;
417 }
418
419 /* byte 1 is feature.  the rest is the preference list */
420 static int dccp_setsockopt_change(struct sock *sk, int type,
421                                   struct dccp_so_feat __user *optval)
422 {
423         struct dccp_so_feat opt;
424         u8 *val;
425         int rc;
426
427         if (copy_from_user(&opt, optval, sizeof(opt)))
428                 return -EFAULT;
429
430         val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
431         if (!val)
432                 return -ENOMEM;
433
434         if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
435                 rc = -EFAULT;
436                 goto out_free_val;
437         }
438
439         rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
440                               val, opt.dccpsf_len, GFP_KERNEL);
441         if (rc)
442                 goto out_free_val;
443
444 out:
445         return rc;
446
447 out_free_val:
448         kfree(val);
449         goto out;
450 }
451
452 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
453                 char __user *optval, int optlen)
454 {
455         struct dccp_sock *dp = dccp_sk(sk);
456         int val, err = 0;
457
458         if (optlen < sizeof(int))
459                 return -EINVAL;
460
461         if (get_user(val, (int __user *)optval))
462                 return -EFAULT;
463
464         if (optname == DCCP_SOCKOPT_SERVICE)
465                 return dccp_setsockopt_service(sk, val, optval, optlen);
466
467         lock_sock(sk);
468         switch (optname) {
469         case DCCP_SOCKOPT_PACKET_SIZE:
470                 dp->dccps_packet_size = val;
471                 break;
472         case DCCP_SOCKOPT_CHANGE_L:
473                 if (optlen != sizeof(struct dccp_so_feat))
474                         err = -EINVAL;
475                 else
476                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
477                                                      (struct dccp_so_feat __user *)
478                                                      optval);
479                 break;
480         case DCCP_SOCKOPT_CHANGE_R:
481                 if (optlen != sizeof(struct dccp_so_feat))
482                         err = -EINVAL;
483                 else
484                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
485                                                      (struct dccp_so_feat __user *)
486                                                      optval);
487                 break;
488         case DCCP_SOCKOPT_SEND_CSCOV:   /* sender side, RFC 4340, sec. 9.2 */
489                 if (val < 0 || val > 15)
490                         err = -EINVAL;
491                 else
492                         dp->dccps_pcslen = val;
493                 break;
494         case DCCP_SOCKOPT_RECV_CSCOV:   /* receiver side, RFC 4340 sec. 9.2.1 */
495                 if (val < 0 || val > 15)
496                         err = -EINVAL;
497                 else {
498                         dp->dccps_pcrlen = val;
499                         /* FIXME: add feature negotiation,
500                          * ChangeL(MinimumChecksumCoverage, val) */
501                 }
502                 break;
503         default:
504                 err = -ENOPROTOOPT;
505                 break;
506         }
507
508         release_sock(sk);
509         return err;
510 }
511
512 int dccp_setsockopt(struct sock *sk, int level, int optname,
513                     char __user *optval, int optlen)
514 {
515         if (level != SOL_DCCP)
516                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
517                                                              optname, optval,
518                                                              optlen);
519         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
520 }
521
522 EXPORT_SYMBOL_GPL(dccp_setsockopt);
523
524 #ifdef CONFIG_COMPAT
525 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
526                            char __user *optval, int optlen)
527 {
528         if (level != SOL_DCCP)
529                 return inet_csk_compat_setsockopt(sk, level, optname,
530                                                   optval, optlen);
531         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
532 }
533
534 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
535 #endif
536
537 static int dccp_getsockopt_service(struct sock *sk, int len,
538                                    __be32 __user *optval,
539                                    int __user *optlen)
540 {
541         const struct dccp_sock *dp = dccp_sk(sk);
542         const struct dccp_service_list *sl;
543         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
544
545         lock_sock(sk);
546         if ((sl = dp->dccps_service_list) != NULL) {
547                 slen = sl->dccpsl_nr * sizeof(u32);
548                 total_len += slen;
549         }
550
551         err = -EINVAL;
552         if (total_len > len)
553                 goto out;
554
555         err = 0;
556         if (put_user(total_len, optlen) ||
557             put_user(dp->dccps_service, optval) ||
558             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
559                 err = -EFAULT;
560 out:
561         release_sock(sk);
562         return err;
563 }
564
565 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
566                     char __user *optval, int __user *optlen)
567 {
568         struct dccp_sock *dp;
569         int val, len;
570
571         if (get_user(len, optlen))
572                 return -EFAULT;
573
574         if (len < sizeof(int))
575                 return -EINVAL;
576
577         dp = dccp_sk(sk);
578
579         switch (optname) {
580         case DCCP_SOCKOPT_PACKET_SIZE:
581                 val = dp->dccps_packet_size;
582                 len = sizeof(dp->dccps_packet_size);
583                 break;
584         case DCCP_SOCKOPT_SERVICE:
585                 return dccp_getsockopt_service(sk, len,
586                                                (__be32 __user *)optval, optlen);
587         case DCCP_SOCKOPT_SEND_CSCOV:
588                 val = dp->dccps_pcslen;
589                 break;
590         case DCCP_SOCKOPT_RECV_CSCOV:
591                 val = dp->dccps_pcrlen;
592                 break;
593         case 128 ... 191:
594                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
595                                              len, (u32 __user *)optval, optlen);
596         case 192 ... 255:
597                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
598                                              len, (u32 __user *)optval, optlen);
599         default:
600                 return -ENOPROTOOPT;
601         }
602
603         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
604                 return -EFAULT;
605
606         return 0;
607 }
608
609 int dccp_getsockopt(struct sock *sk, int level, int optname,
610                     char __user *optval, int __user *optlen)
611 {
612         if (level != SOL_DCCP)
613                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
614                                                              optname, optval,
615                                                              optlen);
616         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
617 }
618
619 EXPORT_SYMBOL_GPL(dccp_getsockopt);
620
621 #ifdef CONFIG_COMPAT
622 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
623                            char __user *optval, int __user *optlen)
624 {
625         if (level != SOL_DCCP)
626                 return inet_csk_compat_getsockopt(sk, level, optname,
627                                                   optval, optlen);
628         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
629 }
630
631 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
632 #endif
633
634 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
635                  size_t len)
636 {
637         const struct dccp_sock *dp = dccp_sk(sk);
638         const int flags = msg->msg_flags;
639         const int noblock = flags & MSG_DONTWAIT;
640         struct sk_buff *skb;
641         int rc, size;
642         long timeo;
643
644         if (len > dp->dccps_mss_cache)
645                 return -EMSGSIZE;
646
647         lock_sock(sk);
648         timeo = sock_sndtimeo(sk, noblock);
649
650         /*
651          * We have to use sk_stream_wait_connect here to set sk_write_pending,
652          * so that the trick in dccp_rcv_request_sent_state_process.
653          */
654         /* Wait for a connection to finish. */
655         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING))
656                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
657                         goto out_release;
658
659         size = sk->sk_prot->max_header + len;
660         release_sock(sk);
661         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
662         lock_sock(sk);
663         if (skb == NULL)
664                 goto out_release;
665
666         skb_reserve(skb, sk->sk_prot->max_header);
667         rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
668         if (rc != 0)
669                 goto out_discard;
670
671         skb_queue_tail(&sk->sk_write_queue, skb);
672         dccp_write_xmit(sk,0);
673 out_release:
674         release_sock(sk);
675         return rc ? : len;
676 out_discard:
677         kfree_skb(skb);
678         goto out_release;
679 }
680
681 EXPORT_SYMBOL_GPL(dccp_sendmsg);
682
683 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
684                  size_t len, int nonblock, int flags, int *addr_len)
685 {
686         const struct dccp_hdr *dh;
687         long timeo;
688
689         lock_sock(sk);
690
691         if (sk->sk_state == DCCP_LISTEN) {
692                 len = -ENOTCONN;
693                 goto out;
694         }
695
696         timeo = sock_rcvtimeo(sk, nonblock);
697
698         do {
699                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
700
701                 if (skb == NULL)
702                         goto verify_sock_status;
703
704                 dh = dccp_hdr(skb);
705
706                 if (dh->dccph_type == DCCP_PKT_DATA ||
707                     dh->dccph_type == DCCP_PKT_DATAACK)
708                         goto found_ok_skb;
709
710                 if (dh->dccph_type == DCCP_PKT_RESET ||
711                     dh->dccph_type == DCCP_PKT_CLOSE) {
712                         dccp_pr_debug("found fin ok!\n");
713                         len = 0;
714                         goto found_fin_ok;
715                 }
716                 dccp_pr_debug("packet_type=%s\n",
717                               dccp_packet_name(dh->dccph_type));
718                 sk_eat_skb(sk, skb, 0);
719 verify_sock_status:
720                 if (sock_flag(sk, SOCK_DONE)) {
721                         len = 0;
722                         break;
723                 }
724
725                 if (sk->sk_err) {
726                         len = sock_error(sk);
727                         break;
728                 }
729
730                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
731                         len = 0;
732                         break;
733                 }
734
735                 if (sk->sk_state == DCCP_CLOSED) {
736                         if (!sock_flag(sk, SOCK_DONE)) {
737                                 /* This occurs when user tries to read
738                                  * from never connected socket.
739                                  */
740                                 len = -ENOTCONN;
741                                 break;
742                         }
743                         len = 0;
744                         break;
745                 }
746
747                 if (!timeo) {
748                         len = -EAGAIN;
749                         break;
750                 }
751
752                 if (signal_pending(current)) {
753                         len = sock_intr_errno(timeo);
754                         break;
755                 }
756
757                 sk_wait_data(sk, &timeo);
758                 continue;
759         found_ok_skb:
760                 if (len > skb->len)
761                         len = skb->len;
762                 else if (len < skb->len)
763                         msg->msg_flags |= MSG_TRUNC;
764
765                 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
766                         /* Exception. Bailout! */
767                         len = -EFAULT;
768                         break;
769                 }
770         found_fin_ok:
771                 if (!(flags & MSG_PEEK))
772                         sk_eat_skb(sk, skb, 0);
773                 break;
774         } while (1);
775 out:
776         release_sock(sk);
777         return len;
778 }
779
780 EXPORT_SYMBOL_GPL(dccp_recvmsg);
781
782 int inet_dccp_listen(struct socket *sock, int backlog)
783 {
784         struct sock *sk = sock->sk;
785         unsigned char old_state;
786         int err;
787
788         lock_sock(sk);
789
790         err = -EINVAL;
791         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
792                 goto out;
793
794         old_state = sk->sk_state;
795         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
796                 goto out;
797
798         /* Really, if the socket is already in listen state
799          * we can only allow the backlog to be adjusted.
800          */
801         if (old_state != DCCP_LISTEN) {
802                 /*
803                  * FIXME: here it probably should be sk->sk_prot->listen_start
804                  * see tcp_listen_start
805                  */
806                 err = dccp_listen_start(sk, backlog);
807                 if (err)
808                         goto out;
809         }
810         sk->sk_max_ack_backlog = backlog;
811         err = 0;
812
813 out:
814         release_sock(sk);
815         return err;
816 }
817
818 EXPORT_SYMBOL_GPL(inet_dccp_listen);
819
820 static const unsigned char dccp_new_state[] = {
821         /* current state:   new state:      action:     */
822         [0]               = DCCP_CLOSED,
823         [DCCP_OPEN]       = DCCP_CLOSING | DCCP_ACTION_FIN,
824         [DCCP_REQUESTING] = DCCP_CLOSED,
825         [DCCP_PARTOPEN]   = DCCP_CLOSING | DCCP_ACTION_FIN,
826         [DCCP_LISTEN]     = DCCP_CLOSED,
827         [DCCP_RESPOND]    = DCCP_CLOSED,
828         [DCCP_CLOSING]    = DCCP_CLOSED,
829         [DCCP_TIME_WAIT]  = DCCP_CLOSED,
830         [DCCP_CLOSED]     = DCCP_CLOSED,
831 };
832
833 static int dccp_close_state(struct sock *sk)
834 {
835         const int next = dccp_new_state[sk->sk_state];
836         const int ns = next & DCCP_STATE_MASK;
837
838         if (ns != sk->sk_state)
839                 dccp_set_state(sk, ns);
840
841         return next & DCCP_ACTION_FIN;
842 }
843
844 void dccp_close(struct sock *sk, long timeout)
845 {
846         struct dccp_sock *dp = dccp_sk(sk);
847         struct sk_buff *skb;
848         int state;
849
850         lock_sock(sk);
851
852         sk->sk_shutdown = SHUTDOWN_MASK;
853
854         if (sk->sk_state == DCCP_LISTEN) {
855                 dccp_set_state(sk, DCCP_CLOSED);
856
857                 /* Special case. */
858                 inet_csk_listen_stop(sk);
859
860                 goto adjudge_to_death;
861         }
862
863         sk_stop_timer(sk, &dp->dccps_xmit_timer);
864
865         /*
866          * We need to flush the recv. buffs.  We do this only on the
867          * descriptor close, not protocol-sourced closes, because the
868           *reader process may not have drained the data yet!
869          */
870         /* FIXME: check for unread data */
871         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
872                 __kfree_skb(skb);
873         }
874
875         if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
876                 /* Check zero linger _after_ checking for unread data. */
877                 sk->sk_prot->disconnect(sk, 0);
878         } else if (dccp_close_state(sk)) {
879                 dccp_send_close(sk, 1);
880         }
881
882         sk_stream_wait_close(sk, timeout);
883
884 adjudge_to_death:
885         state = sk->sk_state;
886         sock_hold(sk);
887         sock_orphan(sk);
888         atomic_inc(sk->sk_prot->orphan_count);
889
890         /*
891          * It is the last release_sock in its life. It will remove backlog.
892          */
893         release_sock(sk);
894         /*
895          * Now socket is owned by kernel and we acquire BH lock
896          * to finish close. No need to check for user refs.
897          */
898         local_bh_disable();
899         bh_lock_sock(sk);
900         BUG_TRAP(!sock_owned_by_user(sk));
901
902         /* Have we already been destroyed by a softirq or backlog? */
903         if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
904                 goto out;
905
906         /*
907          * The last release_sock may have processed the CLOSE or RESET
908          * packet moving sock to CLOSED state, if not we have to fire
909          * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
910          * in draft-ietf-dccp-spec-11. -acme
911          */
912         if (sk->sk_state == DCCP_CLOSING) {
913                 /* FIXME: should start at 2 * RTT */
914                 /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
915                 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
916                                           inet_csk(sk)->icsk_rto,
917                                           DCCP_RTO_MAX);
918 #if 0
919                 /* Yeah, we should use sk->sk_prot->orphan_count, etc */
920                 dccp_set_state(sk, DCCP_CLOSED);
921 #endif
922         }
923
924         if (sk->sk_state == DCCP_CLOSED)
925                 inet_csk_destroy_sock(sk);
926
927         /* Otherwise, socket is reprieved until protocol close. */
928
929 out:
930         bh_unlock_sock(sk);
931         local_bh_enable();
932         sock_put(sk);
933 }
934
935 EXPORT_SYMBOL_GPL(dccp_close);
936
937 void dccp_shutdown(struct sock *sk, int how)
938 {
939         dccp_pr_debug("entry\n");
940 }
941
942 EXPORT_SYMBOL_GPL(dccp_shutdown);
943
944 static int __init dccp_mib_init(void)
945 {
946         int rc = -ENOMEM;
947
948         dccp_statistics[0] = alloc_percpu(struct dccp_mib);
949         if (dccp_statistics[0] == NULL)
950                 goto out;
951
952         dccp_statistics[1] = alloc_percpu(struct dccp_mib);
953         if (dccp_statistics[1] == NULL)
954                 goto out_free_one;
955
956         rc = 0;
957 out:
958         return rc;
959 out_free_one:
960         free_percpu(dccp_statistics[0]);
961         dccp_statistics[0] = NULL;
962         goto out;
963
964 }
965
966 static void dccp_mib_exit(void)
967 {
968         free_percpu(dccp_statistics[0]);
969         free_percpu(dccp_statistics[1]);
970         dccp_statistics[0] = dccp_statistics[1] = NULL;
971 }
972
973 static int thash_entries;
974 module_param(thash_entries, int, 0444);
975 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
976
977 #ifdef CONFIG_IP_DCCP_DEBUG
978 int dccp_debug;
979 module_param(dccp_debug, int, 0444);
980 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
981
982 EXPORT_SYMBOL_GPL(dccp_debug);
983 #endif
984
985 static int __init dccp_init(void)
986 {
987         unsigned long goal;
988         int ehash_order, bhash_order, i;
989         int rc = -ENOBUFS;
990
991         dccp_hashinfo.bind_bucket_cachep =
992                 kmem_cache_create("dccp_bind_bucket",
993                                   sizeof(struct inet_bind_bucket), 0,
994                                   SLAB_HWCACHE_ALIGN, NULL, NULL);
995         if (!dccp_hashinfo.bind_bucket_cachep)
996                 goto out;
997
998         /*
999          * Size and allocate the main established and bind bucket
1000          * hash tables.
1001          *
1002          * The methodology is similar to that of the buffer cache.
1003          */
1004         if (num_physpages >= (128 * 1024))
1005                 goal = num_physpages >> (21 - PAGE_SHIFT);
1006         else
1007                 goal = num_physpages >> (23 - PAGE_SHIFT);
1008
1009         if (thash_entries)
1010                 goal = (thash_entries *
1011                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1012         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1013                 ;
1014         do {
1015                 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1016                                         sizeof(struct inet_ehash_bucket);
1017                 dccp_hashinfo.ehash_size >>= 1;
1018                 while (dccp_hashinfo.ehash_size &
1019                        (dccp_hashinfo.ehash_size - 1))
1020                         dccp_hashinfo.ehash_size--;
1021                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1022                         __get_free_pages(GFP_ATOMIC, ehash_order);
1023         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1024
1025         if (!dccp_hashinfo.ehash) {
1026                 printk(KERN_CRIT "Failed to allocate DCCP "
1027                                  "established hash table\n");
1028                 goto out_free_bind_bucket_cachep;
1029         }
1030
1031         for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) {
1032                 rwlock_init(&dccp_hashinfo.ehash[i].lock);
1033                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1034         }
1035
1036         bhash_order = ehash_order;
1037
1038         do {
1039                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1040                                         sizeof(struct inet_bind_hashbucket);
1041                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1042                     bhash_order > 0)
1043                         continue;
1044                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1045                         __get_free_pages(GFP_ATOMIC, bhash_order);
1046         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1047
1048         if (!dccp_hashinfo.bhash) {
1049                 printk(KERN_CRIT "Failed to allocate DCCP bind hash table\n");
1050                 goto out_free_dccp_ehash;
1051         }
1052
1053         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1054                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1055                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1056         }
1057
1058         rc = dccp_mib_init();
1059         if (rc)
1060                 goto out_free_dccp_bhash;
1061
1062         rc = dccp_ackvec_init();
1063         if (rc)
1064                 goto out_free_dccp_mib;
1065
1066         rc = dccp_sysctl_init();
1067         if (rc)
1068                 goto out_ackvec_exit;
1069 out:
1070         return rc;
1071 out_ackvec_exit:
1072         dccp_ackvec_exit();
1073 out_free_dccp_mib:
1074         dccp_mib_exit();
1075 out_free_dccp_bhash:
1076         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1077         dccp_hashinfo.bhash = NULL;
1078 out_free_dccp_ehash:
1079         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1080         dccp_hashinfo.ehash = NULL;
1081 out_free_bind_bucket_cachep:
1082         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1083         dccp_hashinfo.bind_bucket_cachep = NULL;
1084         goto out;
1085 }
1086
1087 static void __exit dccp_fini(void)
1088 {
1089         dccp_mib_exit();
1090         free_pages((unsigned long)dccp_hashinfo.bhash,
1091                    get_order(dccp_hashinfo.bhash_size *
1092                              sizeof(struct inet_bind_hashbucket)));
1093         free_pages((unsigned long)dccp_hashinfo.ehash,
1094                    get_order(dccp_hashinfo.ehash_size *
1095                              sizeof(struct inet_ehash_bucket)));
1096         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1097         dccp_ackvec_exit();
1098         dccp_sysctl_exit();
1099 }
1100
1101 module_init(dccp_init);
1102 module_exit(dccp_fini);
1103
1104 MODULE_LICENSE("GPL");
1105 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1106 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");