Merge branch 'linux-2.6' into for-2.6.22
[linux-2.6] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
24
25 #include <net/inet_sock.h>
26 #include <net/sock.h>
27 #include <net/xfrm.h>
28
29 #include <asm/semaphore.h>
30 #include <linux/spinlock.h>
31 #include <linux/timer.h>
32 #include <linux/delay.h>
33 #include <linux/poll.h>
34
35 #include "ccid.h"
36 #include "dccp.h"
37 #include "feat.h"
38
39 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
40
41 EXPORT_SYMBOL_GPL(dccp_statistics);
42
43 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
44
45 EXPORT_SYMBOL_GPL(dccp_orphan_count);
46
47 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
48         .lhash_lock     = RW_LOCK_UNLOCKED,
49         .lhash_users    = ATOMIC_INIT(0),
50         .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
51 };
52
53 EXPORT_SYMBOL_GPL(dccp_hashinfo);
54
55 /* the maximum queue length for tx in packets. 0 is no limit */
56 int sysctl_dccp_tx_qlen __read_mostly = 5;
57
58 void dccp_set_state(struct sock *sk, const int state)
59 {
60         const int oldstate = sk->sk_state;
61
62         dccp_pr_debug("%s(%p) %-10.10s -> %s\n",
63                       dccp_role(sk), sk,
64                       dccp_state_name(oldstate), dccp_state_name(state));
65         WARN_ON(state == oldstate);
66
67         switch (state) {
68         case DCCP_OPEN:
69                 if (oldstate != DCCP_OPEN)
70                         DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
71                 break;
72
73         case DCCP_CLOSED:
74                 if (oldstate == DCCP_CLOSING || oldstate == DCCP_OPEN)
75                         DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
76
77                 sk->sk_prot->unhash(sk);
78                 if (inet_csk(sk)->icsk_bind_hash != NULL &&
79                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
80                         inet_put_port(&dccp_hashinfo, sk);
81                 /* fall through */
82         default:
83                 if (oldstate == DCCP_OPEN)
84                         DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
85         }
86
87         /* Change state AFTER socket is unhashed to avoid closed
88          * socket sitting in hash tables.
89          */
90         sk->sk_state = state;
91 }
92
93 EXPORT_SYMBOL_GPL(dccp_set_state);
94
95 void dccp_done(struct sock *sk)
96 {
97         dccp_set_state(sk, DCCP_CLOSED);
98         dccp_clear_xmit_timers(sk);
99
100         sk->sk_shutdown = SHUTDOWN_MASK;
101
102         if (!sock_flag(sk, SOCK_DEAD))
103                 sk->sk_state_change(sk);
104         else
105                 inet_csk_destroy_sock(sk);
106 }
107
108 EXPORT_SYMBOL_GPL(dccp_done);
109
110 const char *dccp_packet_name(const int type)
111 {
112         static const char *dccp_packet_names[] = {
113                 [DCCP_PKT_REQUEST]  = "REQUEST",
114                 [DCCP_PKT_RESPONSE] = "RESPONSE",
115                 [DCCP_PKT_DATA]     = "DATA",
116                 [DCCP_PKT_ACK]      = "ACK",
117                 [DCCP_PKT_DATAACK]  = "DATAACK",
118                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
119                 [DCCP_PKT_CLOSE]    = "CLOSE",
120                 [DCCP_PKT_RESET]    = "RESET",
121                 [DCCP_PKT_SYNC]     = "SYNC",
122                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
123         };
124
125         if (type >= DCCP_NR_PKT_TYPES)
126                 return "INVALID";
127         else
128                 return dccp_packet_names[type];
129 }
130
131 EXPORT_SYMBOL_GPL(dccp_packet_name);
132
133 const char *dccp_state_name(const int state)
134 {
135         static char *dccp_state_names[] = {
136         [DCCP_OPEN]       = "OPEN",
137         [DCCP_REQUESTING] = "REQUESTING",
138         [DCCP_PARTOPEN]   = "PARTOPEN",
139         [DCCP_LISTEN]     = "LISTEN",
140         [DCCP_RESPOND]    = "RESPOND",
141         [DCCP_CLOSING]    = "CLOSING",
142         [DCCP_TIME_WAIT]  = "TIME_WAIT",
143         [DCCP_CLOSED]     = "CLOSED",
144         };
145
146         if (state >= DCCP_MAX_STATES)
147                 return "INVALID STATE!";
148         else
149                 return dccp_state_names[state];
150 }
151
152 EXPORT_SYMBOL_GPL(dccp_state_name);
153
154 void dccp_hash(struct sock *sk)
155 {
156         inet_hash(&dccp_hashinfo, sk);
157 }
158
159 EXPORT_SYMBOL_GPL(dccp_hash);
160
161 void dccp_unhash(struct sock *sk)
162 {
163         inet_unhash(&dccp_hashinfo, sk);
164 }
165
166 EXPORT_SYMBOL_GPL(dccp_unhash);
167
168 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
169 {
170         struct dccp_sock *dp = dccp_sk(sk);
171         struct dccp_minisock *dmsk = dccp_msk(sk);
172         struct inet_connection_sock *icsk = inet_csk(sk);
173
174         dccp_minisock_init(&dp->dccps_minisock);
175         do_gettimeofday(&dp->dccps_epoch);
176
177         /*
178          * FIXME: We're hardcoding the CCID, and doing this at this point makes
179          * the listening (master) sock get CCID control blocks, which is not
180          * necessary, but for now, to not mess with the test userspace apps,
181          * lets leave it here, later the real solution is to do this in a
182          * setsockopt(CCIDs-I-want/accept). -acme
183          */
184         if (likely(ctl_sock_initialized)) {
185                 int rc = dccp_feat_init(dmsk);
186
187                 if (rc)
188                         return rc;
189
190                 if (dmsk->dccpms_send_ack_vector) {
191                         dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
192                         if (dp->dccps_hc_rx_ackvec == NULL)
193                                 return -ENOMEM;
194                 }
195                 dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
196                                                       sk, GFP_KERNEL);
197                 dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
198                                                       sk, GFP_KERNEL);
199                 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
200                              dp->dccps_hc_tx_ccid == NULL)) {
201                         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
202                         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
203                         if (dmsk->dccpms_send_ack_vector) {
204                                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
205                                 dp->dccps_hc_rx_ackvec = NULL;
206                         }
207                         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
208                         return -ENOMEM;
209                 }
210         } else {
211                 /* control socket doesn't need feat nego */
212                 INIT_LIST_HEAD(&dmsk->dccpms_pending);
213                 INIT_LIST_HEAD(&dmsk->dccpms_conf);
214         }
215
216         dccp_init_xmit_timers(sk);
217         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
218         icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
219         sk->sk_state            = DCCP_CLOSED;
220         sk->sk_write_space      = dccp_write_space;
221         icsk->icsk_sync_mss     = dccp_sync_mss;
222         dp->dccps_mss_cache     = 536;
223         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
224         dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
225         dp->dccps_l_ack_ratio   = dp->dccps_r_ack_ratio = 1;
226
227         return 0;
228 }
229
230 EXPORT_SYMBOL_GPL(dccp_init_sock);
231
232 int dccp_destroy_sock(struct sock *sk)
233 {
234         struct dccp_sock *dp = dccp_sk(sk);
235         struct dccp_minisock *dmsk = dccp_msk(sk);
236
237         /*
238          * DCCP doesn't use sk_write_queue, just sk_send_head
239          * for retransmissions
240          */
241         if (sk->sk_send_head != NULL) {
242                 kfree_skb(sk->sk_send_head);
243                 sk->sk_send_head = NULL;
244         }
245
246         /* Clean up a referenced DCCP bind bucket. */
247         if (inet_csk(sk)->icsk_bind_hash != NULL)
248                 inet_put_port(&dccp_hashinfo, sk);
249
250         kfree(dp->dccps_service_list);
251         dp->dccps_service_list = NULL;
252
253         if (dmsk->dccpms_send_ack_vector) {
254                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
255                 dp->dccps_hc_rx_ackvec = NULL;
256         }
257         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
258         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
259         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
260
261         /* clean up feature negotiation state */
262         dccp_feat_clean(dmsk);
263
264         return 0;
265 }
266
267 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
268
269 static inline int dccp_listen_start(struct sock *sk, int backlog)
270 {
271         struct dccp_sock *dp = dccp_sk(sk);
272
273         dp->dccps_role = DCCP_ROLE_LISTEN;
274         return inet_csk_listen_start(sk, backlog);
275 }
276
277 int dccp_disconnect(struct sock *sk, int flags)
278 {
279         struct inet_connection_sock *icsk = inet_csk(sk);
280         struct inet_sock *inet = inet_sk(sk);
281         int err = 0;
282         const int old_state = sk->sk_state;
283
284         if (old_state != DCCP_CLOSED)
285                 dccp_set_state(sk, DCCP_CLOSED);
286
287         /* ABORT function of RFC793 */
288         if (old_state == DCCP_LISTEN) {
289                 inet_csk_listen_stop(sk);
290         /* FIXME: do the active reset thing */
291         } else if (old_state == DCCP_REQUESTING)
292                 sk->sk_err = ECONNRESET;
293
294         dccp_clear_xmit_timers(sk);
295         __skb_queue_purge(&sk->sk_receive_queue);
296         if (sk->sk_send_head != NULL) {
297                 __kfree_skb(sk->sk_send_head);
298                 sk->sk_send_head = NULL;
299         }
300
301         inet->dport = 0;
302
303         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
304                 inet_reset_saddr(sk);
305
306         sk->sk_shutdown = 0;
307         sock_reset_flag(sk, SOCK_DONE);
308
309         icsk->icsk_backoff = 0;
310         inet_csk_delack_init(sk);
311         __sk_dst_reset(sk);
312
313         BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
314
315         sk->sk_error_report(sk);
316         return err;
317 }
318
319 EXPORT_SYMBOL_GPL(dccp_disconnect);
320
321 /*
322  *      Wait for a DCCP event.
323  *
324  *      Note that we don't need to lock the socket, as the upper poll layers
325  *      take care of normal races (between the test and the event) and we don't
326  *      go look at any of the socket buffers directly.
327  */
328 unsigned int dccp_poll(struct file *file, struct socket *sock,
329                        poll_table *wait)
330 {
331         unsigned int mask;
332         struct sock *sk = sock->sk;
333
334         poll_wait(file, sk->sk_sleep, wait);
335         if (sk->sk_state == DCCP_LISTEN)
336                 return inet_csk_listen_poll(sk);
337
338         /* Socket is not locked. We are protected from async events
339            by poll logic and correct handling of state changes
340            made by another threads is impossible in any case.
341          */
342
343         mask = 0;
344         if (sk->sk_err)
345                 mask = POLLERR;
346
347         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
348                 mask |= POLLHUP;
349         if (sk->sk_shutdown & RCV_SHUTDOWN)
350                 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
351
352         /* Connected? */
353         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
354                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
355                         mask |= POLLIN | POLLRDNORM;
356
357                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
358                         if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
359                                 mask |= POLLOUT | POLLWRNORM;
360                         } else {  /* send SIGIO later */
361                                 set_bit(SOCK_ASYNC_NOSPACE,
362                                         &sk->sk_socket->flags);
363                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
364
365                                 /* Race breaker. If space is freed after
366                                  * wspace test but before the flags are set,
367                                  * IO signal will be lost.
368                                  */
369                                 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
370                                         mask |= POLLOUT | POLLWRNORM;
371                         }
372                 }
373         }
374         return mask;
375 }
376
377 EXPORT_SYMBOL_GPL(dccp_poll);
378
379 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
380 {
381         dccp_pr_debug("entry\n");
382         return -ENOIOCTLCMD;
383 }
384
385 EXPORT_SYMBOL_GPL(dccp_ioctl);
386
387 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
388                                    char __user *optval, int optlen)
389 {
390         struct dccp_sock *dp = dccp_sk(sk);
391         struct dccp_service_list *sl = NULL;
392
393         if (service == DCCP_SERVICE_INVALID_VALUE ||
394             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
395                 return -EINVAL;
396
397         if (optlen > sizeof(service)) {
398                 sl = kmalloc(optlen, GFP_KERNEL);
399                 if (sl == NULL)
400                         return -ENOMEM;
401
402                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
403                 if (copy_from_user(sl->dccpsl_list,
404                                    optval + sizeof(service),
405                                    optlen - sizeof(service)) ||
406                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
407                         kfree(sl);
408                         return -EFAULT;
409                 }
410         }
411
412         lock_sock(sk);
413         dp->dccps_service = service;
414
415         kfree(dp->dccps_service_list);
416
417         dp->dccps_service_list = sl;
418         release_sock(sk);
419         return 0;
420 }
421
422 /* byte 1 is feature.  the rest is the preference list */
423 static int dccp_setsockopt_change(struct sock *sk, int type,
424                                   struct dccp_so_feat __user *optval)
425 {
426         struct dccp_so_feat opt;
427         u8 *val;
428         int rc;
429
430         if (copy_from_user(&opt, optval, sizeof(opt)))
431                 return -EFAULT;
432
433         val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
434         if (!val)
435                 return -ENOMEM;
436
437         if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
438                 rc = -EFAULT;
439                 goto out_free_val;
440         }
441
442         rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
443                               val, opt.dccpsf_len, GFP_KERNEL);
444         if (rc)
445                 goto out_free_val;
446
447 out:
448         return rc;
449
450 out_free_val:
451         kfree(val);
452         goto out;
453 }
454
455 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
456                 char __user *optval, int optlen)
457 {
458         struct dccp_sock *dp = dccp_sk(sk);
459         int val, err = 0;
460
461         if (optlen < sizeof(int))
462                 return -EINVAL;
463
464         if (get_user(val, (int __user *)optval))
465                 return -EFAULT;
466
467         if (optname == DCCP_SOCKOPT_SERVICE)
468                 return dccp_setsockopt_service(sk, val, optval, optlen);
469
470         lock_sock(sk);
471         switch (optname) {
472         case DCCP_SOCKOPT_PACKET_SIZE:
473                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
474                 err = 0;
475                 break;
476         case DCCP_SOCKOPT_CHANGE_L:
477                 if (optlen != sizeof(struct dccp_so_feat))
478                         err = -EINVAL;
479                 else
480                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
481                                                      (struct dccp_so_feat __user *)
482                                                      optval);
483                 break;
484         case DCCP_SOCKOPT_CHANGE_R:
485                 if (optlen != sizeof(struct dccp_so_feat))
486                         err = -EINVAL;
487                 else
488                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
489                                                      (struct dccp_so_feat __user *)
490                                                      optval);
491                 break;
492         case DCCP_SOCKOPT_SEND_CSCOV:   /* sender side, RFC 4340, sec. 9.2 */
493                 if (val < 0 || val > 15)
494                         err = -EINVAL;
495                 else
496                         dp->dccps_pcslen = val;
497                 break;
498         case DCCP_SOCKOPT_RECV_CSCOV:   /* receiver side, RFC 4340 sec. 9.2.1 */
499                 if (val < 0 || val > 15)
500                         err = -EINVAL;
501                 else {
502                         dp->dccps_pcrlen = val;
503                         /* FIXME: add feature negotiation,
504                          * ChangeL(MinimumChecksumCoverage, val) */
505                 }
506                 break;
507         default:
508                 err = -ENOPROTOOPT;
509                 break;
510         }
511
512         release_sock(sk);
513         return err;
514 }
515
516 int dccp_setsockopt(struct sock *sk, int level, int optname,
517                     char __user *optval, int optlen)
518 {
519         if (level != SOL_DCCP)
520                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
521                                                              optname, optval,
522                                                              optlen);
523         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
524 }
525
526 EXPORT_SYMBOL_GPL(dccp_setsockopt);
527
528 #ifdef CONFIG_COMPAT
529 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
530                            char __user *optval, int optlen)
531 {
532         if (level != SOL_DCCP)
533                 return inet_csk_compat_setsockopt(sk, level, optname,
534                                                   optval, optlen);
535         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
536 }
537
538 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
539 #endif
540
541 static int dccp_getsockopt_service(struct sock *sk, int len,
542                                    __be32 __user *optval,
543                                    int __user *optlen)
544 {
545         const struct dccp_sock *dp = dccp_sk(sk);
546         const struct dccp_service_list *sl;
547         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
548
549         lock_sock(sk);
550         if ((sl = dp->dccps_service_list) != NULL) {
551                 slen = sl->dccpsl_nr * sizeof(u32);
552                 total_len += slen;
553         }
554
555         err = -EINVAL;
556         if (total_len > len)
557                 goto out;
558
559         err = 0;
560         if (put_user(total_len, optlen) ||
561             put_user(dp->dccps_service, optval) ||
562             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
563                 err = -EFAULT;
564 out:
565         release_sock(sk);
566         return err;
567 }
568
569 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
570                     char __user *optval, int __user *optlen)
571 {
572         struct dccp_sock *dp;
573         int val, len;
574
575         if (get_user(len, optlen))
576                 return -EFAULT;
577
578         if (len < (int)sizeof(int))
579                 return -EINVAL;
580
581         dp = dccp_sk(sk);
582
583         switch (optname) {
584         case DCCP_SOCKOPT_PACKET_SIZE:
585                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
586                 return 0;
587         case DCCP_SOCKOPT_SERVICE:
588                 return dccp_getsockopt_service(sk, len,
589                                                (__be32 __user *)optval, optlen);
590         case DCCP_SOCKOPT_SEND_CSCOV:
591                 val = dp->dccps_pcslen;
592                 len = sizeof(val);
593                 break;
594         case DCCP_SOCKOPT_RECV_CSCOV:
595                 val = dp->dccps_pcrlen;
596                 len = sizeof(val);
597                 break;
598         case 128 ... 191:
599                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
600                                              len, (u32 __user *)optval, optlen);
601         case 192 ... 255:
602                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
603                                              len, (u32 __user *)optval, optlen);
604         default:
605                 return -ENOPROTOOPT;
606         }
607
608         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
609                 return -EFAULT;
610
611         return 0;
612 }
613
614 int dccp_getsockopt(struct sock *sk, int level, int optname,
615                     char __user *optval, int __user *optlen)
616 {
617         if (level != SOL_DCCP)
618                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
619                                                              optname, optval,
620                                                              optlen);
621         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
622 }
623
624 EXPORT_SYMBOL_GPL(dccp_getsockopt);
625
626 #ifdef CONFIG_COMPAT
627 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
628                            char __user *optval, int __user *optlen)
629 {
630         if (level != SOL_DCCP)
631                 return inet_csk_compat_getsockopt(sk, level, optname,
632                                                   optval, optlen);
633         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
634 }
635
636 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
637 #endif
638
639 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
640                  size_t len)
641 {
642         const struct dccp_sock *dp = dccp_sk(sk);
643         const int flags = msg->msg_flags;
644         const int noblock = flags & MSG_DONTWAIT;
645         struct sk_buff *skb;
646         int rc, size;
647         long timeo;
648
649         if (len > dp->dccps_mss_cache)
650                 return -EMSGSIZE;
651
652         lock_sock(sk);
653
654         if (sysctl_dccp_tx_qlen &&
655             (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
656                 rc = -EAGAIN;
657                 goto out_release;
658         }
659
660         timeo = sock_sndtimeo(sk, noblock);
661
662         /*
663          * We have to use sk_stream_wait_connect here to set sk_write_pending,
664          * so that the trick in dccp_rcv_request_sent_state_process.
665          */
666         /* Wait for a connection to finish. */
667         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING))
668                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
669                         goto out_release;
670
671         size = sk->sk_prot->max_header + len;
672         release_sock(sk);
673         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
674         lock_sock(sk);
675         if (skb == NULL)
676                 goto out_release;
677
678         skb_reserve(skb, sk->sk_prot->max_header);
679         rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
680         if (rc != 0)
681                 goto out_discard;
682
683         skb_queue_tail(&sk->sk_write_queue, skb);
684         dccp_write_xmit(sk,0);
685 out_release:
686         release_sock(sk);
687         return rc ? : len;
688 out_discard:
689         kfree_skb(skb);
690         goto out_release;
691 }
692
693 EXPORT_SYMBOL_GPL(dccp_sendmsg);
694
695 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
696                  size_t len, int nonblock, int flags, int *addr_len)
697 {
698         const struct dccp_hdr *dh;
699         long timeo;
700
701         lock_sock(sk);
702
703         if (sk->sk_state == DCCP_LISTEN) {
704                 len = -ENOTCONN;
705                 goto out;
706         }
707
708         timeo = sock_rcvtimeo(sk, nonblock);
709
710         do {
711                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
712
713                 if (skb == NULL)
714                         goto verify_sock_status;
715
716                 dh = dccp_hdr(skb);
717
718                 if (dh->dccph_type == DCCP_PKT_DATA ||
719                     dh->dccph_type == DCCP_PKT_DATAACK)
720                         goto found_ok_skb;
721
722                 if (dh->dccph_type == DCCP_PKT_RESET ||
723                     dh->dccph_type == DCCP_PKT_CLOSE) {
724                         dccp_pr_debug("found fin ok!\n");
725                         len = 0;
726                         goto found_fin_ok;
727                 }
728                 dccp_pr_debug("packet_type=%s\n",
729                               dccp_packet_name(dh->dccph_type));
730                 sk_eat_skb(sk, skb, 0);
731 verify_sock_status:
732                 if (sock_flag(sk, SOCK_DONE)) {
733                         len = 0;
734                         break;
735                 }
736
737                 if (sk->sk_err) {
738                         len = sock_error(sk);
739                         break;
740                 }
741
742                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
743                         len = 0;
744                         break;
745                 }
746
747                 if (sk->sk_state == DCCP_CLOSED) {
748                         if (!sock_flag(sk, SOCK_DONE)) {
749                                 /* This occurs when user tries to read
750                                  * from never connected socket.
751                                  */
752                                 len = -ENOTCONN;
753                                 break;
754                         }
755                         len = 0;
756                         break;
757                 }
758
759                 if (!timeo) {
760                         len = -EAGAIN;
761                         break;
762                 }
763
764                 if (signal_pending(current)) {
765                         len = sock_intr_errno(timeo);
766                         break;
767                 }
768
769                 sk_wait_data(sk, &timeo);
770                 continue;
771         found_ok_skb:
772                 if (len > skb->len)
773                         len = skb->len;
774                 else if (len < skb->len)
775                         msg->msg_flags |= MSG_TRUNC;
776
777                 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
778                         /* Exception. Bailout! */
779                         len = -EFAULT;
780                         break;
781                 }
782         found_fin_ok:
783                 if (!(flags & MSG_PEEK))
784                         sk_eat_skb(sk, skb, 0);
785                 break;
786         } while (1);
787 out:
788         release_sock(sk);
789         return len;
790 }
791
792 EXPORT_SYMBOL_GPL(dccp_recvmsg);
793
794 int inet_dccp_listen(struct socket *sock, int backlog)
795 {
796         struct sock *sk = sock->sk;
797         unsigned char old_state;
798         int err;
799
800         lock_sock(sk);
801
802         err = -EINVAL;
803         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
804                 goto out;
805
806         old_state = sk->sk_state;
807         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
808                 goto out;
809
810         /* Really, if the socket is already in listen state
811          * we can only allow the backlog to be adjusted.
812          */
813         if (old_state != DCCP_LISTEN) {
814                 /*
815                  * FIXME: here it probably should be sk->sk_prot->listen_start
816                  * see tcp_listen_start
817                  */
818                 err = dccp_listen_start(sk, backlog);
819                 if (err)
820                         goto out;
821         }
822         sk->sk_max_ack_backlog = backlog;
823         err = 0;
824
825 out:
826         release_sock(sk);
827         return err;
828 }
829
830 EXPORT_SYMBOL_GPL(inet_dccp_listen);
831
832 static const unsigned char dccp_new_state[] = {
833         /* current state:   new state:      action:     */
834         [0]               = DCCP_CLOSED,
835         [DCCP_OPEN]       = DCCP_CLOSING | DCCP_ACTION_FIN,
836         [DCCP_REQUESTING] = DCCP_CLOSED,
837         [DCCP_PARTOPEN]   = DCCP_CLOSING | DCCP_ACTION_FIN,
838         [DCCP_LISTEN]     = DCCP_CLOSED,
839         [DCCP_RESPOND]    = DCCP_CLOSED,
840         [DCCP_CLOSING]    = DCCP_CLOSED,
841         [DCCP_TIME_WAIT]  = DCCP_CLOSED,
842         [DCCP_CLOSED]     = DCCP_CLOSED,
843 };
844
845 static int dccp_close_state(struct sock *sk)
846 {
847         const int next = dccp_new_state[sk->sk_state];
848         const int ns = next & DCCP_STATE_MASK;
849
850         if (ns != sk->sk_state)
851                 dccp_set_state(sk, ns);
852
853         return next & DCCP_ACTION_FIN;
854 }
855
856 void dccp_close(struct sock *sk, long timeout)
857 {
858         struct dccp_sock *dp = dccp_sk(sk);
859         struct sk_buff *skb;
860         int state;
861
862         lock_sock(sk);
863
864         sk->sk_shutdown = SHUTDOWN_MASK;
865
866         if (sk->sk_state == DCCP_LISTEN) {
867                 dccp_set_state(sk, DCCP_CLOSED);
868
869                 /* Special case. */
870                 inet_csk_listen_stop(sk);
871
872                 goto adjudge_to_death;
873         }
874
875         sk_stop_timer(sk, &dp->dccps_xmit_timer);
876
877         /*
878          * We need to flush the recv. buffs.  We do this only on the
879          * descriptor close, not protocol-sourced closes, because the
880           *reader process may not have drained the data yet!
881          */
882         /* FIXME: check for unread data */
883         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
884                 __kfree_skb(skb);
885         }
886
887         if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
888                 /* Check zero linger _after_ checking for unread data. */
889                 sk->sk_prot->disconnect(sk, 0);
890         } else if (dccp_close_state(sk)) {
891                 dccp_send_close(sk, 1);
892         }
893
894         sk_stream_wait_close(sk, timeout);
895
896 adjudge_to_death:
897         state = sk->sk_state;
898         sock_hold(sk);
899         sock_orphan(sk);
900         atomic_inc(sk->sk_prot->orphan_count);
901
902         /*
903          * It is the last release_sock in its life. It will remove backlog.
904          */
905         release_sock(sk);
906         /*
907          * Now socket is owned by kernel and we acquire BH lock
908          * to finish close. No need to check for user refs.
909          */
910         local_bh_disable();
911         bh_lock_sock(sk);
912         BUG_TRAP(!sock_owned_by_user(sk));
913
914         /* Have we already been destroyed by a softirq or backlog? */
915         if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
916                 goto out;
917
918         /*
919          * The last release_sock may have processed the CLOSE or RESET
920          * packet moving sock to CLOSED state, if not we have to fire
921          * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
922          * in draft-ietf-dccp-spec-11. -acme
923          */
924         if (sk->sk_state == DCCP_CLOSING) {
925                 /* FIXME: should start at 2 * RTT */
926                 /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
927                 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
928                                           inet_csk(sk)->icsk_rto,
929                                           DCCP_RTO_MAX);
930 #if 0
931                 /* Yeah, we should use sk->sk_prot->orphan_count, etc */
932                 dccp_set_state(sk, DCCP_CLOSED);
933 #endif
934         }
935
936         if (sk->sk_state == DCCP_CLOSED)
937                 inet_csk_destroy_sock(sk);
938
939         /* Otherwise, socket is reprieved until protocol close. */
940
941 out:
942         bh_unlock_sock(sk);
943         local_bh_enable();
944         sock_put(sk);
945 }
946
947 EXPORT_SYMBOL_GPL(dccp_close);
948
949 void dccp_shutdown(struct sock *sk, int how)
950 {
951         dccp_pr_debug("entry\n");
952 }
953
954 EXPORT_SYMBOL_GPL(dccp_shutdown);
955
956 static int __init dccp_mib_init(void)
957 {
958         int rc = -ENOMEM;
959
960         dccp_statistics[0] = alloc_percpu(struct dccp_mib);
961         if (dccp_statistics[0] == NULL)
962                 goto out;
963
964         dccp_statistics[1] = alloc_percpu(struct dccp_mib);
965         if (dccp_statistics[1] == NULL)
966                 goto out_free_one;
967
968         rc = 0;
969 out:
970         return rc;
971 out_free_one:
972         free_percpu(dccp_statistics[0]);
973         dccp_statistics[0] = NULL;
974         goto out;
975
976 }
977
978 static void dccp_mib_exit(void)
979 {
980         free_percpu(dccp_statistics[0]);
981         free_percpu(dccp_statistics[1]);
982         dccp_statistics[0] = dccp_statistics[1] = NULL;
983 }
984
985 static int thash_entries;
986 module_param(thash_entries, int, 0444);
987 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
988
989 #ifdef CONFIG_IP_DCCP_DEBUG
990 int dccp_debug;
991 module_param(dccp_debug, int, 0444);
992 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
993
994 EXPORT_SYMBOL_GPL(dccp_debug);
995 #endif
996
997 static int __init dccp_init(void)
998 {
999         unsigned long goal;
1000         int ehash_order, bhash_order, i;
1001         int rc = -ENOBUFS;
1002
1003         dccp_hashinfo.bind_bucket_cachep =
1004                 kmem_cache_create("dccp_bind_bucket",
1005                                   sizeof(struct inet_bind_bucket), 0,
1006                                   SLAB_HWCACHE_ALIGN, NULL, NULL);
1007         if (!dccp_hashinfo.bind_bucket_cachep)
1008                 goto out;
1009
1010         /*
1011          * Size and allocate the main established and bind bucket
1012          * hash tables.
1013          *
1014          * The methodology is similar to that of the buffer cache.
1015          */
1016         if (num_physpages >= (128 * 1024))
1017                 goal = num_physpages >> (21 - PAGE_SHIFT);
1018         else
1019                 goal = num_physpages >> (23 - PAGE_SHIFT);
1020
1021         if (thash_entries)
1022                 goal = (thash_entries *
1023                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1024         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1025                 ;
1026         do {
1027                 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1028                                         sizeof(struct inet_ehash_bucket);
1029                 while (dccp_hashinfo.ehash_size &
1030                        (dccp_hashinfo.ehash_size - 1))
1031                         dccp_hashinfo.ehash_size--;
1032                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1033                         __get_free_pages(GFP_ATOMIC, ehash_order);
1034         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1035
1036         if (!dccp_hashinfo.ehash) {
1037                 DCCP_CRIT("Failed to allocate DCCP established hash table");
1038                 goto out_free_bind_bucket_cachep;
1039         }
1040
1041         for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1042                 rwlock_init(&dccp_hashinfo.ehash[i].lock);
1043                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1044                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
1045         }
1046
1047         bhash_order = ehash_order;
1048
1049         do {
1050                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1051                                         sizeof(struct inet_bind_hashbucket);
1052                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1053                     bhash_order > 0)
1054                         continue;
1055                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1056                         __get_free_pages(GFP_ATOMIC, bhash_order);
1057         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1058
1059         if (!dccp_hashinfo.bhash) {
1060                 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1061                 goto out_free_dccp_ehash;
1062         }
1063
1064         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1065                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1066                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1067         }
1068
1069         rc = dccp_mib_init();
1070         if (rc)
1071                 goto out_free_dccp_bhash;
1072
1073         rc = dccp_ackvec_init();
1074         if (rc)
1075                 goto out_free_dccp_mib;
1076
1077         rc = dccp_sysctl_init();
1078         if (rc)
1079                 goto out_ackvec_exit;
1080 out:
1081         return rc;
1082 out_ackvec_exit:
1083         dccp_ackvec_exit();
1084 out_free_dccp_mib:
1085         dccp_mib_exit();
1086 out_free_dccp_bhash:
1087         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1088         dccp_hashinfo.bhash = NULL;
1089 out_free_dccp_ehash:
1090         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1091         dccp_hashinfo.ehash = NULL;
1092 out_free_bind_bucket_cachep:
1093         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1094         dccp_hashinfo.bind_bucket_cachep = NULL;
1095         goto out;
1096 }
1097
1098 static void __exit dccp_fini(void)
1099 {
1100         dccp_mib_exit();
1101         free_pages((unsigned long)dccp_hashinfo.bhash,
1102                    get_order(dccp_hashinfo.bhash_size *
1103                              sizeof(struct inet_bind_hashbucket)));
1104         free_pages((unsigned long)dccp_hashinfo.ehash,
1105                    get_order(dccp_hashinfo.ehash_size *
1106                              sizeof(struct inet_ehash_bucket)));
1107         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1108         dccp_ackvec_exit();
1109         dccp_sysctl_exit();
1110 }
1111
1112 module_init(dccp_init);
1113 module_exit(dccp_fini);
1114
1115 MODULE_LICENSE("GPL");
1116 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1117 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");