Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6
[linux-2.6] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
24
25 #include <net/inet_sock.h>
26 #include <net/sock.h>
27 #include <net/xfrm.h>
28
29 #include <asm/semaphore.h>
30 #include <linux/spinlock.h>
31 #include <linux/timer.h>
32 #include <linux/delay.h>
33 #include <linux/poll.h>
34
35 #include "ccid.h"
36 #include "dccp.h"
37 #include "feat.h"
38
39 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
40
41 EXPORT_SYMBOL_GPL(dccp_statistics);
42
43 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
44
45 EXPORT_SYMBOL_GPL(dccp_orphan_count);
46
47 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
48         .lhash_lock     = RW_LOCK_UNLOCKED,
49         .lhash_users    = ATOMIC_INIT(0),
50         .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
51 };
52
53 EXPORT_SYMBOL_GPL(dccp_hashinfo);
54
55 /* the maximum queue length for tx in packets. 0 is no limit */
56 int sysctl_dccp_tx_qlen __read_mostly = 5;
57
58 void dccp_set_state(struct sock *sk, const int state)
59 {
60         const int oldstate = sk->sk_state;
61
62         dccp_pr_debug("%s(%p) %-10.10s -> %s\n",
63                       dccp_role(sk), sk,
64                       dccp_state_name(oldstate), dccp_state_name(state));
65         WARN_ON(state == oldstate);
66
67         switch (state) {
68         case DCCP_OPEN:
69                 if (oldstate != DCCP_OPEN)
70                         DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
71                 break;
72
73         case DCCP_CLOSED:
74                 if (oldstate == DCCP_CLOSING || oldstate == DCCP_OPEN)
75                         DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
76
77                 sk->sk_prot->unhash(sk);
78                 if (inet_csk(sk)->icsk_bind_hash != NULL &&
79                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
80                         inet_put_port(&dccp_hashinfo, sk);
81                 /* fall through */
82         default:
83                 if (oldstate == DCCP_OPEN)
84                         DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
85         }
86
87         /* Change state AFTER socket is unhashed to avoid closed
88          * socket sitting in hash tables.
89          */
90         sk->sk_state = state;
91 }
92
93 EXPORT_SYMBOL_GPL(dccp_set_state);
94
95 void dccp_done(struct sock *sk)
96 {
97         dccp_set_state(sk, DCCP_CLOSED);
98         dccp_clear_xmit_timers(sk);
99
100         sk->sk_shutdown = SHUTDOWN_MASK;
101
102         if (!sock_flag(sk, SOCK_DEAD))
103                 sk->sk_state_change(sk);
104         else
105                 inet_csk_destroy_sock(sk);
106 }
107
108 EXPORT_SYMBOL_GPL(dccp_done);
109
110 const char *dccp_packet_name(const int type)
111 {
112         static const char *dccp_packet_names[] = {
113                 [DCCP_PKT_REQUEST]  = "REQUEST",
114                 [DCCP_PKT_RESPONSE] = "RESPONSE",
115                 [DCCP_PKT_DATA]     = "DATA",
116                 [DCCP_PKT_ACK]      = "ACK",
117                 [DCCP_PKT_DATAACK]  = "DATAACK",
118                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
119                 [DCCP_PKT_CLOSE]    = "CLOSE",
120                 [DCCP_PKT_RESET]    = "RESET",
121                 [DCCP_PKT_SYNC]     = "SYNC",
122                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
123         };
124
125         if (type >= DCCP_NR_PKT_TYPES)
126                 return "INVALID";
127         else
128                 return dccp_packet_names[type];
129 }
130
131 EXPORT_SYMBOL_GPL(dccp_packet_name);
132
133 const char *dccp_state_name(const int state)
134 {
135         static char *dccp_state_names[] = {
136         [DCCP_OPEN]       = "OPEN",
137         [DCCP_REQUESTING] = "REQUESTING",
138         [DCCP_PARTOPEN]   = "PARTOPEN",
139         [DCCP_LISTEN]     = "LISTEN",
140         [DCCP_RESPOND]    = "RESPOND",
141         [DCCP_CLOSING]    = "CLOSING",
142         [DCCP_TIME_WAIT]  = "TIME_WAIT",
143         [DCCP_CLOSED]     = "CLOSED",
144         };
145
146         if (state >= DCCP_MAX_STATES)
147                 return "INVALID STATE!";
148         else
149                 return dccp_state_names[state];
150 }
151
152 EXPORT_SYMBOL_GPL(dccp_state_name);
153
154 void dccp_hash(struct sock *sk)
155 {
156         inet_hash(&dccp_hashinfo, sk);
157 }
158
159 EXPORT_SYMBOL_GPL(dccp_hash);
160
161 void dccp_unhash(struct sock *sk)
162 {
163         inet_unhash(&dccp_hashinfo, sk);
164 }
165
166 EXPORT_SYMBOL_GPL(dccp_unhash);
167
168 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
169 {
170         struct dccp_sock *dp = dccp_sk(sk);
171         struct dccp_minisock *dmsk = dccp_msk(sk);
172         struct inet_connection_sock *icsk = inet_csk(sk);
173
174         dccp_minisock_init(&dp->dccps_minisock);
175         do_gettimeofday(&dp->dccps_epoch);
176
177         /*
178          * FIXME: We're hardcoding the CCID, and doing this at this point makes
179          * the listening (master) sock get CCID control blocks, which is not
180          * necessary, but for now, to not mess with the test userspace apps,
181          * lets leave it here, later the real solution is to do this in a
182          * setsockopt(CCIDs-I-want/accept). -acme
183          */
184         if (likely(ctl_sock_initialized)) {
185                 int rc = dccp_feat_init(dmsk);
186
187                 if (rc)
188                         return rc;
189
190                 if (dmsk->dccpms_send_ack_vector) {
191                         dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
192                         if (dp->dccps_hc_rx_ackvec == NULL)
193                                 return -ENOMEM;
194                 }
195                 dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
196                                                       sk, GFP_KERNEL);
197                 dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
198                                                       sk, GFP_KERNEL);
199                 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
200                              dp->dccps_hc_tx_ccid == NULL)) {
201                         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
202                         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
203                         if (dmsk->dccpms_send_ack_vector) {
204                                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
205                                 dp->dccps_hc_rx_ackvec = NULL;
206                         }
207                         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
208                         return -ENOMEM;
209                 }
210         } else {
211                 /* control socket doesn't need feat nego */
212                 INIT_LIST_HEAD(&dmsk->dccpms_pending);
213                 INIT_LIST_HEAD(&dmsk->dccpms_conf);
214         }
215
216         dccp_init_xmit_timers(sk);
217         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
218         icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
219         sk->sk_state            = DCCP_CLOSED;
220         sk->sk_write_space      = dccp_write_space;
221         icsk->icsk_sync_mss     = dccp_sync_mss;
222         dp->dccps_mss_cache     = 536;
223         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
224         dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
225         dp->dccps_l_ack_ratio   = dp->dccps_r_ack_ratio = 1;
226
227         return 0;
228 }
229
230 EXPORT_SYMBOL_GPL(dccp_init_sock);
231
232 int dccp_destroy_sock(struct sock *sk)
233 {
234         struct dccp_sock *dp = dccp_sk(sk);
235         struct dccp_minisock *dmsk = dccp_msk(sk);
236
237         /*
238          * DCCP doesn't use sk_write_queue, just sk_send_head
239          * for retransmissions
240          */
241         if (sk->sk_send_head != NULL) {
242                 kfree_skb(sk->sk_send_head);
243                 sk->sk_send_head = NULL;
244         }
245
246         /* Clean up a referenced DCCP bind bucket. */
247         if (inet_csk(sk)->icsk_bind_hash != NULL)
248                 inet_put_port(&dccp_hashinfo, sk);
249
250         kfree(dp->dccps_service_list);
251         dp->dccps_service_list = NULL;
252
253         if (dmsk->dccpms_send_ack_vector) {
254                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
255                 dp->dccps_hc_rx_ackvec = NULL;
256         }
257         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
258         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
259         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
260
261         /* clean up feature negotiation state */
262         dccp_feat_clean(dmsk);
263
264         return 0;
265 }
266
267 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
268
269 static inline int dccp_listen_start(struct sock *sk, int backlog)
270 {
271         struct dccp_sock *dp = dccp_sk(sk);
272
273         dp->dccps_role = DCCP_ROLE_LISTEN;
274         return inet_csk_listen_start(sk, backlog);
275 }
276
277 int dccp_disconnect(struct sock *sk, int flags)
278 {
279         struct inet_connection_sock *icsk = inet_csk(sk);
280         struct inet_sock *inet = inet_sk(sk);
281         int err = 0;
282         const int old_state = sk->sk_state;
283
284         if (old_state != DCCP_CLOSED)
285                 dccp_set_state(sk, DCCP_CLOSED);
286
287         /* ABORT function of RFC793 */
288         if (old_state == DCCP_LISTEN) {
289                 inet_csk_listen_stop(sk);
290         /* FIXME: do the active reset thing */
291         } else if (old_state == DCCP_REQUESTING)
292                 sk->sk_err = ECONNRESET;
293
294         dccp_clear_xmit_timers(sk);
295         __skb_queue_purge(&sk->sk_receive_queue);
296         if (sk->sk_send_head != NULL) {
297                 __kfree_skb(sk->sk_send_head);
298                 sk->sk_send_head = NULL;
299         }
300
301         inet->dport = 0;
302
303         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
304                 inet_reset_saddr(sk);
305
306         sk->sk_shutdown = 0;
307         sock_reset_flag(sk, SOCK_DONE);
308
309         icsk->icsk_backoff = 0;
310         inet_csk_delack_init(sk);
311         __sk_dst_reset(sk);
312
313         BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
314
315         sk->sk_error_report(sk);
316         return err;
317 }
318
319 EXPORT_SYMBOL_GPL(dccp_disconnect);
320
321 /*
322  *      Wait for a DCCP event.
323  *
324  *      Note that we don't need to lock the socket, as the upper poll layers
325  *      take care of normal races (between the test and the event) and we don't
326  *      go look at any of the socket buffers directly.
327  */
328 unsigned int dccp_poll(struct file *file, struct socket *sock,
329                        poll_table *wait)
330 {
331         unsigned int mask;
332         struct sock *sk = sock->sk;
333
334         poll_wait(file, sk->sk_sleep, wait);
335         if (sk->sk_state == DCCP_LISTEN)
336                 return inet_csk_listen_poll(sk);
337
338         /* Socket is not locked. We are protected from async events
339            by poll logic and correct handling of state changes
340            made by another threads is impossible in any case.
341          */
342
343         mask = 0;
344         if (sk->sk_err)
345                 mask = POLLERR;
346
347         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
348                 mask |= POLLHUP;
349         if (sk->sk_shutdown & RCV_SHUTDOWN)
350                 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
351
352         /* Connected? */
353         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
354                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
355                         mask |= POLLIN | POLLRDNORM;
356
357                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
358                         if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
359                                 mask |= POLLOUT | POLLWRNORM;
360                         } else {  /* send SIGIO later */
361                                 set_bit(SOCK_ASYNC_NOSPACE,
362                                         &sk->sk_socket->flags);
363                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
364
365                                 /* Race breaker. If space is freed after
366                                  * wspace test but before the flags are set,
367                                  * IO signal will be lost.
368                                  */
369                                 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
370                                         mask |= POLLOUT | POLLWRNORM;
371                         }
372                 }
373         }
374         return mask;
375 }
376
377 EXPORT_SYMBOL_GPL(dccp_poll);
378
379 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
380 {
381         dccp_pr_debug("entry\n");
382         return -ENOIOCTLCMD;
383 }
384
385 EXPORT_SYMBOL_GPL(dccp_ioctl);
386
387 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
388                                    char __user *optval, int optlen)
389 {
390         struct dccp_sock *dp = dccp_sk(sk);
391         struct dccp_service_list *sl = NULL;
392
393         if (service == DCCP_SERVICE_INVALID_VALUE || 
394             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
395                 return -EINVAL;
396
397         if (optlen > sizeof(service)) {
398                 sl = kmalloc(optlen, GFP_KERNEL);
399                 if (sl == NULL)
400                         return -ENOMEM;
401
402                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
403                 if (copy_from_user(sl->dccpsl_list,
404                                    optval + sizeof(service),
405                                    optlen - sizeof(service)) ||
406                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
407                         kfree(sl);
408                         return -EFAULT;
409                 }
410         }
411
412         lock_sock(sk);
413         dp->dccps_service = service;
414
415         kfree(dp->dccps_service_list);
416
417         dp->dccps_service_list = sl;
418         release_sock(sk);
419         return 0;
420 }
421
422 /* byte 1 is feature.  the rest is the preference list */
423 static int dccp_setsockopt_change(struct sock *sk, int type,
424                                   struct dccp_so_feat __user *optval)
425 {
426         struct dccp_so_feat opt;
427         u8 *val;
428         int rc;
429
430         if (copy_from_user(&opt, optval, sizeof(opt)))
431                 return -EFAULT;
432
433         val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
434         if (!val)
435                 return -ENOMEM;
436
437         if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
438                 rc = -EFAULT;
439                 goto out_free_val;
440         }
441
442         rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
443                               val, opt.dccpsf_len, GFP_KERNEL);
444         if (rc)
445                 goto out_free_val;
446
447 out:
448         return rc;
449
450 out_free_val:
451         kfree(val);
452         goto out;
453 }
454
455 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
456                 char __user *optval, int optlen)
457 {
458         struct dccp_sock *dp = dccp_sk(sk);
459         int val, err = 0;
460
461         if (optlen < sizeof(int))
462                 return -EINVAL;
463
464         if (get_user(val, (int __user *)optval))
465                 return -EFAULT;
466
467         if (optname == DCCP_SOCKOPT_SERVICE)
468                 return dccp_setsockopt_service(sk, val, optval, optlen);
469
470         lock_sock(sk);
471         switch (optname) {
472         case DCCP_SOCKOPT_PACKET_SIZE:
473                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
474                 err = 0;
475                 break;
476         case DCCP_SOCKOPT_CHANGE_L:
477                 if (optlen != sizeof(struct dccp_so_feat))
478                         err = -EINVAL;
479                 else
480                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
481                                                      (struct dccp_so_feat __user *)
482                                                      optval);
483                 break;
484         case DCCP_SOCKOPT_CHANGE_R:
485                 if (optlen != sizeof(struct dccp_so_feat))
486                         err = -EINVAL;
487                 else
488                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
489                                                      (struct dccp_so_feat __user *)
490                                                      optval);
491                 break;
492         case DCCP_SOCKOPT_SEND_CSCOV:   /* sender side, RFC 4340, sec. 9.2 */
493                 if (val < 0 || val > 15)
494                         err = -EINVAL;
495                 else
496                         dp->dccps_pcslen = val;
497                 break;
498         case DCCP_SOCKOPT_RECV_CSCOV:   /* receiver side, RFC 4340 sec. 9.2.1 */
499                 if (val < 0 || val > 15)
500                         err = -EINVAL;
501                 else {
502                         dp->dccps_pcrlen = val;
503                         /* FIXME: add feature negotiation,
504                          * ChangeL(MinimumChecksumCoverage, val) */
505                 }
506                 break;
507         default:
508                 err = -ENOPROTOOPT;
509                 break;
510         }
511
512         release_sock(sk);
513         return err;
514 }
515
516 int dccp_setsockopt(struct sock *sk, int level, int optname,
517                     char __user *optval, int optlen)
518 {
519         if (level != SOL_DCCP)
520                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
521                                                              optname, optval,
522                                                              optlen);
523         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
524 }
525
526 EXPORT_SYMBOL_GPL(dccp_setsockopt);
527
528 #ifdef CONFIG_COMPAT
529 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
530                            char __user *optval, int optlen)
531 {
532         if (level != SOL_DCCP)
533                 return inet_csk_compat_setsockopt(sk, level, optname,
534                                                   optval, optlen);
535         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
536 }
537
538 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
539 #endif
540
541 static int dccp_getsockopt_service(struct sock *sk, int len,
542                                    __be32 __user *optval,
543                                    int __user *optlen)
544 {
545         const struct dccp_sock *dp = dccp_sk(sk);
546         const struct dccp_service_list *sl;
547         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
548
549         lock_sock(sk);
550         if ((sl = dp->dccps_service_list) != NULL) {
551                 slen = sl->dccpsl_nr * sizeof(u32);
552                 total_len += slen;
553         }
554
555         err = -EINVAL;
556         if (total_len > len)
557                 goto out;
558
559         err = 0;
560         if (put_user(total_len, optlen) ||
561             put_user(dp->dccps_service, optval) ||
562             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
563                 err = -EFAULT;
564 out:
565         release_sock(sk);
566         return err;
567 }
568
569 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
570                     char __user *optval, int __user *optlen)
571 {
572         struct dccp_sock *dp;
573         int val, len;
574
575         if (get_user(len, optlen))
576                 return -EFAULT;
577
578         if (len < sizeof(int))
579                 return -EINVAL;
580
581         dp = dccp_sk(sk);
582
583         switch (optname) {
584         case DCCP_SOCKOPT_PACKET_SIZE:
585                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
586                 return 0;
587         case DCCP_SOCKOPT_SERVICE:
588                 return dccp_getsockopt_service(sk, len,
589                                                (__be32 __user *)optval, optlen);
590         case DCCP_SOCKOPT_SEND_CSCOV:
591                 val = dp->dccps_pcslen;
592                 break;
593         case DCCP_SOCKOPT_RECV_CSCOV:
594                 val = dp->dccps_pcrlen;
595                 break;
596         case 128 ... 191:
597                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
598                                              len, (u32 __user *)optval, optlen);
599         case 192 ... 255:
600                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
601                                              len, (u32 __user *)optval, optlen);
602         default:
603                 return -ENOPROTOOPT;
604         }
605
606         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
607                 return -EFAULT;
608
609         return 0;
610 }
611
612 int dccp_getsockopt(struct sock *sk, int level, int optname,
613                     char __user *optval, int __user *optlen)
614 {
615         if (level != SOL_DCCP)
616                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
617                                                              optname, optval,
618                                                              optlen);
619         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
620 }
621
622 EXPORT_SYMBOL_GPL(dccp_getsockopt);
623
624 #ifdef CONFIG_COMPAT
625 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
626                            char __user *optval, int __user *optlen)
627 {
628         if (level != SOL_DCCP)
629                 return inet_csk_compat_getsockopt(sk, level, optname,
630                                                   optval, optlen);
631         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
632 }
633
634 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
635 #endif
636
637 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
638                  size_t len)
639 {
640         const struct dccp_sock *dp = dccp_sk(sk);
641         const int flags = msg->msg_flags;
642         const int noblock = flags & MSG_DONTWAIT;
643         struct sk_buff *skb;
644         int rc, size;
645         long timeo;
646
647         if (len > dp->dccps_mss_cache)
648                 return -EMSGSIZE;
649
650         lock_sock(sk);
651
652         if (sysctl_dccp_tx_qlen &&
653             (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
654                 rc = -EAGAIN;
655                 goto out_release;
656         }
657
658         timeo = sock_sndtimeo(sk, noblock);
659
660         /*
661          * We have to use sk_stream_wait_connect here to set sk_write_pending,
662          * so that the trick in dccp_rcv_request_sent_state_process.
663          */
664         /* Wait for a connection to finish. */
665         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING))
666                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
667                         goto out_release;
668
669         size = sk->sk_prot->max_header + len;
670         release_sock(sk);
671         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
672         lock_sock(sk);
673         if (skb == NULL)
674                 goto out_release;
675
676         skb_reserve(skb, sk->sk_prot->max_header);
677         rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
678         if (rc != 0)
679                 goto out_discard;
680
681         skb_queue_tail(&sk->sk_write_queue, skb);
682         dccp_write_xmit(sk,0);
683 out_release:
684         release_sock(sk);
685         return rc ? : len;
686 out_discard:
687         kfree_skb(skb);
688         goto out_release;
689 }
690
691 EXPORT_SYMBOL_GPL(dccp_sendmsg);
692
693 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
694                  size_t len, int nonblock, int flags, int *addr_len)
695 {
696         const struct dccp_hdr *dh;
697         long timeo;
698
699         lock_sock(sk);
700
701         if (sk->sk_state == DCCP_LISTEN) {
702                 len = -ENOTCONN;
703                 goto out;
704         }
705
706         timeo = sock_rcvtimeo(sk, nonblock);
707
708         do {
709                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
710
711                 if (skb == NULL)
712                         goto verify_sock_status;
713
714                 dh = dccp_hdr(skb);
715
716                 if (dh->dccph_type == DCCP_PKT_DATA ||
717                     dh->dccph_type == DCCP_PKT_DATAACK)
718                         goto found_ok_skb;
719
720                 if (dh->dccph_type == DCCP_PKT_RESET ||
721                     dh->dccph_type == DCCP_PKT_CLOSE) {
722                         dccp_pr_debug("found fin ok!\n");
723                         len = 0;
724                         goto found_fin_ok;
725                 }
726                 dccp_pr_debug("packet_type=%s\n",
727                               dccp_packet_name(dh->dccph_type));
728                 sk_eat_skb(sk, skb, 0);
729 verify_sock_status:
730                 if (sock_flag(sk, SOCK_DONE)) {
731                         len = 0;
732                         break;
733                 }
734
735                 if (sk->sk_err) {
736                         len = sock_error(sk);
737                         break;
738                 }
739
740                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
741                         len = 0;
742                         break;
743                 }
744
745                 if (sk->sk_state == DCCP_CLOSED) {
746                         if (!sock_flag(sk, SOCK_DONE)) {
747                                 /* This occurs when user tries to read
748                                  * from never connected socket.
749                                  */
750                                 len = -ENOTCONN;
751                                 break;
752                         }
753                         len = 0;
754                         break;
755                 }
756
757                 if (!timeo) {
758                         len = -EAGAIN;
759                         break;
760                 }
761
762                 if (signal_pending(current)) {
763                         len = sock_intr_errno(timeo);
764                         break;
765                 }
766
767                 sk_wait_data(sk, &timeo);
768                 continue;
769         found_ok_skb:
770                 if (len > skb->len)
771                         len = skb->len;
772                 else if (len < skb->len)
773                         msg->msg_flags |= MSG_TRUNC;
774
775                 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
776                         /* Exception. Bailout! */
777                         len = -EFAULT;
778                         break;
779                 }
780         found_fin_ok:
781                 if (!(flags & MSG_PEEK))
782                         sk_eat_skb(sk, skb, 0);
783                 break;
784         } while (1);
785 out:
786         release_sock(sk);
787         return len;
788 }
789
790 EXPORT_SYMBOL_GPL(dccp_recvmsg);
791
792 int inet_dccp_listen(struct socket *sock, int backlog)
793 {
794         struct sock *sk = sock->sk;
795         unsigned char old_state;
796         int err;
797
798         lock_sock(sk);
799
800         err = -EINVAL;
801         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
802                 goto out;
803
804         old_state = sk->sk_state;
805         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
806                 goto out;
807
808         /* Really, if the socket is already in listen state
809          * we can only allow the backlog to be adjusted.
810          */
811         if (old_state != DCCP_LISTEN) {
812                 /*
813                  * FIXME: here it probably should be sk->sk_prot->listen_start
814                  * see tcp_listen_start
815                  */
816                 err = dccp_listen_start(sk, backlog);
817                 if (err)
818                         goto out;
819         }
820         sk->sk_max_ack_backlog = backlog;
821         err = 0;
822
823 out:
824         release_sock(sk);
825         return err;
826 }
827
828 EXPORT_SYMBOL_GPL(inet_dccp_listen);
829
830 static const unsigned char dccp_new_state[] = {
831         /* current state:   new state:      action:     */
832         [0]               = DCCP_CLOSED,
833         [DCCP_OPEN]       = DCCP_CLOSING | DCCP_ACTION_FIN,
834         [DCCP_REQUESTING] = DCCP_CLOSED,
835         [DCCP_PARTOPEN]   = DCCP_CLOSING | DCCP_ACTION_FIN,
836         [DCCP_LISTEN]     = DCCP_CLOSED,
837         [DCCP_RESPOND]    = DCCP_CLOSED,
838         [DCCP_CLOSING]    = DCCP_CLOSED,
839         [DCCP_TIME_WAIT]  = DCCP_CLOSED,
840         [DCCP_CLOSED]     = DCCP_CLOSED,
841 };
842
843 static int dccp_close_state(struct sock *sk)
844 {
845         const int next = dccp_new_state[sk->sk_state];
846         const int ns = next & DCCP_STATE_MASK;
847
848         if (ns != sk->sk_state)
849                 dccp_set_state(sk, ns);
850
851         return next & DCCP_ACTION_FIN;
852 }
853
854 void dccp_close(struct sock *sk, long timeout)
855 {
856         struct dccp_sock *dp = dccp_sk(sk);
857         struct sk_buff *skb;
858         int state;
859
860         lock_sock(sk);
861
862         sk->sk_shutdown = SHUTDOWN_MASK;
863
864         if (sk->sk_state == DCCP_LISTEN) {
865                 dccp_set_state(sk, DCCP_CLOSED);
866
867                 /* Special case. */
868                 inet_csk_listen_stop(sk);
869
870                 goto adjudge_to_death;
871         }
872
873         sk_stop_timer(sk, &dp->dccps_xmit_timer);
874
875         /*
876          * We need to flush the recv. buffs.  We do this only on the
877          * descriptor close, not protocol-sourced closes, because the
878           *reader process may not have drained the data yet!
879          */
880         /* FIXME: check for unread data */
881         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
882                 __kfree_skb(skb);
883         }
884
885         if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
886                 /* Check zero linger _after_ checking for unread data. */
887                 sk->sk_prot->disconnect(sk, 0);
888         } else if (dccp_close_state(sk)) {
889                 dccp_send_close(sk, 1);
890         }
891
892         sk_stream_wait_close(sk, timeout);
893
894 adjudge_to_death:
895         state = sk->sk_state;
896         sock_hold(sk);
897         sock_orphan(sk);
898         atomic_inc(sk->sk_prot->orphan_count);
899
900         /*
901          * It is the last release_sock in its life. It will remove backlog.
902          */
903         release_sock(sk);
904         /*
905          * Now socket is owned by kernel and we acquire BH lock
906          * to finish close. No need to check for user refs.
907          */
908         local_bh_disable();
909         bh_lock_sock(sk);
910         BUG_TRAP(!sock_owned_by_user(sk));
911
912         /* Have we already been destroyed by a softirq or backlog? */
913         if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
914                 goto out;
915
916         /*
917          * The last release_sock may have processed the CLOSE or RESET
918          * packet moving sock to CLOSED state, if not we have to fire
919          * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
920          * in draft-ietf-dccp-spec-11. -acme
921          */
922         if (sk->sk_state == DCCP_CLOSING) {
923                 /* FIXME: should start at 2 * RTT */
924                 /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
925                 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
926                                           inet_csk(sk)->icsk_rto,
927                                           DCCP_RTO_MAX);
928 #if 0
929                 /* Yeah, we should use sk->sk_prot->orphan_count, etc */
930                 dccp_set_state(sk, DCCP_CLOSED);
931 #endif
932         }
933
934         if (sk->sk_state == DCCP_CLOSED)
935                 inet_csk_destroy_sock(sk);
936
937         /* Otherwise, socket is reprieved until protocol close. */
938
939 out:
940         bh_unlock_sock(sk);
941         local_bh_enable();
942         sock_put(sk);
943 }
944
945 EXPORT_SYMBOL_GPL(dccp_close);
946
947 void dccp_shutdown(struct sock *sk, int how)
948 {
949         dccp_pr_debug("entry\n");
950 }
951
952 EXPORT_SYMBOL_GPL(dccp_shutdown);
953
954 static int __init dccp_mib_init(void)
955 {
956         int rc = -ENOMEM;
957
958         dccp_statistics[0] = alloc_percpu(struct dccp_mib);
959         if (dccp_statistics[0] == NULL)
960                 goto out;
961
962         dccp_statistics[1] = alloc_percpu(struct dccp_mib);
963         if (dccp_statistics[1] == NULL)
964                 goto out_free_one;
965
966         rc = 0;
967 out:
968         return rc;
969 out_free_one:
970         free_percpu(dccp_statistics[0]);
971         dccp_statistics[0] = NULL;
972         goto out;
973
974 }
975
976 static void dccp_mib_exit(void)
977 {
978         free_percpu(dccp_statistics[0]);
979         free_percpu(dccp_statistics[1]);
980         dccp_statistics[0] = dccp_statistics[1] = NULL;
981 }
982
983 static int thash_entries;
984 module_param(thash_entries, int, 0444);
985 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
986
987 #ifdef CONFIG_IP_DCCP_DEBUG
988 int dccp_debug;
989 module_param(dccp_debug, int, 0444);
990 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
991
992 EXPORT_SYMBOL_GPL(dccp_debug);
993 #endif
994
995 static int __init dccp_init(void)
996 {
997         unsigned long goal;
998         int ehash_order, bhash_order, i;
999         int rc = -ENOBUFS;
1000
1001         dccp_hashinfo.bind_bucket_cachep =
1002                 kmem_cache_create("dccp_bind_bucket",
1003                                   sizeof(struct inet_bind_bucket), 0,
1004                                   SLAB_HWCACHE_ALIGN, NULL, NULL);
1005         if (!dccp_hashinfo.bind_bucket_cachep)
1006                 goto out;
1007
1008         /*
1009          * Size and allocate the main established and bind bucket
1010          * hash tables.
1011          *
1012          * The methodology is similar to that of the buffer cache.
1013          */
1014         if (num_physpages >= (128 * 1024))
1015                 goal = num_physpages >> (21 - PAGE_SHIFT);
1016         else
1017                 goal = num_physpages >> (23 - PAGE_SHIFT);
1018
1019         if (thash_entries)
1020                 goal = (thash_entries *
1021                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1022         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1023                 ;
1024         do {
1025                 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1026                                         sizeof(struct inet_ehash_bucket);
1027                 dccp_hashinfo.ehash_size >>= 1;
1028                 while (dccp_hashinfo.ehash_size &
1029                        (dccp_hashinfo.ehash_size - 1))
1030                         dccp_hashinfo.ehash_size--;
1031                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1032                         __get_free_pages(GFP_ATOMIC, ehash_order);
1033         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1034
1035         if (!dccp_hashinfo.ehash) {
1036                 DCCP_CRIT("Failed to allocate DCCP established hash table");
1037                 goto out_free_bind_bucket_cachep;
1038         }
1039
1040         for (i = 0; i < (dccp_hashinfo.ehash_size << 1); i++) {
1041                 rwlock_init(&dccp_hashinfo.ehash[i].lock);
1042                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1043         }
1044
1045         bhash_order = ehash_order;
1046
1047         do {
1048                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1049                                         sizeof(struct inet_bind_hashbucket);
1050                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1051                     bhash_order > 0)
1052                         continue;
1053                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1054                         __get_free_pages(GFP_ATOMIC, bhash_order);
1055         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1056
1057         if (!dccp_hashinfo.bhash) {
1058                 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1059                 goto out_free_dccp_ehash;
1060         }
1061
1062         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1063                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1064                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1065         }
1066
1067         rc = dccp_mib_init();
1068         if (rc)
1069                 goto out_free_dccp_bhash;
1070
1071         rc = dccp_ackvec_init();
1072         if (rc)
1073                 goto out_free_dccp_mib;
1074
1075         rc = dccp_sysctl_init();
1076         if (rc)
1077                 goto out_ackvec_exit;
1078 out:
1079         return rc;
1080 out_ackvec_exit:
1081         dccp_ackvec_exit();
1082 out_free_dccp_mib:
1083         dccp_mib_exit();
1084 out_free_dccp_bhash:
1085         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1086         dccp_hashinfo.bhash = NULL;
1087 out_free_dccp_ehash:
1088         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1089         dccp_hashinfo.ehash = NULL;
1090 out_free_bind_bucket_cachep:
1091         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1092         dccp_hashinfo.bind_bucket_cachep = NULL;
1093         goto out;
1094 }
1095
1096 static void __exit dccp_fini(void)
1097 {
1098         dccp_mib_exit();
1099         free_pages((unsigned long)dccp_hashinfo.bhash,
1100                    get_order(dccp_hashinfo.bhash_size *
1101                              sizeof(struct inet_bind_hashbucket)));
1102         free_pages((unsigned long)dccp_hashinfo.ehash,
1103                    get_order(dccp_hashinfo.ehash_size *
1104                              sizeof(struct inet_ehash_bucket)));
1105         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1106         dccp_ackvec_exit();
1107         dccp_sysctl_exit();
1108 }
1109
1110 module_init(dccp_init);
1111 module_exit(dccp_fini);
1112
1113 MODULE_LICENSE("GPL");
1114 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1115 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");