Merge branch 'next'
[linux-2.6] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
24
25 #include <net/inet_sock.h>
26 #include <net/sock.h>
27 #include <net/xfrm.h>
28
29 #include <asm/ioctls.h>
30 #include <linux/spinlock.h>
31 #include <linux/timer.h>
32 #include <linux/delay.h>
33 #include <linux/poll.h>
34
35 #include "ccid.h"
36 #include "dccp.h"
37 #include "feat.h"
38
39 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
40
41 EXPORT_SYMBOL_GPL(dccp_statistics);
42
43 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
44
45 EXPORT_SYMBOL_GPL(dccp_orphan_count);
46
47 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
48         .lhash_lock     = RW_LOCK_UNLOCKED,
49         .lhash_users    = ATOMIC_INIT(0),
50         .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
51 };
52
53 EXPORT_SYMBOL_GPL(dccp_hashinfo);
54
55 /* the maximum queue length for tx in packets. 0 is no limit */
56 int sysctl_dccp_tx_qlen __read_mostly = 5;
57
58 void dccp_set_state(struct sock *sk, const int state)
59 {
60         const int oldstate = sk->sk_state;
61
62         dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
63                       dccp_state_name(oldstate), dccp_state_name(state));
64         WARN_ON(state == oldstate);
65
66         switch (state) {
67         case DCCP_OPEN:
68                 if (oldstate != DCCP_OPEN)
69                         DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
70                 break;
71
72         case DCCP_CLOSED:
73                 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
74                     oldstate == DCCP_CLOSING)
75                         DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
76
77                 sk->sk_prot->unhash(sk);
78                 if (inet_csk(sk)->icsk_bind_hash != NULL &&
79                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
80                         inet_put_port(sk);
81                 /* fall through */
82         default:
83                 if (oldstate == DCCP_OPEN)
84                         DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
85         }
86
87         /* Change state AFTER socket is unhashed to avoid closed
88          * socket sitting in hash tables.
89          */
90         sk->sk_state = state;
91 }
92
93 EXPORT_SYMBOL_GPL(dccp_set_state);
94
95 static void dccp_finish_passive_close(struct sock *sk)
96 {
97         switch (sk->sk_state) {
98         case DCCP_PASSIVE_CLOSE:
99                 /* Node (client or server) has received Close packet. */
100                 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
101                 dccp_set_state(sk, DCCP_CLOSED);
102                 break;
103         case DCCP_PASSIVE_CLOSEREQ:
104                 /*
105                  * Client received CloseReq. We set the `active' flag so that
106                  * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
107                  */
108                 dccp_send_close(sk, 1);
109                 dccp_set_state(sk, DCCP_CLOSING);
110         }
111 }
112
113 void dccp_done(struct sock *sk)
114 {
115         dccp_set_state(sk, DCCP_CLOSED);
116         dccp_clear_xmit_timers(sk);
117
118         sk->sk_shutdown = SHUTDOWN_MASK;
119
120         if (!sock_flag(sk, SOCK_DEAD))
121                 sk->sk_state_change(sk);
122         else
123                 inet_csk_destroy_sock(sk);
124 }
125
126 EXPORT_SYMBOL_GPL(dccp_done);
127
128 const char *dccp_packet_name(const int type)
129 {
130         static const char *dccp_packet_names[] = {
131                 [DCCP_PKT_REQUEST]  = "REQUEST",
132                 [DCCP_PKT_RESPONSE] = "RESPONSE",
133                 [DCCP_PKT_DATA]     = "DATA",
134                 [DCCP_PKT_ACK]      = "ACK",
135                 [DCCP_PKT_DATAACK]  = "DATAACK",
136                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
137                 [DCCP_PKT_CLOSE]    = "CLOSE",
138                 [DCCP_PKT_RESET]    = "RESET",
139                 [DCCP_PKT_SYNC]     = "SYNC",
140                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
141         };
142
143         if (type >= DCCP_NR_PKT_TYPES)
144                 return "INVALID";
145         else
146                 return dccp_packet_names[type];
147 }
148
149 EXPORT_SYMBOL_GPL(dccp_packet_name);
150
151 const char *dccp_state_name(const int state)
152 {
153         static char *dccp_state_names[] = {
154         [DCCP_OPEN]             = "OPEN",
155         [DCCP_REQUESTING]       = "REQUESTING",
156         [DCCP_PARTOPEN]         = "PARTOPEN",
157         [DCCP_LISTEN]           = "LISTEN",
158         [DCCP_RESPOND]          = "RESPOND",
159         [DCCP_CLOSING]          = "CLOSING",
160         [DCCP_ACTIVE_CLOSEREQ]  = "CLOSEREQ",
161         [DCCP_PASSIVE_CLOSE]    = "PASSIVE_CLOSE",
162         [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
163         [DCCP_TIME_WAIT]        = "TIME_WAIT",
164         [DCCP_CLOSED]           = "CLOSED",
165         };
166
167         if (state >= DCCP_MAX_STATES)
168                 return "INVALID STATE!";
169         else
170                 return dccp_state_names[state];
171 }
172
173 EXPORT_SYMBOL_GPL(dccp_state_name);
174
175 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
176 {
177         struct dccp_sock *dp = dccp_sk(sk);
178         struct dccp_minisock *dmsk = dccp_msk(sk);
179         struct inet_connection_sock *icsk = inet_csk(sk);
180
181         dccp_minisock_init(&dp->dccps_minisock);
182
183         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
184         icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
185         sk->sk_state            = DCCP_CLOSED;
186         sk->sk_write_space      = dccp_write_space;
187         icsk->icsk_sync_mss     = dccp_sync_mss;
188         dp->dccps_mss_cache     = 536;
189         dp->dccps_rate_last     = jiffies;
190         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
191         dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
192         dp->dccps_l_ack_ratio   = dp->dccps_r_ack_ratio = 1;
193
194         dccp_init_xmit_timers(sk);
195
196         /*
197          * FIXME: We're hardcoding the CCID, and doing this at this point makes
198          * the listening (master) sock get CCID control blocks, which is not
199          * necessary, but for now, to not mess with the test userspace apps,
200          * lets leave it here, later the real solution is to do this in a
201          * setsockopt(CCIDs-I-want/accept). -acme
202          */
203         if (likely(ctl_sock_initialized)) {
204                 int rc = dccp_feat_init(dmsk);
205
206                 if (rc)
207                         return rc;
208
209                 if (dmsk->dccpms_send_ack_vector) {
210                         dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
211                         if (dp->dccps_hc_rx_ackvec == NULL)
212                                 return -ENOMEM;
213                 }
214                 dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
215                                                       sk, GFP_KERNEL);
216                 dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
217                                                       sk, GFP_KERNEL);
218                 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
219                              dp->dccps_hc_tx_ccid == NULL)) {
220                         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
221                         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
222                         if (dmsk->dccpms_send_ack_vector) {
223                                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
224                                 dp->dccps_hc_rx_ackvec = NULL;
225                         }
226                         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
227                         return -ENOMEM;
228                 }
229         } else {
230                 /* control socket doesn't need feat nego */
231                 INIT_LIST_HEAD(&dmsk->dccpms_pending);
232                 INIT_LIST_HEAD(&dmsk->dccpms_conf);
233         }
234
235         return 0;
236 }
237
238 EXPORT_SYMBOL_GPL(dccp_init_sock);
239
240 void dccp_destroy_sock(struct sock *sk)
241 {
242         struct dccp_sock *dp = dccp_sk(sk);
243         struct dccp_minisock *dmsk = dccp_msk(sk);
244
245         /*
246          * DCCP doesn't use sk_write_queue, just sk_send_head
247          * for retransmissions
248          */
249         if (sk->sk_send_head != NULL) {
250                 kfree_skb(sk->sk_send_head);
251                 sk->sk_send_head = NULL;
252         }
253
254         /* Clean up a referenced DCCP bind bucket. */
255         if (inet_csk(sk)->icsk_bind_hash != NULL)
256                 inet_put_port(sk);
257
258         kfree(dp->dccps_service_list);
259         dp->dccps_service_list = NULL;
260
261         if (dmsk->dccpms_send_ack_vector) {
262                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
263                 dp->dccps_hc_rx_ackvec = NULL;
264         }
265         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
266         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
267         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
268
269         /* clean up feature negotiation state */
270         dccp_feat_clean(dmsk);
271 }
272
273 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
274
275 static inline int dccp_listen_start(struct sock *sk, int backlog)
276 {
277         struct dccp_sock *dp = dccp_sk(sk);
278
279         dp->dccps_role = DCCP_ROLE_LISTEN;
280         return inet_csk_listen_start(sk, backlog);
281 }
282
283 static inline int dccp_need_reset(int state)
284 {
285         return state != DCCP_CLOSED && state != DCCP_LISTEN &&
286                state != DCCP_REQUESTING;
287 }
288
289 int dccp_disconnect(struct sock *sk, int flags)
290 {
291         struct inet_connection_sock *icsk = inet_csk(sk);
292         struct inet_sock *inet = inet_sk(sk);
293         int err = 0;
294         const int old_state = sk->sk_state;
295
296         if (old_state != DCCP_CLOSED)
297                 dccp_set_state(sk, DCCP_CLOSED);
298
299         /*
300          * This corresponds to the ABORT function of RFC793, sec. 3.8
301          * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
302          */
303         if (old_state == DCCP_LISTEN) {
304                 inet_csk_listen_stop(sk);
305         } else if (dccp_need_reset(old_state)) {
306                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
307                 sk->sk_err = ECONNRESET;
308         } else if (old_state == DCCP_REQUESTING)
309                 sk->sk_err = ECONNRESET;
310
311         dccp_clear_xmit_timers(sk);
312
313         __skb_queue_purge(&sk->sk_receive_queue);
314         __skb_queue_purge(&sk->sk_write_queue);
315         if (sk->sk_send_head != NULL) {
316                 __kfree_skb(sk->sk_send_head);
317                 sk->sk_send_head = NULL;
318         }
319
320         inet->dport = 0;
321
322         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
323                 inet_reset_saddr(sk);
324
325         sk->sk_shutdown = 0;
326         sock_reset_flag(sk, SOCK_DONE);
327
328         icsk->icsk_backoff = 0;
329         inet_csk_delack_init(sk);
330         __sk_dst_reset(sk);
331
332         WARN_ON(inet->num && !icsk->icsk_bind_hash);
333
334         sk->sk_error_report(sk);
335         return err;
336 }
337
338 EXPORT_SYMBOL_GPL(dccp_disconnect);
339
340 /*
341  *      Wait for a DCCP event.
342  *
343  *      Note that we don't need to lock the socket, as the upper poll layers
344  *      take care of normal races (between the test and the event) and we don't
345  *      go look at any of the socket buffers directly.
346  */
347 unsigned int dccp_poll(struct file *file, struct socket *sock,
348                        poll_table *wait)
349 {
350         unsigned int mask;
351         struct sock *sk = sock->sk;
352
353         poll_wait(file, sk->sk_sleep, wait);
354         if (sk->sk_state == DCCP_LISTEN)
355                 return inet_csk_listen_poll(sk);
356
357         /* Socket is not locked. We are protected from async events
358            by poll logic and correct handling of state changes
359            made by another threads is impossible in any case.
360          */
361
362         mask = 0;
363         if (sk->sk_err)
364                 mask = POLLERR;
365
366         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
367                 mask |= POLLHUP;
368         if (sk->sk_shutdown & RCV_SHUTDOWN)
369                 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
370
371         /* Connected? */
372         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
373                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
374                         mask |= POLLIN | POLLRDNORM;
375
376                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
377                         if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
378                                 mask |= POLLOUT | POLLWRNORM;
379                         } else {  /* send SIGIO later */
380                                 set_bit(SOCK_ASYNC_NOSPACE,
381                                         &sk->sk_socket->flags);
382                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
383
384                                 /* Race breaker. If space is freed after
385                                  * wspace test but before the flags are set,
386                                  * IO signal will be lost.
387                                  */
388                                 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
389                                         mask |= POLLOUT | POLLWRNORM;
390                         }
391                 }
392         }
393         return mask;
394 }
395
396 EXPORT_SYMBOL_GPL(dccp_poll);
397
398 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
399 {
400         int rc = -ENOTCONN;
401
402         lock_sock(sk);
403
404         if (sk->sk_state == DCCP_LISTEN)
405                 goto out;
406
407         switch (cmd) {
408         case SIOCINQ: {
409                 struct sk_buff *skb;
410                 unsigned long amount = 0;
411
412                 skb = skb_peek(&sk->sk_receive_queue);
413                 if (skb != NULL) {
414                         /*
415                          * We will only return the amount of this packet since
416                          * that is all that will be read.
417                          */
418                         amount = skb->len;
419                 }
420                 rc = put_user(amount, (int __user *)arg);
421         }
422                 break;
423         default:
424                 rc = -ENOIOCTLCMD;
425                 break;
426         }
427 out:
428         release_sock(sk);
429         return rc;
430 }
431
432 EXPORT_SYMBOL_GPL(dccp_ioctl);
433
434 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
435                                    char __user *optval, int optlen)
436 {
437         struct dccp_sock *dp = dccp_sk(sk);
438         struct dccp_service_list *sl = NULL;
439
440         if (service == DCCP_SERVICE_INVALID_VALUE ||
441             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
442                 return -EINVAL;
443
444         if (optlen > sizeof(service)) {
445                 sl = kmalloc(optlen, GFP_KERNEL);
446                 if (sl == NULL)
447                         return -ENOMEM;
448
449                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
450                 if (copy_from_user(sl->dccpsl_list,
451                                    optval + sizeof(service),
452                                    optlen - sizeof(service)) ||
453                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
454                         kfree(sl);
455                         return -EFAULT;
456                 }
457         }
458
459         lock_sock(sk);
460         dp->dccps_service = service;
461
462         kfree(dp->dccps_service_list);
463
464         dp->dccps_service_list = sl;
465         release_sock(sk);
466         return 0;
467 }
468
469 /* byte 1 is feature.  the rest is the preference list */
470 static int dccp_setsockopt_change(struct sock *sk, int type,
471                                   struct dccp_so_feat __user *optval)
472 {
473         struct dccp_so_feat opt;
474         u8 *val;
475         int rc;
476
477         if (copy_from_user(&opt, optval, sizeof(opt)))
478                 return -EFAULT;
479         /*
480          * rfc4340: 6.1. Change Options
481          */
482         if (opt.dccpsf_len < 1)
483                 return -EINVAL;
484
485         val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
486         if (!val)
487                 return -ENOMEM;
488
489         if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
490                 rc = -EFAULT;
491                 goto out_free_val;
492         }
493
494         rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
495                               val, opt.dccpsf_len, GFP_KERNEL);
496         if (rc)
497                 goto out_free_val;
498
499 out:
500         return rc;
501
502 out_free_val:
503         kfree(val);
504         goto out;
505 }
506
507 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
508                 char __user *optval, int optlen)
509 {
510         struct dccp_sock *dp = dccp_sk(sk);
511         int val, err = 0;
512
513         if (optlen < sizeof(int))
514                 return -EINVAL;
515
516         if (get_user(val, (int __user *)optval))
517                 return -EFAULT;
518
519         if (optname == DCCP_SOCKOPT_SERVICE)
520                 return dccp_setsockopt_service(sk, val, optval, optlen);
521
522         lock_sock(sk);
523         switch (optname) {
524         case DCCP_SOCKOPT_PACKET_SIZE:
525                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
526                 err = 0;
527                 break;
528         case DCCP_SOCKOPT_CHANGE_L:
529                 if (optlen != sizeof(struct dccp_so_feat))
530                         err = -EINVAL;
531                 else
532                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
533                                                      (struct dccp_so_feat __user *)
534                                                      optval);
535                 break;
536         case DCCP_SOCKOPT_CHANGE_R:
537                 if (optlen != sizeof(struct dccp_so_feat))
538                         err = -EINVAL;
539                 else
540                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
541                                                      (struct dccp_so_feat __user *)
542                                                      optval);
543                 break;
544         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
545                 if (dp->dccps_role != DCCP_ROLE_SERVER)
546                         err = -EOPNOTSUPP;
547                 else
548                         dp->dccps_server_timewait = (val != 0);
549                 break;
550         case DCCP_SOCKOPT_SEND_CSCOV:   /* sender side, RFC 4340, sec. 9.2 */
551                 if (val < 0 || val > 15)
552                         err = -EINVAL;
553                 else
554                         dp->dccps_pcslen = val;
555                 break;
556         case DCCP_SOCKOPT_RECV_CSCOV:   /* receiver side, RFC 4340 sec. 9.2.1 */
557                 if (val < 0 || val > 15)
558                         err = -EINVAL;
559                 else {
560                         dp->dccps_pcrlen = val;
561                         /* FIXME: add feature negotiation,
562                          * ChangeL(MinimumChecksumCoverage, val) */
563                 }
564                 break;
565         default:
566                 err = -ENOPROTOOPT;
567                 break;
568         }
569
570         release_sock(sk);
571         return err;
572 }
573
574 int dccp_setsockopt(struct sock *sk, int level, int optname,
575                     char __user *optval, int optlen)
576 {
577         if (level != SOL_DCCP)
578                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
579                                                              optname, optval,
580                                                              optlen);
581         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
582 }
583
584 EXPORT_SYMBOL_GPL(dccp_setsockopt);
585
586 #ifdef CONFIG_COMPAT
587 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
588                            char __user *optval, int optlen)
589 {
590         if (level != SOL_DCCP)
591                 return inet_csk_compat_setsockopt(sk, level, optname,
592                                                   optval, optlen);
593         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
594 }
595
596 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
597 #endif
598
599 static int dccp_getsockopt_service(struct sock *sk, int len,
600                                    __be32 __user *optval,
601                                    int __user *optlen)
602 {
603         const struct dccp_sock *dp = dccp_sk(sk);
604         const struct dccp_service_list *sl;
605         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
606
607         lock_sock(sk);
608         if ((sl = dp->dccps_service_list) != NULL) {
609                 slen = sl->dccpsl_nr * sizeof(u32);
610                 total_len += slen;
611         }
612
613         err = -EINVAL;
614         if (total_len > len)
615                 goto out;
616
617         err = 0;
618         if (put_user(total_len, optlen) ||
619             put_user(dp->dccps_service, optval) ||
620             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
621                 err = -EFAULT;
622 out:
623         release_sock(sk);
624         return err;
625 }
626
627 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
628                     char __user *optval, int __user *optlen)
629 {
630         struct dccp_sock *dp;
631         int val, len;
632
633         if (get_user(len, optlen))
634                 return -EFAULT;
635
636         if (len < (int)sizeof(int))
637                 return -EINVAL;
638
639         dp = dccp_sk(sk);
640
641         switch (optname) {
642         case DCCP_SOCKOPT_PACKET_SIZE:
643                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
644                 return 0;
645         case DCCP_SOCKOPT_SERVICE:
646                 return dccp_getsockopt_service(sk, len,
647                                                (__be32 __user *)optval, optlen);
648         case DCCP_SOCKOPT_GET_CUR_MPS:
649                 val = dp->dccps_mss_cache;
650                 break;
651         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
652                 val = dp->dccps_server_timewait;
653                 break;
654         case DCCP_SOCKOPT_SEND_CSCOV:
655                 val = dp->dccps_pcslen;
656                 break;
657         case DCCP_SOCKOPT_RECV_CSCOV:
658                 val = dp->dccps_pcrlen;
659                 break;
660         case 128 ... 191:
661                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
662                                              len, (u32 __user *)optval, optlen);
663         case 192 ... 255:
664                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
665                                              len, (u32 __user *)optval, optlen);
666         default:
667                 return -ENOPROTOOPT;
668         }
669
670         len = sizeof(val);
671         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
672                 return -EFAULT;
673
674         return 0;
675 }
676
677 int dccp_getsockopt(struct sock *sk, int level, int optname,
678                     char __user *optval, int __user *optlen)
679 {
680         if (level != SOL_DCCP)
681                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
682                                                              optname, optval,
683                                                              optlen);
684         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
685 }
686
687 EXPORT_SYMBOL_GPL(dccp_getsockopt);
688
689 #ifdef CONFIG_COMPAT
690 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
691                            char __user *optval, int __user *optlen)
692 {
693         if (level != SOL_DCCP)
694                 return inet_csk_compat_getsockopt(sk, level, optname,
695                                                   optval, optlen);
696         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
697 }
698
699 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
700 #endif
701
702 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
703                  size_t len)
704 {
705         const struct dccp_sock *dp = dccp_sk(sk);
706         const int flags = msg->msg_flags;
707         const int noblock = flags & MSG_DONTWAIT;
708         struct sk_buff *skb;
709         int rc, size;
710         long timeo;
711
712         if (len > dp->dccps_mss_cache)
713                 return -EMSGSIZE;
714
715         lock_sock(sk);
716
717         if (sysctl_dccp_tx_qlen &&
718             (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
719                 rc = -EAGAIN;
720                 goto out_release;
721         }
722
723         timeo = sock_sndtimeo(sk, noblock);
724
725         /*
726          * We have to use sk_stream_wait_connect here to set sk_write_pending,
727          * so that the trick in dccp_rcv_request_sent_state_process.
728          */
729         /* Wait for a connection to finish. */
730         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
731                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
732                         goto out_release;
733
734         size = sk->sk_prot->max_header + len;
735         release_sock(sk);
736         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
737         lock_sock(sk);
738         if (skb == NULL)
739                 goto out_release;
740
741         skb_reserve(skb, sk->sk_prot->max_header);
742         rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
743         if (rc != 0)
744                 goto out_discard;
745
746         skb_queue_tail(&sk->sk_write_queue, skb);
747         dccp_write_xmit(sk,0);
748 out_release:
749         release_sock(sk);
750         return rc ? : len;
751 out_discard:
752         kfree_skb(skb);
753         goto out_release;
754 }
755
756 EXPORT_SYMBOL_GPL(dccp_sendmsg);
757
758 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
759                  size_t len, int nonblock, int flags, int *addr_len)
760 {
761         const struct dccp_hdr *dh;
762         long timeo;
763
764         lock_sock(sk);
765
766         if (sk->sk_state == DCCP_LISTEN) {
767                 len = -ENOTCONN;
768                 goto out;
769         }
770
771         timeo = sock_rcvtimeo(sk, nonblock);
772
773         do {
774                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
775
776                 if (skb == NULL)
777                         goto verify_sock_status;
778
779                 dh = dccp_hdr(skb);
780
781                 switch (dh->dccph_type) {
782                 case DCCP_PKT_DATA:
783                 case DCCP_PKT_DATAACK:
784                         goto found_ok_skb;
785
786                 case DCCP_PKT_CLOSE:
787                 case DCCP_PKT_CLOSEREQ:
788                         if (!(flags & MSG_PEEK))
789                                 dccp_finish_passive_close(sk);
790                         /* fall through */
791                 case DCCP_PKT_RESET:
792                         dccp_pr_debug("found fin (%s) ok!\n",
793                                       dccp_packet_name(dh->dccph_type));
794                         len = 0;
795                         goto found_fin_ok;
796                 default:
797                         dccp_pr_debug("packet_type=%s\n",
798                                       dccp_packet_name(dh->dccph_type));
799                         sk_eat_skb(sk, skb, 0);
800                 }
801 verify_sock_status:
802                 if (sock_flag(sk, SOCK_DONE)) {
803                         len = 0;
804                         break;
805                 }
806
807                 if (sk->sk_err) {
808                         len = sock_error(sk);
809                         break;
810                 }
811
812                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
813                         len = 0;
814                         break;
815                 }
816
817                 if (sk->sk_state == DCCP_CLOSED) {
818                         if (!sock_flag(sk, SOCK_DONE)) {
819                                 /* This occurs when user tries to read
820                                  * from never connected socket.
821                                  */
822                                 len = -ENOTCONN;
823                                 break;
824                         }
825                         len = 0;
826                         break;
827                 }
828
829                 if (!timeo) {
830                         len = -EAGAIN;
831                         break;
832                 }
833
834                 if (signal_pending(current)) {
835                         len = sock_intr_errno(timeo);
836                         break;
837                 }
838
839                 sk_wait_data(sk, &timeo);
840                 continue;
841         found_ok_skb:
842                 if (len > skb->len)
843                         len = skb->len;
844                 else if (len < skb->len)
845                         msg->msg_flags |= MSG_TRUNC;
846
847                 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
848                         /* Exception. Bailout! */
849                         len = -EFAULT;
850                         break;
851                 }
852         found_fin_ok:
853                 if (!(flags & MSG_PEEK))
854                         sk_eat_skb(sk, skb, 0);
855                 break;
856         } while (1);
857 out:
858         release_sock(sk);
859         return len;
860 }
861
862 EXPORT_SYMBOL_GPL(dccp_recvmsg);
863
864 int inet_dccp_listen(struct socket *sock, int backlog)
865 {
866         struct sock *sk = sock->sk;
867         unsigned char old_state;
868         int err;
869
870         lock_sock(sk);
871
872         err = -EINVAL;
873         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
874                 goto out;
875
876         old_state = sk->sk_state;
877         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
878                 goto out;
879
880         /* Really, if the socket is already in listen state
881          * we can only allow the backlog to be adjusted.
882          */
883         if (old_state != DCCP_LISTEN) {
884                 /*
885                  * FIXME: here it probably should be sk->sk_prot->listen_start
886                  * see tcp_listen_start
887                  */
888                 err = dccp_listen_start(sk, backlog);
889                 if (err)
890                         goto out;
891         }
892         sk->sk_max_ack_backlog = backlog;
893         err = 0;
894
895 out:
896         release_sock(sk);
897         return err;
898 }
899
900 EXPORT_SYMBOL_GPL(inet_dccp_listen);
901
902 static void dccp_terminate_connection(struct sock *sk)
903 {
904         u8 next_state = DCCP_CLOSED;
905
906         switch (sk->sk_state) {
907         case DCCP_PASSIVE_CLOSE:
908         case DCCP_PASSIVE_CLOSEREQ:
909                 dccp_finish_passive_close(sk);
910                 break;
911         case DCCP_PARTOPEN:
912                 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
913                 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
914                 /* fall through */
915         case DCCP_OPEN:
916                 dccp_send_close(sk, 1);
917
918                 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
919                     !dccp_sk(sk)->dccps_server_timewait)
920                         next_state = DCCP_ACTIVE_CLOSEREQ;
921                 else
922                         next_state = DCCP_CLOSING;
923                 /* fall through */
924         default:
925                 dccp_set_state(sk, next_state);
926         }
927 }
928
929 void dccp_close(struct sock *sk, long timeout)
930 {
931         struct dccp_sock *dp = dccp_sk(sk);
932         struct sk_buff *skb;
933         u32 data_was_unread = 0;
934         int state;
935
936         lock_sock(sk);
937
938         sk->sk_shutdown = SHUTDOWN_MASK;
939
940         if (sk->sk_state == DCCP_LISTEN) {
941                 dccp_set_state(sk, DCCP_CLOSED);
942
943                 /* Special case. */
944                 inet_csk_listen_stop(sk);
945
946                 goto adjudge_to_death;
947         }
948
949         sk_stop_timer(sk, &dp->dccps_xmit_timer);
950
951         /*
952          * We need to flush the recv. buffs.  We do this only on the
953          * descriptor close, not protocol-sourced closes, because the
954           *reader process may not have drained the data yet!
955          */
956         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
957                 data_was_unread += skb->len;
958                 __kfree_skb(skb);
959         }
960
961         if (data_was_unread) {
962                 /* Unread data was tossed, send an appropriate Reset Code */
963                 DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
964                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
965                 dccp_set_state(sk, DCCP_CLOSED);
966         } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
967                 /* Check zero linger _after_ checking for unread data. */
968                 sk->sk_prot->disconnect(sk, 0);
969         } else if (sk->sk_state != DCCP_CLOSED) {
970                 dccp_terminate_connection(sk);
971         }
972
973         sk_stream_wait_close(sk, timeout);
974
975 adjudge_to_death:
976         state = sk->sk_state;
977         sock_hold(sk);
978         sock_orphan(sk);
979         atomic_inc(sk->sk_prot->orphan_count);
980
981         /*
982          * It is the last release_sock in its life. It will remove backlog.
983          */
984         release_sock(sk);
985         /*
986          * Now socket is owned by kernel and we acquire BH lock
987          * to finish close. No need to check for user refs.
988          */
989         local_bh_disable();
990         bh_lock_sock(sk);
991         WARN_ON(sock_owned_by_user(sk));
992
993         /* Have we already been destroyed by a softirq or backlog? */
994         if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
995                 goto out;
996
997         if (sk->sk_state == DCCP_CLOSED)
998                 inet_csk_destroy_sock(sk);
999
1000         /* Otherwise, socket is reprieved until protocol close. */
1001
1002 out:
1003         bh_unlock_sock(sk);
1004         local_bh_enable();
1005         sock_put(sk);
1006 }
1007
1008 EXPORT_SYMBOL_GPL(dccp_close);
1009
1010 void dccp_shutdown(struct sock *sk, int how)
1011 {
1012         dccp_pr_debug("called shutdown(%x)\n", how);
1013 }
1014
1015 EXPORT_SYMBOL_GPL(dccp_shutdown);
1016
1017 static inline int dccp_mib_init(void)
1018 {
1019         return snmp_mib_init((void**)dccp_statistics, sizeof(struct dccp_mib));
1020 }
1021
1022 static inline void dccp_mib_exit(void)
1023 {
1024         snmp_mib_free((void**)dccp_statistics);
1025 }
1026
1027 static int thash_entries;
1028 module_param(thash_entries, int, 0444);
1029 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1030
1031 #ifdef CONFIG_IP_DCCP_DEBUG
1032 int dccp_debug;
1033 module_param(dccp_debug, bool, 0644);
1034 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1035
1036 EXPORT_SYMBOL_GPL(dccp_debug);
1037 #endif
1038
1039 static int __init dccp_init(void)
1040 {
1041         unsigned long goal;
1042         int ehash_order, bhash_order, i;
1043         int rc = -ENOBUFS;
1044
1045         BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1046                      FIELD_SIZEOF(struct sk_buff, cb));
1047
1048         dccp_hashinfo.bind_bucket_cachep =
1049                 kmem_cache_create("dccp_bind_bucket",
1050                                   sizeof(struct inet_bind_bucket), 0,
1051                                   SLAB_HWCACHE_ALIGN, NULL);
1052         if (!dccp_hashinfo.bind_bucket_cachep)
1053                 goto out;
1054
1055         /*
1056          * Size and allocate the main established and bind bucket
1057          * hash tables.
1058          *
1059          * The methodology is similar to that of the buffer cache.
1060          */
1061         if (num_physpages >= (128 * 1024))
1062                 goal = num_physpages >> (21 - PAGE_SHIFT);
1063         else
1064                 goal = num_physpages >> (23 - PAGE_SHIFT);
1065
1066         if (thash_entries)
1067                 goal = (thash_entries *
1068                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1069         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1070                 ;
1071         do {
1072                 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1073                                         sizeof(struct inet_ehash_bucket);
1074                 while (dccp_hashinfo.ehash_size &
1075                        (dccp_hashinfo.ehash_size - 1))
1076                         dccp_hashinfo.ehash_size--;
1077                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1078                         __get_free_pages(GFP_ATOMIC, ehash_order);
1079         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1080
1081         if (!dccp_hashinfo.ehash) {
1082                 DCCP_CRIT("Failed to allocate DCCP established hash table");
1083                 goto out_free_bind_bucket_cachep;
1084         }
1085
1086         for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1087                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1088                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
1089         }
1090
1091         if (inet_ehash_locks_alloc(&dccp_hashinfo))
1092                         goto out_free_dccp_ehash;
1093
1094         bhash_order = ehash_order;
1095
1096         do {
1097                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1098                                         sizeof(struct inet_bind_hashbucket);
1099                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1100                     bhash_order > 0)
1101                         continue;
1102                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1103                         __get_free_pages(GFP_ATOMIC, bhash_order);
1104         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1105
1106         if (!dccp_hashinfo.bhash) {
1107                 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1108                 goto out_free_dccp_locks;
1109         }
1110
1111         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1112                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1113                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1114         }
1115
1116         rc = dccp_mib_init();
1117         if (rc)
1118                 goto out_free_dccp_bhash;
1119
1120         rc = dccp_ackvec_init();
1121         if (rc)
1122                 goto out_free_dccp_mib;
1123
1124         rc = dccp_sysctl_init();
1125         if (rc)
1126                 goto out_ackvec_exit;
1127
1128         dccp_timestamping_init();
1129 out:
1130         return rc;
1131 out_ackvec_exit:
1132         dccp_ackvec_exit();
1133 out_free_dccp_mib:
1134         dccp_mib_exit();
1135 out_free_dccp_bhash:
1136         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1137         dccp_hashinfo.bhash = NULL;
1138 out_free_dccp_locks:
1139         inet_ehash_locks_free(&dccp_hashinfo);
1140 out_free_dccp_ehash:
1141         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1142         dccp_hashinfo.ehash = NULL;
1143 out_free_bind_bucket_cachep:
1144         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1145         dccp_hashinfo.bind_bucket_cachep = NULL;
1146         goto out;
1147 }
1148
1149 static void __exit dccp_fini(void)
1150 {
1151         dccp_mib_exit();
1152         free_pages((unsigned long)dccp_hashinfo.bhash,
1153                    get_order(dccp_hashinfo.bhash_size *
1154                              sizeof(struct inet_bind_hashbucket)));
1155         free_pages((unsigned long)dccp_hashinfo.ehash,
1156                    get_order(dccp_hashinfo.ehash_size *
1157                              sizeof(struct inet_ehash_bucket)));
1158         inet_ehash_locks_free(&dccp_hashinfo);
1159         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1160         dccp_ackvec_exit();
1161         dccp_sysctl_exit();
1162 }
1163
1164 module_init(dccp_init);
1165 module_exit(dccp_fini);
1166
1167 MODULE_LICENSE("GPL");
1168 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1169 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");