Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/jbarnes...
[linux-2.6] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
24
25 #include <net/inet_sock.h>
26 #include <net/sock.h>
27 #include <net/xfrm.h>
28
29 #include <asm/ioctls.h>
30 #include <linux/spinlock.h>
31 #include <linux/timer.h>
32 #include <linux/delay.h>
33 #include <linux/poll.h>
34
35 #include "ccid.h"
36 #include "dccp.h"
37 #include "feat.h"
38
39 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
40
41 EXPORT_SYMBOL_GPL(dccp_statistics);
42
43 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
44
45 EXPORT_SYMBOL_GPL(dccp_orphan_count);
46
47 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
48         .lhash_lock     = RW_LOCK_UNLOCKED,
49         .lhash_users    = ATOMIC_INIT(0),
50         .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
51 };
52
53 EXPORT_SYMBOL_GPL(dccp_hashinfo);
54
55 /* the maximum queue length for tx in packets. 0 is no limit */
56 int sysctl_dccp_tx_qlen __read_mostly = 5;
57
58 void dccp_set_state(struct sock *sk, const int state)
59 {
60         const int oldstate = sk->sk_state;
61
62         dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
63                       dccp_state_name(oldstate), dccp_state_name(state));
64         WARN_ON(state == oldstate);
65
66         switch (state) {
67         case DCCP_OPEN:
68                 if (oldstate != DCCP_OPEN)
69                         DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
70                 break;
71
72         case DCCP_CLOSED:
73                 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
74                     oldstate == DCCP_CLOSING)
75                         DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
76
77                 sk->sk_prot->unhash(sk);
78                 if (inet_csk(sk)->icsk_bind_hash != NULL &&
79                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
80                         inet_put_port(sk);
81                 /* fall through */
82         default:
83                 if (oldstate == DCCP_OPEN)
84                         DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
85         }
86
87         /* Change state AFTER socket is unhashed to avoid closed
88          * socket sitting in hash tables.
89          */
90         sk->sk_state = state;
91 }
92
93 EXPORT_SYMBOL_GPL(dccp_set_state);
94
95 static void dccp_finish_passive_close(struct sock *sk)
96 {
97         switch (sk->sk_state) {
98         case DCCP_PASSIVE_CLOSE:
99                 /* Node (client or server) has received Close packet. */
100                 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
101                 dccp_set_state(sk, DCCP_CLOSED);
102                 break;
103         case DCCP_PASSIVE_CLOSEREQ:
104                 /*
105                  * Client received CloseReq. We set the `active' flag so that
106                  * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
107                  */
108                 dccp_send_close(sk, 1);
109                 dccp_set_state(sk, DCCP_CLOSING);
110         }
111 }
112
113 void dccp_done(struct sock *sk)
114 {
115         dccp_set_state(sk, DCCP_CLOSED);
116         dccp_clear_xmit_timers(sk);
117
118         sk->sk_shutdown = SHUTDOWN_MASK;
119
120         if (!sock_flag(sk, SOCK_DEAD))
121                 sk->sk_state_change(sk);
122         else
123                 inet_csk_destroy_sock(sk);
124 }
125
126 EXPORT_SYMBOL_GPL(dccp_done);
127
128 const char *dccp_packet_name(const int type)
129 {
130         static const char *dccp_packet_names[] = {
131                 [DCCP_PKT_REQUEST]  = "REQUEST",
132                 [DCCP_PKT_RESPONSE] = "RESPONSE",
133                 [DCCP_PKT_DATA]     = "DATA",
134                 [DCCP_PKT_ACK]      = "ACK",
135                 [DCCP_PKT_DATAACK]  = "DATAACK",
136                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
137                 [DCCP_PKT_CLOSE]    = "CLOSE",
138                 [DCCP_PKT_RESET]    = "RESET",
139                 [DCCP_PKT_SYNC]     = "SYNC",
140                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
141         };
142
143         if (type >= DCCP_NR_PKT_TYPES)
144                 return "INVALID";
145         else
146                 return dccp_packet_names[type];
147 }
148
149 EXPORT_SYMBOL_GPL(dccp_packet_name);
150
151 const char *dccp_state_name(const int state)
152 {
153         static char *dccp_state_names[] = {
154         [DCCP_OPEN]             = "OPEN",
155         [DCCP_REQUESTING]       = "REQUESTING",
156         [DCCP_PARTOPEN]         = "PARTOPEN",
157         [DCCP_LISTEN]           = "LISTEN",
158         [DCCP_RESPOND]          = "RESPOND",
159         [DCCP_CLOSING]          = "CLOSING",
160         [DCCP_ACTIVE_CLOSEREQ]  = "CLOSEREQ",
161         [DCCP_PASSIVE_CLOSE]    = "PASSIVE_CLOSE",
162         [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
163         [DCCP_TIME_WAIT]        = "TIME_WAIT",
164         [DCCP_CLOSED]           = "CLOSED",
165         };
166
167         if (state >= DCCP_MAX_STATES)
168                 return "INVALID STATE!";
169         else
170                 return dccp_state_names[state];
171 }
172
173 EXPORT_SYMBOL_GPL(dccp_state_name);
174
175 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
176 {
177         struct dccp_sock *dp = dccp_sk(sk);
178         struct dccp_minisock *dmsk = dccp_msk(sk);
179         struct inet_connection_sock *icsk = inet_csk(sk);
180
181         dccp_minisock_init(&dp->dccps_minisock);
182
183         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
184         icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
185         sk->sk_state            = DCCP_CLOSED;
186         sk->sk_write_space      = dccp_write_space;
187         icsk->icsk_sync_mss     = dccp_sync_mss;
188         dp->dccps_mss_cache     = 536;
189         dp->dccps_rate_last     = jiffies;
190         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
191         dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
192         dp->dccps_l_ack_ratio   = dp->dccps_r_ack_ratio = 1;
193
194         dccp_init_xmit_timers(sk);
195
196         /*
197          * FIXME: We're hardcoding the CCID, and doing this at this point makes
198          * the listening (master) sock get CCID control blocks, which is not
199          * necessary, but for now, to not mess with the test userspace apps,
200          * lets leave it here, later the real solution is to do this in a
201          * setsockopt(CCIDs-I-want/accept). -acme
202          */
203         if (likely(ctl_sock_initialized)) {
204                 int rc = dccp_feat_init(dmsk);
205
206                 if (rc)
207                         return rc;
208
209                 if (dmsk->dccpms_send_ack_vector) {
210                         dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
211                         if (dp->dccps_hc_rx_ackvec == NULL)
212                                 return -ENOMEM;
213                 }
214                 dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
215                                                       sk, GFP_KERNEL);
216                 dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
217                                                       sk, GFP_KERNEL);
218                 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
219                              dp->dccps_hc_tx_ccid == NULL)) {
220                         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
221                         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
222                         if (dmsk->dccpms_send_ack_vector) {
223                                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
224                                 dp->dccps_hc_rx_ackvec = NULL;
225                         }
226                         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
227                         return -ENOMEM;
228                 }
229         } else {
230                 /* control socket doesn't need feat nego */
231                 INIT_LIST_HEAD(&dmsk->dccpms_pending);
232                 INIT_LIST_HEAD(&dmsk->dccpms_conf);
233         }
234
235         return 0;
236 }
237
238 EXPORT_SYMBOL_GPL(dccp_init_sock);
239
240 int dccp_destroy_sock(struct sock *sk)
241 {
242         struct dccp_sock *dp = dccp_sk(sk);
243         struct dccp_minisock *dmsk = dccp_msk(sk);
244
245         /*
246          * DCCP doesn't use sk_write_queue, just sk_send_head
247          * for retransmissions
248          */
249         if (sk->sk_send_head != NULL) {
250                 kfree_skb(sk->sk_send_head);
251                 sk->sk_send_head = NULL;
252         }
253
254         /* Clean up a referenced DCCP bind bucket. */
255         if (inet_csk(sk)->icsk_bind_hash != NULL)
256                 inet_put_port(sk);
257
258         kfree(dp->dccps_service_list);
259         dp->dccps_service_list = NULL;
260
261         if (dmsk->dccpms_send_ack_vector) {
262                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
263                 dp->dccps_hc_rx_ackvec = NULL;
264         }
265         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
266         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
267         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
268
269         /* clean up feature negotiation state */
270         dccp_feat_clean(dmsk);
271
272         return 0;
273 }
274
275 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
276
277 static inline int dccp_listen_start(struct sock *sk, int backlog)
278 {
279         struct dccp_sock *dp = dccp_sk(sk);
280
281         dp->dccps_role = DCCP_ROLE_LISTEN;
282         return inet_csk_listen_start(sk, backlog);
283 }
284
285 static inline int dccp_need_reset(int state)
286 {
287         return state != DCCP_CLOSED && state != DCCP_LISTEN &&
288                state != DCCP_REQUESTING;
289 }
290
291 int dccp_disconnect(struct sock *sk, int flags)
292 {
293         struct inet_connection_sock *icsk = inet_csk(sk);
294         struct inet_sock *inet = inet_sk(sk);
295         int err = 0;
296         const int old_state = sk->sk_state;
297
298         if (old_state != DCCP_CLOSED)
299                 dccp_set_state(sk, DCCP_CLOSED);
300
301         /*
302          * This corresponds to the ABORT function of RFC793, sec. 3.8
303          * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
304          */
305         if (old_state == DCCP_LISTEN) {
306                 inet_csk_listen_stop(sk);
307         } else if (dccp_need_reset(old_state)) {
308                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
309                 sk->sk_err = ECONNRESET;
310         } else if (old_state == DCCP_REQUESTING)
311                 sk->sk_err = ECONNRESET;
312
313         dccp_clear_xmit_timers(sk);
314         __skb_queue_purge(&sk->sk_receive_queue);
315         if (sk->sk_send_head != NULL) {
316                 __kfree_skb(sk->sk_send_head);
317                 sk->sk_send_head = NULL;
318         }
319
320         inet->dport = 0;
321
322         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
323                 inet_reset_saddr(sk);
324
325         sk->sk_shutdown = 0;
326         sock_reset_flag(sk, SOCK_DONE);
327
328         icsk->icsk_backoff = 0;
329         inet_csk_delack_init(sk);
330         __sk_dst_reset(sk);
331
332         BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
333
334         sk->sk_error_report(sk);
335         return err;
336 }
337
338 EXPORT_SYMBOL_GPL(dccp_disconnect);
339
340 /*
341  *      Wait for a DCCP event.
342  *
343  *      Note that we don't need to lock the socket, as the upper poll layers
344  *      take care of normal races (between the test and the event) and we don't
345  *      go look at any of the socket buffers directly.
346  */
347 unsigned int dccp_poll(struct file *file, struct socket *sock,
348                        poll_table *wait)
349 {
350         unsigned int mask;
351         struct sock *sk = sock->sk;
352
353         poll_wait(file, sk->sk_sleep, wait);
354         if (sk->sk_state == DCCP_LISTEN)
355                 return inet_csk_listen_poll(sk);
356
357         /* Socket is not locked. We are protected from async events
358            by poll logic and correct handling of state changes
359            made by another threads is impossible in any case.
360          */
361
362         mask = 0;
363         if (sk->sk_err)
364                 mask = POLLERR;
365
366         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
367                 mask |= POLLHUP;
368         if (sk->sk_shutdown & RCV_SHUTDOWN)
369                 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
370
371         /* Connected? */
372         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
373                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
374                         mask |= POLLIN | POLLRDNORM;
375
376                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
377                         if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
378                                 mask |= POLLOUT | POLLWRNORM;
379                         } else {  /* send SIGIO later */
380                                 set_bit(SOCK_ASYNC_NOSPACE,
381                                         &sk->sk_socket->flags);
382                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
383
384                                 /* Race breaker. If space is freed after
385                                  * wspace test but before the flags are set,
386                                  * IO signal will be lost.
387                                  */
388                                 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
389                                         mask |= POLLOUT | POLLWRNORM;
390                         }
391                 }
392         }
393         return mask;
394 }
395
396 EXPORT_SYMBOL_GPL(dccp_poll);
397
398 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
399 {
400         int rc = -ENOTCONN;
401
402         lock_sock(sk);
403
404         if (sk->sk_state == DCCP_LISTEN)
405                 goto out;
406
407         switch (cmd) {
408         case SIOCINQ: {
409                 struct sk_buff *skb;
410                 unsigned long amount = 0;
411
412                 skb = skb_peek(&sk->sk_receive_queue);
413                 if (skb != NULL) {
414                         /*
415                          * We will only return the amount of this packet since
416                          * that is all that will be read.
417                          */
418                         amount = skb->len;
419                 }
420                 rc = put_user(amount, (int __user *)arg);
421         }
422                 break;
423         default:
424                 rc = -ENOIOCTLCMD;
425                 break;
426         }
427 out:
428         release_sock(sk);
429         return rc;
430 }
431
432 EXPORT_SYMBOL_GPL(dccp_ioctl);
433
434 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
435                                    char __user *optval, int optlen)
436 {
437         struct dccp_sock *dp = dccp_sk(sk);
438         struct dccp_service_list *sl = NULL;
439
440         if (service == DCCP_SERVICE_INVALID_VALUE ||
441             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
442                 return -EINVAL;
443
444         if (optlen > sizeof(service)) {
445                 sl = kmalloc(optlen, GFP_KERNEL);
446                 if (sl == NULL)
447                         return -ENOMEM;
448
449                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
450                 if (copy_from_user(sl->dccpsl_list,
451                                    optval + sizeof(service),
452                                    optlen - sizeof(service)) ||
453                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
454                         kfree(sl);
455                         return -EFAULT;
456                 }
457         }
458
459         lock_sock(sk);
460         dp->dccps_service = service;
461
462         kfree(dp->dccps_service_list);
463
464         dp->dccps_service_list = sl;
465         release_sock(sk);
466         return 0;
467 }
468
469 /* byte 1 is feature.  the rest is the preference list */
470 static int dccp_setsockopt_change(struct sock *sk, int type,
471                                   struct dccp_so_feat __user *optval)
472 {
473         struct dccp_so_feat opt;
474         u8 *val;
475         int rc;
476
477         if (copy_from_user(&opt, optval, sizeof(opt)))
478                 return -EFAULT;
479
480         val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
481         if (!val)
482                 return -ENOMEM;
483
484         if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
485                 rc = -EFAULT;
486                 goto out_free_val;
487         }
488
489         rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
490                               val, opt.dccpsf_len, GFP_KERNEL);
491         if (rc)
492                 goto out_free_val;
493
494 out:
495         return rc;
496
497 out_free_val:
498         kfree(val);
499         goto out;
500 }
501
502 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
503                 char __user *optval, int optlen)
504 {
505         struct dccp_sock *dp = dccp_sk(sk);
506         int val, err = 0;
507
508         if (optlen < sizeof(int))
509                 return -EINVAL;
510
511         if (get_user(val, (int __user *)optval))
512                 return -EFAULT;
513
514         if (optname == DCCP_SOCKOPT_SERVICE)
515                 return dccp_setsockopt_service(sk, val, optval, optlen);
516
517         lock_sock(sk);
518         switch (optname) {
519         case DCCP_SOCKOPT_PACKET_SIZE:
520                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
521                 err = 0;
522                 break;
523         case DCCP_SOCKOPT_CHANGE_L:
524                 if (optlen != sizeof(struct dccp_so_feat))
525                         err = -EINVAL;
526                 else
527                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
528                                                      (struct dccp_so_feat __user *)
529                                                      optval);
530                 break;
531         case DCCP_SOCKOPT_CHANGE_R:
532                 if (optlen != sizeof(struct dccp_so_feat))
533                         err = -EINVAL;
534                 else
535                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
536                                                      (struct dccp_so_feat __user *)
537                                                      optval);
538                 break;
539         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
540                 if (dp->dccps_role != DCCP_ROLE_SERVER)
541                         err = -EOPNOTSUPP;
542                 else
543                         dp->dccps_server_timewait = (val != 0);
544                 break;
545         case DCCP_SOCKOPT_SEND_CSCOV:   /* sender side, RFC 4340, sec. 9.2 */
546                 if (val < 0 || val > 15)
547                         err = -EINVAL;
548                 else
549                         dp->dccps_pcslen = val;
550                 break;
551         case DCCP_SOCKOPT_RECV_CSCOV:   /* receiver side, RFC 4340 sec. 9.2.1 */
552                 if (val < 0 || val > 15)
553                         err = -EINVAL;
554                 else {
555                         dp->dccps_pcrlen = val;
556                         /* FIXME: add feature negotiation,
557                          * ChangeL(MinimumChecksumCoverage, val) */
558                 }
559                 break;
560         default:
561                 err = -ENOPROTOOPT;
562                 break;
563         }
564
565         release_sock(sk);
566         return err;
567 }
568
569 int dccp_setsockopt(struct sock *sk, int level, int optname,
570                     char __user *optval, int optlen)
571 {
572         if (level != SOL_DCCP)
573                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
574                                                              optname, optval,
575                                                              optlen);
576         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
577 }
578
579 EXPORT_SYMBOL_GPL(dccp_setsockopt);
580
581 #ifdef CONFIG_COMPAT
582 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
583                            char __user *optval, int optlen)
584 {
585         if (level != SOL_DCCP)
586                 return inet_csk_compat_setsockopt(sk, level, optname,
587                                                   optval, optlen);
588         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
589 }
590
591 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
592 #endif
593
594 static int dccp_getsockopt_service(struct sock *sk, int len,
595                                    __be32 __user *optval,
596                                    int __user *optlen)
597 {
598         const struct dccp_sock *dp = dccp_sk(sk);
599         const struct dccp_service_list *sl;
600         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
601
602         lock_sock(sk);
603         if ((sl = dp->dccps_service_list) != NULL) {
604                 slen = sl->dccpsl_nr * sizeof(u32);
605                 total_len += slen;
606         }
607
608         err = -EINVAL;
609         if (total_len > len)
610                 goto out;
611
612         err = 0;
613         if (put_user(total_len, optlen) ||
614             put_user(dp->dccps_service, optval) ||
615             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
616                 err = -EFAULT;
617 out:
618         release_sock(sk);
619         return err;
620 }
621
622 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
623                     char __user *optval, int __user *optlen)
624 {
625         struct dccp_sock *dp;
626         int val, len;
627
628         if (get_user(len, optlen))
629                 return -EFAULT;
630
631         if (len < (int)sizeof(int))
632                 return -EINVAL;
633
634         dp = dccp_sk(sk);
635
636         switch (optname) {
637         case DCCP_SOCKOPT_PACKET_SIZE:
638                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
639                 return 0;
640         case DCCP_SOCKOPT_SERVICE:
641                 return dccp_getsockopt_service(sk, len,
642                                                (__be32 __user *)optval, optlen);
643         case DCCP_SOCKOPT_GET_CUR_MPS:
644                 val = dp->dccps_mss_cache;
645                 break;
646         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
647                 val = dp->dccps_server_timewait;
648                 break;
649         case DCCP_SOCKOPT_SEND_CSCOV:
650                 val = dp->dccps_pcslen;
651                 break;
652         case DCCP_SOCKOPT_RECV_CSCOV:
653                 val = dp->dccps_pcrlen;
654                 break;
655         case 128 ... 191:
656                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
657                                              len, (u32 __user *)optval, optlen);
658         case 192 ... 255:
659                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
660                                              len, (u32 __user *)optval, optlen);
661         default:
662                 return -ENOPROTOOPT;
663         }
664
665         len = sizeof(val);
666         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
667                 return -EFAULT;
668
669         return 0;
670 }
671
672 int dccp_getsockopt(struct sock *sk, int level, int optname,
673                     char __user *optval, int __user *optlen)
674 {
675         if (level != SOL_DCCP)
676                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
677                                                              optname, optval,
678                                                              optlen);
679         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
680 }
681
682 EXPORT_SYMBOL_GPL(dccp_getsockopt);
683
684 #ifdef CONFIG_COMPAT
685 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
686                            char __user *optval, int __user *optlen)
687 {
688         if (level != SOL_DCCP)
689                 return inet_csk_compat_getsockopt(sk, level, optname,
690                                                   optval, optlen);
691         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
692 }
693
694 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
695 #endif
696
697 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
698                  size_t len)
699 {
700         const struct dccp_sock *dp = dccp_sk(sk);
701         const int flags = msg->msg_flags;
702         const int noblock = flags & MSG_DONTWAIT;
703         struct sk_buff *skb;
704         int rc, size;
705         long timeo;
706
707         if (len > dp->dccps_mss_cache)
708                 return -EMSGSIZE;
709
710         lock_sock(sk);
711
712         if (sysctl_dccp_tx_qlen &&
713             (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
714                 rc = -EAGAIN;
715                 goto out_release;
716         }
717
718         timeo = sock_sndtimeo(sk, noblock);
719
720         /*
721          * We have to use sk_stream_wait_connect here to set sk_write_pending,
722          * so that the trick in dccp_rcv_request_sent_state_process.
723          */
724         /* Wait for a connection to finish. */
725         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
726                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
727                         goto out_release;
728
729         size = sk->sk_prot->max_header + len;
730         release_sock(sk);
731         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
732         lock_sock(sk);
733         if (skb == NULL)
734                 goto out_release;
735
736         skb_reserve(skb, sk->sk_prot->max_header);
737         rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
738         if (rc != 0)
739                 goto out_discard;
740
741         skb_queue_tail(&sk->sk_write_queue, skb);
742         dccp_write_xmit(sk,0);
743 out_release:
744         release_sock(sk);
745         return rc ? : len;
746 out_discard:
747         kfree_skb(skb);
748         goto out_release;
749 }
750
751 EXPORT_SYMBOL_GPL(dccp_sendmsg);
752
753 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
754                  size_t len, int nonblock, int flags, int *addr_len)
755 {
756         const struct dccp_hdr *dh;
757         long timeo;
758
759         lock_sock(sk);
760
761         if (sk->sk_state == DCCP_LISTEN) {
762                 len = -ENOTCONN;
763                 goto out;
764         }
765
766         timeo = sock_rcvtimeo(sk, nonblock);
767
768         do {
769                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
770
771                 if (skb == NULL)
772                         goto verify_sock_status;
773
774                 dh = dccp_hdr(skb);
775
776                 switch (dh->dccph_type) {
777                 case DCCP_PKT_DATA:
778                 case DCCP_PKT_DATAACK:
779                         goto found_ok_skb;
780
781                 case DCCP_PKT_CLOSE:
782                 case DCCP_PKT_CLOSEREQ:
783                         if (!(flags & MSG_PEEK))
784                                 dccp_finish_passive_close(sk);
785                         /* fall through */
786                 case DCCP_PKT_RESET:
787                         dccp_pr_debug("found fin (%s) ok!\n",
788                                       dccp_packet_name(dh->dccph_type));
789                         len = 0;
790                         goto found_fin_ok;
791                 default:
792                         dccp_pr_debug("packet_type=%s\n",
793                                       dccp_packet_name(dh->dccph_type));
794                         sk_eat_skb(sk, skb, 0);
795                 }
796 verify_sock_status:
797                 if (sock_flag(sk, SOCK_DONE)) {
798                         len = 0;
799                         break;
800                 }
801
802                 if (sk->sk_err) {
803                         len = sock_error(sk);
804                         break;
805                 }
806
807                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
808                         len = 0;
809                         break;
810                 }
811
812                 if (sk->sk_state == DCCP_CLOSED) {
813                         if (!sock_flag(sk, SOCK_DONE)) {
814                                 /* This occurs when user tries to read
815                                  * from never connected socket.
816                                  */
817                                 len = -ENOTCONN;
818                                 break;
819                         }
820                         len = 0;
821                         break;
822                 }
823
824                 if (!timeo) {
825                         len = -EAGAIN;
826                         break;
827                 }
828
829                 if (signal_pending(current)) {
830                         len = sock_intr_errno(timeo);
831                         break;
832                 }
833
834                 sk_wait_data(sk, &timeo);
835                 continue;
836         found_ok_skb:
837                 if (len > skb->len)
838                         len = skb->len;
839                 else if (len < skb->len)
840                         msg->msg_flags |= MSG_TRUNC;
841
842                 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
843                         /* Exception. Bailout! */
844                         len = -EFAULT;
845                         break;
846                 }
847         found_fin_ok:
848                 if (!(flags & MSG_PEEK))
849                         sk_eat_skb(sk, skb, 0);
850                 break;
851         } while (1);
852 out:
853         release_sock(sk);
854         return len;
855 }
856
857 EXPORT_SYMBOL_GPL(dccp_recvmsg);
858
859 int inet_dccp_listen(struct socket *sock, int backlog)
860 {
861         struct sock *sk = sock->sk;
862         unsigned char old_state;
863         int err;
864
865         lock_sock(sk);
866
867         err = -EINVAL;
868         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
869                 goto out;
870
871         old_state = sk->sk_state;
872         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
873                 goto out;
874
875         /* Really, if the socket is already in listen state
876          * we can only allow the backlog to be adjusted.
877          */
878         if (old_state != DCCP_LISTEN) {
879                 /*
880                  * FIXME: here it probably should be sk->sk_prot->listen_start
881                  * see tcp_listen_start
882                  */
883                 err = dccp_listen_start(sk, backlog);
884                 if (err)
885                         goto out;
886         }
887         sk->sk_max_ack_backlog = backlog;
888         err = 0;
889
890 out:
891         release_sock(sk);
892         return err;
893 }
894
895 EXPORT_SYMBOL_GPL(inet_dccp_listen);
896
897 static void dccp_terminate_connection(struct sock *sk)
898 {
899         u8 next_state = DCCP_CLOSED;
900
901         switch (sk->sk_state) {
902         case DCCP_PASSIVE_CLOSE:
903         case DCCP_PASSIVE_CLOSEREQ:
904                 dccp_finish_passive_close(sk);
905                 break;
906         case DCCP_PARTOPEN:
907                 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
908                 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
909                 /* fall through */
910         case DCCP_OPEN:
911                 dccp_send_close(sk, 1);
912
913                 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
914                     !dccp_sk(sk)->dccps_server_timewait)
915                         next_state = DCCP_ACTIVE_CLOSEREQ;
916                 else
917                         next_state = DCCP_CLOSING;
918                 /* fall through */
919         default:
920                 dccp_set_state(sk, next_state);
921         }
922 }
923
924 void dccp_close(struct sock *sk, long timeout)
925 {
926         struct dccp_sock *dp = dccp_sk(sk);
927         struct sk_buff *skb;
928         u32 data_was_unread = 0;
929         int state;
930
931         lock_sock(sk);
932
933         sk->sk_shutdown = SHUTDOWN_MASK;
934
935         if (sk->sk_state == DCCP_LISTEN) {
936                 dccp_set_state(sk, DCCP_CLOSED);
937
938                 /* Special case. */
939                 inet_csk_listen_stop(sk);
940
941                 goto adjudge_to_death;
942         }
943
944         sk_stop_timer(sk, &dp->dccps_xmit_timer);
945
946         /*
947          * We need to flush the recv. buffs.  We do this only on the
948          * descriptor close, not protocol-sourced closes, because the
949           *reader process may not have drained the data yet!
950          */
951         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
952                 data_was_unread += skb->len;
953                 __kfree_skb(skb);
954         }
955
956         if (data_was_unread) {
957                 /* Unread data was tossed, send an appropriate Reset Code */
958                 DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
959                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
960                 dccp_set_state(sk, DCCP_CLOSED);
961         } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
962                 /* Check zero linger _after_ checking for unread data. */
963                 sk->sk_prot->disconnect(sk, 0);
964         } else if (sk->sk_state != DCCP_CLOSED) {
965                 dccp_terminate_connection(sk);
966         }
967
968         sk_stream_wait_close(sk, timeout);
969
970 adjudge_to_death:
971         state = sk->sk_state;
972         sock_hold(sk);
973         sock_orphan(sk);
974         atomic_inc(sk->sk_prot->orphan_count);
975
976         /*
977          * It is the last release_sock in its life. It will remove backlog.
978          */
979         release_sock(sk);
980         /*
981          * Now socket is owned by kernel and we acquire BH lock
982          * to finish close. No need to check for user refs.
983          */
984         local_bh_disable();
985         bh_lock_sock(sk);
986         BUG_TRAP(!sock_owned_by_user(sk));
987
988         /* Have we already been destroyed by a softirq or backlog? */
989         if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
990                 goto out;
991
992         if (sk->sk_state == DCCP_CLOSED)
993                 inet_csk_destroy_sock(sk);
994
995         /* Otherwise, socket is reprieved until protocol close. */
996
997 out:
998         bh_unlock_sock(sk);
999         local_bh_enable();
1000         sock_put(sk);
1001 }
1002
1003 EXPORT_SYMBOL_GPL(dccp_close);
1004
1005 void dccp_shutdown(struct sock *sk, int how)
1006 {
1007         dccp_pr_debug("called shutdown(%x)\n", how);
1008 }
1009
1010 EXPORT_SYMBOL_GPL(dccp_shutdown);
1011
1012 static inline int dccp_mib_init(void)
1013 {
1014         return snmp_mib_init((void**)dccp_statistics, sizeof(struct dccp_mib));
1015 }
1016
1017 static inline void dccp_mib_exit(void)
1018 {
1019         snmp_mib_free((void**)dccp_statistics);
1020 }
1021
1022 static int thash_entries;
1023 module_param(thash_entries, int, 0444);
1024 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1025
1026 #ifdef CONFIG_IP_DCCP_DEBUG
1027 int dccp_debug;
1028 module_param(dccp_debug, bool, 0444);
1029 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1030
1031 EXPORT_SYMBOL_GPL(dccp_debug);
1032 #endif
1033
1034 static int __init dccp_init(void)
1035 {
1036         unsigned long goal;
1037         int ehash_order, bhash_order, i;
1038         int rc = -ENOBUFS;
1039
1040         BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1041                      FIELD_SIZEOF(struct sk_buff, cb));
1042
1043         dccp_hashinfo.bind_bucket_cachep =
1044                 kmem_cache_create("dccp_bind_bucket",
1045                                   sizeof(struct inet_bind_bucket), 0,
1046                                   SLAB_HWCACHE_ALIGN, NULL);
1047         if (!dccp_hashinfo.bind_bucket_cachep)
1048                 goto out;
1049
1050         /*
1051          * Size and allocate the main established and bind bucket
1052          * hash tables.
1053          *
1054          * The methodology is similar to that of the buffer cache.
1055          */
1056         if (num_physpages >= (128 * 1024))
1057                 goal = num_physpages >> (21 - PAGE_SHIFT);
1058         else
1059                 goal = num_physpages >> (23 - PAGE_SHIFT);
1060
1061         if (thash_entries)
1062                 goal = (thash_entries *
1063                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1064         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1065                 ;
1066         do {
1067                 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1068                                         sizeof(struct inet_ehash_bucket);
1069                 while (dccp_hashinfo.ehash_size &
1070                        (dccp_hashinfo.ehash_size - 1))
1071                         dccp_hashinfo.ehash_size--;
1072                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1073                         __get_free_pages(GFP_ATOMIC, ehash_order);
1074         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1075
1076         if (!dccp_hashinfo.ehash) {
1077                 DCCP_CRIT("Failed to allocate DCCP established hash table");
1078                 goto out_free_bind_bucket_cachep;
1079         }
1080
1081         for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1082                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1083                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
1084         }
1085
1086         if (inet_ehash_locks_alloc(&dccp_hashinfo))
1087                         goto out_free_dccp_ehash;
1088
1089         bhash_order = ehash_order;
1090
1091         do {
1092                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1093                                         sizeof(struct inet_bind_hashbucket);
1094                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1095                     bhash_order > 0)
1096                         continue;
1097                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1098                         __get_free_pages(GFP_ATOMIC, bhash_order);
1099         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1100
1101         if (!dccp_hashinfo.bhash) {
1102                 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1103                 goto out_free_dccp_locks;
1104         }
1105
1106         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1107                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1108                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1109         }
1110
1111         rc = dccp_mib_init();
1112         if (rc)
1113                 goto out_free_dccp_bhash;
1114
1115         rc = dccp_ackvec_init();
1116         if (rc)
1117                 goto out_free_dccp_mib;
1118
1119         rc = dccp_sysctl_init();
1120         if (rc)
1121                 goto out_ackvec_exit;
1122
1123         dccp_timestamping_init();
1124 out:
1125         return rc;
1126 out_ackvec_exit:
1127         dccp_ackvec_exit();
1128 out_free_dccp_mib:
1129         dccp_mib_exit();
1130 out_free_dccp_bhash:
1131         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1132         dccp_hashinfo.bhash = NULL;
1133 out_free_dccp_locks:
1134         inet_ehash_locks_free(&dccp_hashinfo);
1135 out_free_dccp_ehash:
1136         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1137         dccp_hashinfo.ehash = NULL;
1138 out_free_bind_bucket_cachep:
1139         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1140         dccp_hashinfo.bind_bucket_cachep = NULL;
1141         goto out;
1142 }
1143
1144 static void __exit dccp_fini(void)
1145 {
1146         dccp_mib_exit();
1147         free_pages((unsigned long)dccp_hashinfo.bhash,
1148                    get_order(dccp_hashinfo.bhash_size *
1149                              sizeof(struct inet_bind_hashbucket)));
1150         free_pages((unsigned long)dccp_hashinfo.ehash,
1151                    get_order(dccp_hashinfo.ehash_size *
1152                              sizeof(struct inet_ehash_bucket)));
1153         inet_ehash_locks_free(&dccp_hashinfo);
1154         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1155         dccp_ackvec_exit();
1156         dccp_sysctl_exit();
1157 }
1158
1159 module_init(dccp_init);
1160 module_exit(dccp_fini);
1161
1162 MODULE_LICENSE("GPL");
1163 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1164 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");