Merge git://git.kernel.org/pub/scm/linux/kernel/git/lethal/sh-2.6
[linux-2.6] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
24
25 #include <net/inet_sock.h>
26 #include <net/sock.h>
27 #include <net/xfrm.h>
28
29 #include <asm/ioctls.h>
30 #include <linux/spinlock.h>
31 #include <linux/timer.h>
32 #include <linux/delay.h>
33 #include <linux/poll.h>
34
35 #include "ccid.h"
36 #include "dccp.h"
37 #include "feat.h"
38
39 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
40
41 EXPORT_SYMBOL_GPL(dccp_statistics);
42
43 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
44
45 EXPORT_SYMBOL_GPL(dccp_orphan_count);
46
47 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
48         .lhash_lock     = RW_LOCK_UNLOCKED,
49         .lhash_users    = ATOMIC_INIT(0),
50         .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
51 };
52
53 EXPORT_SYMBOL_GPL(dccp_hashinfo);
54
55 /* the maximum queue length for tx in packets. 0 is no limit */
56 int sysctl_dccp_tx_qlen __read_mostly = 5;
57
58 void dccp_set_state(struct sock *sk, const int state)
59 {
60         const int oldstate = sk->sk_state;
61
62         dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
63                       dccp_state_name(oldstate), dccp_state_name(state));
64         WARN_ON(state == oldstate);
65
66         switch (state) {
67         case DCCP_OPEN:
68                 if (oldstate != DCCP_OPEN)
69                         DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
70                 break;
71
72         case DCCP_CLOSED:
73                 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
74                     oldstate == DCCP_CLOSING)
75                         DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
76
77                 sk->sk_prot->unhash(sk);
78                 if (inet_csk(sk)->icsk_bind_hash != NULL &&
79                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
80                         inet_put_port(sk);
81                 /* fall through */
82         default:
83                 if (oldstate == DCCP_OPEN)
84                         DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
85         }
86
87         /* Change state AFTER socket is unhashed to avoid closed
88          * socket sitting in hash tables.
89          */
90         sk->sk_state = state;
91 }
92
93 EXPORT_SYMBOL_GPL(dccp_set_state);
94
95 static void dccp_finish_passive_close(struct sock *sk)
96 {
97         switch (sk->sk_state) {
98         case DCCP_PASSIVE_CLOSE:
99                 /* Node (client or server) has received Close packet. */
100                 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
101                 dccp_set_state(sk, DCCP_CLOSED);
102                 break;
103         case DCCP_PASSIVE_CLOSEREQ:
104                 /*
105                  * Client received CloseReq. We set the `active' flag so that
106                  * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
107                  */
108                 dccp_send_close(sk, 1);
109                 dccp_set_state(sk, DCCP_CLOSING);
110         }
111 }
112
113 void dccp_done(struct sock *sk)
114 {
115         dccp_set_state(sk, DCCP_CLOSED);
116         dccp_clear_xmit_timers(sk);
117
118         sk->sk_shutdown = SHUTDOWN_MASK;
119
120         if (!sock_flag(sk, SOCK_DEAD))
121                 sk->sk_state_change(sk);
122         else
123                 inet_csk_destroy_sock(sk);
124 }
125
126 EXPORT_SYMBOL_GPL(dccp_done);
127
128 const char *dccp_packet_name(const int type)
129 {
130         static const char *dccp_packet_names[] = {
131                 [DCCP_PKT_REQUEST]  = "REQUEST",
132                 [DCCP_PKT_RESPONSE] = "RESPONSE",
133                 [DCCP_PKT_DATA]     = "DATA",
134                 [DCCP_PKT_ACK]      = "ACK",
135                 [DCCP_PKT_DATAACK]  = "DATAACK",
136                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
137                 [DCCP_PKT_CLOSE]    = "CLOSE",
138                 [DCCP_PKT_RESET]    = "RESET",
139                 [DCCP_PKT_SYNC]     = "SYNC",
140                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
141         };
142
143         if (type >= DCCP_NR_PKT_TYPES)
144                 return "INVALID";
145         else
146                 return dccp_packet_names[type];
147 }
148
149 EXPORT_SYMBOL_GPL(dccp_packet_name);
150
151 const char *dccp_state_name(const int state)
152 {
153         static char *dccp_state_names[] = {
154         [DCCP_OPEN]             = "OPEN",
155         [DCCP_REQUESTING]       = "REQUESTING",
156         [DCCP_PARTOPEN]         = "PARTOPEN",
157         [DCCP_LISTEN]           = "LISTEN",
158         [DCCP_RESPOND]          = "RESPOND",
159         [DCCP_CLOSING]          = "CLOSING",
160         [DCCP_ACTIVE_CLOSEREQ]  = "CLOSEREQ",
161         [DCCP_PASSIVE_CLOSE]    = "PASSIVE_CLOSE",
162         [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
163         [DCCP_TIME_WAIT]        = "TIME_WAIT",
164         [DCCP_CLOSED]           = "CLOSED",
165         };
166
167         if (state >= DCCP_MAX_STATES)
168                 return "INVALID STATE!";
169         else
170                 return dccp_state_names[state];
171 }
172
173 EXPORT_SYMBOL_GPL(dccp_state_name);
174
175 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
176 {
177         struct dccp_sock *dp = dccp_sk(sk);
178         struct dccp_minisock *dmsk = dccp_msk(sk);
179         struct inet_connection_sock *icsk = inet_csk(sk);
180
181         dccp_minisock_init(&dp->dccps_minisock);
182
183         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
184         icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
185         sk->sk_state            = DCCP_CLOSED;
186         sk->sk_write_space      = dccp_write_space;
187         icsk->icsk_sync_mss     = dccp_sync_mss;
188         dp->dccps_mss_cache     = 536;
189         dp->dccps_rate_last     = jiffies;
190         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
191         dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
192         dp->dccps_l_ack_ratio   = dp->dccps_r_ack_ratio = 1;
193
194         dccp_init_xmit_timers(sk);
195
196         /*
197          * FIXME: We're hardcoding the CCID, and doing this at this point makes
198          * the listening (master) sock get CCID control blocks, which is not
199          * necessary, but for now, to not mess with the test userspace apps,
200          * lets leave it here, later the real solution is to do this in a
201          * setsockopt(CCIDs-I-want/accept). -acme
202          */
203         if (likely(ctl_sock_initialized)) {
204                 int rc = dccp_feat_init(dmsk);
205
206                 if (rc)
207                         return rc;
208
209                 if (dmsk->dccpms_send_ack_vector) {
210                         dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
211                         if (dp->dccps_hc_rx_ackvec == NULL)
212                                 return -ENOMEM;
213                 }
214                 dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
215                                                       sk, GFP_KERNEL);
216                 dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
217                                                       sk, GFP_KERNEL);
218                 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
219                              dp->dccps_hc_tx_ccid == NULL)) {
220                         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
221                         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
222                         if (dmsk->dccpms_send_ack_vector) {
223                                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
224                                 dp->dccps_hc_rx_ackvec = NULL;
225                         }
226                         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
227                         return -ENOMEM;
228                 }
229         } else {
230                 /* control socket doesn't need feat nego */
231                 INIT_LIST_HEAD(&dmsk->dccpms_pending);
232                 INIT_LIST_HEAD(&dmsk->dccpms_conf);
233         }
234
235         return 0;
236 }
237
238 EXPORT_SYMBOL_GPL(dccp_init_sock);
239
240 void dccp_destroy_sock(struct sock *sk)
241 {
242         struct dccp_sock *dp = dccp_sk(sk);
243         struct dccp_minisock *dmsk = dccp_msk(sk);
244
245         /*
246          * DCCP doesn't use sk_write_queue, just sk_send_head
247          * for retransmissions
248          */
249         if (sk->sk_send_head != NULL) {
250                 kfree_skb(sk->sk_send_head);
251                 sk->sk_send_head = NULL;
252         }
253
254         /* Clean up a referenced DCCP bind bucket. */
255         if (inet_csk(sk)->icsk_bind_hash != NULL)
256                 inet_put_port(sk);
257
258         kfree(dp->dccps_service_list);
259         dp->dccps_service_list = NULL;
260
261         if (dmsk->dccpms_send_ack_vector) {
262                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
263                 dp->dccps_hc_rx_ackvec = NULL;
264         }
265         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
266         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
267         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
268
269         /* clean up feature negotiation state */
270         dccp_feat_clean(dmsk);
271 }
272
273 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
274
275 static inline int dccp_listen_start(struct sock *sk, int backlog)
276 {
277         struct dccp_sock *dp = dccp_sk(sk);
278
279         dp->dccps_role = DCCP_ROLE_LISTEN;
280         return inet_csk_listen_start(sk, backlog);
281 }
282
283 static inline int dccp_need_reset(int state)
284 {
285         return state != DCCP_CLOSED && state != DCCP_LISTEN &&
286                state != DCCP_REQUESTING;
287 }
288
289 int dccp_disconnect(struct sock *sk, int flags)
290 {
291         struct inet_connection_sock *icsk = inet_csk(sk);
292         struct inet_sock *inet = inet_sk(sk);
293         int err = 0;
294         const int old_state = sk->sk_state;
295
296         if (old_state != DCCP_CLOSED)
297                 dccp_set_state(sk, DCCP_CLOSED);
298
299         /*
300          * This corresponds to the ABORT function of RFC793, sec. 3.8
301          * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
302          */
303         if (old_state == DCCP_LISTEN) {
304                 inet_csk_listen_stop(sk);
305         } else if (dccp_need_reset(old_state)) {
306                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
307                 sk->sk_err = ECONNRESET;
308         } else if (old_state == DCCP_REQUESTING)
309                 sk->sk_err = ECONNRESET;
310
311         dccp_clear_xmit_timers(sk);
312         __skb_queue_purge(&sk->sk_receive_queue);
313         if (sk->sk_send_head != NULL) {
314                 __kfree_skb(sk->sk_send_head);
315                 sk->sk_send_head = NULL;
316         }
317
318         inet->dport = 0;
319
320         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
321                 inet_reset_saddr(sk);
322
323         sk->sk_shutdown = 0;
324         sock_reset_flag(sk, SOCK_DONE);
325
326         icsk->icsk_backoff = 0;
327         inet_csk_delack_init(sk);
328         __sk_dst_reset(sk);
329
330         WARN_ON(inet->num && !icsk->icsk_bind_hash);
331
332         sk->sk_error_report(sk);
333         return err;
334 }
335
336 EXPORT_SYMBOL_GPL(dccp_disconnect);
337
338 /*
339  *      Wait for a DCCP event.
340  *
341  *      Note that we don't need to lock the socket, as the upper poll layers
342  *      take care of normal races (between the test and the event) and we don't
343  *      go look at any of the socket buffers directly.
344  */
345 unsigned int dccp_poll(struct file *file, struct socket *sock,
346                        poll_table *wait)
347 {
348         unsigned int mask;
349         struct sock *sk = sock->sk;
350
351         poll_wait(file, sk->sk_sleep, wait);
352         if (sk->sk_state == DCCP_LISTEN)
353                 return inet_csk_listen_poll(sk);
354
355         /* Socket is not locked. We are protected from async events
356            by poll logic and correct handling of state changes
357            made by another threads is impossible in any case.
358          */
359
360         mask = 0;
361         if (sk->sk_err)
362                 mask = POLLERR;
363
364         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
365                 mask |= POLLHUP;
366         if (sk->sk_shutdown & RCV_SHUTDOWN)
367                 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
368
369         /* Connected? */
370         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
371                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
372                         mask |= POLLIN | POLLRDNORM;
373
374                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
375                         if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
376                                 mask |= POLLOUT | POLLWRNORM;
377                         } else {  /* send SIGIO later */
378                                 set_bit(SOCK_ASYNC_NOSPACE,
379                                         &sk->sk_socket->flags);
380                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
381
382                                 /* Race breaker. If space is freed after
383                                  * wspace test but before the flags are set,
384                                  * IO signal will be lost.
385                                  */
386                                 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
387                                         mask |= POLLOUT | POLLWRNORM;
388                         }
389                 }
390         }
391         return mask;
392 }
393
394 EXPORT_SYMBOL_GPL(dccp_poll);
395
396 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
397 {
398         int rc = -ENOTCONN;
399
400         lock_sock(sk);
401
402         if (sk->sk_state == DCCP_LISTEN)
403                 goto out;
404
405         switch (cmd) {
406         case SIOCINQ: {
407                 struct sk_buff *skb;
408                 unsigned long amount = 0;
409
410                 skb = skb_peek(&sk->sk_receive_queue);
411                 if (skb != NULL) {
412                         /*
413                          * We will only return the amount of this packet since
414                          * that is all that will be read.
415                          */
416                         amount = skb->len;
417                 }
418                 rc = put_user(amount, (int __user *)arg);
419         }
420                 break;
421         default:
422                 rc = -ENOIOCTLCMD;
423                 break;
424         }
425 out:
426         release_sock(sk);
427         return rc;
428 }
429
430 EXPORT_SYMBOL_GPL(dccp_ioctl);
431
432 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
433                                    char __user *optval, int optlen)
434 {
435         struct dccp_sock *dp = dccp_sk(sk);
436         struct dccp_service_list *sl = NULL;
437
438         if (service == DCCP_SERVICE_INVALID_VALUE ||
439             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
440                 return -EINVAL;
441
442         if (optlen > sizeof(service)) {
443                 sl = kmalloc(optlen, GFP_KERNEL);
444                 if (sl == NULL)
445                         return -ENOMEM;
446
447                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
448                 if (copy_from_user(sl->dccpsl_list,
449                                    optval + sizeof(service),
450                                    optlen - sizeof(service)) ||
451                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
452                         kfree(sl);
453                         return -EFAULT;
454                 }
455         }
456
457         lock_sock(sk);
458         dp->dccps_service = service;
459
460         kfree(dp->dccps_service_list);
461
462         dp->dccps_service_list = sl;
463         release_sock(sk);
464         return 0;
465 }
466
467 /* byte 1 is feature.  the rest is the preference list */
468 static int dccp_setsockopt_change(struct sock *sk, int type,
469                                   struct dccp_so_feat __user *optval)
470 {
471         struct dccp_so_feat opt;
472         u8 *val;
473         int rc;
474
475         if (copy_from_user(&opt, optval, sizeof(opt)))
476                 return -EFAULT;
477
478         val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
479         if (!val)
480                 return -ENOMEM;
481
482         if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
483                 rc = -EFAULT;
484                 goto out_free_val;
485         }
486
487         rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
488                               val, opt.dccpsf_len, GFP_KERNEL);
489         if (rc)
490                 goto out_free_val;
491
492 out:
493         return rc;
494
495 out_free_val:
496         kfree(val);
497         goto out;
498 }
499
500 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
501                 char __user *optval, int optlen)
502 {
503         struct dccp_sock *dp = dccp_sk(sk);
504         int val, err = 0;
505
506         if (optlen < sizeof(int))
507                 return -EINVAL;
508
509         if (get_user(val, (int __user *)optval))
510                 return -EFAULT;
511
512         if (optname == DCCP_SOCKOPT_SERVICE)
513                 return dccp_setsockopt_service(sk, val, optval, optlen);
514
515         lock_sock(sk);
516         switch (optname) {
517         case DCCP_SOCKOPT_PACKET_SIZE:
518                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
519                 err = 0;
520                 break;
521         case DCCP_SOCKOPT_CHANGE_L:
522                 if (optlen != sizeof(struct dccp_so_feat))
523                         err = -EINVAL;
524                 else
525                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
526                                                      (struct dccp_so_feat __user *)
527                                                      optval);
528                 break;
529         case DCCP_SOCKOPT_CHANGE_R:
530                 if (optlen != sizeof(struct dccp_so_feat))
531                         err = -EINVAL;
532                 else
533                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
534                                                      (struct dccp_so_feat __user *)
535                                                      optval);
536                 break;
537         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
538                 if (dp->dccps_role != DCCP_ROLE_SERVER)
539                         err = -EOPNOTSUPP;
540                 else
541                         dp->dccps_server_timewait = (val != 0);
542                 break;
543         case DCCP_SOCKOPT_SEND_CSCOV:   /* sender side, RFC 4340, sec. 9.2 */
544                 if (val < 0 || val > 15)
545                         err = -EINVAL;
546                 else
547                         dp->dccps_pcslen = val;
548                 break;
549         case DCCP_SOCKOPT_RECV_CSCOV:   /* receiver side, RFC 4340 sec. 9.2.1 */
550                 if (val < 0 || val > 15)
551                         err = -EINVAL;
552                 else {
553                         dp->dccps_pcrlen = val;
554                         /* FIXME: add feature negotiation,
555                          * ChangeL(MinimumChecksumCoverage, val) */
556                 }
557                 break;
558         default:
559                 err = -ENOPROTOOPT;
560                 break;
561         }
562
563         release_sock(sk);
564         return err;
565 }
566
567 int dccp_setsockopt(struct sock *sk, int level, int optname,
568                     char __user *optval, int optlen)
569 {
570         if (level != SOL_DCCP)
571                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
572                                                              optname, optval,
573                                                              optlen);
574         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
575 }
576
577 EXPORT_SYMBOL_GPL(dccp_setsockopt);
578
579 #ifdef CONFIG_COMPAT
580 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
581                            char __user *optval, int optlen)
582 {
583         if (level != SOL_DCCP)
584                 return inet_csk_compat_setsockopt(sk, level, optname,
585                                                   optval, optlen);
586         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
587 }
588
589 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
590 #endif
591
592 static int dccp_getsockopt_service(struct sock *sk, int len,
593                                    __be32 __user *optval,
594                                    int __user *optlen)
595 {
596         const struct dccp_sock *dp = dccp_sk(sk);
597         const struct dccp_service_list *sl;
598         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
599
600         lock_sock(sk);
601         if ((sl = dp->dccps_service_list) != NULL) {
602                 slen = sl->dccpsl_nr * sizeof(u32);
603                 total_len += slen;
604         }
605
606         err = -EINVAL;
607         if (total_len > len)
608                 goto out;
609
610         err = 0;
611         if (put_user(total_len, optlen) ||
612             put_user(dp->dccps_service, optval) ||
613             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
614                 err = -EFAULT;
615 out:
616         release_sock(sk);
617         return err;
618 }
619
620 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
621                     char __user *optval, int __user *optlen)
622 {
623         struct dccp_sock *dp;
624         int val, len;
625
626         if (get_user(len, optlen))
627                 return -EFAULT;
628
629         if (len < (int)sizeof(int))
630                 return -EINVAL;
631
632         dp = dccp_sk(sk);
633
634         switch (optname) {
635         case DCCP_SOCKOPT_PACKET_SIZE:
636                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
637                 return 0;
638         case DCCP_SOCKOPT_SERVICE:
639                 return dccp_getsockopt_service(sk, len,
640                                                (__be32 __user *)optval, optlen);
641         case DCCP_SOCKOPT_GET_CUR_MPS:
642                 val = dp->dccps_mss_cache;
643                 break;
644         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
645                 val = dp->dccps_server_timewait;
646                 break;
647         case DCCP_SOCKOPT_SEND_CSCOV:
648                 val = dp->dccps_pcslen;
649                 break;
650         case DCCP_SOCKOPT_RECV_CSCOV:
651                 val = dp->dccps_pcrlen;
652                 break;
653         case 128 ... 191:
654                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
655                                              len, (u32 __user *)optval, optlen);
656         case 192 ... 255:
657                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
658                                              len, (u32 __user *)optval, optlen);
659         default:
660                 return -ENOPROTOOPT;
661         }
662
663         len = sizeof(val);
664         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
665                 return -EFAULT;
666
667         return 0;
668 }
669
670 int dccp_getsockopt(struct sock *sk, int level, int optname,
671                     char __user *optval, int __user *optlen)
672 {
673         if (level != SOL_DCCP)
674                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
675                                                              optname, optval,
676                                                              optlen);
677         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
678 }
679
680 EXPORT_SYMBOL_GPL(dccp_getsockopt);
681
682 #ifdef CONFIG_COMPAT
683 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
684                            char __user *optval, int __user *optlen)
685 {
686         if (level != SOL_DCCP)
687                 return inet_csk_compat_getsockopt(sk, level, optname,
688                                                   optval, optlen);
689         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
690 }
691
692 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
693 #endif
694
695 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
696                  size_t len)
697 {
698         const struct dccp_sock *dp = dccp_sk(sk);
699         const int flags = msg->msg_flags;
700         const int noblock = flags & MSG_DONTWAIT;
701         struct sk_buff *skb;
702         int rc, size;
703         long timeo;
704
705         if (len > dp->dccps_mss_cache)
706                 return -EMSGSIZE;
707
708         lock_sock(sk);
709
710         if (sysctl_dccp_tx_qlen &&
711             (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
712                 rc = -EAGAIN;
713                 goto out_release;
714         }
715
716         timeo = sock_sndtimeo(sk, noblock);
717
718         /*
719          * We have to use sk_stream_wait_connect here to set sk_write_pending,
720          * so that the trick in dccp_rcv_request_sent_state_process.
721          */
722         /* Wait for a connection to finish. */
723         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
724                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
725                         goto out_release;
726
727         size = sk->sk_prot->max_header + len;
728         release_sock(sk);
729         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
730         lock_sock(sk);
731         if (skb == NULL)
732                 goto out_release;
733
734         skb_reserve(skb, sk->sk_prot->max_header);
735         rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
736         if (rc != 0)
737                 goto out_discard;
738
739         skb_queue_tail(&sk->sk_write_queue, skb);
740         dccp_write_xmit(sk,0);
741 out_release:
742         release_sock(sk);
743         return rc ? : len;
744 out_discard:
745         kfree_skb(skb);
746         goto out_release;
747 }
748
749 EXPORT_SYMBOL_GPL(dccp_sendmsg);
750
751 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
752                  size_t len, int nonblock, int flags, int *addr_len)
753 {
754         const struct dccp_hdr *dh;
755         long timeo;
756
757         lock_sock(sk);
758
759         if (sk->sk_state == DCCP_LISTEN) {
760                 len = -ENOTCONN;
761                 goto out;
762         }
763
764         timeo = sock_rcvtimeo(sk, nonblock);
765
766         do {
767                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
768
769                 if (skb == NULL)
770                         goto verify_sock_status;
771
772                 dh = dccp_hdr(skb);
773
774                 switch (dh->dccph_type) {
775                 case DCCP_PKT_DATA:
776                 case DCCP_PKT_DATAACK:
777                         goto found_ok_skb;
778
779                 case DCCP_PKT_CLOSE:
780                 case DCCP_PKT_CLOSEREQ:
781                         if (!(flags & MSG_PEEK))
782                                 dccp_finish_passive_close(sk);
783                         /* fall through */
784                 case DCCP_PKT_RESET:
785                         dccp_pr_debug("found fin (%s) ok!\n",
786                                       dccp_packet_name(dh->dccph_type));
787                         len = 0;
788                         goto found_fin_ok;
789                 default:
790                         dccp_pr_debug("packet_type=%s\n",
791                                       dccp_packet_name(dh->dccph_type));
792                         sk_eat_skb(sk, skb, 0);
793                 }
794 verify_sock_status:
795                 if (sock_flag(sk, SOCK_DONE)) {
796                         len = 0;
797                         break;
798                 }
799
800                 if (sk->sk_err) {
801                         len = sock_error(sk);
802                         break;
803                 }
804
805                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
806                         len = 0;
807                         break;
808                 }
809
810                 if (sk->sk_state == DCCP_CLOSED) {
811                         if (!sock_flag(sk, SOCK_DONE)) {
812                                 /* This occurs when user tries to read
813                                  * from never connected socket.
814                                  */
815                                 len = -ENOTCONN;
816                                 break;
817                         }
818                         len = 0;
819                         break;
820                 }
821
822                 if (!timeo) {
823                         len = -EAGAIN;
824                         break;
825                 }
826
827                 if (signal_pending(current)) {
828                         len = sock_intr_errno(timeo);
829                         break;
830                 }
831
832                 sk_wait_data(sk, &timeo);
833                 continue;
834         found_ok_skb:
835                 if (len > skb->len)
836                         len = skb->len;
837                 else if (len < skb->len)
838                         msg->msg_flags |= MSG_TRUNC;
839
840                 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
841                         /* Exception. Bailout! */
842                         len = -EFAULT;
843                         break;
844                 }
845         found_fin_ok:
846                 if (!(flags & MSG_PEEK))
847                         sk_eat_skb(sk, skb, 0);
848                 break;
849         } while (1);
850 out:
851         release_sock(sk);
852         return len;
853 }
854
855 EXPORT_SYMBOL_GPL(dccp_recvmsg);
856
857 int inet_dccp_listen(struct socket *sock, int backlog)
858 {
859         struct sock *sk = sock->sk;
860         unsigned char old_state;
861         int err;
862
863         lock_sock(sk);
864
865         err = -EINVAL;
866         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
867                 goto out;
868
869         old_state = sk->sk_state;
870         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
871                 goto out;
872
873         /* Really, if the socket is already in listen state
874          * we can only allow the backlog to be adjusted.
875          */
876         if (old_state != DCCP_LISTEN) {
877                 /*
878                  * FIXME: here it probably should be sk->sk_prot->listen_start
879                  * see tcp_listen_start
880                  */
881                 err = dccp_listen_start(sk, backlog);
882                 if (err)
883                         goto out;
884         }
885         sk->sk_max_ack_backlog = backlog;
886         err = 0;
887
888 out:
889         release_sock(sk);
890         return err;
891 }
892
893 EXPORT_SYMBOL_GPL(inet_dccp_listen);
894
895 static void dccp_terminate_connection(struct sock *sk)
896 {
897         u8 next_state = DCCP_CLOSED;
898
899         switch (sk->sk_state) {
900         case DCCP_PASSIVE_CLOSE:
901         case DCCP_PASSIVE_CLOSEREQ:
902                 dccp_finish_passive_close(sk);
903                 break;
904         case DCCP_PARTOPEN:
905                 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
906                 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
907                 /* fall through */
908         case DCCP_OPEN:
909                 dccp_send_close(sk, 1);
910
911                 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
912                     !dccp_sk(sk)->dccps_server_timewait)
913                         next_state = DCCP_ACTIVE_CLOSEREQ;
914                 else
915                         next_state = DCCP_CLOSING;
916                 /* fall through */
917         default:
918                 dccp_set_state(sk, next_state);
919         }
920 }
921
922 void dccp_close(struct sock *sk, long timeout)
923 {
924         struct dccp_sock *dp = dccp_sk(sk);
925         struct sk_buff *skb;
926         u32 data_was_unread = 0;
927         int state;
928
929         lock_sock(sk);
930
931         sk->sk_shutdown = SHUTDOWN_MASK;
932
933         if (sk->sk_state == DCCP_LISTEN) {
934                 dccp_set_state(sk, DCCP_CLOSED);
935
936                 /* Special case. */
937                 inet_csk_listen_stop(sk);
938
939                 goto adjudge_to_death;
940         }
941
942         sk_stop_timer(sk, &dp->dccps_xmit_timer);
943
944         /*
945          * We need to flush the recv. buffs.  We do this only on the
946          * descriptor close, not protocol-sourced closes, because the
947           *reader process may not have drained the data yet!
948          */
949         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
950                 data_was_unread += skb->len;
951                 __kfree_skb(skb);
952         }
953
954         if (data_was_unread) {
955                 /* Unread data was tossed, send an appropriate Reset Code */
956                 DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
957                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
958                 dccp_set_state(sk, DCCP_CLOSED);
959         } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
960                 /* Check zero linger _after_ checking for unread data. */
961                 sk->sk_prot->disconnect(sk, 0);
962         } else if (sk->sk_state != DCCP_CLOSED) {
963                 dccp_terminate_connection(sk);
964         }
965
966         sk_stream_wait_close(sk, timeout);
967
968 adjudge_to_death:
969         state = sk->sk_state;
970         sock_hold(sk);
971         sock_orphan(sk);
972         atomic_inc(sk->sk_prot->orphan_count);
973
974         /*
975          * It is the last release_sock in its life. It will remove backlog.
976          */
977         release_sock(sk);
978         /*
979          * Now socket is owned by kernel and we acquire BH lock
980          * to finish close. No need to check for user refs.
981          */
982         local_bh_disable();
983         bh_lock_sock(sk);
984         WARN_ON(sock_owned_by_user(sk));
985
986         /* Have we already been destroyed by a softirq or backlog? */
987         if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
988                 goto out;
989
990         if (sk->sk_state == DCCP_CLOSED)
991                 inet_csk_destroy_sock(sk);
992
993         /* Otherwise, socket is reprieved until protocol close. */
994
995 out:
996         bh_unlock_sock(sk);
997         local_bh_enable();
998         sock_put(sk);
999 }
1000
1001 EXPORT_SYMBOL_GPL(dccp_close);
1002
1003 void dccp_shutdown(struct sock *sk, int how)
1004 {
1005         dccp_pr_debug("called shutdown(%x)\n", how);
1006 }
1007
1008 EXPORT_SYMBOL_GPL(dccp_shutdown);
1009
1010 static inline int dccp_mib_init(void)
1011 {
1012         return snmp_mib_init((void**)dccp_statistics, sizeof(struct dccp_mib));
1013 }
1014
1015 static inline void dccp_mib_exit(void)
1016 {
1017         snmp_mib_free((void**)dccp_statistics);
1018 }
1019
1020 static int thash_entries;
1021 module_param(thash_entries, int, 0444);
1022 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1023
1024 #ifdef CONFIG_IP_DCCP_DEBUG
1025 int dccp_debug;
1026 module_param(dccp_debug, bool, 0444);
1027 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1028
1029 EXPORT_SYMBOL_GPL(dccp_debug);
1030 #endif
1031
1032 static int __init dccp_init(void)
1033 {
1034         unsigned long goal;
1035         int ehash_order, bhash_order, i;
1036         int rc = -ENOBUFS;
1037
1038         BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1039                      FIELD_SIZEOF(struct sk_buff, cb));
1040
1041         dccp_hashinfo.bind_bucket_cachep =
1042                 kmem_cache_create("dccp_bind_bucket",
1043                                   sizeof(struct inet_bind_bucket), 0,
1044                                   SLAB_HWCACHE_ALIGN, NULL);
1045         if (!dccp_hashinfo.bind_bucket_cachep)
1046                 goto out;
1047
1048         /*
1049          * Size and allocate the main established and bind bucket
1050          * hash tables.
1051          *
1052          * The methodology is similar to that of the buffer cache.
1053          */
1054         if (num_physpages >= (128 * 1024))
1055                 goal = num_physpages >> (21 - PAGE_SHIFT);
1056         else
1057                 goal = num_physpages >> (23 - PAGE_SHIFT);
1058
1059         if (thash_entries)
1060                 goal = (thash_entries *
1061                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1062         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1063                 ;
1064         do {
1065                 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1066                                         sizeof(struct inet_ehash_bucket);
1067                 while (dccp_hashinfo.ehash_size &
1068                        (dccp_hashinfo.ehash_size - 1))
1069                         dccp_hashinfo.ehash_size--;
1070                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1071                         __get_free_pages(GFP_ATOMIC, ehash_order);
1072         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1073
1074         if (!dccp_hashinfo.ehash) {
1075                 DCCP_CRIT("Failed to allocate DCCP established hash table");
1076                 goto out_free_bind_bucket_cachep;
1077         }
1078
1079         for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1080                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1081                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
1082         }
1083
1084         if (inet_ehash_locks_alloc(&dccp_hashinfo))
1085                         goto out_free_dccp_ehash;
1086
1087         bhash_order = ehash_order;
1088
1089         do {
1090                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1091                                         sizeof(struct inet_bind_hashbucket);
1092                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1093                     bhash_order > 0)
1094                         continue;
1095                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1096                         __get_free_pages(GFP_ATOMIC, bhash_order);
1097         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1098
1099         if (!dccp_hashinfo.bhash) {
1100                 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1101                 goto out_free_dccp_locks;
1102         }
1103
1104         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1105                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1106                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1107         }
1108
1109         rc = dccp_mib_init();
1110         if (rc)
1111                 goto out_free_dccp_bhash;
1112
1113         rc = dccp_ackvec_init();
1114         if (rc)
1115                 goto out_free_dccp_mib;
1116
1117         rc = dccp_sysctl_init();
1118         if (rc)
1119                 goto out_ackvec_exit;
1120
1121         dccp_timestamping_init();
1122 out:
1123         return rc;
1124 out_ackvec_exit:
1125         dccp_ackvec_exit();
1126 out_free_dccp_mib:
1127         dccp_mib_exit();
1128 out_free_dccp_bhash:
1129         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1130         dccp_hashinfo.bhash = NULL;
1131 out_free_dccp_locks:
1132         inet_ehash_locks_free(&dccp_hashinfo);
1133 out_free_dccp_ehash:
1134         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1135         dccp_hashinfo.ehash = NULL;
1136 out_free_bind_bucket_cachep:
1137         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1138         dccp_hashinfo.bind_bucket_cachep = NULL;
1139         goto out;
1140 }
1141
1142 static void __exit dccp_fini(void)
1143 {
1144         dccp_mib_exit();
1145         free_pages((unsigned long)dccp_hashinfo.bhash,
1146                    get_order(dccp_hashinfo.bhash_size *
1147                              sizeof(struct inet_bind_hashbucket)));
1148         free_pages((unsigned long)dccp_hashinfo.ehash,
1149                    get_order(dccp_hashinfo.ehash_size *
1150                              sizeof(struct inet_ehash_bucket)));
1151         inet_ehash_locks_free(&dccp_hashinfo);
1152         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1153         dccp_ackvec_exit();
1154         dccp_sysctl_exit();
1155 }
1156
1157 module_init(dccp_init);
1158 module_exit(dccp_fini);
1159
1160 MODULE_LICENSE("GPL");
1161 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1162 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");