Merge branch 'linus' into x86/xen
[linux-2.6] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
24
25 #include <net/inet_sock.h>
26 #include <net/sock.h>
27 #include <net/xfrm.h>
28
29 #include <asm/ioctls.h>
30 #include <linux/spinlock.h>
31 #include <linux/timer.h>
32 #include <linux/delay.h>
33 #include <linux/poll.h>
34
35 #include "ccid.h"
36 #include "dccp.h"
37 #include "feat.h"
38
39 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
40
41 EXPORT_SYMBOL_GPL(dccp_statistics);
42
43 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
44
45 EXPORT_SYMBOL_GPL(dccp_orphan_count);
46
47 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
48         .lhash_lock     = RW_LOCK_UNLOCKED,
49         .lhash_users    = ATOMIC_INIT(0),
50         .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
51 };
52
53 EXPORT_SYMBOL_GPL(dccp_hashinfo);
54
55 /* the maximum queue length for tx in packets. 0 is no limit */
56 int sysctl_dccp_tx_qlen __read_mostly = 5;
57
58 void dccp_set_state(struct sock *sk, const int state)
59 {
60         const int oldstate = sk->sk_state;
61
62         dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
63                       dccp_state_name(oldstate), dccp_state_name(state));
64         WARN_ON(state == oldstate);
65
66         switch (state) {
67         case DCCP_OPEN:
68                 if (oldstate != DCCP_OPEN)
69                         DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
70                 break;
71
72         case DCCP_CLOSED:
73                 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
74                     oldstate == DCCP_CLOSING)
75                         DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
76
77                 sk->sk_prot->unhash(sk);
78                 if (inet_csk(sk)->icsk_bind_hash != NULL &&
79                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
80                         inet_put_port(sk);
81                 /* fall through */
82         default:
83                 if (oldstate == DCCP_OPEN)
84                         DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
85         }
86
87         /* Change state AFTER socket is unhashed to avoid closed
88          * socket sitting in hash tables.
89          */
90         sk->sk_state = state;
91 }
92
93 EXPORT_SYMBOL_GPL(dccp_set_state);
94
95 static void dccp_finish_passive_close(struct sock *sk)
96 {
97         switch (sk->sk_state) {
98         case DCCP_PASSIVE_CLOSE:
99                 /* Node (client or server) has received Close packet. */
100                 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
101                 dccp_set_state(sk, DCCP_CLOSED);
102                 break;
103         case DCCP_PASSIVE_CLOSEREQ:
104                 /*
105                  * Client received CloseReq. We set the `active' flag so that
106                  * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
107                  */
108                 dccp_send_close(sk, 1);
109                 dccp_set_state(sk, DCCP_CLOSING);
110         }
111 }
112
113 void dccp_done(struct sock *sk)
114 {
115         dccp_set_state(sk, DCCP_CLOSED);
116         dccp_clear_xmit_timers(sk);
117
118         sk->sk_shutdown = SHUTDOWN_MASK;
119
120         if (!sock_flag(sk, SOCK_DEAD))
121                 sk->sk_state_change(sk);
122         else
123                 inet_csk_destroy_sock(sk);
124 }
125
126 EXPORT_SYMBOL_GPL(dccp_done);
127
128 const char *dccp_packet_name(const int type)
129 {
130         static const char *dccp_packet_names[] = {
131                 [DCCP_PKT_REQUEST]  = "REQUEST",
132                 [DCCP_PKT_RESPONSE] = "RESPONSE",
133                 [DCCP_PKT_DATA]     = "DATA",
134                 [DCCP_PKT_ACK]      = "ACK",
135                 [DCCP_PKT_DATAACK]  = "DATAACK",
136                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
137                 [DCCP_PKT_CLOSE]    = "CLOSE",
138                 [DCCP_PKT_RESET]    = "RESET",
139                 [DCCP_PKT_SYNC]     = "SYNC",
140                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
141         };
142
143         if (type >= DCCP_NR_PKT_TYPES)
144                 return "INVALID";
145         else
146                 return dccp_packet_names[type];
147 }
148
149 EXPORT_SYMBOL_GPL(dccp_packet_name);
150
151 const char *dccp_state_name(const int state)
152 {
153         static char *dccp_state_names[] = {
154         [DCCP_OPEN]             = "OPEN",
155         [DCCP_REQUESTING]       = "REQUESTING",
156         [DCCP_PARTOPEN]         = "PARTOPEN",
157         [DCCP_LISTEN]           = "LISTEN",
158         [DCCP_RESPOND]          = "RESPOND",
159         [DCCP_CLOSING]          = "CLOSING",
160         [DCCP_ACTIVE_CLOSEREQ]  = "CLOSEREQ",
161         [DCCP_PASSIVE_CLOSE]    = "PASSIVE_CLOSE",
162         [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
163         [DCCP_TIME_WAIT]        = "TIME_WAIT",
164         [DCCP_CLOSED]           = "CLOSED",
165         };
166
167         if (state >= DCCP_MAX_STATES)
168                 return "INVALID STATE!";
169         else
170                 return dccp_state_names[state];
171 }
172
173 EXPORT_SYMBOL_GPL(dccp_state_name);
174
175 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
176 {
177         struct dccp_sock *dp = dccp_sk(sk);
178         struct dccp_minisock *dmsk = dccp_msk(sk);
179         struct inet_connection_sock *icsk = inet_csk(sk);
180
181         dccp_minisock_init(&dp->dccps_minisock);
182
183         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
184         icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
185         sk->sk_state            = DCCP_CLOSED;
186         sk->sk_write_space      = dccp_write_space;
187         icsk->icsk_sync_mss     = dccp_sync_mss;
188         dp->dccps_mss_cache     = 536;
189         dp->dccps_rate_last     = jiffies;
190         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
191         dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
192         dp->dccps_l_ack_ratio   = dp->dccps_r_ack_ratio = 1;
193
194         dccp_init_xmit_timers(sk);
195
196         /*
197          * FIXME: We're hardcoding the CCID, and doing this at this point makes
198          * the listening (master) sock get CCID control blocks, which is not
199          * necessary, but for now, to not mess with the test userspace apps,
200          * lets leave it here, later the real solution is to do this in a
201          * setsockopt(CCIDs-I-want/accept). -acme
202          */
203         if (likely(ctl_sock_initialized)) {
204                 int rc = dccp_feat_init(dmsk);
205
206                 if (rc)
207                         return rc;
208
209                 if (dmsk->dccpms_send_ack_vector) {
210                         dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
211                         if (dp->dccps_hc_rx_ackvec == NULL)
212                                 return -ENOMEM;
213                 }
214                 dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
215                                                       sk, GFP_KERNEL);
216                 dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
217                                                       sk, GFP_KERNEL);
218                 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
219                              dp->dccps_hc_tx_ccid == NULL)) {
220                         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
221                         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
222                         if (dmsk->dccpms_send_ack_vector) {
223                                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
224                                 dp->dccps_hc_rx_ackvec = NULL;
225                         }
226                         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
227                         return -ENOMEM;
228                 }
229         } else {
230                 /* control socket doesn't need feat nego */
231                 INIT_LIST_HEAD(&dmsk->dccpms_pending);
232                 INIT_LIST_HEAD(&dmsk->dccpms_conf);
233         }
234
235         return 0;
236 }
237
238 EXPORT_SYMBOL_GPL(dccp_init_sock);
239
240 void dccp_destroy_sock(struct sock *sk)
241 {
242         struct dccp_sock *dp = dccp_sk(sk);
243         struct dccp_minisock *dmsk = dccp_msk(sk);
244
245         /*
246          * DCCP doesn't use sk_write_queue, just sk_send_head
247          * for retransmissions
248          */
249         if (sk->sk_send_head != NULL) {
250                 kfree_skb(sk->sk_send_head);
251                 sk->sk_send_head = NULL;
252         }
253
254         /* Clean up a referenced DCCP bind bucket. */
255         if (inet_csk(sk)->icsk_bind_hash != NULL)
256                 inet_put_port(sk);
257
258         kfree(dp->dccps_service_list);
259         dp->dccps_service_list = NULL;
260
261         if (dmsk->dccpms_send_ack_vector) {
262                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
263                 dp->dccps_hc_rx_ackvec = NULL;
264         }
265         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
266         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
267         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
268
269         /* clean up feature negotiation state */
270         dccp_feat_clean(dmsk);
271 }
272
273 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
274
275 static inline int dccp_listen_start(struct sock *sk, int backlog)
276 {
277         struct dccp_sock *dp = dccp_sk(sk);
278
279         dp->dccps_role = DCCP_ROLE_LISTEN;
280         return inet_csk_listen_start(sk, backlog);
281 }
282
283 static inline int dccp_need_reset(int state)
284 {
285         return state != DCCP_CLOSED && state != DCCP_LISTEN &&
286                state != DCCP_REQUESTING;
287 }
288
289 int dccp_disconnect(struct sock *sk, int flags)
290 {
291         struct inet_connection_sock *icsk = inet_csk(sk);
292         struct inet_sock *inet = inet_sk(sk);
293         int err = 0;
294         const int old_state = sk->sk_state;
295
296         if (old_state != DCCP_CLOSED)
297                 dccp_set_state(sk, DCCP_CLOSED);
298
299         /*
300          * This corresponds to the ABORT function of RFC793, sec. 3.8
301          * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
302          */
303         if (old_state == DCCP_LISTEN) {
304                 inet_csk_listen_stop(sk);
305         } else if (dccp_need_reset(old_state)) {
306                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
307                 sk->sk_err = ECONNRESET;
308         } else if (old_state == DCCP_REQUESTING)
309                 sk->sk_err = ECONNRESET;
310
311         dccp_clear_xmit_timers(sk);
312         __skb_queue_purge(&sk->sk_receive_queue);
313         if (sk->sk_send_head != NULL) {
314                 __kfree_skb(sk->sk_send_head);
315                 sk->sk_send_head = NULL;
316         }
317
318         inet->dport = 0;
319
320         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
321                 inet_reset_saddr(sk);
322
323         sk->sk_shutdown = 0;
324         sock_reset_flag(sk, SOCK_DONE);
325
326         icsk->icsk_backoff = 0;
327         inet_csk_delack_init(sk);
328         __sk_dst_reset(sk);
329
330         WARN_ON(inet->num && !icsk->icsk_bind_hash);
331
332         sk->sk_error_report(sk);
333         return err;
334 }
335
336 EXPORT_SYMBOL_GPL(dccp_disconnect);
337
338 /*
339  *      Wait for a DCCP event.
340  *
341  *      Note that we don't need to lock the socket, as the upper poll layers
342  *      take care of normal races (between the test and the event) and we don't
343  *      go look at any of the socket buffers directly.
344  */
345 unsigned int dccp_poll(struct file *file, struct socket *sock,
346                        poll_table *wait)
347 {
348         unsigned int mask;
349         struct sock *sk = sock->sk;
350
351         poll_wait(file, sk->sk_sleep, wait);
352         if (sk->sk_state == DCCP_LISTEN)
353                 return inet_csk_listen_poll(sk);
354
355         /* Socket is not locked. We are protected from async events
356            by poll logic and correct handling of state changes
357            made by another threads is impossible in any case.
358          */
359
360         mask = 0;
361         if (sk->sk_err)
362                 mask = POLLERR;
363
364         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
365                 mask |= POLLHUP;
366         if (sk->sk_shutdown & RCV_SHUTDOWN)
367                 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
368
369         /* Connected? */
370         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
371                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
372                         mask |= POLLIN | POLLRDNORM;
373
374                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
375                         if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
376                                 mask |= POLLOUT | POLLWRNORM;
377                         } else {  /* send SIGIO later */
378                                 set_bit(SOCK_ASYNC_NOSPACE,
379                                         &sk->sk_socket->flags);
380                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
381
382                                 /* Race breaker. If space is freed after
383                                  * wspace test but before the flags are set,
384                                  * IO signal will be lost.
385                                  */
386                                 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
387                                         mask |= POLLOUT | POLLWRNORM;
388                         }
389                 }
390         }
391         return mask;
392 }
393
394 EXPORT_SYMBOL_GPL(dccp_poll);
395
396 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
397 {
398         int rc = -ENOTCONN;
399
400         lock_sock(sk);
401
402         if (sk->sk_state == DCCP_LISTEN)
403                 goto out;
404
405         switch (cmd) {
406         case SIOCINQ: {
407                 struct sk_buff *skb;
408                 unsigned long amount = 0;
409
410                 skb = skb_peek(&sk->sk_receive_queue);
411                 if (skb != NULL) {
412                         /*
413                          * We will only return the amount of this packet since
414                          * that is all that will be read.
415                          */
416                         amount = skb->len;
417                 }
418                 rc = put_user(amount, (int __user *)arg);
419         }
420                 break;
421         default:
422                 rc = -ENOIOCTLCMD;
423                 break;
424         }
425 out:
426         release_sock(sk);
427         return rc;
428 }
429
430 EXPORT_SYMBOL_GPL(dccp_ioctl);
431
432 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
433                                    char __user *optval, int optlen)
434 {
435         struct dccp_sock *dp = dccp_sk(sk);
436         struct dccp_service_list *sl = NULL;
437
438         if (service == DCCP_SERVICE_INVALID_VALUE ||
439             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
440                 return -EINVAL;
441
442         if (optlen > sizeof(service)) {
443                 sl = kmalloc(optlen, GFP_KERNEL);
444                 if (sl == NULL)
445                         return -ENOMEM;
446
447                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
448                 if (copy_from_user(sl->dccpsl_list,
449                                    optval + sizeof(service),
450                                    optlen - sizeof(service)) ||
451                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
452                         kfree(sl);
453                         return -EFAULT;
454                 }
455         }
456
457         lock_sock(sk);
458         dp->dccps_service = service;
459
460         kfree(dp->dccps_service_list);
461
462         dp->dccps_service_list = sl;
463         release_sock(sk);
464         return 0;
465 }
466
467 /* byte 1 is feature.  the rest is the preference list */
468 static int dccp_setsockopt_change(struct sock *sk, int type,
469                                   struct dccp_so_feat __user *optval)
470 {
471         struct dccp_so_feat opt;
472         u8 *val;
473         int rc;
474
475         if (copy_from_user(&opt, optval, sizeof(opt)))
476                 return -EFAULT;
477         /*
478          * rfc4340: 6.1. Change Options
479          */
480         if (opt.dccpsf_len < 1)
481                 return -EINVAL;
482
483         val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
484         if (!val)
485                 return -ENOMEM;
486
487         if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
488                 rc = -EFAULT;
489                 goto out_free_val;
490         }
491
492         rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
493                               val, opt.dccpsf_len, GFP_KERNEL);
494         if (rc)
495                 goto out_free_val;
496
497 out:
498         return rc;
499
500 out_free_val:
501         kfree(val);
502         goto out;
503 }
504
505 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
506                 char __user *optval, int optlen)
507 {
508         struct dccp_sock *dp = dccp_sk(sk);
509         int val, err = 0;
510
511         if (optlen < sizeof(int))
512                 return -EINVAL;
513
514         if (get_user(val, (int __user *)optval))
515                 return -EFAULT;
516
517         if (optname == DCCP_SOCKOPT_SERVICE)
518                 return dccp_setsockopt_service(sk, val, optval, optlen);
519
520         lock_sock(sk);
521         switch (optname) {
522         case DCCP_SOCKOPT_PACKET_SIZE:
523                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
524                 err = 0;
525                 break;
526         case DCCP_SOCKOPT_CHANGE_L:
527                 if (optlen != sizeof(struct dccp_so_feat))
528                         err = -EINVAL;
529                 else
530                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
531                                                      (struct dccp_so_feat __user *)
532                                                      optval);
533                 break;
534         case DCCP_SOCKOPT_CHANGE_R:
535                 if (optlen != sizeof(struct dccp_so_feat))
536                         err = -EINVAL;
537                 else
538                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
539                                                      (struct dccp_so_feat __user *)
540                                                      optval);
541                 break;
542         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
543                 if (dp->dccps_role != DCCP_ROLE_SERVER)
544                         err = -EOPNOTSUPP;
545                 else
546                         dp->dccps_server_timewait = (val != 0);
547                 break;
548         case DCCP_SOCKOPT_SEND_CSCOV:   /* sender side, RFC 4340, sec. 9.2 */
549                 if (val < 0 || val > 15)
550                         err = -EINVAL;
551                 else
552                         dp->dccps_pcslen = val;
553                 break;
554         case DCCP_SOCKOPT_RECV_CSCOV:   /* receiver side, RFC 4340 sec. 9.2.1 */
555                 if (val < 0 || val > 15)
556                         err = -EINVAL;
557                 else {
558                         dp->dccps_pcrlen = val;
559                         /* FIXME: add feature negotiation,
560                          * ChangeL(MinimumChecksumCoverage, val) */
561                 }
562                 break;
563         default:
564                 err = -ENOPROTOOPT;
565                 break;
566         }
567
568         release_sock(sk);
569         return err;
570 }
571
572 int dccp_setsockopt(struct sock *sk, int level, int optname,
573                     char __user *optval, int optlen)
574 {
575         if (level != SOL_DCCP)
576                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
577                                                              optname, optval,
578                                                              optlen);
579         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
580 }
581
582 EXPORT_SYMBOL_GPL(dccp_setsockopt);
583
584 #ifdef CONFIG_COMPAT
585 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
586                            char __user *optval, int optlen)
587 {
588         if (level != SOL_DCCP)
589                 return inet_csk_compat_setsockopt(sk, level, optname,
590                                                   optval, optlen);
591         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
592 }
593
594 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
595 #endif
596
597 static int dccp_getsockopt_service(struct sock *sk, int len,
598                                    __be32 __user *optval,
599                                    int __user *optlen)
600 {
601         const struct dccp_sock *dp = dccp_sk(sk);
602         const struct dccp_service_list *sl;
603         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
604
605         lock_sock(sk);
606         if ((sl = dp->dccps_service_list) != NULL) {
607                 slen = sl->dccpsl_nr * sizeof(u32);
608                 total_len += slen;
609         }
610
611         err = -EINVAL;
612         if (total_len > len)
613                 goto out;
614
615         err = 0;
616         if (put_user(total_len, optlen) ||
617             put_user(dp->dccps_service, optval) ||
618             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
619                 err = -EFAULT;
620 out:
621         release_sock(sk);
622         return err;
623 }
624
625 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
626                     char __user *optval, int __user *optlen)
627 {
628         struct dccp_sock *dp;
629         int val, len;
630
631         if (get_user(len, optlen))
632                 return -EFAULT;
633
634         if (len < (int)sizeof(int))
635                 return -EINVAL;
636
637         dp = dccp_sk(sk);
638
639         switch (optname) {
640         case DCCP_SOCKOPT_PACKET_SIZE:
641                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
642                 return 0;
643         case DCCP_SOCKOPT_SERVICE:
644                 return dccp_getsockopt_service(sk, len,
645                                                (__be32 __user *)optval, optlen);
646         case DCCP_SOCKOPT_GET_CUR_MPS:
647                 val = dp->dccps_mss_cache;
648                 break;
649         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
650                 val = dp->dccps_server_timewait;
651                 break;
652         case DCCP_SOCKOPT_SEND_CSCOV:
653                 val = dp->dccps_pcslen;
654                 break;
655         case DCCP_SOCKOPT_RECV_CSCOV:
656                 val = dp->dccps_pcrlen;
657                 break;
658         case 128 ... 191:
659                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
660                                              len, (u32 __user *)optval, optlen);
661         case 192 ... 255:
662                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
663                                              len, (u32 __user *)optval, optlen);
664         default:
665                 return -ENOPROTOOPT;
666         }
667
668         len = sizeof(val);
669         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
670                 return -EFAULT;
671
672         return 0;
673 }
674
675 int dccp_getsockopt(struct sock *sk, int level, int optname,
676                     char __user *optval, int __user *optlen)
677 {
678         if (level != SOL_DCCP)
679                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
680                                                              optname, optval,
681                                                              optlen);
682         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
683 }
684
685 EXPORT_SYMBOL_GPL(dccp_getsockopt);
686
687 #ifdef CONFIG_COMPAT
688 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
689                            char __user *optval, int __user *optlen)
690 {
691         if (level != SOL_DCCP)
692                 return inet_csk_compat_getsockopt(sk, level, optname,
693                                                   optval, optlen);
694         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
695 }
696
697 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
698 #endif
699
700 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
701                  size_t len)
702 {
703         const struct dccp_sock *dp = dccp_sk(sk);
704         const int flags = msg->msg_flags;
705         const int noblock = flags & MSG_DONTWAIT;
706         struct sk_buff *skb;
707         int rc, size;
708         long timeo;
709
710         if (len > dp->dccps_mss_cache)
711                 return -EMSGSIZE;
712
713         lock_sock(sk);
714
715         if (sysctl_dccp_tx_qlen &&
716             (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
717                 rc = -EAGAIN;
718                 goto out_release;
719         }
720
721         timeo = sock_sndtimeo(sk, noblock);
722
723         /*
724          * We have to use sk_stream_wait_connect here to set sk_write_pending,
725          * so that the trick in dccp_rcv_request_sent_state_process.
726          */
727         /* Wait for a connection to finish. */
728         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
729                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
730                         goto out_release;
731
732         size = sk->sk_prot->max_header + len;
733         release_sock(sk);
734         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
735         lock_sock(sk);
736         if (skb == NULL)
737                 goto out_release;
738
739         skb_reserve(skb, sk->sk_prot->max_header);
740         rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
741         if (rc != 0)
742                 goto out_discard;
743
744         skb_queue_tail(&sk->sk_write_queue, skb);
745         dccp_write_xmit(sk,0);
746 out_release:
747         release_sock(sk);
748         return rc ? : len;
749 out_discard:
750         kfree_skb(skb);
751         goto out_release;
752 }
753
754 EXPORT_SYMBOL_GPL(dccp_sendmsg);
755
756 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
757                  size_t len, int nonblock, int flags, int *addr_len)
758 {
759         const struct dccp_hdr *dh;
760         long timeo;
761
762         lock_sock(sk);
763
764         if (sk->sk_state == DCCP_LISTEN) {
765                 len = -ENOTCONN;
766                 goto out;
767         }
768
769         timeo = sock_rcvtimeo(sk, nonblock);
770
771         do {
772                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
773
774                 if (skb == NULL)
775                         goto verify_sock_status;
776
777                 dh = dccp_hdr(skb);
778
779                 switch (dh->dccph_type) {
780                 case DCCP_PKT_DATA:
781                 case DCCP_PKT_DATAACK:
782                         goto found_ok_skb;
783
784                 case DCCP_PKT_CLOSE:
785                 case DCCP_PKT_CLOSEREQ:
786                         if (!(flags & MSG_PEEK))
787                                 dccp_finish_passive_close(sk);
788                         /* fall through */
789                 case DCCP_PKT_RESET:
790                         dccp_pr_debug("found fin (%s) ok!\n",
791                                       dccp_packet_name(dh->dccph_type));
792                         len = 0;
793                         goto found_fin_ok;
794                 default:
795                         dccp_pr_debug("packet_type=%s\n",
796                                       dccp_packet_name(dh->dccph_type));
797                         sk_eat_skb(sk, skb, 0);
798                 }
799 verify_sock_status:
800                 if (sock_flag(sk, SOCK_DONE)) {
801                         len = 0;
802                         break;
803                 }
804
805                 if (sk->sk_err) {
806                         len = sock_error(sk);
807                         break;
808                 }
809
810                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
811                         len = 0;
812                         break;
813                 }
814
815                 if (sk->sk_state == DCCP_CLOSED) {
816                         if (!sock_flag(sk, SOCK_DONE)) {
817                                 /* This occurs when user tries to read
818                                  * from never connected socket.
819                                  */
820                                 len = -ENOTCONN;
821                                 break;
822                         }
823                         len = 0;
824                         break;
825                 }
826
827                 if (!timeo) {
828                         len = -EAGAIN;
829                         break;
830                 }
831
832                 if (signal_pending(current)) {
833                         len = sock_intr_errno(timeo);
834                         break;
835                 }
836
837                 sk_wait_data(sk, &timeo);
838                 continue;
839         found_ok_skb:
840                 if (len > skb->len)
841                         len = skb->len;
842                 else if (len < skb->len)
843                         msg->msg_flags |= MSG_TRUNC;
844
845                 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
846                         /* Exception. Bailout! */
847                         len = -EFAULT;
848                         break;
849                 }
850         found_fin_ok:
851                 if (!(flags & MSG_PEEK))
852                         sk_eat_skb(sk, skb, 0);
853                 break;
854         } while (1);
855 out:
856         release_sock(sk);
857         return len;
858 }
859
860 EXPORT_SYMBOL_GPL(dccp_recvmsg);
861
862 int inet_dccp_listen(struct socket *sock, int backlog)
863 {
864         struct sock *sk = sock->sk;
865         unsigned char old_state;
866         int err;
867
868         lock_sock(sk);
869
870         err = -EINVAL;
871         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
872                 goto out;
873
874         old_state = sk->sk_state;
875         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
876                 goto out;
877
878         /* Really, if the socket is already in listen state
879          * we can only allow the backlog to be adjusted.
880          */
881         if (old_state != DCCP_LISTEN) {
882                 /*
883                  * FIXME: here it probably should be sk->sk_prot->listen_start
884                  * see tcp_listen_start
885                  */
886                 err = dccp_listen_start(sk, backlog);
887                 if (err)
888                         goto out;
889         }
890         sk->sk_max_ack_backlog = backlog;
891         err = 0;
892
893 out:
894         release_sock(sk);
895         return err;
896 }
897
898 EXPORT_SYMBOL_GPL(inet_dccp_listen);
899
900 static void dccp_terminate_connection(struct sock *sk)
901 {
902         u8 next_state = DCCP_CLOSED;
903
904         switch (sk->sk_state) {
905         case DCCP_PASSIVE_CLOSE:
906         case DCCP_PASSIVE_CLOSEREQ:
907                 dccp_finish_passive_close(sk);
908                 break;
909         case DCCP_PARTOPEN:
910                 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
911                 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
912                 /* fall through */
913         case DCCP_OPEN:
914                 dccp_send_close(sk, 1);
915
916                 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
917                     !dccp_sk(sk)->dccps_server_timewait)
918                         next_state = DCCP_ACTIVE_CLOSEREQ;
919                 else
920                         next_state = DCCP_CLOSING;
921                 /* fall through */
922         default:
923                 dccp_set_state(sk, next_state);
924         }
925 }
926
927 void dccp_close(struct sock *sk, long timeout)
928 {
929         struct dccp_sock *dp = dccp_sk(sk);
930         struct sk_buff *skb;
931         u32 data_was_unread = 0;
932         int state;
933
934         lock_sock(sk);
935
936         sk->sk_shutdown = SHUTDOWN_MASK;
937
938         if (sk->sk_state == DCCP_LISTEN) {
939                 dccp_set_state(sk, DCCP_CLOSED);
940
941                 /* Special case. */
942                 inet_csk_listen_stop(sk);
943
944                 goto adjudge_to_death;
945         }
946
947         sk_stop_timer(sk, &dp->dccps_xmit_timer);
948
949         /*
950          * We need to flush the recv. buffs.  We do this only on the
951          * descriptor close, not protocol-sourced closes, because the
952           *reader process may not have drained the data yet!
953          */
954         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
955                 data_was_unread += skb->len;
956                 __kfree_skb(skb);
957         }
958
959         if (data_was_unread) {
960                 /* Unread data was tossed, send an appropriate Reset Code */
961                 DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
962                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
963                 dccp_set_state(sk, DCCP_CLOSED);
964         } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
965                 /* Check zero linger _after_ checking for unread data. */
966                 sk->sk_prot->disconnect(sk, 0);
967         } else if (sk->sk_state != DCCP_CLOSED) {
968                 dccp_terminate_connection(sk);
969         }
970
971         sk_stream_wait_close(sk, timeout);
972
973 adjudge_to_death:
974         state = sk->sk_state;
975         sock_hold(sk);
976         sock_orphan(sk);
977         atomic_inc(sk->sk_prot->orphan_count);
978
979         /*
980          * It is the last release_sock in its life. It will remove backlog.
981          */
982         release_sock(sk);
983         /*
984          * Now socket is owned by kernel and we acquire BH lock
985          * to finish close. No need to check for user refs.
986          */
987         local_bh_disable();
988         bh_lock_sock(sk);
989         WARN_ON(sock_owned_by_user(sk));
990
991         /* Have we already been destroyed by a softirq or backlog? */
992         if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
993                 goto out;
994
995         if (sk->sk_state == DCCP_CLOSED)
996                 inet_csk_destroy_sock(sk);
997
998         /* Otherwise, socket is reprieved until protocol close. */
999
1000 out:
1001         bh_unlock_sock(sk);
1002         local_bh_enable();
1003         sock_put(sk);
1004 }
1005
1006 EXPORT_SYMBOL_GPL(dccp_close);
1007
1008 void dccp_shutdown(struct sock *sk, int how)
1009 {
1010         dccp_pr_debug("called shutdown(%x)\n", how);
1011 }
1012
1013 EXPORT_SYMBOL_GPL(dccp_shutdown);
1014
1015 static inline int dccp_mib_init(void)
1016 {
1017         return snmp_mib_init((void**)dccp_statistics, sizeof(struct dccp_mib));
1018 }
1019
1020 static inline void dccp_mib_exit(void)
1021 {
1022         snmp_mib_free((void**)dccp_statistics);
1023 }
1024
1025 static int thash_entries;
1026 module_param(thash_entries, int, 0444);
1027 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1028
1029 #ifdef CONFIG_IP_DCCP_DEBUG
1030 int dccp_debug;
1031 module_param(dccp_debug, bool, 0444);
1032 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1033
1034 EXPORT_SYMBOL_GPL(dccp_debug);
1035 #endif
1036
1037 static int __init dccp_init(void)
1038 {
1039         unsigned long goal;
1040         int ehash_order, bhash_order, i;
1041         int rc = -ENOBUFS;
1042
1043         BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1044                      FIELD_SIZEOF(struct sk_buff, cb));
1045
1046         dccp_hashinfo.bind_bucket_cachep =
1047                 kmem_cache_create("dccp_bind_bucket",
1048                                   sizeof(struct inet_bind_bucket), 0,
1049                                   SLAB_HWCACHE_ALIGN, NULL);
1050         if (!dccp_hashinfo.bind_bucket_cachep)
1051                 goto out;
1052
1053         /*
1054          * Size and allocate the main established and bind bucket
1055          * hash tables.
1056          *
1057          * The methodology is similar to that of the buffer cache.
1058          */
1059         if (num_physpages >= (128 * 1024))
1060                 goal = num_physpages >> (21 - PAGE_SHIFT);
1061         else
1062                 goal = num_physpages >> (23 - PAGE_SHIFT);
1063
1064         if (thash_entries)
1065                 goal = (thash_entries *
1066                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1067         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1068                 ;
1069         do {
1070                 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1071                                         sizeof(struct inet_ehash_bucket);
1072                 while (dccp_hashinfo.ehash_size &
1073                        (dccp_hashinfo.ehash_size - 1))
1074                         dccp_hashinfo.ehash_size--;
1075                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1076                         __get_free_pages(GFP_ATOMIC, ehash_order);
1077         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1078
1079         if (!dccp_hashinfo.ehash) {
1080                 DCCP_CRIT("Failed to allocate DCCP established hash table");
1081                 goto out_free_bind_bucket_cachep;
1082         }
1083
1084         for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1085                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1086                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
1087         }
1088
1089         if (inet_ehash_locks_alloc(&dccp_hashinfo))
1090                         goto out_free_dccp_ehash;
1091
1092         bhash_order = ehash_order;
1093
1094         do {
1095                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1096                                         sizeof(struct inet_bind_hashbucket);
1097                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1098                     bhash_order > 0)
1099                         continue;
1100                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1101                         __get_free_pages(GFP_ATOMIC, bhash_order);
1102         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1103
1104         if (!dccp_hashinfo.bhash) {
1105                 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1106                 goto out_free_dccp_locks;
1107         }
1108
1109         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1110                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1111                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1112         }
1113
1114         rc = dccp_mib_init();
1115         if (rc)
1116                 goto out_free_dccp_bhash;
1117
1118         rc = dccp_ackvec_init();
1119         if (rc)
1120                 goto out_free_dccp_mib;
1121
1122         rc = dccp_sysctl_init();
1123         if (rc)
1124                 goto out_ackvec_exit;
1125
1126         dccp_timestamping_init();
1127 out:
1128         return rc;
1129 out_ackvec_exit:
1130         dccp_ackvec_exit();
1131 out_free_dccp_mib:
1132         dccp_mib_exit();
1133 out_free_dccp_bhash:
1134         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1135         dccp_hashinfo.bhash = NULL;
1136 out_free_dccp_locks:
1137         inet_ehash_locks_free(&dccp_hashinfo);
1138 out_free_dccp_ehash:
1139         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1140         dccp_hashinfo.ehash = NULL;
1141 out_free_bind_bucket_cachep:
1142         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1143         dccp_hashinfo.bind_bucket_cachep = NULL;
1144         goto out;
1145 }
1146
1147 static void __exit dccp_fini(void)
1148 {
1149         dccp_mib_exit();
1150         free_pages((unsigned long)dccp_hashinfo.bhash,
1151                    get_order(dccp_hashinfo.bhash_size *
1152                              sizeof(struct inet_bind_hashbucket)));
1153         free_pages((unsigned long)dccp_hashinfo.ehash,
1154                    get_order(dccp_hashinfo.ehash_size *
1155                              sizeof(struct inet_ehash_bucket)));
1156         inet_ehash_locks_free(&dccp_hashinfo);
1157         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1158         dccp_ackvec_exit();
1159         dccp_sysctl_exit();
1160 }
1161
1162 module_init(dccp_init);
1163 module_exit(dccp_fini);
1164
1165 MODULE_LICENSE("GPL");
1166 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1167 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");