dccp: Integration of dynamic feature activation - part 1 (socket setup)
[linux-2.6] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
24
25 #include <net/inet_sock.h>
26 #include <net/sock.h>
27 #include <net/xfrm.h>
28
29 #include <asm/ioctls.h>
30 #include <linux/spinlock.h>
31 #include <linux/timer.h>
32 #include <linux/delay.h>
33 #include <linux/poll.h>
34
35 #include "ccid.h"
36 #include "dccp.h"
37 #include "feat.h"
38
39 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
40
41 EXPORT_SYMBOL_GPL(dccp_statistics);
42
43 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
44
45 EXPORT_SYMBOL_GPL(dccp_orphan_count);
46
47 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
48         .lhash_lock     = RW_LOCK_UNLOCKED,
49         .lhash_users    = ATOMIC_INIT(0),
50         .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
51 };
52
53 EXPORT_SYMBOL_GPL(dccp_hashinfo);
54
55 /* the maximum queue length for tx in packets. 0 is no limit */
56 int sysctl_dccp_tx_qlen __read_mostly = 5;
57
58 void dccp_set_state(struct sock *sk, const int state)
59 {
60         const int oldstate = sk->sk_state;
61
62         dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
63                       dccp_state_name(oldstate), dccp_state_name(state));
64         WARN_ON(state == oldstate);
65
66         switch (state) {
67         case DCCP_OPEN:
68                 if (oldstate != DCCP_OPEN)
69                         DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
70                 /* Client retransmits all Confirm options until entering OPEN */
71                 if (oldstate == DCCP_PARTOPEN)
72                         dccp_feat_list_purge(&dccp_sk(sk)->dccps_featneg);
73                 break;
74
75         case DCCP_CLOSED:
76                 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
77                     oldstate == DCCP_CLOSING)
78                         DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
79
80                 sk->sk_prot->unhash(sk);
81                 if (inet_csk(sk)->icsk_bind_hash != NULL &&
82                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
83                         inet_put_port(sk);
84                 /* fall through */
85         default:
86                 if (oldstate == DCCP_OPEN)
87                         DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
88         }
89
90         /* Change state AFTER socket is unhashed to avoid closed
91          * socket sitting in hash tables.
92          */
93         sk->sk_state = state;
94 }
95
96 EXPORT_SYMBOL_GPL(dccp_set_state);
97
98 static void dccp_finish_passive_close(struct sock *sk)
99 {
100         switch (sk->sk_state) {
101         case DCCP_PASSIVE_CLOSE:
102                 /* Node (client or server) has received Close packet. */
103                 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
104                 dccp_set_state(sk, DCCP_CLOSED);
105                 break;
106         case DCCP_PASSIVE_CLOSEREQ:
107                 /*
108                  * Client received CloseReq. We set the `active' flag so that
109                  * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
110                  */
111                 dccp_send_close(sk, 1);
112                 dccp_set_state(sk, DCCP_CLOSING);
113         }
114 }
115
116 void dccp_done(struct sock *sk)
117 {
118         dccp_set_state(sk, DCCP_CLOSED);
119         dccp_clear_xmit_timers(sk);
120
121         sk->sk_shutdown = SHUTDOWN_MASK;
122
123         if (!sock_flag(sk, SOCK_DEAD))
124                 sk->sk_state_change(sk);
125         else
126                 inet_csk_destroy_sock(sk);
127 }
128
129 EXPORT_SYMBOL_GPL(dccp_done);
130
131 const char *dccp_packet_name(const int type)
132 {
133         static const char *dccp_packet_names[] = {
134                 [DCCP_PKT_REQUEST]  = "REQUEST",
135                 [DCCP_PKT_RESPONSE] = "RESPONSE",
136                 [DCCP_PKT_DATA]     = "DATA",
137                 [DCCP_PKT_ACK]      = "ACK",
138                 [DCCP_PKT_DATAACK]  = "DATAACK",
139                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
140                 [DCCP_PKT_CLOSE]    = "CLOSE",
141                 [DCCP_PKT_RESET]    = "RESET",
142                 [DCCP_PKT_SYNC]     = "SYNC",
143                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
144         };
145
146         if (type >= DCCP_NR_PKT_TYPES)
147                 return "INVALID";
148         else
149                 return dccp_packet_names[type];
150 }
151
152 EXPORT_SYMBOL_GPL(dccp_packet_name);
153
154 const char *dccp_state_name(const int state)
155 {
156         static char *dccp_state_names[] = {
157         [DCCP_OPEN]             = "OPEN",
158         [DCCP_REQUESTING]       = "REQUESTING",
159         [DCCP_PARTOPEN]         = "PARTOPEN",
160         [DCCP_LISTEN]           = "LISTEN",
161         [DCCP_RESPOND]          = "RESPOND",
162         [DCCP_CLOSING]          = "CLOSING",
163         [DCCP_ACTIVE_CLOSEREQ]  = "CLOSEREQ",
164         [DCCP_PASSIVE_CLOSE]    = "PASSIVE_CLOSE",
165         [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
166         [DCCP_TIME_WAIT]        = "TIME_WAIT",
167         [DCCP_CLOSED]           = "CLOSED",
168         };
169
170         if (state >= DCCP_MAX_STATES)
171                 return "INVALID STATE!";
172         else
173                 return dccp_state_names[state];
174 }
175
176 EXPORT_SYMBOL_GPL(dccp_state_name);
177
178 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
179 {
180         struct dccp_sock *dp = dccp_sk(sk);
181         struct inet_connection_sock *icsk = inet_csk(sk);
182
183         dccp_minisock_init(&dp->dccps_minisock);
184
185         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
186         icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
187         sk->sk_state            = DCCP_CLOSED;
188         sk->sk_write_space      = dccp_write_space;
189         icsk->icsk_sync_mss     = dccp_sync_mss;
190         dp->dccps_mss_cache     = 536;
191         dp->dccps_rate_last     = jiffies;
192         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
193         dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
194         dp->dccps_l_ack_ratio   = dp->dccps_r_ack_ratio = 1;
195
196         dccp_init_xmit_timers(sk);
197
198         INIT_LIST_HEAD(&dp->dccps_featneg);
199         /* control socket doesn't need feat nego */
200         if (likely(ctl_sock_initialized))
201                 return dccp_feat_init(sk);
202         return 0;
203 }
204
205 EXPORT_SYMBOL_GPL(dccp_init_sock);
206
207 void dccp_destroy_sock(struct sock *sk)
208 {
209         struct dccp_sock *dp = dccp_sk(sk);
210         struct dccp_minisock *dmsk = dccp_msk(sk);
211
212         /*
213          * DCCP doesn't use sk_write_queue, just sk_send_head
214          * for retransmissions
215          */
216         if (sk->sk_send_head != NULL) {
217                 kfree_skb(sk->sk_send_head);
218                 sk->sk_send_head = NULL;
219         }
220
221         /* Clean up a referenced DCCP bind bucket. */
222         if (inet_csk(sk)->icsk_bind_hash != NULL)
223                 inet_put_port(sk);
224
225         kfree(dp->dccps_service_list);
226         dp->dccps_service_list = NULL;
227
228         if (dmsk->dccpms_send_ack_vector) {
229                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
230                 dp->dccps_hc_rx_ackvec = NULL;
231         }
232         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
233         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
234         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
235
236         /* clean up feature negotiation state */
237         dccp_feat_list_purge(&dp->dccps_featneg);
238 }
239
240 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
241
242 static inline int dccp_listen_start(struct sock *sk, int backlog)
243 {
244         struct dccp_sock *dp = dccp_sk(sk);
245
246         dp->dccps_role = DCCP_ROLE_LISTEN;
247         /* do not start to listen if feature negotiation setup fails */
248         if (dccp_feat_finalise_settings(dp))
249                 return -EPROTO;
250         return inet_csk_listen_start(sk, backlog);
251 }
252
253 static inline int dccp_need_reset(int state)
254 {
255         return state != DCCP_CLOSED && state != DCCP_LISTEN &&
256                state != DCCP_REQUESTING;
257 }
258
259 int dccp_disconnect(struct sock *sk, int flags)
260 {
261         struct inet_connection_sock *icsk = inet_csk(sk);
262         struct inet_sock *inet = inet_sk(sk);
263         int err = 0;
264         const int old_state = sk->sk_state;
265
266         if (old_state != DCCP_CLOSED)
267                 dccp_set_state(sk, DCCP_CLOSED);
268
269         /*
270          * This corresponds to the ABORT function of RFC793, sec. 3.8
271          * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
272          */
273         if (old_state == DCCP_LISTEN) {
274                 inet_csk_listen_stop(sk);
275         } else if (dccp_need_reset(old_state)) {
276                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
277                 sk->sk_err = ECONNRESET;
278         } else if (old_state == DCCP_REQUESTING)
279                 sk->sk_err = ECONNRESET;
280
281         dccp_clear_xmit_timers(sk);
282
283         __skb_queue_purge(&sk->sk_receive_queue);
284         __skb_queue_purge(&sk->sk_write_queue);
285         if (sk->sk_send_head != NULL) {
286                 __kfree_skb(sk->sk_send_head);
287                 sk->sk_send_head = NULL;
288         }
289
290         inet->dport = 0;
291
292         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
293                 inet_reset_saddr(sk);
294
295         sk->sk_shutdown = 0;
296         sock_reset_flag(sk, SOCK_DONE);
297
298         icsk->icsk_backoff = 0;
299         inet_csk_delack_init(sk);
300         __sk_dst_reset(sk);
301
302         WARN_ON(inet->num && !icsk->icsk_bind_hash);
303
304         sk->sk_error_report(sk);
305         return err;
306 }
307
308 EXPORT_SYMBOL_GPL(dccp_disconnect);
309
310 /*
311  *      Wait for a DCCP event.
312  *
313  *      Note that we don't need to lock the socket, as the upper poll layers
314  *      take care of normal races (between the test and the event) and we don't
315  *      go look at any of the socket buffers directly.
316  */
317 unsigned int dccp_poll(struct file *file, struct socket *sock,
318                        poll_table *wait)
319 {
320         unsigned int mask;
321         struct sock *sk = sock->sk;
322
323         poll_wait(file, sk->sk_sleep, wait);
324         if (sk->sk_state == DCCP_LISTEN)
325                 return inet_csk_listen_poll(sk);
326
327         /* Socket is not locked. We are protected from async events
328            by poll logic and correct handling of state changes
329            made by another threads is impossible in any case.
330          */
331
332         mask = 0;
333         if (sk->sk_err)
334                 mask = POLLERR;
335
336         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
337                 mask |= POLLHUP;
338         if (sk->sk_shutdown & RCV_SHUTDOWN)
339                 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
340
341         /* Connected? */
342         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
343                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
344                         mask |= POLLIN | POLLRDNORM;
345
346                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
347                         if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
348                                 mask |= POLLOUT | POLLWRNORM;
349                         } else {  /* send SIGIO later */
350                                 set_bit(SOCK_ASYNC_NOSPACE,
351                                         &sk->sk_socket->flags);
352                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
353
354                                 /* Race breaker. If space is freed after
355                                  * wspace test but before the flags are set,
356                                  * IO signal will be lost.
357                                  */
358                                 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
359                                         mask |= POLLOUT | POLLWRNORM;
360                         }
361                 }
362         }
363         return mask;
364 }
365
366 EXPORT_SYMBOL_GPL(dccp_poll);
367
368 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
369 {
370         int rc = -ENOTCONN;
371
372         lock_sock(sk);
373
374         if (sk->sk_state == DCCP_LISTEN)
375                 goto out;
376
377         switch (cmd) {
378         case SIOCINQ: {
379                 struct sk_buff *skb;
380                 unsigned long amount = 0;
381
382                 skb = skb_peek(&sk->sk_receive_queue);
383                 if (skb != NULL) {
384                         /*
385                          * We will only return the amount of this packet since
386                          * that is all that will be read.
387                          */
388                         amount = skb->len;
389                 }
390                 rc = put_user(amount, (int __user *)arg);
391         }
392                 break;
393         default:
394                 rc = -ENOIOCTLCMD;
395                 break;
396         }
397 out:
398         release_sock(sk);
399         return rc;
400 }
401
402 EXPORT_SYMBOL_GPL(dccp_ioctl);
403
404 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
405                                    char __user *optval, int optlen)
406 {
407         struct dccp_sock *dp = dccp_sk(sk);
408         struct dccp_service_list *sl = NULL;
409
410         if (service == DCCP_SERVICE_INVALID_VALUE ||
411             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
412                 return -EINVAL;
413
414         if (optlen > sizeof(service)) {
415                 sl = kmalloc(optlen, GFP_KERNEL);
416                 if (sl == NULL)
417                         return -ENOMEM;
418
419                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
420                 if (copy_from_user(sl->dccpsl_list,
421                                    optval + sizeof(service),
422                                    optlen - sizeof(service)) ||
423                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
424                         kfree(sl);
425                         return -EFAULT;
426                 }
427         }
428
429         lock_sock(sk);
430         dp->dccps_service = service;
431
432         kfree(dp->dccps_service_list);
433
434         dp->dccps_service_list = sl;
435         release_sock(sk);
436         return 0;
437 }
438
439 static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
440 {
441         u8 *list, len;
442         int i, rc;
443
444         if (cscov < 0 || cscov > 15)
445                 return -EINVAL;
446         /*
447          * Populate a list of permissible values, in the range cscov...15. This
448          * is necessary since feature negotiation of single values only works if
449          * both sides incidentally choose the same value. Since the list starts
450          * lowest-value first, negotiation will pick the smallest shared value.
451          */
452         if (cscov == 0)
453                 return 0;
454         len = 16 - cscov;
455
456         list = kmalloc(len, GFP_KERNEL);
457         if (list == NULL)
458                 return -ENOBUFS;
459
460         for (i = 0; i < len; i++)
461                 list[i] = cscov++;
462
463         rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
464
465         if (rc == 0) {
466                 if (rx)
467                         dccp_sk(sk)->dccps_pcrlen = cscov;
468                 else
469                         dccp_sk(sk)->dccps_pcslen = cscov;
470         }
471         kfree(list);
472         return rc;
473 }
474
475 static int dccp_setsockopt_ccid(struct sock *sk, int type,
476                                 char __user *optval, int optlen)
477 {
478         u8 *val;
479         int rc = 0;
480
481         if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
482                 return -EINVAL;
483
484         val = kmalloc(optlen, GFP_KERNEL);
485         if (val == NULL)
486                 return -ENOMEM;
487
488         if (copy_from_user(val, optval, optlen)) {
489                 kfree(val);
490                 return -EFAULT;
491         }
492
493         lock_sock(sk);
494         if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
495                 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
496
497         if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
498                 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
499         release_sock(sk);
500
501         kfree(val);
502         return rc;
503 }
504
505 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
506                 char __user *optval, int optlen)
507 {
508         struct dccp_sock *dp = dccp_sk(sk);
509         int val, err = 0;
510
511         switch (optname) {
512         case DCCP_SOCKOPT_PACKET_SIZE:
513                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
514                 return 0;
515         case DCCP_SOCKOPT_CHANGE_L:
516         case DCCP_SOCKOPT_CHANGE_R:
517                 DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
518                 return 0;
519         case DCCP_SOCKOPT_CCID:
520         case DCCP_SOCKOPT_RX_CCID:
521         case DCCP_SOCKOPT_TX_CCID:
522                 return dccp_setsockopt_ccid(sk, optname, optval, optlen);
523         }
524
525         if (optlen < (int)sizeof(int))
526                 return -EINVAL;
527
528         if (get_user(val, (int __user *)optval))
529                 return -EFAULT;
530
531         if (optname == DCCP_SOCKOPT_SERVICE)
532                 return dccp_setsockopt_service(sk, val, optval, optlen);
533
534         lock_sock(sk);
535         switch (optname) {
536         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
537                 if (dp->dccps_role != DCCP_ROLE_SERVER)
538                         err = -EOPNOTSUPP;
539                 else
540                         dp->dccps_server_timewait = (val != 0);
541                 break;
542         case DCCP_SOCKOPT_SEND_CSCOV:
543                 err = dccp_setsockopt_cscov(sk, val, false);
544                 break;
545         case DCCP_SOCKOPT_RECV_CSCOV:
546                 err = dccp_setsockopt_cscov(sk, val, true);
547                 break;
548         default:
549                 err = -ENOPROTOOPT;
550                 break;
551         }
552         release_sock(sk);
553
554         return err;
555 }
556
557 int dccp_setsockopt(struct sock *sk, int level, int optname,
558                     char __user *optval, int optlen)
559 {
560         if (level != SOL_DCCP)
561                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
562                                                              optname, optval,
563                                                              optlen);
564         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
565 }
566
567 EXPORT_SYMBOL_GPL(dccp_setsockopt);
568
569 #ifdef CONFIG_COMPAT
570 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
571                            char __user *optval, int optlen)
572 {
573         if (level != SOL_DCCP)
574                 return inet_csk_compat_setsockopt(sk, level, optname,
575                                                   optval, optlen);
576         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
577 }
578
579 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
580 #endif
581
582 static int dccp_getsockopt_service(struct sock *sk, int len,
583                                    __be32 __user *optval,
584                                    int __user *optlen)
585 {
586         const struct dccp_sock *dp = dccp_sk(sk);
587         const struct dccp_service_list *sl;
588         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
589
590         lock_sock(sk);
591         if ((sl = dp->dccps_service_list) != NULL) {
592                 slen = sl->dccpsl_nr * sizeof(u32);
593                 total_len += slen;
594         }
595
596         err = -EINVAL;
597         if (total_len > len)
598                 goto out;
599
600         err = 0;
601         if (put_user(total_len, optlen) ||
602             put_user(dp->dccps_service, optval) ||
603             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
604                 err = -EFAULT;
605 out:
606         release_sock(sk);
607         return err;
608 }
609
610 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
611                     char __user *optval, int __user *optlen)
612 {
613         struct dccp_sock *dp;
614         int val, len;
615
616         if (get_user(len, optlen))
617                 return -EFAULT;
618
619         if (len < (int)sizeof(int))
620                 return -EINVAL;
621
622         dp = dccp_sk(sk);
623
624         switch (optname) {
625         case DCCP_SOCKOPT_PACKET_SIZE:
626                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
627                 return 0;
628         case DCCP_SOCKOPT_SERVICE:
629                 return dccp_getsockopt_service(sk, len,
630                                                (__be32 __user *)optval, optlen);
631         case DCCP_SOCKOPT_GET_CUR_MPS:
632                 val = dp->dccps_mss_cache;
633                 break;
634         case DCCP_SOCKOPT_AVAILABLE_CCIDS:
635                 return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
636         case DCCP_SOCKOPT_TX_CCID:
637                 val = ccid_get_current_tx_ccid(dp);
638                 if (val < 0)
639                         return -ENOPROTOOPT;
640                 break;
641         case DCCP_SOCKOPT_RX_CCID:
642                 val = ccid_get_current_rx_ccid(dp);
643                 if (val < 0)
644                         return -ENOPROTOOPT;
645                 break;
646         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
647                 val = dp->dccps_server_timewait;
648                 break;
649         case DCCP_SOCKOPT_SEND_CSCOV:
650                 val = dp->dccps_pcslen;
651                 break;
652         case DCCP_SOCKOPT_RECV_CSCOV:
653                 val = dp->dccps_pcrlen;
654                 break;
655         case 128 ... 191:
656                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
657                                              len, (u32 __user *)optval, optlen);
658         case 192 ... 255:
659                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
660                                              len, (u32 __user *)optval, optlen);
661         default:
662                 return -ENOPROTOOPT;
663         }
664
665         len = sizeof(val);
666         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
667                 return -EFAULT;
668
669         return 0;
670 }
671
672 int dccp_getsockopt(struct sock *sk, int level, int optname,
673                     char __user *optval, int __user *optlen)
674 {
675         if (level != SOL_DCCP)
676                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
677                                                              optname, optval,
678                                                              optlen);
679         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
680 }
681
682 EXPORT_SYMBOL_GPL(dccp_getsockopt);
683
684 #ifdef CONFIG_COMPAT
685 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
686                            char __user *optval, int __user *optlen)
687 {
688         if (level != SOL_DCCP)
689                 return inet_csk_compat_getsockopt(sk, level, optname,
690                                                   optval, optlen);
691         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
692 }
693
694 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
695 #endif
696
697 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
698                  size_t len)
699 {
700         const struct dccp_sock *dp = dccp_sk(sk);
701         const int flags = msg->msg_flags;
702         const int noblock = flags & MSG_DONTWAIT;
703         struct sk_buff *skb;
704         int rc, size;
705         long timeo;
706
707         if (len > dp->dccps_mss_cache)
708                 return -EMSGSIZE;
709
710         lock_sock(sk);
711
712         if (sysctl_dccp_tx_qlen &&
713             (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
714                 rc = -EAGAIN;
715                 goto out_release;
716         }
717
718         timeo = sock_sndtimeo(sk, noblock);
719
720         /*
721          * We have to use sk_stream_wait_connect here to set sk_write_pending,
722          * so that the trick in dccp_rcv_request_sent_state_process.
723          */
724         /* Wait for a connection to finish. */
725         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
726                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
727                         goto out_release;
728
729         size = sk->sk_prot->max_header + len;
730         release_sock(sk);
731         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
732         lock_sock(sk);
733         if (skb == NULL)
734                 goto out_release;
735
736         skb_reserve(skb, sk->sk_prot->max_header);
737         rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
738         if (rc != 0)
739                 goto out_discard;
740
741         skb_queue_tail(&sk->sk_write_queue, skb);
742         dccp_write_xmit(sk,0);
743 out_release:
744         release_sock(sk);
745         return rc ? : len;
746 out_discard:
747         kfree_skb(skb);
748         goto out_release;
749 }
750
751 EXPORT_SYMBOL_GPL(dccp_sendmsg);
752
753 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
754                  size_t len, int nonblock, int flags, int *addr_len)
755 {
756         const struct dccp_hdr *dh;
757         long timeo;
758
759         lock_sock(sk);
760
761         if (sk->sk_state == DCCP_LISTEN) {
762                 len = -ENOTCONN;
763                 goto out;
764         }
765
766         timeo = sock_rcvtimeo(sk, nonblock);
767
768         do {
769                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
770
771                 if (skb == NULL)
772                         goto verify_sock_status;
773
774                 dh = dccp_hdr(skb);
775
776                 switch (dh->dccph_type) {
777                 case DCCP_PKT_DATA:
778                 case DCCP_PKT_DATAACK:
779                         goto found_ok_skb;
780
781                 case DCCP_PKT_CLOSE:
782                 case DCCP_PKT_CLOSEREQ:
783                         if (!(flags & MSG_PEEK))
784                                 dccp_finish_passive_close(sk);
785                         /* fall through */
786                 case DCCP_PKT_RESET:
787                         dccp_pr_debug("found fin (%s) ok!\n",
788                                       dccp_packet_name(dh->dccph_type));
789                         len = 0;
790                         goto found_fin_ok;
791                 default:
792                         dccp_pr_debug("packet_type=%s\n",
793                                       dccp_packet_name(dh->dccph_type));
794                         sk_eat_skb(sk, skb, 0);
795                 }
796 verify_sock_status:
797                 if (sock_flag(sk, SOCK_DONE)) {
798                         len = 0;
799                         break;
800                 }
801
802                 if (sk->sk_err) {
803                         len = sock_error(sk);
804                         break;
805                 }
806
807                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
808                         len = 0;
809                         break;
810                 }
811
812                 if (sk->sk_state == DCCP_CLOSED) {
813                         if (!sock_flag(sk, SOCK_DONE)) {
814                                 /* This occurs when user tries to read
815                                  * from never connected socket.
816                                  */
817                                 len = -ENOTCONN;
818                                 break;
819                         }
820                         len = 0;
821                         break;
822                 }
823
824                 if (!timeo) {
825                         len = -EAGAIN;
826                         break;
827                 }
828
829                 if (signal_pending(current)) {
830                         len = sock_intr_errno(timeo);
831                         break;
832                 }
833
834                 sk_wait_data(sk, &timeo);
835                 continue;
836         found_ok_skb:
837                 if (len > skb->len)
838                         len = skb->len;
839                 else if (len < skb->len)
840                         msg->msg_flags |= MSG_TRUNC;
841
842                 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
843                         /* Exception. Bailout! */
844                         len = -EFAULT;
845                         break;
846                 }
847         found_fin_ok:
848                 if (!(flags & MSG_PEEK))
849                         sk_eat_skb(sk, skb, 0);
850                 break;
851         } while (1);
852 out:
853         release_sock(sk);
854         return len;
855 }
856
857 EXPORT_SYMBOL_GPL(dccp_recvmsg);
858
859 int inet_dccp_listen(struct socket *sock, int backlog)
860 {
861         struct sock *sk = sock->sk;
862         unsigned char old_state;
863         int err;
864
865         lock_sock(sk);
866
867         err = -EINVAL;
868         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
869                 goto out;
870
871         old_state = sk->sk_state;
872         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
873                 goto out;
874
875         /* Really, if the socket is already in listen state
876          * we can only allow the backlog to be adjusted.
877          */
878         if (old_state != DCCP_LISTEN) {
879                 /*
880                  * FIXME: here it probably should be sk->sk_prot->listen_start
881                  * see tcp_listen_start
882                  */
883                 err = dccp_listen_start(sk, backlog);
884                 if (err)
885                         goto out;
886         }
887         sk->sk_max_ack_backlog = backlog;
888         err = 0;
889
890 out:
891         release_sock(sk);
892         return err;
893 }
894
895 EXPORT_SYMBOL_GPL(inet_dccp_listen);
896
897 static void dccp_terminate_connection(struct sock *sk)
898 {
899         u8 next_state = DCCP_CLOSED;
900
901         switch (sk->sk_state) {
902         case DCCP_PASSIVE_CLOSE:
903         case DCCP_PASSIVE_CLOSEREQ:
904                 dccp_finish_passive_close(sk);
905                 break;
906         case DCCP_PARTOPEN:
907                 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
908                 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
909                 /* fall through */
910         case DCCP_OPEN:
911                 dccp_send_close(sk, 1);
912
913                 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
914                     !dccp_sk(sk)->dccps_server_timewait)
915                         next_state = DCCP_ACTIVE_CLOSEREQ;
916                 else
917                         next_state = DCCP_CLOSING;
918                 /* fall through */
919         default:
920                 dccp_set_state(sk, next_state);
921         }
922 }
923
924 void dccp_close(struct sock *sk, long timeout)
925 {
926         struct dccp_sock *dp = dccp_sk(sk);
927         struct sk_buff *skb;
928         u32 data_was_unread = 0;
929         int state;
930
931         lock_sock(sk);
932
933         sk->sk_shutdown = SHUTDOWN_MASK;
934
935         if (sk->sk_state == DCCP_LISTEN) {
936                 dccp_set_state(sk, DCCP_CLOSED);
937
938                 /* Special case. */
939                 inet_csk_listen_stop(sk);
940
941                 goto adjudge_to_death;
942         }
943
944         sk_stop_timer(sk, &dp->dccps_xmit_timer);
945
946         /*
947          * We need to flush the recv. buffs.  We do this only on the
948          * descriptor close, not protocol-sourced closes, because the
949           *reader process may not have drained the data yet!
950          */
951         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
952                 data_was_unread += skb->len;
953                 __kfree_skb(skb);
954         }
955
956         if (data_was_unread) {
957                 /* Unread data was tossed, send an appropriate Reset Code */
958                 DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
959                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
960                 dccp_set_state(sk, DCCP_CLOSED);
961         } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
962                 /* Check zero linger _after_ checking for unread data. */
963                 sk->sk_prot->disconnect(sk, 0);
964         } else if (sk->sk_state != DCCP_CLOSED) {
965                 dccp_terminate_connection(sk);
966         }
967
968         sk_stream_wait_close(sk, timeout);
969
970 adjudge_to_death:
971         state = sk->sk_state;
972         sock_hold(sk);
973         sock_orphan(sk);
974         atomic_inc(sk->sk_prot->orphan_count);
975
976         /*
977          * It is the last release_sock in its life. It will remove backlog.
978          */
979         release_sock(sk);
980         /*
981          * Now socket is owned by kernel and we acquire BH lock
982          * to finish close. No need to check for user refs.
983          */
984         local_bh_disable();
985         bh_lock_sock(sk);
986         WARN_ON(sock_owned_by_user(sk));
987
988         /* Have we already been destroyed by a softirq or backlog? */
989         if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
990                 goto out;
991
992         if (sk->sk_state == DCCP_CLOSED)
993                 inet_csk_destroy_sock(sk);
994
995         /* Otherwise, socket is reprieved until protocol close. */
996
997 out:
998         bh_unlock_sock(sk);
999         local_bh_enable();
1000         sock_put(sk);
1001 }
1002
1003 EXPORT_SYMBOL_GPL(dccp_close);
1004
1005 void dccp_shutdown(struct sock *sk, int how)
1006 {
1007         dccp_pr_debug("called shutdown(%x)\n", how);
1008 }
1009
1010 EXPORT_SYMBOL_GPL(dccp_shutdown);
1011
1012 static inline int dccp_mib_init(void)
1013 {
1014         return snmp_mib_init((void**)dccp_statistics, sizeof(struct dccp_mib));
1015 }
1016
1017 static inline void dccp_mib_exit(void)
1018 {
1019         snmp_mib_free((void**)dccp_statistics);
1020 }
1021
1022 static int thash_entries;
1023 module_param(thash_entries, int, 0444);
1024 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1025
1026 #ifdef CONFIG_IP_DCCP_DEBUG
1027 int dccp_debug;
1028 module_param(dccp_debug, bool, 0644);
1029 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1030
1031 EXPORT_SYMBOL_GPL(dccp_debug);
1032 #endif
1033
1034 static int __init dccp_init(void)
1035 {
1036         unsigned long goal;
1037         int ehash_order, bhash_order, i;
1038         int rc = -ENOBUFS;
1039
1040         BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1041                      FIELD_SIZEOF(struct sk_buff, cb));
1042
1043         dccp_hashinfo.bind_bucket_cachep =
1044                 kmem_cache_create("dccp_bind_bucket",
1045                                   sizeof(struct inet_bind_bucket), 0,
1046                                   SLAB_HWCACHE_ALIGN, NULL);
1047         if (!dccp_hashinfo.bind_bucket_cachep)
1048                 goto out;
1049
1050         /*
1051          * Size and allocate the main established and bind bucket
1052          * hash tables.
1053          *
1054          * The methodology is similar to that of the buffer cache.
1055          */
1056         if (num_physpages >= (128 * 1024))
1057                 goal = num_physpages >> (21 - PAGE_SHIFT);
1058         else
1059                 goal = num_physpages >> (23 - PAGE_SHIFT);
1060
1061         if (thash_entries)
1062                 goal = (thash_entries *
1063                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1064         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1065                 ;
1066         do {
1067                 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1068                                         sizeof(struct inet_ehash_bucket);
1069                 while (dccp_hashinfo.ehash_size &
1070                        (dccp_hashinfo.ehash_size - 1))
1071                         dccp_hashinfo.ehash_size--;
1072                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1073                         __get_free_pages(GFP_ATOMIC, ehash_order);
1074         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1075
1076         if (!dccp_hashinfo.ehash) {
1077                 DCCP_CRIT("Failed to allocate DCCP established hash table");
1078                 goto out_free_bind_bucket_cachep;
1079         }
1080
1081         for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1082                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1083                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
1084         }
1085
1086         if (inet_ehash_locks_alloc(&dccp_hashinfo))
1087                         goto out_free_dccp_ehash;
1088
1089         bhash_order = ehash_order;
1090
1091         do {
1092                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1093                                         sizeof(struct inet_bind_hashbucket);
1094                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1095                     bhash_order > 0)
1096                         continue;
1097                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1098                         __get_free_pages(GFP_ATOMIC, bhash_order);
1099         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1100
1101         if (!dccp_hashinfo.bhash) {
1102                 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1103                 goto out_free_dccp_locks;
1104         }
1105
1106         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1107                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1108                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1109         }
1110
1111         rc = dccp_mib_init();
1112         if (rc)
1113                 goto out_free_dccp_bhash;
1114
1115         rc = dccp_ackvec_init();
1116         if (rc)
1117                 goto out_free_dccp_mib;
1118
1119         rc = dccp_sysctl_init();
1120         if (rc)
1121                 goto out_ackvec_exit;
1122
1123         dccp_timestamping_init();
1124 out:
1125         return rc;
1126 out_ackvec_exit:
1127         dccp_ackvec_exit();
1128 out_free_dccp_mib:
1129         dccp_mib_exit();
1130 out_free_dccp_bhash:
1131         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1132         dccp_hashinfo.bhash = NULL;
1133 out_free_dccp_locks:
1134         inet_ehash_locks_free(&dccp_hashinfo);
1135 out_free_dccp_ehash:
1136         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1137         dccp_hashinfo.ehash = NULL;
1138 out_free_bind_bucket_cachep:
1139         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1140         dccp_hashinfo.bind_bucket_cachep = NULL;
1141         goto out;
1142 }
1143
1144 static void __exit dccp_fini(void)
1145 {
1146         dccp_mib_exit();
1147         free_pages((unsigned long)dccp_hashinfo.bhash,
1148                    get_order(dccp_hashinfo.bhash_size *
1149                              sizeof(struct inet_bind_hashbucket)));
1150         free_pages((unsigned long)dccp_hashinfo.ehash,
1151                    get_order(dccp_hashinfo.ehash_size *
1152                              sizeof(struct inet_ehash_bucket)));
1153         inet_ehash_locks_free(&dccp_hashinfo);
1154         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1155         dccp_ackvec_exit();
1156         dccp_sysctl_exit();
1157 }
1158
1159 module_init(dccp_init);
1160 module_exit(dccp_fini);
1161
1162 MODULE_LICENSE("GPL");
1163 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1164 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");