dccp: Increase the scope of variable-length htonl/ntohl functions
[linux-2.6] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
24
25 #include <net/inet_sock.h>
26 #include <net/sock.h>
27 #include <net/xfrm.h>
28
29 #include <asm/ioctls.h>
30 #include <linux/spinlock.h>
31 #include <linux/timer.h>
32 #include <linux/delay.h>
33 #include <linux/poll.h>
34
35 #include "ccid.h"
36 #include "dccp.h"
37 #include "feat.h"
38
39 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
40
41 EXPORT_SYMBOL_GPL(dccp_statistics);
42
43 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
44
45 EXPORT_SYMBOL_GPL(dccp_orphan_count);
46
47 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
48         .lhash_lock     = RW_LOCK_UNLOCKED,
49         .lhash_users    = ATOMIC_INIT(0),
50         .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
51 };
52
53 EXPORT_SYMBOL_GPL(dccp_hashinfo);
54
55 /* the maximum queue length for tx in packets. 0 is no limit */
56 int sysctl_dccp_tx_qlen __read_mostly = 5;
57
58 void dccp_set_state(struct sock *sk, const int state)
59 {
60         const int oldstate = sk->sk_state;
61
62         dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
63                       dccp_state_name(oldstate), dccp_state_name(state));
64         WARN_ON(state == oldstate);
65
66         switch (state) {
67         case DCCP_OPEN:
68                 if (oldstate != DCCP_OPEN)
69                         DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
70                 break;
71
72         case DCCP_CLOSED:
73                 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
74                     oldstate == DCCP_CLOSING)
75                         DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
76
77                 sk->sk_prot->unhash(sk);
78                 if (inet_csk(sk)->icsk_bind_hash != NULL &&
79                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
80                         inet_put_port(sk);
81                 /* fall through */
82         default:
83                 if (oldstate == DCCP_OPEN)
84                         DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
85         }
86
87         /* Change state AFTER socket is unhashed to avoid closed
88          * socket sitting in hash tables.
89          */
90         sk->sk_state = state;
91 }
92
93 EXPORT_SYMBOL_GPL(dccp_set_state);
94
95 static void dccp_finish_passive_close(struct sock *sk)
96 {
97         switch (sk->sk_state) {
98         case DCCP_PASSIVE_CLOSE:
99                 /* Node (client or server) has received Close packet. */
100                 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
101                 dccp_set_state(sk, DCCP_CLOSED);
102                 break;
103         case DCCP_PASSIVE_CLOSEREQ:
104                 /*
105                  * Client received CloseReq. We set the `active' flag so that
106                  * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
107                  */
108                 dccp_send_close(sk, 1);
109                 dccp_set_state(sk, DCCP_CLOSING);
110         }
111 }
112
113 void dccp_done(struct sock *sk)
114 {
115         dccp_set_state(sk, DCCP_CLOSED);
116         dccp_clear_xmit_timers(sk);
117
118         sk->sk_shutdown = SHUTDOWN_MASK;
119
120         if (!sock_flag(sk, SOCK_DEAD))
121                 sk->sk_state_change(sk);
122         else
123                 inet_csk_destroy_sock(sk);
124 }
125
126 EXPORT_SYMBOL_GPL(dccp_done);
127
128 const char *dccp_packet_name(const int type)
129 {
130         static const char *dccp_packet_names[] = {
131                 [DCCP_PKT_REQUEST]  = "REQUEST",
132                 [DCCP_PKT_RESPONSE] = "RESPONSE",
133                 [DCCP_PKT_DATA]     = "DATA",
134                 [DCCP_PKT_ACK]      = "ACK",
135                 [DCCP_PKT_DATAACK]  = "DATAACK",
136                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
137                 [DCCP_PKT_CLOSE]    = "CLOSE",
138                 [DCCP_PKT_RESET]    = "RESET",
139                 [DCCP_PKT_SYNC]     = "SYNC",
140                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
141         };
142
143         if (type >= DCCP_NR_PKT_TYPES)
144                 return "INVALID";
145         else
146                 return dccp_packet_names[type];
147 }
148
149 EXPORT_SYMBOL_GPL(dccp_packet_name);
150
151 const char *dccp_state_name(const int state)
152 {
153         static char *dccp_state_names[] = {
154         [DCCP_OPEN]             = "OPEN",
155         [DCCP_REQUESTING]       = "REQUESTING",
156         [DCCP_PARTOPEN]         = "PARTOPEN",
157         [DCCP_LISTEN]           = "LISTEN",
158         [DCCP_RESPOND]          = "RESPOND",
159         [DCCP_CLOSING]          = "CLOSING",
160         [DCCP_ACTIVE_CLOSEREQ]  = "CLOSEREQ",
161         [DCCP_PASSIVE_CLOSE]    = "PASSIVE_CLOSE",
162         [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
163         [DCCP_TIME_WAIT]        = "TIME_WAIT",
164         [DCCP_CLOSED]           = "CLOSED",
165         };
166
167         if (state >= DCCP_MAX_STATES)
168                 return "INVALID STATE!";
169         else
170                 return dccp_state_names[state];
171 }
172
173 EXPORT_SYMBOL_GPL(dccp_state_name);
174
175 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
176 {
177         struct dccp_sock *dp = dccp_sk(sk);
178         struct dccp_minisock *dmsk = dccp_msk(sk);
179         struct inet_connection_sock *icsk = inet_csk(sk);
180
181         dccp_minisock_init(&dp->dccps_minisock);
182
183         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
184         icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
185         sk->sk_state            = DCCP_CLOSED;
186         sk->sk_write_space      = dccp_write_space;
187         icsk->icsk_sync_mss     = dccp_sync_mss;
188         dp->dccps_mss_cache     = 536;
189         dp->dccps_rate_last     = jiffies;
190         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
191         dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
192         dp->dccps_l_ack_ratio   = dp->dccps_r_ack_ratio = 1;
193
194         dccp_init_xmit_timers(sk);
195
196         INIT_LIST_HEAD(&dp->dccps_featneg);
197         /*
198          * FIXME: We're hardcoding the CCID, and doing this at this point makes
199          * the listening (master) sock get CCID control blocks, which is not
200          * necessary, but for now, to not mess with the test userspace apps,
201          * lets leave it here, later the real solution is to do this in a
202          * setsockopt(CCIDs-I-want/accept). -acme
203          */
204         if (likely(ctl_sock_initialized)) {
205                 int rc = dccp_feat_init(sk);
206
207                 if (rc)
208                         return rc;
209
210                 if (dmsk->dccpms_send_ack_vector) {
211                         dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
212                         if (dp->dccps_hc_rx_ackvec == NULL)
213                                 return -ENOMEM;
214                 }
215                 dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
216                                                       sk, GFP_KERNEL);
217                 dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
218                                                       sk, GFP_KERNEL);
219                 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
220                              dp->dccps_hc_tx_ccid == NULL)) {
221                         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
222                         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
223                         if (dmsk->dccpms_send_ack_vector) {
224                                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
225                                 dp->dccps_hc_rx_ackvec = NULL;
226                         }
227                         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
228                         return -ENOMEM;
229                 }
230         } else {
231                 /* control socket doesn't need feat nego */
232                 INIT_LIST_HEAD(&dmsk->dccpms_pending);
233                 INIT_LIST_HEAD(&dmsk->dccpms_conf);
234         }
235
236         return 0;
237 }
238
239 EXPORT_SYMBOL_GPL(dccp_init_sock);
240
241 void dccp_destroy_sock(struct sock *sk)
242 {
243         struct dccp_sock *dp = dccp_sk(sk);
244         struct dccp_minisock *dmsk = dccp_msk(sk);
245
246         /*
247          * DCCP doesn't use sk_write_queue, just sk_send_head
248          * for retransmissions
249          */
250         if (sk->sk_send_head != NULL) {
251                 kfree_skb(sk->sk_send_head);
252                 sk->sk_send_head = NULL;
253         }
254
255         /* Clean up a referenced DCCP bind bucket. */
256         if (inet_csk(sk)->icsk_bind_hash != NULL)
257                 inet_put_port(sk);
258
259         kfree(dp->dccps_service_list);
260         dp->dccps_service_list = NULL;
261
262         if (dmsk->dccpms_send_ack_vector) {
263                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
264                 dp->dccps_hc_rx_ackvec = NULL;
265         }
266         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
267         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
268         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
269
270         /* clean up feature negotiation state */
271         dccp_feat_list_purge(&dp->dccps_featneg);
272 }
273
274 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
275
276 static inline int dccp_listen_start(struct sock *sk, int backlog)
277 {
278         struct dccp_sock *dp = dccp_sk(sk);
279
280         dp->dccps_role = DCCP_ROLE_LISTEN;
281         /* do not start to listen if feature negotiation setup fails */
282         if (dccp_feat_finalise_settings(dp))
283                 return -EPROTO;
284         return inet_csk_listen_start(sk, backlog);
285 }
286
287 static inline int dccp_need_reset(int state)
288 {
289         return state != DCCP_CLOSED && state != DCCP_LISTEN &&
290                state != DCCP_REQUESTING;
291 }
292
293 int dccp_disconnect(struct sock *sk, int flags)
294 {
295         struct inet_connection_sock *icsk = inet_csk(sk);
296         struct inet_sock *inet = inet_sk(sk);
297         int err = 0;
298         const int old_state = sk->sk_state;
299
300         if (old_state != DCCP_CLOSED)
301                 dccp_set_state(sk, DCCP_CLOSED);
302
303         /*
304          * This corresponds to the ABORT function of RFC793, sec. 3.8
305          * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
306          */
307         if (old_state == DCCP_LISTEN) {
308                 inet_csk_listen_stop(sk);
309         } else if (dccp_need_reset(old_state)) {
310                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
311                 sk->sk_err = ECONNRESET;
312         } else if (old_state == DCCP_REQUESTING)
313                 sk->sk_err = ECONNRESET;
314
315         dccp_clear_xmit_timers(sk);
316
317         __skb_queue_purge(&sk->sk_receive_queue);
318         __skb_queue_purge(&sk->sk_write_queue);
319         if (sk->sk_send_head != NULL) {
320                 __kfree_skb(sk->sk_send_head);
321                 sk->sk_send_head = NULL;
322         }
323
324         inet->dport = 0;
325
326         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
327                 inet_reset_saddr(sk);
328
329         sk->sk_shutdown = 0;
330         sock_reset_flag(sk, SOCK_DONE);
331
332         icsk->icsk_backoff = 0;
333         inet_csk_delack_init(sk);
334         __sk_dst_reset(sk);
335
336         WARN_ON(inet->num && !icsk->icsk_bind_hash);
337
338         sk->sk_error_report(sk);
339         return err;
340 }
341
342 EXPORT_SYMBOL_GPL(dccp_disconnect);
343
344 /*
345  *      Wait for a DCCP event.
346  *
347  *      Note that we don't need to lock the socket, as the upper poll layers
348  *      take care of normal races (between the test and the event) and we don't
349  *      go look at any of the socket buffers directly.
350  */
351 unsigned int dccp_poll(struct file *file, struct socket *sock,
352                        poll_table *wait)
353 {
354         unsigned int mask;
355         struct sock *sk = sock->sk;
356
357         poll_wait(file, sk->sk_sleep, wait);
358         if (sk->sk_state == DCCP_LISTEN)
359                 return inet_csk_listen_poll(sk);
360
361         /* Socket is not locked. We are protected from async events
362            by poll logic and correct handling of state changes
363            made by another threads is impossible in any case.
364          */
365
366         mask = 0;
367         if (sk->sk_err)
368                 mask = POLLERR;
369
370         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
371                 mask |= POLLHUP;
372         if (sk->sk_shutdown & RCV_SHUTDOWN)
373                 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
374
375         /* Connected? */
376         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
377                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
378                         mask |= POLLIN | POLLRDNORM;
379
380                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
381                         if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
382                                 mask |= POLLOUT | POLLWRNORM;
383                         } else {  /* send SIGIO later */
384                                 set_bit(SOCK_ASYNC_NOSPACE,
385                                         &sk->sk_socket->flags);
386                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
387
388                                 /* Race breaker. If space is freed after
389                                  * wspace test but before the flags are set,
390                                  * IO signal will be lost.
391                                  */
392                                 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
393                                         mask |= POLLOUT | POLLWRNORM;
394                         }
395                 }
396         }
397         return mask;
398 }
399
400 EXPORT_SYMBOL_GPL(dccp_poll);
401
402 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
403 {
404         int rc = -ENOTCONN;
405
406         lock_sock(sk);
407
408         if (sk->sk_state == DCCP_LISTEN)
409                 goto out;
410
411         switch (cmd) {
412         case SIOCINQ: {
413                 struct sk_buff *skb;
414                 unsigned long amount = 0;
415
416                 skb = skb_peek(&sk->sk_receive_queue);
417                 if (skb != NULL) {
418                         /*
419                          * We will only return the amount of this packet since
420                          * that is all that will be read.
421                          */
422                         amount = skb->len;
423                 }
424                 rc = put_user(amount, (int __user *)arg);
425         }
426                 break;
427         default:
428                 rc = -ENOIOCTLCMD;
429                 break;
430         }
431 out:
432         release_sock(sk);
433         return rc;
434 }
435
436 EXPORT_SYMBOL_GPL(dccp_ioctl);
437
438 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
439                                    char __user *optval, int optlen)
440 {
441         struct dccp_sock *dp = dccp_sk(sk);
442         struct dccp_service_list *sl = NULL;
443
444         if (service == DCCP_SERVICE_INVALID_VALUE ||
445             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
446                 return -EINVAL;
447
448         if (optlen > sizeof(service)) {
449                 sl = kmalloc(optlen, GFP_KERNEL);
450                 if (sl == NULL)
451                         return -ENOMEM;
452
453                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
454                 if (copy_from_user(sl->dccpsl_list,
455                                    optval + sizeof(service),
456                                    optlen - sizeof(service)) ||
457                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
458                         kfree(sl);
459                         return -EFAULT;
460                 }
461         }
462
463         lock_sock(sk);
464         dp->dccps_service = service;
465
466         kfree(dp->dccps_service_list);
467
468         dp->dccps_service_list = sl;
469         release_sock(sk);
470         return 0;
471 }
472
473 static int dccp_setsockopt_cscov(struct sock *sk, int cscov, bool rx)
474 {
475         u8 *list, len;
476         int i, rc;
477
478         if (cscov < 0 || cscov > 15)
479                 return -EINVAL;
480         /*
481          * Populate a list of permissible values, in the range cscov...15. This
482          * is necessary since feature negotiation of single values only works if
483          * both sides incidentally choose the same value. Since the list starts
484          * lowest-value first, negotiation will pick the smallest shared value.
485          */
486         if (cscov == 0)
487                 return 0;
488         len = 16 - cscov;
489
490         list = kmalloc(len, GFP_KERNEL);
491         if (list == NULL)
492                 return -ENOBUFS;
493
494         for (i = 0; i < len; i++)
495                 list[i] = cscov++;
496
497         rc = dccp_feat_register_sp(sk, DCCPF_MIN_CSUM_COVER, rx, list, len);
498
499         if (rc == 0) {
500                 if (rx)
501                         dccp_sk(sk)->dccps_pcrlen = cscov;
502                 else
503                         dccp_sk(sk)->dccps_pcslen = cscov;
504         }
505         kfree(list);
506         return rc;
507 }
508
509 static int dccp_setsockopt_ccid(struct sock *sk, int type,
510                                 char __user *optval, int optlen)
511 {
512         u8 *val;
513         int rc = 0;
514
515         if (optlen < 1 || optlen > DCCP_FEAT_MAX_SP_VALS)
516                 return -EINVAL;
517
518         val = kmalloc(optlen, GFP_KERNEL);
519         if (val == NULL)
520                 return -ENOMEM;
521
522         if (copy_from_user(val, optval, optlen)) {
523                 kfree(val);
524                 return -EFAULT;
525         }
526
527         lock_sock(sk);
528         if (type == DCCP_SOCKOPT_TX_CCID || type == DCCP_SOCKOPT_CCID)
529                 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 1, val, optlen);
530
531         if (!rc && (type == DCCP_SOCKOPT_RX_CCID || type == DCCP_SOCKOPT_CCID))
532                 rc = dccp_feat_register_sp(sk, DCCPF_CCID, 0, val, optlen);
533         release_sock(sk);
534
535         kfree(val);
536         return rc;
537 }
538
539 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
540                 char __user *optval, int optlen)
541 {
542         struct dccp_sock *dp = dccp_sk(sk);
543         int val, err = 0;
544
545         switch (optname) {
546         case DCCP_SOCKOPT_PACKET_SIZE:
547                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
548                 return 0;
549         case DCCP_SOCKOPT_CHANGE_L:
550         case DCCP_SOCKOPT_CHANGE_R:
551                 DCCP_WARN("sockopt(CHANGE_L/R) is deprecated: fix your app\n");
552                 return 0;
553         case DCCP_SOCKOPT_CCID:
554         case DCCP_SOCKOPT_RX_CCID:
555         case DCCP_SOCKOPT_TX_CCID:
556                 return dccp_setsockopt_ccid(sk, optname, optval, optlen);
557         }
558
559         if (optlen < (int)sizeof(int))
560                 return -EINVAL;
561
562         if (get_user(val, (int __user *)optval))
563                 return -EFAULT;
564
565         if (optname == DCCP_SOCKOPT_SERVICE)
566                 return dccp_setsockopt_service(sk, val, optval, optlen);
567
568         lock_sock(sk);
569         switch (optname) {
570         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
571                 if (dp->dccps_role != DCCP_ROLE_SERVER)
572                         err = -EOPNOTSUPP;
573                 else
574                         dp->dccps_server_timewait = (val != 0);
575                 break;
576         case DCCP_SOCKOPT_SEND_CSCOV:
577                 err = dccp_setsockopt_cscov(sk, val, false);
578                 break;
579         case DCCP_SOCKOPT_RECV_CSCOV:
580                 err = dccp_setsockopt_cscov(sk, val, true);
581                 break;
582         default:
583                 err = -ENOPROTOOPT;
584                 break;
585         }
586         release_sock(sk);
587
588         return err;
589 }
590
591 int dccp_setsockopt(struct sock *sk, int level, int optname,
592                     char __user *optval, int optlen)
593 {
594         if (level != SOL_DCCP)
595                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
596                                                              optname, optval,
597                                                              optlen);
598         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
599 }
600
601 EXPORT_SYMBOL_GPL(dccp_setsockopt);
602
603 #ifdef CONFIG_COMPAT
604 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
605                            char __user *optval, int optlen)
606 {
607         if (level != SOL_DCCP)
608                 return inet_csk_compat_setsockopt(sk, level, optname,
609                                                   optval, optlen);
610         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
611 }
612
613 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
614 #endif
615
616 static int dccp_getsockopt_service(struct sock *sk, int len,
617                                    __be32 __user *optval,
618                                    int __user *optlen)
619 {
620         const struct dccp_sock *dp = dccp_sk(sk);
621         const struct dccp_service_list *sl;
622         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
623
624         lock_sock(sk);
625         if ((sl = dp->dccps_service_list) != NULL) {
626                 slen = sl->dccpsl_nr * sizeof(u32);
627                 total_len += slen;
628         }
629
630         err = -EINVAL;
631         if (total_len > len)
632                 goto out;
633
634         err = 0;
635         if (put_user(total_len, optlen) ||
636             put_user(dp->dccps_service, optval) ||
637             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
638                 err = -EFAULT;
639 out:
640         release_sock(sk);
641         return err;
642 }
643
644 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
645                     char __user *optval, int __user *optlen)
646 {
647         struct dccp_sock *dp;
648         int val, len;
649
650         if (get_user(len, optlen))
651                 return -EFAULT;
652
653         if (len < (int)sizeof(int))
654                 return -EINVAL;
655
656         dp = dccp_sk(sk);
657
658         switch (optname) {
659         case DCCP_SOCKOPT_PACKET_SIZE:
660                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
661                 return 0;
662         case DCCP_SOCKOPT_SERVICE:
663                 return dccp_getsockopt_service(sk, len,
664                                                (__be32 __user *)optval, optlen);
665         case DCCP_SOCKOPT_GET_CUR_MPS:
666                 val = dp->dccps_mss_cache;
667                 break;
668         case DCCP_SOCKOPT_AVAILABLE_CCIDS:
669                 return ccid_getsockopt_builtin_ccids(sk, len, optval, optlen);
670         case DCCP_SOCKOPT_TX_CCID:
671                 val = ccid_get_current_tx_ccid(dp);
672                 if (val < 0)
673                         return -ENOPROTOOPT;
674                 break;
675         case DCCP_SOCKOPT_RX_CCID:
676                 val = ccid_get_current_rx_ccid(dp);
677                 if (val < 0)
678                         return -ENOPROTOOPT;
679                 break;
680         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
681                 val = dp->dccps_server_timewait;
682                 break;
683         case DCCP_SOCKOPT_SEND_CSCOV:
684                 val = dp->dccps_pcslen;
685                 break;
686         case DCCP_SOCKOPT_RECV_CSCOV:
687                 val = dp->dccps_pcrlen;
688                 break;
689         case 128 ... 191:
690                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
691                                              len, (u32 __user *)optval, optlen);
692         case 192 ... 255:
693                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
694                                              len, (u32 __user *)optval, optlen);
695         default:
696                 return -ENOPROTOOPT;
697         }
698
699         len = sizeof(val);
700         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
701                 return -EFAULT;
702
703         return 0;
704 }
705
706 int dccp_getsockopt(struct sock *sk, int level, int optname,
707                     char __user *optval, int __user *optlen)
708 {
709         if (level != SOL_DCCP)
710                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
711                                                              optname, optval,
712                                                              optlen);
713         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
714 }
715
716 EXPORT_SYMBOL_GPL(dccp_getsockopt);
717
718 #ifdef CONFIG_COMPAT
719 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
720                            char __user *optval, int __user *optlen)
721 {
722         if (level != SOL_DCCP)
723                 return inet_csk_compat_getsockopt(sk, level, optname,
724                                                   optval, optlen);
725         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
726 }
727
728 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
729 #endif
730
731 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
732                  size_t len)
733 {
734         const struct dccp_sock *dp = dccp_sk(sk);
735         const int flags = msg->msg_flags;
736         const int noblock = flags & MSG_DONTWAIT;
737         struct sk_buff *skb;
738         int rc, size;
739         long timeo;
740
741         if (len > dp->dccps_mss_cache)
742                 return -EMSGSIZE;
743
744         lock_sock(sk);
745
746         if (sysctl_dccp_tx_qlen &&
747             (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
748                 rc = -EAGAIN;
749                 goto out_release;
750         }
751
752         timeo = sock_sndtimeo(sk, noblock);
753
754         /*
755          * We have to use sk_stream_wait_connect here to set sk_write_pending,
756          * so that the trick in dccp_rcv_request_sent_state_process.
757          */
758         /* Wait for a connection to finish. */
759         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
760                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
761                         goto out_release;
762
763         size = sk->sk_prot->max_header + len;
764         release_sock(sk);
765         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
766         lock_sock(sk);
767         if (skb == NULL)
768                 goto out_release;
769
770         skb_reserve(skb, sk->sk_prot->max_header);
771         rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
772         if (rc != 0)
773                 goto out_discard;
774
775         skb_queue_tail(&sk->sk_write_queue, skb);
776         dccp_write_xmit(sk,0);
777 out_release:
778         release_sock(sk);
779         return rc ? : len;
780 out_discard:
781         kfree_skb(skb);
782         goto out_release;
783 }
784
785 EXPORT_SYMBOL_GPL(dccp_sendmsg);
786
787 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
788                  size_t len, int nonblock, int flags, int *addr_len)
789 {
790         const struct dccp_hdr *dh;
791         long timeo;
792
793         lock_sock(sk);
794
795         if (sk->sk_state == DCCP_LISTEN) {
796                 len = -ENOTCONN;
797                 goto out;
798         }
799
800         timeo = sock_rcvtimeo(sk, nonblock);
801
802         do {
803                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
804
805                 if (skb == NULL)
806                         goto verify_sock_status;
807
808                 dh = dccp_hdr(skb);
809
810                 switch (dh->dccph_type) {
811                 case DCCP_PKT_DATA:
812                 case DCCP_PKT_DATAACK:
813                         goto found_ok_skb;
814
815                 case DCCP_PKT_CLOSE:
816                 case DCCP_PKT_CLOSEREQ:
817                         if (!(flags & MSG_PEEK))
818                                 dccp_finish_passive_close(sk);
819                         /* fall through */
820                 case DCCP_PKT_RESET:
821                         dccp_pr_debug("found fin (%s) ok!\n",
822                                       dccp_packet_name(dh->dccph_type));
823                         len = 0;
824                         goto found_fin_ok;
825                 default:
826                         dccp_pr_debug("packet_type=%s\n",
827                                       dccp_packet_name(dh->dccph_type));
828                         sk_eat_skb(sk, skb, 0);
829                 }
830 verify_sock_status:
831                 if (sock_flag(sk, SOCK_DONE)) {
832                         len = 0;
833                         break;
834                 }
835
836                 if (sk->sk_err) {
837                         len = sock_error(sk);
838                         break;
839                 }
840
841                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
842                         len = 0;
843                         break;
844                 }
845
846                 if (sk->sk_state == DCCP_CLOSED) {
847                         if (!sock_flag(sk, SOCK_DONE)) {
848                                 /* This occurs when user tries to read
849                                  * from never connected socket.
850                                  */
851                                 len = -ENOTCONN;
852                                 break;
853                         }
854                         len = 0;
855                         break;
856                 }
857
858                 if (!timeo) {
859                         len = -EAGAIN;
860                         break;
861                 }
862
863                 if (signal_pending(current)) {
864                         len = sock_intr_errno(timeo);
865                         break;
866                 }
867
868                 sk_wait_data(sk, &timeo);
869                 continue;
870         found_ok_skb:
871                 if (len > skb->len)
872                         len = skb->len;
873                 else if (len < skb->len)
874                         msg->msg_flags |= MSG_TRUNC;
875
876                 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
877                         /* Exception. Bailout! */
878                         len = -EFAULT;
879                         break;
880                 }
881         found_fin_ok:
882                 if (!(flags & MSG_PEEK))
883                         sk_eat_skb(sk, skb, 0);
884                 break;
885         } while (1);
886 out:
887         release_sock(sk);
888         return len;
889 }
890
891 EXPORT_SYMBOL_GPL(dccp_recvmsg);
892
893 int inet_dccp_listen(struct socket *sock, int backlog)
894 {
895         struct sock *sk = sock->sk;
896         unsigned char old_state;
897         int err;
898
899         lock_sock(sk);
900
901         err = -EINVAL;
902         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
903                 goto out;
904
905         old_state = sk->sk_state;
906         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
907                 goto out;
908
909         /* Really, if the socket is already in listen state
910          * we can only allow the backlog to be adjusted.
911          */
912         if (old_state != DCCP_LISTEN) {
913                 /*
914                  * FIXME: here it probably should be sk->sk_prot->listen_start
915                  * see tcp_listen_start
916                  */
917                 err = dccp_listen_start(sk, backlog);
918                 if (err)
919                         goto out;
920         }
921         sk->sk_max_ack_backlog = backlog;
922         err = 0;
923
924 out:
925         release_sock(sk);
926         return err;
927 }
928
929 EXPORT_SYMBOL_GPL(inet_dccp_listen);
930
931 static void dccp_terminate_connection(struct sock *sk)
932 {
933         u8 next_state = DCCP_CLOSED;
934
935         switch (sk->sk_state) {
936         case DCCP_PASSIVE_CLOSE:
937         case DCCP_PASSIVE_CLOSEREQ:
938                 dccp_finish_passive_close(sk);
939                 break;
940         case DCCP_PARTOPEN:
941                 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
942                 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
943                 /* fall through */
944         case DCCP_OPEN:
945                 dccp_send_close(sk, 1);
946
947                 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
948                     !dccp_sk(sk)->dccps_server_timewait)
949                         next_state = DCCP_ACTIVE_CLOSEREQ;
950                 else
951                         next_state = DCCP_CLOSING;
952                 /* fall through */
953         default:
954                 dccp_set_state(sk, next_state);
955         }
956 }
957
958 void dccp_close(struct sock *sk, long timeout)
959 {
960         struct dccp_sock *dp = dccp_sk(sk);
961         struct sk_buff *skb;
962         u32 data_was_unread = 0;
963         int state;
964
965         lock_sock(sk);
966
967         sk->sk_shutdown = SHUTDOWN_MASK;
968
969         if (sk->sk_state == DCCP_LISTEN) {
970                 dccp_set_state(sk, DCCP_CLOSED);
971
972                 /* Special case. */
973                 inet_csk_listen_stop(sk);
974
975                 goto adjudge_to_death;
976         }
977
978         sk_stop_timer(sk, &dp->dccps_xmit_timer);
979
980         /*
981          * We need to flush the recv. buffs.  We do this only on the
982          * descriptor close, not protocol-sourced closes, because the
983           *reader process may not have drained the data yet!
984          */
985         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
986                 data_was_unread += skb->len;
987                 __kfree_skb(skb);
988         }
989
990         if (data_was_unread) {
991                 /* Unread data was tossed, send an appropriate Reset Code */
992                 DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
993                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
994                 dccp_set_state(sk, DCCP_CLOSED);
995         } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
996                 /* Check zero linger _after_ checking for unread data. */
997                 sk->sk_prot->disconnect(sk, 0);
998         } else if (sk->sk_state != DCCP_CLOSED) {
999                 dccp_terminate_connection(sk);
1000         }
1001
1002         sk_stream_wait_close(sk, timeout);
1003
1004 adjudge_to_death:
1005         state = sk->sk_state;
1006         sock_hold(sk);
1007         sock_orphan(sk);
1008         atomic_inc(sk->sk_prot->orphan_count);
1009
1010         /*
1011          * It is the last release_sock in its life. It will remove backlog.
1012          */
1013         release_sock(sk);
1014         /*
1015          * Now socket is owned by kernel and we acquire BH lock
1016          * to finish close. No need to check for user refs.
1017          */
1018         local_bh_disable();
1019         bh_lock_sock(sk);
1020         WARN_ON(sock_owned_by_user(sk));
1021
1022         /* Have we already been destroyed by a softirq or backlog? */
1023         if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
1024                 goto out;
1025
1026         if (sk->sk_state == DCCP_CLOSED)
1027                 inet_csk_destroy_sock(sk);
1028
1029         /* Otherwise, socket is reprieved until protocol close. */
1030
1031 out:
1032         bh_unlock_sock(sk);
1033         local_bh_enable();
1034         sock_put(sk);
1035 }
1036
1037 EXPORT_SYMBOL_GPL(dccp_close);
1038
1039 void dccp_shutdown(struct sock *sk, int how)
1040 {
1041         dccp_pr_debug("called shutdown(%x)\n", how);
1042 }
1043
1044 EXPORT_SYMBOL_GPL(dccp_shutdown);
1045
1046 static inline int dccp_mib_init(void)
1047 {
1048         return snmp_mib_init((void**)dccp_statistics, sizeof(struct dccp_mib));
1049 }
1050
1051 static inline void dccp_mib_exit(void)
1052 {
1053         snmp_mib_free((void**)dccp_statistics);
1054 }
1055
1056 static int thash_entries;
1057 module_param(thash_entries, int, 0444);
1058 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1059
1060 #ifdef CONFIG_IP_DCCP_DEBUG
1061 int dccp_debug;
1062 module_param(dccp_debug, bool, 0644);
1063 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1064
1065 EXPORT_SYMBOL_GPL(dccp_debug);
1066 #endif
1067
1068 static int __init dccp_init(void)
1069 {
1070         unsigned long goal;
1071         int ehash_order, bhash_order, i;
1072         int rc = -ENOBUFS;
1073
1074         BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
1075                      FIELD_SIZEOF(struct sk_buff, cb));
1076
1077         dccp_hashinfo.bind_bucket_cachep =
1078                 kmem_cache_create("dccp_bind_bucket",
1079                                   sizeof(struct inet_bind_bucket), 0,
1080                                   SLAB_HWCACHE_ALIGN, NULL);
1081         if (!dccp_hashinfo.bind_bucket_cachep)
1082                 goto out;
1083
1084         /*
1085          * Size and allocate the main established and bind bucket
1086          * hash tables.
1087          *
1088          * The methodology is similar to that of the buffer cache.
1089          */
1090         if (num_physpages >= (128 * 1024))
1091                 goal = num_physpages >> (21 - PAGE_SHIFT);
1092         else
1093                 goal = num_physpages >> (23 - PAGE_SHIFT);
1094
1095         if (thash_entries)
1096                 goal = (thash_entries *
1097                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1098         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1099                 ;
1100         do {
1101                 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1102                                         sizeof(struct inet_ehash_bucket);
1103                 while (dccp_hashinfo.ehash_size &
1104                        (dccp_hashinfo.ehash_size - 1))
1105                         dccp_hashinfo.ehash_size--;
1106                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1107                         __get_free_pages(GFP_ATOMIC, ehash_order);
1108         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1109
1110         if (!dccp_hashinfo.ehash) {
1111                 DCCP_CRIT("Failed to allocate DCCP established hash table");
1112                 goto out_free_bind_bucket_cachep;
1113         }
1114
1115         for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1116                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1117                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
1118         }
1119
1120         if (inet_ehash_locks_alloc(&dccp_hashinfo))
1121                         goto out_free_dccp_ehash;
1122
1123         bhash_order = ehash_order;
1124
1125         do {
1126                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1127                                         sizeof(struct inet_bind_hashbucket);
1128                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1129                     bhash_order > 0)
1130                         continue;
1131                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1132                         __get_free_pages(GFP_ATOMIC, bhash_order);
1133         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1134
1135         if (!dccp_hashinfo.bhash) {
1136                 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1137                 goto out_free_dccp_locks;
1138         }
1139
1140         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1141                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1142                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1143         }
1144
1145         rc = dccp_mib_init();
1146         if (rc)
1147                 goto out_free_dccp_bhash;
1148
1149         rc = dccp_ackvec_init();
1150         if (rc)
1151                 goto out_free_dccp_mib;
1152
1153         rc = dccp_sysctl_init();
1154         if (rc)
1155                 goto out_ackvec_exit;
1156
1157         dccp_timestamping_init();
1158 out:
1159         return rc;
1160 out_ackvec_exit:
1161         dccp_ackvec_exit();
1162 out_free_dccp_mib:
1163         dccp_mib_exit();
1164 out_free_dccp_bhash:
1165         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1166         dccp_hashinfo.bhash = NULL;
1167 out_free_dccp_locks:
1168         inet_ehash_locks_free(&dccp_hashinfo);
1169 out_free_dccp_ehash:
1170         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1171         dccp_hashinfo.ehash = NULL;
1172 out_free_bind_bucket_cachep:
1173         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1174         dccp_hashinfo.bind_bucket_cachep = NULL;
1175         goto out;
1176 }
1177
1178 static void __exit dccp_fini(void)
1179 {
1180         dccp_mib_exit();
1181         free_pages((unsigned long)dccp_hashinfo.bhash,
1182                    get_order(dccp_hashinfo.bhash_size *
1183                              sizeof(struct inet_bind_hashbucket)));
1184         free_pages((unsigned long)dccp_hashinfo.ehash,
1185                    get_order(dccp_hashinfo.ehash_size *
1186                              sizeof(struct inet_ehash_bucket)));
1187         inet_ehash_locks_free(&dccp_hashinfo);
1188         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1189         dccp_ackvec_exit();
1190         dccp_sysctl_exit();
1191 }
1192
1193 module_init(dccp_init);
1194 module_exit(dccp_fini);
1195
1196 MODULE_LICENSE("GPL");
1197 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1198 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");