Merge git://git.kernel.org/pub/scm/linux/kernel/git/wim/linux-2.6-watchdog
[linux-2.6] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
24
25 #include <net/inet_sock.h>
26 #include <net/sock.h>
27 #include <net/xfrm.h>
28
29 #include <asm/ioctls.h>
30 #include <asm/semaphore.h>
31 #include <linux/spinlock.h>
32 #include <linux/timer.h>
33 #include <linux/delay.h>
34 #include <linux/poll.h>
35
36 #include "ccid.h"
37 #include "dccp.h"
38 #include "feat.h"
39
40 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
41
42 EXPORT_SYMBOL_GPL(dccp_statistics);
43
44 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
45
46 EXPORT_SYMBOL_GPL(dccp_orphan_count);
47
48 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
49         .lhash_lock     = RW_LOCK_UNLOCKED,
50         .lhash_users    = ATOMIC_INIT(0),
51         .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
52 };
53
54 EXPORT_SYMBOL_GPL(dccp_hashinfo);
55
56 /* the maximum queue length for tx in packets. 0 is no limit */
57 int sysctl_dccp_tx_qlen __read_mostly = 5;
58
59 void dccp_set_state(struct sock *sk, const int state)
60 {
61         const int oldstate = sk->sk_state;
62
63         dccp_pr_debug("%s(%p) %-10.10s -> %s\n",
64                       dccp_role(sk), sk,
65                       dccp_state_name(oldstate), dccp_state_name(state));
66         WARN_ON(state == oldstate);
67
68         switch (state) {
69         case DCCP_OPEN:
70                 if (oldstate != DCCP_OPEN)
71                         DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
72                 break;
73
74         case DCCP_CLOSED:
75                 if (oldstate == DCCP_CLOSING || oldstate == DCCP_OPEN)
76                         DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
77
78                 sk->sk_prot->unhash(sk);
79                 if (inet_csk(sk)->icsk_bind_hash != NULL &&
80                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
81                         inet_put_port(&dccp_hashinfo, sk);
82                 /* fall through */
83         default:
84                 if (oldstate == DCCP_OPEN)
85                         DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
86         }
87
88         /* Change state AFTER socket is unhashed to avoid closed
89          * socket sitting in hash tables.
90          */
91         sk->sk_state = state;
92 }
93
94 EXPORT_SYMBOL_GPL(dccp_set_state);
95
96 void dccp_done(struct sock *sk)
97 {
98         dccp_set_state(sk, DCCP_CLOSED);
99         dccp_clear_xmit_timers(sk);
100
101         sk->sk_shutdown = SHUTDOWN_MASK;
102
103         if (!sock_flag(sk, SOCK_DEAD))
104                 sk->sk_state_change(sk);
105         else
106                 inet_csk_destroy_sock(sk);
107 }
108
109 EXPORT_SYMBOL_GPL(dccp_done);
110
111 const char *dccp_packet_name(const int type)
112 {
113         static const char *dccp_packet_names[] = {
114                 [DCCP_PKT_REQUEST]  = "REQUEST",
115                 [DCCP_PKT_RESPONSE] = "RESPONSE",
116                 [DCCP_PKT_DATA]     = "DATA",
117                 [DCCP_PKT_ACK]      = "ACK",
118                 [DCCP_PKT_DATAACK]  = "DATAACK",
119                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
120                 [DCCP_PKT_CLOSE]    = "CLOSE",
121                 [DCCP_PKT_RESET]    = "RESET",
122                 [DCCP_PKT_SYNC]     = "SYNC",
123                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
124         };
125
126         if (type >= DCCP_NR_PKT_TYPES)
127                 return "INVALID";
128         else
129                 return dccp_packet_names[type];
130 }
131
132 EXPORT_SYMBOL_GPL(dccp_packet_name);
133
134 const char *dccp_state_name(const int state)
135 {
136         static char *dccp_state_names[] = {
137         [DCCP_OPEN]       = "OPEN",
138         [DCCP_REQUESTING] = "REQUESTING",
139         [DCCP_PARTOPEN]   = "PARTOPEN",
140         [DCCP_LISTEN]     = "LISTEN",
141         [DCCP_RESPOND]    = "RESPOND",
142         [DCCP_CLOSING]    = "CLOSING",
143         [DCCP_TIME_WAIT]  = "TIME_WAIT",
144         [DCCP_CLOSED]     = "CLOSED",
145         };
146
147         if (state >= DCCP_MAX_STATES)
148                 return "INVALID STATE!";
149         else
150                 return dccp_state_names[state];
151 }
152
153 EXPORT_SYMBOL_GPL(dccp_state_name);
154
155 void dccp_hash(struct sock *sk)
156 {
157         inet_hash(&dccp_hashinfo, sk);
158 }
159
160 EXPORT_SYMBOL_GPL(dccp_hash);
161
162 void dccp_unhash(struct sock *sk)
163 {
164         inet_unhash(&dccp_hashinfo, sk);
165 }
166
167 EXPORT_SYMBOL_GPL(dccp_unhash);
168
169 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
170 {
171         struct dccp_sock *dp = dccp_sk(sk);
172         struct dccp_minisock *dmsk = dccp_msk(sk);
173         struct inet_connection_sock *icsk = inet_csk(sk);
174
175         dccp_minisock_init(&dp->dccps_minisock);
176
177         /*
178          * FIXME: We're hardcoding the CCID, and doing this at this point makes
179          * the listening (master) sock get CCID control blocks, which is not
180          * necessary, but for now, to not mess with the test userspace apps,
181          * lets leave it here, later the real solution is to do this in a
182          * setsockopt(CCIDs-I-want/accept). -acme
183          */
184         if (likely(ctl_sock_initialized)) {
185                 int rc = dccp_feat_init(dmsk);
186
187                 if (rc)
188                         return rc;
189
190                 if (dmsk->dccpms_send_ack_vector) {
191                         dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
192                         if (dp->dccps_hc_rx_ackvec == NULL)
193                                 return -ENOMEM;
194                 }
195                 dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
196                                                       sk, GFP_KERNEL);
197                 dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
198                                                       sk, GFP_KERNEL);
199                 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
200                              dp->dccps_hc_tx_ccid == NULL)) {
201                         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
202                         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
203                         if (dmsk->dccpms_send_ack_vector) {
204                                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
205                                 dp->dccps_hc_rx_ackvec = NULL;
206                         }
207                         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
208                         return -ENOMEM;
209                 }
210         } else {
211                 /* control socket doesn't need feat nego */
212                 INIT_LIST_HEAD(&dmsk->dccpms_pending);
213                 INIT_LIST_HEAD(&dmsk->dccpms_conf);
214         }
215
216         dccp_init_xmit_timers(sk);
217         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
218         icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
219         sk->sk_state            = DCCP_CLOSED;
220         sk->sk_write_space      = dccp_write_space;
221         icsk->icsk_sync_mss     = dccp_sync_mss;
222         dp->dccps_mss_cache     = 536;
223         dp->dccps_rate_last     = jiffies;
224         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
225         dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
226         dp->dccps_l_ack_ratio   = dp->dccps_r_ack_ratio = 1;
227
228         return 0;
229 }
230
231 EXPORT_SYMBOL_GPL(dccp_init_sock);
232
233 int dccp_destroy_sock(struct sock *sk)
234 {
235         struct dccp_sock *dp = dccp_sk(sk);
236         struct dccp_minisock *dmsk = dccp_msk(sk);
237
238         /*
239          * DCCP doesn't use sk_write_queue, just sk_send_head
240          * for retransmissions
241          */
242         if (sk->sk_send_head != NULL) {
243                 kfree_skb(sk->sk_send_head);
244                 sk->sk_send_head = NULL;
245         }
246
247         /* Clean up a referenced DCCP bind bucket. */
248         if (inet_csk(sk)->icsk_bind_hash != NULL)
249                 inet_put_port(&dccp_hashinfo, sk);
250
251         kfree(dp->dccps_service_list);
252         dp->dccps_service_list = NULL;
253
254         if (dmsk->dccpms_send_ack_vector) {
255                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
256                 dp->dccps_hc_rx_ackvec = NULL;
257         }
258         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
259         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
260         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
261
262         /* clean up feature negotiation state */
263         dccp_feat_clean(dmsk);
264
265         return 0;
266 }
267
268 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
269
270 static inline int dccp_listen_start(struct sock *sk, int backlog)
271 {
272         struct dccp_sock *dp = dccp_sk(sk);
273
274         dp->dccps_role = DCCP_ROLE_LISTEN;
275         return inet_csk_listen_start(sk, backlog);
276 }
277
278 int dccp_disconnect(struct sock *sk, int flags)
279 {
280         struct inet_connection_sock *icsk = inet_csk(sk);
281         struct inet_sock *inet = inet_sk(sk);
282         int err = 0;
283         const int old_state = sk->sk_state;
284
285         if (old_state != DCCP_CLOSED)
286                 dccp_set_state(sk, DCCP_CLOSED);
287
288         /* ABORT function of RFC793 */
289         if (old_state == DCCP_LISTEN) {
290                 inet_csk_listen_stop(sk);
291         /* FIXME: do the active reset thing */
292         } else if (old_state == DCCP_REQUESTING)
293                 sk->sk_err = ECONNRESET;
294
295         dccp_clear_xmit_timers(sk);
296         __skb_queue_purge(&sk->sk_receive_queue);
297         if (sk->sk_send_head != NULL) {
298                 __kfree_skb(sk->sk_send_head);
299                 sk->sk_send_head = NULL;
300         }
301
302         inet->dport = 0;
303
304         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
305                 inet_reset_saddr(sk);
306
307         sk->sk_shutdown = 0;
308         sock_reset_flag(sk, SOCK_DONE);
309
310         icsk->icsk_backoff = 0;
311         inet_csk_delack_init(sk);
312         __sk_dst_reset(sk);
313
314         BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
315
316         sk->sk_error_report(sk);
317         return err;
318 }
319
320 EXPORT_SYMBOL_GPL(dccp_disconnect);
321
322 /*
323  *      Wait for a DCCP event.
324  *
325  *      Note that we don't need to lock the socket, as the upper poll layers
326  *      take care of normal races (between the test and the event) and we don't
327  *      go look at any of the socket buffers directly.
328  */
329 unsigned int dccp_poll(struct file *file, struct socket *sock,
330                        poll_table *wait)
331 {
332         unsigned int mask;
333         struct sock *sk = sock->sk;
334
335         poll_wait(file, sk->sk_sleep, wait);
336         if (sk->sk_state == DCCP_LISTEN)
337                 return inet_csk_listen_poll(sk);
338
339         /* Socket is not locked. We are protected from async events
340            by poll logic and correct handling of state changes
341            made by another threads is impossible in any case.
342          */
343
344         mask = 0;
345         if (sk->sk_err)
346                 mask = POLLERR;
347
348         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
349                 mask |= POLLHUP;
350         if (sk->sk_shutdown & RCV_SHUTDOWN)
351                 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
352
353         /* Connected? */
354         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
355                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
356                         mask |= POLLIN | POLLRDNORM;
357
358                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
359                         if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
360                                 mask |= POLLOUT | POLLWRNORM;
361                         } else {  /* send SIGIO later */
362                                 set_bit(SOCK_ASYNC_NOSPACE,
363                                         &sk->sk_socket->flags);
364                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
365
366                                 /* Race breaker. If space is freed after
367                                  * wspace test but before the flags are set,
368                                  * IO signal will be lost.
369                                  */
370                                 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
371                                         mask |= POLLOUT | POLLWRNORM;
372                         }
373                 }
374         }
375         return mask;
376 }
377
378 EXPORT_SYMBOL_GPL(dccp_poll);
379
380 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
381 {
382         int rc = -ENOTCONN;
383
384         lock_sock(sk);
385
386         if (sk->sk_state == DCCP_LISTEN)
387                 goto out;
388
389         switch (cmd) {
390         case SIOCINQ: {
391                 struct sk_buff *skb;
392                 unsigned long amount = 0;
393
394                 skb = skb_peek(&sk->sk_receive_queue);
395                 if (skb != NULL) {
396                         /*
397                          * We will only return the amount of this packet since
398                          * that is all that will be read.
399                          */
400                         amount = skb->len;
401                 }
402                 rc = put_user(amount, (int __user *)arg);
403         }
404                 break;
405         default:
406                 rc = -ENOIOCTLCMD;
407                 break;
408         }
409 out:
410         release_sock(sk);
411         return rc;
412 }
413
414 EXPORT_SYMBOL_GPL(dccp_ioctl);
415
416 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
417                                    char __user *optval, int optlen)
418 {
419         struct dccp_sock *dp = dccp_sk(sk);
420         struct dccp_service_list *sl = NULL;
421
422         if (service == DCCP_SERVICE_INVALID_VALUE ||
423             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
424                 return -EINVAL;
425
426         if (optlen > sizeof(service)) {
427                 sl = kmalloc(optlen, GFP_KERNEL);
428                 if (sl == NULL)
429                         return -ENOMEM;
430
431                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
432                 if (copy_from_user(sl->dccpsl_list,
433                                    optval + sizeof(service),
434                                    optlen - sizeof(service)) ||
435                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
436                         kfree(sl);
437                         return -EFAULT;
438                 }
439         }
440
441         lock_sock(sk);
442         dp->dccps_service = service;
443
444         kfree(dp->dccps_service_list);
445
446         dp->dccps_service_list = sl;
447         release_sock(sk);
448         return 0;
449 }
450
451 /* byte 1 is feature.  the rest is the preference list */
452 static int dccp_setsockopt_change(struct sock *sk, int type,
453                                   struct dccp_so_feat __user *optval)
454 {
455         struct dccp_so_feat opt;
456         u8 *val;
457         int rc;
458
459         if (copy_from_user(&opt, optval, sizeof(opt)))
460                 return -EFAULT;
461
462         val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
463         if (!val)
464                 return -ENOMEM;
465
466         if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
467                 rc = -EFAULT;
468                 goto out_free_val;
469         }
470
471         rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
472                               val, opt.dccpsf_len, GFP_KERNEL);
473         if (rc)
474                 goto out_free_val;
475
476 out:
477         return rc;
478
479 out_free_val:
480         kfree(val);
481         goto out;
482 }
483
484 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
485                 char __user *optval, int optlen)
486 {
487         struct dccp_sock *dp = dccp_sk(sk);
488         int val, err = 0;
489
490         if (optlen < sizeof(int))
491                 return -EINVAL;
492
493         if (get_user(val, (int __user *)optval))
494                 return -EFAULT;
495
496         if (optname == DCCP_SOCKOPT_SERVICE)
497                 return dccp_setsockopt_service(sk, val, optval, optlen);
498
499         lock_sock(sk);
500         switch (optname) {
501         case DCCP_SOCKOPT_PACKET_SIZE:
502                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
503                 err = 0;
504                 break;
505         case DCCP_SOCKOPT_CHANGE_L:
506                 if (optlen != sizeof(struct dccp_so_feat))
507                         err = -EINVAL;
508                 else
509                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
510                                                      (struct dccp_so_feat __user *)
511                                                      optval);
512                 break;
513         case DCCP_SOCKOPT_CHANGE_R:
514                 if (optlen != sizeof(struct dccp_so_feat))
515                         err = -EINVAL;
516                 else
517                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
518                                                      (struct dccp_so_feat __user *)
519                                                      optval);
520                 break;
521         case DCCP_SOCKOPT_SEND_CSCOV:   /* sender side, RFC 4340, sec. 9.2 */
522                 if (val < 0 || val > 15)
523                         err = -EINVAL;
524                 else
525                         dp->dccps_pcslen = val;
526                 break;
527         case DCCP_SOCKOPT_RECV_CSCOV:   /* receiver side, RFC 4340 sec. 9.2.1 */
528                 if (val < 0 || val > 15)
529                         err = -EINVAL;
530                 else {
531                         dp->dccps_pcrlen = val;
532                         /* FIXME: add feature negotiation,
533                          * ChangeL(MinimumChecksumCoverage, val) */
534                 }
535                 break;
536         default:
537                 err = -ENOPROTOOPT;
538                 break;
539         }
540
541         release_sock(sk);
542         return err;
543 }
544
545 int dccp_setsockopt(struct sock *sk, int level, int optname,
546                     char __user *optval, int optlen)
547 {
548         if (level != SOL_DCCP)
549                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
550                                                              optname, optval,
551                                                              optlen);
552         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
553 }
554
555 EXPORT_SYMBOL_GPL(dccp_setsockopt);
556
557 #ifdef CONFIG_COMPAT
558 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
559                            char __user *optval, int optlen)
560 {
561         if (level != SOL_DCCP)
562                 return inet_csk_compat_setsockopt(sk, level, optname,
563                                                   optval, optlen);
564         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
565 }
566
567 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
568 #endif
569
570 static int dccp_getsockopt_service(struct sock *sk, int len,
571                                    __be32 __user *optval,
572                                    int __user *optlen)
573 {
574         const struct dccp_sock *dp = dccp_sk(sk);
575         const struct dccp_service_list *sl;
576         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
577
578         lock_sock(sk);
579         if ((sl = dp->dccps_service_list) != NULL) {
580                 slen = sl->dccpsl_nr * sizeof(u32);
581                 total_len += slen;
582         }
583
584         err = -EINVAL;
585         if (total_len > len)
586                 goto out;
587
588         err = 0;
589         if (put_user(total_len, optlen) ||
590             put_user(dp->dccps_service, optval) ||
591             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
592                 err = -EFAULT;
593 out:
594         release_sock(sk);
595         return err;
596 }
597
598 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
599                     char __user *optval, int __user *optlen)
600 {
601         struct dccp_sock *dp;
602         int val, len;
603
604         if (get_user(len, optlen))
605                 return -EFAULT;
606
607         if (len < (int)sizeof(int))
608                 return -EINVAL;
609
610         dp = dccp_sk(sk);
611
612         switch (optname) {
613         case DCCP_SOCKOPT_PACKET_SIZE:
614                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
615                 return 0;
616         case DCCP_SOCKOPT_SERVICE:
617                 return dccp_getsockopt_service(sk, len,
618                                                (__be32 __user *)optval, optlen);
619         case DCCP_SOCKOPT_GET_CUR_MPS:
620                 val = dp->dccps_mss_cache;
621                 len = sizeof(val);
622                 break;
623         case DCCP_SOCKOPT_SEND_CSCOV:
624                 val = dp->dccps_pcslen;
625                 len = sizeof(val);
626                 break;
627         case DCCP_SOCKOPT_RECV_CSCOV:
628                 val = dp->dccps_pcrlen;
629                 len = sizeof(val);
630                 break;
631         case 128 ... 191:
632                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
633                                              len, (u32 __user *)optval, optlen);
634         case 192 ... 255:
635                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
636                                              len, (u32 __user *)optval, optlen);
637         default:
638                 return -ENOPROTOOPT;
639         }
640
641         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
642                 return -EFAULT;
643
644         return 0;
645 }
646
647 int dccp_getsockopt(struct sock *sk, int level, int optname,
648                     char __user *optval, int __user *optlen)
649 {
650         if (level != SOL_DCCP)
651                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
652                                                              optname, optval,
653                                                              optlen);
654         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
655 }
656
657 EXPORT_SYMBOL_GPL(dccp_getsockopt);
658
659 #ifdef CONFIG_COMPAT
660 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
661                            char __user *optval, int __user *optlen)
662 {
663         if (level != SOL_DCCP)
664                 return inet_csk_compat_getsockopt(sk, level, optname,
665                                                   optval, optlen);
666         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
667 }
668
669 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
670 #endif
671
672 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
673                  size_t len)
674 {
675         const struct dccp_sock *dp = dccp_sk(sk);
676         const int flags = msg->msg_flags;
677         const int noblock = flags & MSG_DONTWAIT;
678         struct sk_buff *skb;
679         int rc, size;
680         long timeo;
681
682         if (len > dp->dccps_mss_cache)
683                 return -EMSGSIZE;
684
685         lock_sock(sk);
686
687         if (sysctl_dccp_tx_qlen &&
688             (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
689                 rc = -EAGAIN;
690                 goto out_release;
691         }
692
693         timeo = sock_sndtimeo(sk, noblock);
694
695         /*
696          * We have to use sk_stream_wait_connect here to set sk_write_pending,
697          * so that the trick in dccp_rcv_request_sent_state_process.
698          */
699         /* Wait for a connection to finish. */
700         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
701                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
702                         goto out_release;
703
704         size = sk->sk_prot->max_header + len;
705         release_sock(sk);
706         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
707         lock_sock(sk);
708         if (skb == NULL)
709                 goto out_release;
710
711         skb_reserve(skb, sk->sk_prot->max_header);
712         rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
713         if (rc != 0)
714                 goto out_discard;
715
716         skb_queue_tail(&sk->sk_write_queue, skb);
717         dccp_write_xmit(sk,0);
718 out_release:
719         release_sock(sk);
720         return rc ? : len;
721 out_discard:
722         kfree_skb(skb);
723         goto out_release;
724 }
725
726 EXPORT_SYMBOL_GPL(dccp_sendmsg);
727
728 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
729                  size_t len, int nonblock, int flags, int *addr_len)
730 {
731         const struct dccp_hdr *dh;
732         long timeo;
733
734         lock_sock(sk);
735
736         if (sk->sk_state == DCCP_LISTEN) {
737                 len = -ENOTCONN;
738                 goto out;
739         }
740
741         timeo = sock_rcvtimeo(sk, nonblock);
742
743         do {
744                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
745
746                 if (skb == NULL)
747                         goto verify_sock_status;
748
749                 dh = dccp_hdr(skb);
750
751                 if (dh->dccph_type == DCCP_PKT_DATA ||
752                     dh->dccph_type == DCCP_PKT_DATAACK)
753                         goto found_ok_skb;
754
755                 if (dh->dccph_type == DCCP_PKT_RESET ||
756                     dh->dccph_type == DCCP_PKT_CLOSE) {
757                         dccp_pr_debug("found fin ok!\n");
758                         len = 0;
759                         goto found_fin_ok;
760                 }
761                 dccp_pr_debug("packet_type=%s\n",
762                               dccp_packet_name(dh->dccph_type));
763                 sk_eat_skb(sk, skb, 0);
764 verify_sock_status:
765                 if (sock_flag(sk, SOCK_DONE)) {
766                         len = 0;
767                         break;
768                 }
769
770                 if (sk->sk_err) {
771                         len = sock_error(sk);
772                         break;
773                 }
774
775                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
776                         len = 0;
777                         break;
778                 }
779
780                 if (sk->sk_state == DCCP_CLOSED) {
781                         if (!sock_flag(sk, SOCK_DONE)) {
782                                 /* This occurs when user tries to read
783                                  * from never connected socket.
784                                  */
785                                 len = -ENOTCONN;
786                                 break;
787                         }
788                         len = 0;
789                         break;
790                 }
791
792                 if (!timeo) {
793                         len = -EAGAIN;
794                         break;
795                 }
796
797                 if (signal_pending(current)) {
798                         len = sock_intr_errno(timeo);
799                         break;
800                 }
801
802                 sk_wait_data(sk, &timeo);
803                 continue;
804         found_ok_skb:
805                 if (len > skb->len)
806                         len = skb->len;
807                 else if (len < skb->len)
808                         msg->msg_flags |= MSG_TRUNC;
809
810                 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
811                         /* Exception. Bailout! */
812                         len = -EFAULT;
813                         break;
814                 }
815         found_fin_ok:
816                 if (!(flags & MSG_PEEK))
817                         sk_eat_skb(sk, skb, 0);
818                 break;
819         } while (1);
820 out:
821         release_sock(sk);
822         return len;
823 }
824
825 EXPORT_SYMBOL_GPL(dccp_recvmsg);
826
827 int inet_dccp_listen(struct socket *sock, int backlog)
828 {
829         struct sock *sk = sock->sk;
830         unsigned char old_state;
831         int err;
832
833         lock_sock(sk);
834
835         err = -EINVAL;
836         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
837                 goto out;
838
839         old_state = sk->sk_state;
840         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
841                 goto out;
842
843         /* Really, if the socket is already in listen state
844          * we can only allow the backlog to be adjusted.
845          */
846         if (old_state != DCCP_LISTEN) {
847                 /*
848                  * FIXME: here it probably should be sk->sk_prot->listen_start
849                  * see tcp_listen_start
850                  */
851                 err = dccp_listen_start(sk, backlog);
852                 if (err)
853                         goto out;
854         }
855         sk->sk_max_ack_backlog = backlog;
856         err = 0;
857
858 out:
859         release_sock(sk);
860         return err;
861 }
862
863 EXPORT_SYMBOL_GPL(inet_dccp_listen);
864
865 static const unsigned char dccp_new_state[] = {
866         /* current state:   new state:      action:     */
867         [0]               = DCCP_CLOSED,
868         [DCCP_OPEN]       = DCCP_CLOSING | DCCP_ACTION_FIN,
869         [DCCP_REQUESTING] = DCCP_CLOSED,
870         [DCCP_PARTOPEN]   = DCCP_CLOSING | DCCP_ACTION_FIN,
871         [DCCP_LISTEN]     = DCCP_CLOSED,
872         [DCCP_RESPOND]    = DCCP_CLOSED,
873         [DCCP_CLOSING]    = DCCP_CLOSED,
874         [DCCP_TIME_WAIT]  = DCCP_CLOSED,
875         [DCCP_CLOSED]     = DCCP_CLOSED,
876 };
877
878 static int dccp_close_state(struct sock *sk)
879 {
880         const int next = dccp_new_state[sk->sk_state];
881         const int ns = next & DCCP_STATE_MASK;
882
883         if (ns != sk->sk_state)
884                 dccp_set_state(sk, ns);
885
886         return next & DCCP_ACTION_FIN;
887 }
888
889 void dccp_close(struct sock *sk, long timeout)
890 {
891         struct dccp_sock *dp = dccp_sk(sk);
892         struct sk_buff *skb;
893         int state;
894
895         lock_sock(sk);
896
897         sk->sk_shutdown = SHUTDOWN_MASK;
898
899         if (sk->sk_state == DCCP_LISTEN) {
900                 dccp_set_state(sk, DCCP_CLOSED);
901
902                 /* Special case. */
903                 inet_csk_listen_stop(sk);
904
905                 goto adjudge_to_death;
906         }
907
908         sk_stop_timer(sk, &dp->dccps_xmit_timer);
909
910         /*
911          * We need to flush the recv. buffs.  We do this only on the
912          * descriptor close, not protocol-sourced closes, because the
913           *reader process may not have drained the data yet!
914          */
915         /* FIXME: check for unread data */
916         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
917                 __kfree_skb(skb);
918         }
919
920         if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
921                 /* Check zero linger _after_ checking for unread data. */
922                 sk->sk_prot->disconnect(sk, 0);
923         } else if (dccp_close_state(sk)) {
924                 dccp_send_close(sk, 1);
925         }
926
927         sk_stream_wait_close(sk, timeout);
928
929 adjudge_to_death:
930         state = sk->sk_state;
931         sock_hold(sk);
932         sock_orphan(sk);
933         atomic_inc(sk->sk_prot->orphan_count);
934
935         /*
936          * It is the last release_sock in its life. It will remove backlog.
937          */
938         release_sock(sk);
939         /*
940          * Now socket is owned by kernel and we acquire BH lock
941          * to finish close. No need to check for user refs.
942          */
943         local_bh_disable();
944         bh_lock_sock(sk);
945         BUG_TRAP(!sock_owned_by_user(sk));
946
947         /* Have we already been destroyed by a softirq or backlog? */
948         if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
949                 goto out;
950
951         /*
952          * The last release_sock may have processed the CLOSE or RESET
953          * packet moving sock to CLOSED state, if not we have to fire
954          * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
955          * in draft-ietf-dccp-spec-11. -acme
956          */
957         if (sk->sk_state == DCCP_CLOSING) {
958                 /* FIXME: should start at 2 * RTT */
959                 /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
960                 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
961                                           inet_csk(sk)->icsk_rto,
962                                           DCCP_RTO_MAX);
963 #if 0
964                 /* Yeah, we should use sk->sk_prot->orphan_count, etc */
965                 dccp_set_state(sk, DCCP_CLOSED);
966 #endif
967         }
968
969         if (sk->sk_state == DCCP_CLOSED)
970                 inet_csk_destroy_sock(sk);
971
972         /* Otherwise, socket is reprieved until protocol close. */
973
974 out:
975         bh_unlock_sock(sk);
976         local_bh_enable();
977         sock_put(sk);
978 }
979
980 EXPORT_SYMBOL_GPL(dccp_close);
981
982 void dccp_shutdown(struct sock *sk, int how)
983 {
984         dccp_pr_debug("entry\n");
985 }
986
987 EXPORT_SYMBOL_GPL(dccp_shutdown);
988
989 static int __init dccp_mib_init(void)
990 {
991         int rc = -ENOMEM;
992
993         dccp_statistics[0] = alloc_percpu(struct dccp_mib);
994         if (dccp_statistics[0] == NULL)
995                 goto out;
996
997         dccp_statistics[1] = alloc_percpu(struct dccp_mib);
998         if (dccp_statistics[1] == NULL)
999                 goto out_free_one;
1000
1001         rc = 0;
1002 out:
1003         return rc;
1004 out_free_one:
1005         free_percpu(dccp_statistics[0]);
1006         dccp_statistics[0] = NULL;
1007         goto out;
1008
1009 }
1010
1011 static void dccp_mib_exit(void)
1012 {
1013         free_percpu(dccp_statistics[0]);
1014         free_percpu(dccp_statistics[1]);
1015         dccp_statistics[0] = dccp_statistics[1] = NULL;
1016 }
1017
1018 static int thash_entries;
1019 module_param(thash_entries, int, 0444);
1020 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1021
1022 #ifdef CONFIG_IP_DCCP_DEBUG
1023 int dccp_debug;
1024 module_param(dccp_debug, bool, 0444);
1025 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1026
1027 EXPORT_SYMBOL_GPL(dccp_debug);
1028 #endif
1029
1030 static int __init dccp_init(void)
1031 {
1032         unsigned long goal;
1033         int ehash_order, bhash_order, i;
1034         int rc = -ENOBUFS;
1035
1036         dccp_hashinfo.bind_bucket_cachep =
1037                 kmem_cache_create("dccp_bind_bucket",
1038                                   sizeof(struct inet_bind_bucket), 0,
1039                                   SLAB_HWCACHE_ALIGN, NULL);
1040         if (!dccp_hashinfo.bind_bucket_cachep)
1041                 goto out;
1042
1043         /*
1044          * Size and allocate the main established and bind bucket
1045          * hash tables.
1046          *
1047          * The methodology is similar to that of the buffer cache.
1048          */
1049         if (num_physpages >= (128 * 1024))
1050                 goal = num_physpages >> (21 - PAGE_SHIFT);
1051         else
1052                 goal = num_physpages >> (23 - PAGE_SHIFT);
1053
1054         if (thash_entries)
1055                 goal = (thash_entries *
1056                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1057         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1058                 ;
1059         do {
1060                 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1061                                         sizeof(struct inet_ehash_bucket);
1062                 while (dccp_hashinfo.ehash_size &
1063                        (dccp_hashinfo.ehash_size - 1))
1064                         dccp_hashinfo.ehash_size--;
1065                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1066                         __get_free_pages(GFP_ATOMIC, ehash_order);
1067         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1068
1069         if (!dccp_hashinfo.ehash) {
1070                 DCCP_CRIT("Failed to allocate DCCP established hash table");
1071                 goto out_free_bind_bucket_cachep;
1072         }
1073
1074         for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1075                 rwlock_init(&dccp_hashinfo.ehash[i].lock);
1076                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1077                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
1078         }
1079
1080         bhash_order = ehash_order;
1081
1082         do {
1083                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1084                                         sizeof(struct inet_bind_hashbucket);
1085                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1086                     bhash_order > 0)
1087                         continue;
1088                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1089                         __get_free_pages(GFP_ATOMIC, bhash_order);
1090         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1091
1092         if (!dccp_hashinfo.bhash) {
1093                 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1094                 goto out_free_dccp_ehash;
1095         }
1096
1097         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1098                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1099                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1100         }
1101
1102         rc = dccp_mib_init();
1103         if (rc)
1104                 goto out_free_dccp_bhash;
1105
1106         rc = dccp_ackvec_init();
1107         if (rc)
1108                 goto out_free_dccp_mib;
1109
1110         rc = dccp_sysctl_init();
1111         if (rc)
1112                 goto out_ackvec_exit;
1113
1114         dccp_timestamping_init();
1115 out:
1116         return rc;
1117 out_ackvec_exit:
1118         dccp_ackvec_exit();
1119 out_free_dccp_mib:
1120         dccp_mib_exit();
1121 out_free_dccp_bhash:
1122         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1123         dccp_hashinfo.bhash = NULL;
1124 out_free_dccp_ehash:
1125         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1126         dccp_hashinfo.ehash = NULL;
1127 out_free_bind_bucket_cachep:
1128         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1129         dccp_hashinfo.bind_bucket_cachep = NULL;
1130         goto out;
1131 }
1132
1133 static void __exit dccp_fini(void)
1134 {
1135         dccp_mib_exit();
1136         free_pages((unsigned long)dccp_hashinfo.bhash,
1137                    get_order(dccp_hashinfo.bhash_size *
1138                              sizeof(struct inet_bind_hashbucket)));
1139         free_pages((unsigned long)dccp_hashinfo.ehash,
1140                    get_order(dccp_hashinfo.ehash_size *
1141                              sizeof(struct inet_ehash_bucket)));
1142         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1143         dccp_ackvec_exit();
1144         dccp_sysctl_exit();
1145 }
1146
1147 module_init(dccp_init);
1148 module_exit(dccp_fini);
1149
1150 MODULE_LICENSE("GPL");
1151 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1152 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");