[NET]: Make /proc/net per network namespace
[linux-2.6] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
24
25 #include <net/inet_sock.h>
26 #include <net/sock.h>
27 #include <net/xfrm.h>
28
29 #include <asm/semaphore.h>
30 #include <linux/spinlock.h>
31 #include <linux/timer.h>
32 #include <linux/delay.h>
33 #include <linux/poll.h>
34
35 #include "ccid.h"
36 #include "dccp.h"
37 #include "feat.h"
38
39 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
40
41 EXPORT_SYMBOL_GPL(dccp_statistics);
42
43 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
44
45 EXPORT_SYMBOL_GPL(dccp_orphan_count);
46
47 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
48         .lhash_lock     = RW_LOCK_UNLOCKED,
49         .lhash_users    = ATOMIC_INIT(0),
50         .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
51 };
52
53 EXPORT_SYMBOL_GPL(dccp_hashinfo);
54
55 /* the maximum queue length for tx in packets. 0 is no limit */
56 int sysctl_dccp_tx_qlen __read_mostly = 5;
57
58 void dccp_set_state(struct sock *sk, const int state)
59 {
60         const int oldstate = sk->sk_state;
61
62         dccp_pr_debug("%s(%p) %-10.10s -> %s\n",
63                       dccp_role(sk), sk,
64                       dccp_state_name(oldstate), dccp_state_name(state));
65         WARN_ON(state == oldstate);
66
67         switch (state) {
68         case DCCP_OPEN:
69                 if (oldstate != DCCP_OPEN)
70                         DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
71                 break;
72
73         case DCCP_CLOSED:
74                 if (oldstate == DCCP_CLOSING || oldstate == DCCP_OPEN)
75                         DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
76
77                 sk->sk_prot->unhash(sk);
78                 if (inet_csk(sk)->icsk_bind_hash != NULL &&
79                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
80                         inet_put_port(&dccp_hashinfo, sk);
81                 /* fall through */
82         default:
83                 if (oldstate == DCCP_OPEN)
84                         DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
85         }
86
87         /* Change state AFTER socket is unhashed to avoid closed
88          * socket sitting in hash tables.
89          */
90         sk->sk_state = state;
91 }
92
93 EXPORT_SYMBOL_GPL(dccp_set_state);
94
95 void dccp_done(struct sock *sk)
96 {
97         dccp_set_state(sk, DCCP_CLOSED);
98         dccp_clear_xmit_timers(sk);
99
100         sk->sk_shutdown = SHUTDOWN_MASK;
101
102         if (!sock_flag(sk, SOCK_DEAD))
103                 sk->sk_state_change(sk);
104         else
105                 inet_csk_destroy_sock(sk);
106 }
107
108 EXPORT_SYMBOL_GPL(dccp_done);
109
110 const char *dccp_packet_name(const int type)
111 {
112         static const char *dccp_packet_names[] = {
113                 [DCCP_PKT_REQUEST]  = "REQUEST",
114                 [DCCP_PKT_RESPONSE] = "RESPONSE",
115                 [DCCP_PKT_DATA]     = "DATA",
116                 [DCCP_PKT_ACK]      = "ACK",
117                 [DCCP_PKT_DATAACK]  = "DATAACK",
118                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
119                 [DCCP_PKT_CLOSE]    = "CLOSE",
120                 [DCCP_PKT_RESET]    = "RESET",
121                 [DCCP_PKT_SYNC]     = "SYNC",
122                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
123         };
124
125         if (type >= DCCP_NR_PKT_TYPES)
126                 return "INVALID";
127         else
128                 return dccp_packet_names[type];
129 }
130
131 EXPORT_SYMBOL_GPL(dccp_packet_name);
132
133 const char *dccp_state_name(const int state)
134 {
135         static char *dccp_state_names[] = {
136         [DCCP_OPEN]       = "OPEN",
137         [DCCP_REQUESTING] = "REQUESTING",
138         [DCCP_PARTOPEN]   = "PARTOPEN",
139         [DCCP_LISTEN]     = "LISTEN",
140         [DCCP_RESPOND]    = "RESPOND",
141         [DCCP_CLOSING]    = "CLOSING",
142         [DCCP_TIME_WAIT]  = "TIME_WAIT",
143         [DCCP_CLOSED]     = "CLOSED",
144         };
145
146         if (state >= DCCP_MAX_STATES)
147                 return "INVALID STATE!";
148         else
149                 return dccp_state_names[state];
150 }
151
152 EXPORT_SYMBOL_GPL(dccp_state_name);
153
154 void dccp_hash(struct sock *sk)
155 {
156         inet_hash(&dccp_hashinfo, sk);
157 }
158
159 EXPORT_SYMBOL_GPL(dccp_hash);
160
161 void dccp_unhash(struct sock *sk)
162 {
163         inet_unhash(&dccp_hashinfo, sk);
164 }
165
166 EXPORT_SYMBOL_GPL(dccp_unhash);
167
168 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
169 {
170         struct dccp_sock *dp = dccp_sk(sk);
171         struct dccp_minisock *dmsk = dccp_msk(sk);
172         struct inet_connection_sock *icsk = inet_csk(sk);
173
174         dccp_minisock_init(&dp->dccps_minisock);
175
176         /*
177          * FIXME: We're hardcoding the CCID, and doing this at this point makes
178          * the listening (master) sock get CCID control blocks, which is not
179          * necessary, but for now, to not mess with the test userspace apps,
180          * lets leave it here, later the real solution is to do this in a
181          * setsockopt(CCIDs-I-want/accept). -acme
182          */
183         if (likely(ctl_sock_initialized)) {
184                 int rc = dccp_feat_init(dmsk);
185
186                 if (rc)
187                         return rc;
188
189                 if (dmsk->dccpms_send_ack_vector) {
190                         dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
191                         if (dp->dccps_hc_rx_ackvec == NULL)
192                                 return -ENOMEM;
193                 }
194                 dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
195                                                       sk, GFP_KERNEL);
196                 dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
197                                                       sk, GFP_KERNEL);
198                 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
199                              dp->dccps_hc_tx_ccid == NULL)) {
200                         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
201                         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
202                         if (dmsk->dccpms_send_ack_vector) {
203                                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
204                                 dp->dccps_hc_rx_ackvec = NULL;
205                         }
206                         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
207                         return -ENOMEM;
208                 }
209         } else {
210                 /* control socket doesn't need feat nego */
211                 INIT_LIST_HEAD(&dmsk->dccpms_pending);
212                 INIT_LIST_HEAD(&dmsk->dccpms_conf);
213         }
214
215         dccp_init_xmit_timers(sk);
216         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
217         icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
218         sk->sk_state            = DCCP_CLOSED;
219         sk->sk_write_space      = dccp_write_space;
220         icsk->icsk_sync_mss     = dccp_sync_mss;
221         dp->dccps_mss_cache     = 536;
222         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
223         dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
224         dp->dccps_l_ack_ratio   = dp->dccps_r_ack_ratio = 1;
225
226         return 0;
227 }
228
229 EXPORT_SYMBOL_GPL(dccp_init_sock);
230
231 int dccp_destroy_sock(struct sock *sk)
232 {
233         struct dccp_sock *dp = dccp_sk(sk);
234         struct dccp_minisock *dmsk = dccp_msk(sk);
235
236         /*
237          * DCCP doesn't use sk_write_queue, just sk_send_head
238          * for retransmissions
239          */
240         if (sk->sk_send_head != NULL) {
241                 kfree_skb(sk->sk_send_head);
242                 sk->sk_send_head = NULL;
243         }
244
245         /* Clean up a referenced DCCP bind bucket. */
246         if (inet_csk(sk)->icsk_bind_hash != NULL)
247                 inet_put_port(&dccp_hashinfo, sk);
248
249         kfree(dp->dccps_service_list);
250         dp->dccps_service_list = NULL;
251
252         if (dmsk->dccpms_send_ack_vector) {
253                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
254                 dp->dccps_hc_rx_ackvec = NULL;
255         }
256         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
257         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
258         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
259
260         /* clean up feature negotiation state */
261         dccp_feat_clean(dmsk);
262
263         return 0;
264 }
265
266 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
267
268 static inline int dccp_listen_start(struct sock *sk, int backlog)
269 {
270         struct dccp_sock *dp = dccp_sk(sk);
271
272         dp->dccps_role = DCCP_ROLE_LISTEN;
273         return inet_csk_listen_start(sk, backlog);
274 }
275
276 int dccp_disconnect(struct sock *sk, int flags)
277 {
278         struct inet_connection_sock *icsk = inet_csk(sk);
279         struct inet_sock *inet = inet_sk(sk);
280         int err = 0;
281         const int old_state = sk->sk_state;
282
283         if (old_state != DCCP_CLOSED)
284                 dccp_set_state(sk, DCCP_CLOSED);
285
286         /* ABORT function of RFC793 */
287         if (old_state == DCCP_LISTEN) {
288                 inet_csk_listen_stop(sk);
289         /* FIXME: do the active reset thing */
290         } else if (old_state == DCCP_REQUESTING)
291                 sk->sk_err = ECONNRESET;
292
293         dccp_clear_xmit_timers(sk);
294         __skb_queue_purge(&sk->sk_receive_queue);
295         if (sk->sk_send_head != NULL) {
296                 __kfree_skb(sk->sk_send_head);
297                 sk->sk_send_head = NULL;
298         }
299
300         inet->dport = 0;
301
302         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
303                 inet_reset_saddr(sk);
304
305         sk->sk_shutdown = 0;
306         sock_reset_flag(sk, SOCK_DONE);
307
308         icsk->icsk_backoff = 0;
309         inet_csk_delack_init(sk);
310         __sk_dst_reset(sk);
311
312         BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
313
314         sk->sk_error_report(sk);
315         return err;
316 }
317
318 EXPORT_SYMBOL_GPL(dccp_disconnect);
319
320 /*
321  *      Wait for a DCCP event.
322  *
323  *      Note that we don't need to lock the socket, as the upper poll layers
324  *      take care of normal races (between the test and the event) and we don't
325  *      go look at any of the socket buffers directly.
326  */
327 unsigned int dccp_poll(struct file *file, struct socket *sock,
328                        poll_table *wait)
329 {
330         unsigned int mask;
331         struct sock *sk = sock->sk;
332
333         poll_wait(file, sk->sk_sleep, wait);
334         if (sk->sk_state == DCCP_LISTEN)
335                 return inet_csk_listen_poll(sk);
336
337         /* Socket is not locked. We are protected from async events
338            by poll logic and correct handling of state changes
339            made by another threads is impossible in any case.
340          */
341
342         mask = 0;
343         if (sk->sk_err)
344                 mask = POLLERR;
345
346         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
347                 mask |= POLLHUP;
348         if (sk->sk_shutdown & RCV_SHUTDOWN)
349                 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
350
351         /* Connected? */
352         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
353                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
354                         mask |= POLLIN | POLLRDNORM;
355
356                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
357                         if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
358                                 mask |= POLLOUT | POLLWRNORM;
359                         } else {  /* send SIGIO later */
360                                 set_bit(SOCK_ASYNC_NOSPACE,
361                                         &sk->sk_socket->flags);
362                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
363
364                                 /* Race breaker. If space is freed after
365                                  * wspace test but before the flags are set,
366                                  * IO signal will be lost.
367                                  */
368                                 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
369                                         mask |= POLLOUT | POLLWRNORM;
370                         }
371                 }
372         }
373         return mask;
374 }
375
376 EXPORT_SYMBOL_GPL(dccp_poll);
377
378 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
379 {
380         dccp_pr_debug("entry\n");
381         return -ENOIOCTLCMD;
382 }
383
384 EXPORT_SYMBOL_GPL(dccp_ioctl);
385
386 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
387                                    char __user *optval, int optlen)
388 {
389         struct dccp_sock *dp = dccp_sk(sk);
390         struct dccp_service_list *sl = NULL;
391
392         if (service == DCCP_SERVICE_INVALID_VALUE ||
393             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
394                 return -EINVAL;
395
396         if (optlen > sizeof(service)) {
397                 sl = kmalloc(optlen, GFP_KERNEL);
398                 if (sl == NULL)
399                         return -ENOMEM;
400
401                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
402                 if (copy_from_user(sl->dccpsl_list,
403                                    optval + sizeof(service),
404                                    optlen - sizeof(service)) ||
405                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
406                         kfree(sl);
407                         return -EFAULT;
408                 }
409         }
410
411         lock_sock(sk);
412         dp->dccps_service = service;
413
414         kfree(dp->dccps_service_list);
415
416         dp->dccps_service_list = sl;
417         release_sock(sk);
418         return 0;
419 }
420
421 /* byte 1 is feature.  the rest is the preference list */
422 static int dccp_setsockopt_change(struct sock *sk, int type,
423                                   struct dccp_so_feat __user *optval)
424 {
425         struct dccp_so_feat opt;
426         u8 *val;
427         int rc;
428
429         if (copy_from_user(&opt, optval, sizeof(opt)))
430                 return -EFAULT;
431
432         val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
433         if (!val)
434                 return -ENOMEM;
435
436         if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
437                 rc = -EFAULT;
438                 goto out_free_val;
439         }
440
441         rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
442                               val, opt.dccpsf_len, GFP_KERNEL);
443         if (rc)
444                 goto out_free_val;
445
446 out:
447         return rc;
448
449 out_free_val:
450         kfree(val);
451         goto out;
452 }
453
454 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
455                 char __user *optval, int optlen)
456 {
457         struct dccp_sock *dp = dccp_sk(sk);
458         int val, err = 0;
459
460         if (optlen < sizeof(int))
461                 return -EINVAL;
462
463         if (get_user(val, (int __user *)optval))
464                 return -EFAULT;
465
466         if (optname == DCCP_SOCKOPT_SERVICE)
467                 return dccp_setsockopt_service(sk, val, optval, optlen);
468
469         lock_sock(sk);
470         switch (optname) {
471         case DCCP_SOCKOPT_PACKET_SIZE:
472                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
473                 err = 0;
474                 break;
475         case DCCP_SOCKOPT_CHANGE_L:
476                 if (optlen != sizeof(struct dccp_so_feat))
477                         err = -EINVAL;
478                 else
479                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
480                                                      (struct dccp_so_feat __user *)
481                                                      optval);
482                 break;
483         case DCCP_SOCKOPT_CHANGE_R:
484                 if (optlen != sizeof(struct dccp_so_feat))
485                         err = -EINVAL;
486                 else
487                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
488                                                      (struct dccp_so_feat __user *)
489                                                      optval);
490                 break;
491         case DCCP_SOCKOPT_SEND_CSCOV:   /* sender side, RFC 4340, sec. 9.2 */
492                 if (val < 0 || val > 15)
493                         err = -EINVAL;
494                 else
495                         dp->dccps_pcslen = val;
496                 break;
497         case DCCP_SOCKOPT_RECV_CSCOV:   /* receiver side, RFC 4340 sec. 9.2.1 */
498                 if (val < 0 || val > 15)
499                         err = -EINVAL;
500                 else {
501                         dp->dccps_pcrlen = val;
502                         /* FIXME: add feature negotiation,
503                          * ChangeL(MinimumChecksumCoverage, val) */
504                 }
505                 break;
506         default:
507                 err = -ENOPROTOOPT;
508                 break;
509         }
510
511         release_sock(sk);
512         return err;
513 }
514
515 int dccp_setsockopt(struct sock *sk, int level, int optname,
516                     char __user *optval, int optlen)
517 {
518         if (level != SOL_DCCP)
519                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
520                                                              optname, optval,
521                                                              optlen);
522         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
523 }
524
525 EXPORT_SYMBOL_GPL(dccp_setsockopt);
526
527 #ifdef CONFIG_COMPAT
528 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
529                            char __user *optval, int optlen)
530 {
531         if (level != SOL_DCCP)
532                 return inet_csk_compat_setsockopt(sk, level, optname,
533                                                   optval, optlen);
534         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
535 }
536
537 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
538 #endif
539
540 static int dccp_getsockopt_service(struct sock *sk, int len,
541                                    __be32 __user *optval,
542                                    int __user *optlen)
543 {
544         const struct dccp_sock *dp = dccp_sk(sk);
545         const struct dccp_service_list *sl;
546         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
547
548         lock_sock(sk);
549         if ((sl = dp->dccps_service_list) != NULL) {
550                 slen = sl->dccpsl_nr * sizeof(u32);
551                 total_len += slen;
552         }
553
554         err = -EINVAL;
555         if (total_len > len)
556                 goto out;
557
558         err = 0;
559         if (put_user(total_len, optlen) ||
560             put_user(dp->dccps_service, optval) ||
561             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
562                 err = -EFAULT;
563 out:
564         release_sock(sk);
565         return err;
566 }
567
568 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
569                     char __user *optval, int __user *optlen)
570 {
571         struct dccp_sock *dp;
572         int val, len;
573
574         if (get_user(len, optlen))
575                 return -EFAULT;
576
577         if (len < (int)sizeof(int))
578                 return -EINVAL;
579
580         dp = dccp_sk(sk);
581
582         switch (optname) {
583         case DCCP_SOCKOPT_PACKET_SIZE:
584                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
585                 return 0;
586         case DCCP_SOCKOPT_SERVICE:
587                 return dccp_getsockopt_service(sk, len,
588                                                (__be32 __user *)optval, optlen);
589         case DCCP_SOCKOPT_SEND_CSCOV:
590                 val = dp->dccps_pcslen;
591                 len = sizeof(val);
592                 break;
593         case DCCP_SOCKOPT_RECV_CSCOV:
594                 val = dp->dccps_pcrlen;
595                 len = sizeof(val);
596                 break;
597         case 128 ... 191:
598                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
599                                              len, (u32 __user *)optval, optlen);
600         case 192 ... 255:
601                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
602                                              len, (u32 __user *)optval, optlen);
603         default:
604                 return -ENOPROTOOPT;
605         }
606
607         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
608                 return -EFAULT;
609
610         return 0;
611 }
612
613 int dccp_getsockopt(struct sock *sk, int level, int optname,
614                     char __user *optval, int __user *optlen)
615 {
616         if (level != SOL_DCCP)
617                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
618                                                              optname, optval,
619                                                              optlen);
620         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
621 }
622
623 EXPORT_SYMBOL_GPL(dccp_getsockopt);
624
625 #ifdef CONFIG_COMPAT
626 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
627                            char __user *optval, int __user *optlen)
628 {
629         if (level != SOL_DCCP)
630                 return inet_csk_compat_getsockopt(sk, level, optname,
631                                                   optval, optlen);
632         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
633 }
634
635 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
636 #endif
637
638 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
639                  size_t len)
640 {
641         const struct dccp_sock *dp = dccp_sk(sk);
642         const int flags = msg->msg_flags;
643         const int noblock = flags & MSG_DONTWAIT;
644         struct sk_buff *skb;
645         int rc, size;
646         long timeo;
647
648         if (len > dp->dccps_mss_cache)
649                 return -EMSGSIZE;
650
651         lock_sock(sk);
652
653         if (sysctl_dccp_tx_qlen &&
654             (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
655                 rc = -EAGAIN;
656                 goto out_release;
657         }
658
659         timeo = sock_sndtimeo(sk, noblock);
660
661         /*
662          * We have to use sk_stream_wait_connect here to set sk_write_pending,
663          * so that the trick in dccp_rcv_request_sent_state_process.
664          */
665         /* Wait for a connection to finish. */
666         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN | DCCPF_CLOSING))
667                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
668                         goto out_release;
669
670         size = sk->sk_prot->max_header + len;
671         release_sock(sk);
672         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
673         lock_sock(sk);
674         if (skb == NULL)
675                 goto out_release;
676
677         skb_reserve(skb, sk->sk_prot->max_header);
678         rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
679         if (rc != 0)
680                 goto out_discard;
681
682         skb_queue_tail(&sk->sk_write_queue, skb);
683         dccp_write_xmit(sk,0);
684 out_release:
685         release_sock(sk);
686         return rc ? : len;
687 out_discard:
688         kfree_skb(skb);
689         goto out_release;
690 }
691
692 EXPORT_SYMBOL_GPL(dccp_sendmsg);
693
694 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
695                  size_t len, int nonblock, int flags, int *addr_len)
696 {
697         const struct dccp_hdr *dh;
698         long timeo;
699
700         lock_sock(sk);
701
702         if (sk->sk_state == DCCP_LISTEN) {
703                 len = -ENOTCONN;
704                 goto out;
705         }
706
707         timeo = sock_rcvtimeo(sk, nonblock);
708
709         do {
710                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
711
712                 if (skb == NULL)
713                         goto verify_sock_status;
714
715                 dh = dccp_hdr(skb);
716
717                 if (dh->dccph_type == DCCP_PKT_DATA ||
718                     dh->dccph_type == DCCP_PKT_DATAACK)
719                         goto found_ok_skb;
720
721                 if (dh->dccph_type == DCCP_PKT_RESET ||
722                     dh->dccph_type == DCCP_PKT_CLOSE) {
723                         dccp_pr_debug("found fin ok!\n");
724                         len = 0;
725                         goto found_fin_ok;
726                 }
727                 dccp_pr_debug("packet_type=%s\n",
728                               dccp_packet_name(dh->dccph_type));
729                 sk_eat_skb(sk, skb, 0);
730 verify_sock_status:
731                 if (sock_flag(sk, SOCK_DONE)) {
732                         len = 0;
733                         break;
734                 }
735
736                 if (sk->sk_err) {
737                         len = sock_error(sk);
738                         break;
739                 }
740
741                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
742                         len = 0;
743                         break;
744                 }
745
746                 if (sk->sk_state == DCCP_CLOSED) {
747                         if (!sock_flag(sk, SOCK_DONE)) {
748                                 /* This occurs when user tries to read
749                                  * from never connected socket.
750                                  */
751                                 len = -ENOTCONN;
752                                 break;
753                         }
754                         len = 0;
755                         break;
756                 }
757
758                 if (!timeo) {
759                         len = -EAGAIN;
760                         break;
761                 }
762
763                 if (signal_pending(current)) {
764                         len = sock_intr_errno(timeo);
765                         break;
766                 }
767
768                 sk_wait_data(sk, &timeo);
769                 continue;
770         found_ok_skb:
771                 if (len > skb->len)
772                         len = skb->len;
773                 else if (len < skb->len)
774                         msg->msg_flags |= MSG_TRUNC;
775
776                 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
777                         /* Exception. Bailout! */
778                         len = -EFAULT;
779                         break;
780                 }
781         found_fin_ok:
782                 if (!(flags & MSG_PEEK))
783                         sk_eat_skb(sk, skb, 0);
784                 break;
785         } while (1);
786 out:
787         release_sock(sk);
788         return len;
789 }
790
791 EXPORT_SYMBOL_GPL(dccp_recvmsg);
792
793 int inet_dccp_listen(struct socket *sock, int backlog)
794 {
795         struct sock *sk = sock->sk;
796         unsigned char old_state;
797         int err;
798
799         lock_sock(sk);
800
801         err = -EINVAL;
802         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
803                 goto out;
804
805         old_state = sk->sk_state;
806         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
807                 goto out;
808
809         /* Really, if the socket is already in listen state
810          * we can only allow the backlog to be adjusted.
811          */
812         if (old_state != DCCP_LISTEN) {
813                 /*
814                  * FIXME: here it probably should be sk->sk_prot->listen_start
815                  * see tcp_listen_start
816                  */
817                 err = dccp_listen_start(sk, backlog);
818                 if (err)
819                         goto out;
820         }
821         sk->sk_max_ack_backlog = backlog;
822         err = 0;
823
824 out:
825         release_sock(sk);
826         return err;
827 }
828
829 EXPORT_SYMBOL_GPL(inet_dccp_listen);
830
831 static const unsigned char dccp_new_state[] = {
832         /* current state:   new state:      action:     */
833         [0]               = DCCP_CLOSED,
834         [DCCP_OPEN]       = DCCP_CLOSING | DCCP_ACTION_FIN,
835         [DCCP_REQUESTING] = DCCP_CLOSED,
836         [DCCP_PARTOPEN]   = DCCP_CLOSING | DCCP_ACTION_FIN,
837         [DCCP_LISTEN]     = DCCP_CLOSED,
838         [DCCP_RESPOND]    = DCCP_CLOSED,
839         [DCCP_CLOSING]    = DCCP_CLOSED,
840         [DCCP_TIME_WAIT]  = DCCP_CLOSED,
841         [DCCP_CLOSED]     = DCCP_CLOSED,
842 };
843
844 static int dccp_close_state(struct sock *sk)
845 {
846         const int next = dccp_new_state[sk->sk_state];
847         const int ns = next & DCCP_STATE_MASK;
848
849         if (ns != sk->sk_state)
850                 dccp_set_state(sk, ns);
851
852         return next & DCCP_ACTION_FIN;
853 }
854
855 void dccp_close(struct sock *sk, long timeout)
856 {
857         struct dccp_sock *dp = dccp_sk(sk);
858         struct sk_buff *skb;
859         int state;
860
861         lock_sock(sk);
862
863         sk->sk_shutdown = SHUTDOWN_MASK;
864
865         if (sk->sk_state == DCCP_LISTEN) {
866                 dccp_set_state(sk, DCCP_CLOSED);
867
868                 /* Special case. */
869                 inet_csk_listen_stop(sk);
870
871                 goto adjudge_to_death;
872         }
873
874         sk_stop_timer(sk, &dp->dccps_xmit_timer);
875
876         /*
877          * We need to flush the recv. buffs.  We do this only on the
878          * descriptor close, not protocol-sourced closes, because the
879           *reader process may not have drained the data yet!
880          */
881         /* FIXME: check for unread data */
882         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
883                 __kfree_skb(skb);
884         }
885
886         if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
887                 /* Check zero linger _after_ checking for unread data. */
888                 sk->sk_prot->disconnect(sk, 0);
889         } else if (dccp_close_state(sk)) {
890                 dccp_send_close(sk, 1);
891         }
892
893         sk_stream_wait_close(sk, timeout);
894
895 adjudge_to_death:
896         state = sk->sk_state;
897         sock_hold(sk);
898         sock_orphan(sk);
899         atomic_inc(sk->sk_prot->orphan_count);
900
901         /*
902          * It is the last release_sock in its life. It will remove backlog.
903          */
904         release_sock(sk);
905         /*
906          * Now socket is owned by kernel and we acquire BH lock
907          * to finish close. No need to check for user refs.
908          */
909         local_bh_disable();
910         bh_lock_sock(sk);
911         BUG_TRAP(!sock_owned_by_user(sk));
912
913         /* Have we already been destroyed by a softirq or backlog? */
914         if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
915                 goto out;
916
917         /*
918          * The last release_sock may have processed the CLOSE or RESET
919          * packet moving sock to CLOSED state, if not we have to fire
920          * the CLOSE/CLOSEREQ retransmission timer, see "8.3. Termination"
921          * in draft-ietf-dccp-spec-11. -acme
922          */
923         if (sk->sk_state == DCCP_CLOSING) {
924                 /* FIXME: should start at 2 * RTT */
925                 /* Timer for repeating the CLOSE/CLOSEREQ until an answer. */
926                 inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
927                                           inet_csk(sk)->icsk_rto,
928                                           DCCP_RTO_MAX);
929 #if 0
930                 /* Yeah, we should use sk->sk_prot->orphan_count, etc */
931                 dccp_set_state(sk, DCCP_CLOSED);
932 #endif
933         }
934
935         if (sk->sk_state == DCCP_CLOSED)
936                 inet_csk_destroy_sock(sk);
937
938         /* Otherwise, socket is reprieved until protocol close. */
939
940 out:
941         bh_unlock_sock(sk);
942         local_bh_enable();
943         sock_put(sk);
944 }
945
946 EXPORT_SYMBOL_GPL(dccp_close);
947
948 void dccp_shutdown(struct sock *sk, int how)
949 {
950         dccp_pr_debug("entry\n");
951 }
952
953 EXPORT_SYMBOL_GPL(dccp_shutdown);
954
955 static int __init dccp_mib_init(void)
956 {
957         int rc = -ENOMEM;
958
959         dccp_statistics[0] = alloc_percpu(struct dccp_mib);
960         if (dccp_statistics[0] == NULL)
961                 goto out;
962
963         dccp_statistics[1] = alloc_percpu(struct dccp_mib);
964         if (dccp_statistics[1] == NULL)
965                 goto out_free_one;
966
967         rc = 0;
968 out:
969         return rc;
970 out_free_one:
971         free_percpu(dccp_statistics[0]);
972         dccp_statistics[0] = NULL;
973         goto out;
974
975 }
976
977 static void dccp_mib_exit(void)
978 {
979         free_percpu(dccp_statistics[0]);
980         free_percpu(dccp_statistics[1]);
981         dccp_statistics[0] = dccp_statistics[1] = NULL;
982 }
983
984 static int thash_entries;
985 module_param(thash_entries, int, 0444);
986 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
987
988 #ifdef CONFIG_IP_DCCP_DEBUG
989 int dccp_debug;
990 module_param(dccp_debug, int, 0444);
991 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
992
993 EXPORT_SYMBOL_GPL(dccp_debug);
994 #endif
995
996 static int __init dccp_init(void)
997 {
998         unsigned long goal;
999         int ehash_order, bhash_order, i;
1000         int rc = -ENOBUFS;
1001
1002         dccp_hashinfo.bind_bucket_cachep =
1003                 kmem_cache_create("dccp_bind_bucket",
1004                                   sizeof(struct inet_bind_bucket), 0,
1005                                   SLAB_HWCACHE_ALIGN, NULL);
1006         if (!dccp_hashinfo.bind_bucket_cachep)
1007                 goto out;
1008
1009         /*
1010          * Size and allocate the main established and bind bucket
1011          * hash tables.
1012          *
1013          * The methodology is similar to that of the buffer cache.
1014          */
1015         if (num_physpages >= (128 * 1024))
1016                 goal = num_physpages >> (21 - PAGE_SHIFT);
1017         else
1018                 goal = num_physpages >> (23 - PAGE_SHIFT);
1019
1020         if (thash_entries)
1021                 goal = (thash_entries *
1022                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1023         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1024                 ;
1025         do {
1026                 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1027                                         sizeof(struct inet_ehash_bucket);
1028                 while (dccp_hashinfo.ehash_size &
1029                        (dccp_hashinfo.ehash_size - 1))
1030                         dccp_hashinfo.ehash_size--;
1031                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1032                         __get_free_pages(GFP_ATOMIC, ehash_order);
1033         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1034
1035         if (!dccp_hashinfo.ehash) {
1036                 DCCP_CRIT("Failed to allocate DCCP established hash table");
1037                 goto out_free_bind_bucket_cachep;
1038         }
1039
1040         for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1041                 rwlock_init(&dccp_hashinfo.ehash[i].lock);
1042                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1043                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
1044         }
1045
1046         bhash_order = ehash_order;
1047
1048         do {
1049                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1050                                         sizeof(struct inet_bind_hashbucket);
1051                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1052                     bhash_order > 0)
1053                         continue;
1054                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1055                         __get_free_pages(GFP_ATOMIC, bhash_order);
1056         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1057
1058         if (!dccp_hashinfo.bhash) {
1059                 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1060                 goto out_free_dccp_ehash;
1061         }
1062
1063         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1064                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1065                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1066         }
1067
1068         rc = dccp_mib_init();
1069         if (rc)
1070                 goto out_free_dccp_bhash;
1071
1072         rc = dccp_ackvec_init();
1073         if (rc)
1074                 goto out_free_dccp_mib;
1075
1076         rc = dccp_sysctl_init();
1077         if (rc)
1078                 goto out_ackvec_exit;
1079 out:
1080         return rc;
1081 out_ackvec_exit:
1082         dccp_ackvec_exit();
1083 out_free_dccp_mib:
1084         dccp_mib_exit();
1085 out_free_dccp_bhash:
1086         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1087         dccp_hashinfo.bhash = NULL;
1088 out_free_dccp_ehash:
1089         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1090         dccp_hashinfo.ehash = NULL;
1091 out_free_bind_bucket_cachep:
1092         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1093         dccp_hashinfo.bind_bucket_cachep = NULL;
1094         goto out;
1095 }
1096
1097 static void __exit dccp_fini(void)
1098 {
1099         dccp_mib_exit();
1100         free_pages((unsigned long)dccp_hashinfo.bhash,
1101                    get_order(dccp_hashinfo.bhash_size *
1102                              sizeof(struct inet_bind_hashbucket)));
1103         free_pages((unsigned long)dccp_hashinfo.ehash,
1104                    get_order(dccp_hashinfo.ehash_size *
1105                              sizeof(struct inet_ehash_bucket)));
1106         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1107         dccp_ackvec_exit();
1108         dccp_sysctl_exit();
1109 }
1110
1111 module_init(dccp_init);
1112 module_exit(dccp_fini);
1113
1114 MODULE_LICENSE("GPL");
1115 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1116 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");