Merge branch 'linux-2.6'
[linux-2.6] / net / dccp / proto.c
1 /*
2  *  net/dccp/proto.c
3  *
4  *  An implementation of the DCCP protocol
5  *  Arnaldo Carvalho de Melo <acme@conectiva.com.br>
6  *
7  *      This program is free software; you can redistribute it and/or modify it
8  *      under the terms of the GNU General Public License version 2 as
9  *      published by the Free Software Foundation.
10  */
11
12 #include <linux/dccp.h>
13 #include <linux/module.h>
14 #include <linux/types.h>
15 #include <linux/sched.h>
16 #include <linux/kernel.h>
17 #include <linux/skbuff.h>
18 #include <linux/netdevice.h>
19 #include <linux/in.h>
20 #include <linux/if_arp.h>
21 #include <linux/init.h>
22 #include <linux/random.h>
23 #include <net/checksum.h>
24
25 #include <net/inet_sock.h>
26 #include <net/sock.h>
27 #include <net/xfrm.h>
28
29 #include <asm/ioctls.h>
30 #include <asm/semaphore.h>
31 #include <linux/spinlock.h>
32 #include <linux/timer.h>
33 #include <linux/delay.h>
34 #include <linux/poll.h>
35
36 #include "ccid.h"
37 #include "dccp.h"
38 #include "feat.h"
39
40 DEFINE_SNMP_STAT(struct dccp_mib, dccp_statistics) __read_mostly;
41
42 EXPORT_SYMBOL_GPL(dccp_statistics);
43
44 atomic_t dccp_orphan_count = ATOMIC_INIT(0);
45
46 EXPORT_SYMBOL_GPL(dccp_orphan_count);
47
48 struct inet_hashinfo __cacheline_aligned dccp_hashinfo = {
49         .lhash_lock     = RW_LOCK_UNLOCKED,
50         .lhash_users    = ATOMIC_INIT(0),
51         .lhash_wait = __WAIT_QUEUE_HEAD_INITIALIZER(dccp_hashinfo.lhash_wait),
52 };
53
54 EXPORT_SYMBOL_GPL(dccp_hashinfo);
55
56 /* the maximum queue length for tx in packets. 0 is no limit */
57 int sysctl_dccp_tx_qlen __read_mostly = 5;
58
59 void dccp_set_state(struct sock *sk, const int state)
60 {
61         const int oldstate = sk->sk_state;
62
63         dccp_pr_debug("%s(%p)  %s  -->  %s\n", dccp_role(sk), sk,
64                       dccp_state_name(oldstate), dccp_state_name(state));
65         WARN_ON(state == oldstate);
66
67         switch (state) {
68         case DCCP_OPEN:
69                 if (oldstate != DCCP_OPEN)
70                         DCCP_INC_STATS(DCCP_MIB_CURRESTAB);
71                 break;
72
73         case DCCP_CLOSED:
74                 if (oldstate == DCCP_OPEN || oldstate == DCCP_ACTIVE_CLOSEREQ ||
75                     oldstate == DCCP_CLOSING)
76                         DCCP_INC_STATS(DCCP_MIB_ESTABRESETS);
77
78                 sk->sk_prot->unhash(sk);
79                 if (inet_csk(sk)->icsk_bind_hash != NULL &&
80                     !(sk->sk_userlocks & SOCK_BINDPORT_LOCK))
81                         inet_put_port(&dccp_hashinfo, sk);
82                 /* fall through */
83         default:
84                 if (oldstate == DCCP_OPEN)
85                         DCCP_DEC_STATS(DCCP_MIB_CURRESTAB);
86         }
87
88         /* Change state AFTER socket is unhashed to avoid closed
89          * socket sitting in hash tables.
90          */
91         sk->sk_state = state;
92 }
93
94 EXPORT_SYMBOL_GPL(dccp_set_state);
95
96 static void dccp_finish_passive_close(struct sock *sk)
97 {
98         switch (sk->sk_state) {
99         case DCCP_PASSIVE_CLOSE:
100                 /* Node (client or server) has received Close packet. */
101                 dccp_send_reset(sk, DCCP_RESET_CODE_CLOSED);
102                 dccp_set_state(sk, DCCP_CLOSED);
103                 break;
104         case DCCP_PASSIVE_CLOSEREQ:
105                 /*
106                  * Client received CloseReq. We set the `active' flag so that
107                  * dccp_send_close() retransmits the Close as per RFC 4340, 8.3.
108                  */
109                 dccp_send_close(sk, 1);
110                 dccp_set_state(sk, DCCP_CLOSING);
111         }
112 }
113
114 void dccp_done(struct sock *sk)
115 {
116         dccp_set_state(sk, DCCP_CLOSED);
117         dccp_clear_xmit_timers(sk);
118
119         sk->sk_shutdown = SHUTDOWN_MASK;
120
121         if (!sock_flag(sk, SOCK_DEAD))
122                 sk->sk_state_change(sk);
123         else
124                 inet_csk_destroy_sock(sk);
125 }
126
127 EXPORT_SYMBOL_GPL(dccp_done);
128
129 const char *dccp_packet_name(const int type)
130 {
131         static const char *dccp_packet_names[] = {
132                 [DCCP_PKT_REQUEST]  = "REQUEST",
133                 [DCCP_PKT_RESPONSE] = "RESPONSE",
134                 [DCCP_PKT_DATA]     = "DATA",
135                 [DCCP_PKT_ACK]      = "ACK",
136                 [DCCP_PKT_DATAACK]  = "DATAACK",
137                 [DCCP_PKT_CLOSEREQ] = "CLOSEREQ",
138                 [DCCP_PKT_CLOSE]    = "CLOSE",
139                 [DCCP_PKT_RESET]    = "RESET",
140                 [DCCP_PKT_SYNC]     = "SYNC",
141                 [DCCP_PKT_SYNCACK]  = "SYNCACK",
142         };
143
144         if (type >= DCCP_NR_PKT_TYPES)
145                 return "INVALID";
146         else
147                 return dccp_packet_names[type];
148 }
149
150 EXPORT_SYMBOL_GPL(dccp_packet_name);
151
152 const char *dccp_state_name(const int state)
153 {
154         static char *dccp_state_names[] = {
155         [DCCP_OPEN]             = "OPEN",
156         [DCCP_REQUESTING]       = "REQUESTING",
157         [DCCP_PARTOPEN]         = "PARTOPEN",
158         [DCCP_LISTEN]           = "LISTEN",
159         [DCCP_RESPOND]          = "RESPOND",
160         [DCCP_CLOSING]          = "CLOSING",
161         [DCCP_ACTIVE_CLOSEREQ]  = "CLOSEREQ",
162         [DCCP_PASSIVE_CLOSE]    = "PASSIVE_CLOSE",
163         [DCCP_PASSIVE_CLOSEREQ] = "PASSIVE_CLOSEREQ",
164         [DCCP_TIME_WAIT]        = "TIME_WAIT",
165         [DCCP_CLOSED]           = "CLOSED",
166         };
167
168         if (state >= DCCP_MAX_STATES)
169                 return "INVALID STATE!";
170         else
171                 return dccp_state_names[state];
172 }
173
174 EXPORT_SYMBOL_GPL(dccp_state_name);
175
176 void dccp_hash(struct sock *sk)
177 {
178         inet_hash(&dccp_hashinfo, sk);
179 }
180
181 EXPORT_SYMBOL_GPL(dccp_hash);
182
183 void dccp_unhash(struct sock *sk)
184 {
185         inet_unhash(&dccp_hashinfo, sk);
186 }
187
188 EXPORT_SYMBOL_GPL(dccp_unhash);
189
190 int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
191 {
192         struct dccp_sock *dp = dccp_sk(sk);
193         struct dccp_minisock *dmsk = dccp_msk(sk);
194         struct inet_connection_sock *icsk = inet_csk(sk);
195
196         dccp_minisock_init(&dp->dccps_minisock);
197
198         icsk->icsk_rto          = DCCP_TIMEOUT_INIT;
199         icsk->icsk_syn_retries  = sysctl_dccp_request_retries;
200         sk->sk_state            = DCCP_CLOSED;
201         sk->sk_write_space      = dccp_write_space;
202         icsk->icsk_sync_mss     = dccp_sync_mss;
203         dp->dccps_mss_cache     = 536;
204         dp->dccps_rate_last     = jiffies;
205         dp->dccps_role          = DCCP_ROLE_UNDEFINED;
206         dp->dccps_service       = DCCP_SERVICE_CODE_IS_ABSENT;
207         dp->dccps_l_ack_ratio   = dp->dccps_r_ack_ratio = 1;
208
209         dccp_init_xmit_timers(sk);
210
211         /*
212          * FIXME: We're hardcoding the CCID, and doing this at this point makes
213          * the listening (master) sock get CCID control blocks, which is not
214          * necessary, but for now, to not mess with the test userspace apps,
215          * lets leave it here, later the real solution is to do this in a
216          * setsockopt(CCIDs-I-want/accept). -acme
217          */
218         if (likely(ctl_sock_initialized)) {
219                 int rc = dccp_feat_init(dmsk);
220
221                 if (rc)
222                         return rc;
223
224                 if (dmsk->dccpms_send_ack_vector) {
225                         dp->dccps_hc_rx_ackvec = dccp_ackvec_alloc(GFP_KERNEL);
226                         if (dp->dccps_hc_rx_ackvec == NULL)
227                                 return -ENOMEM;
228                 }
229                 dp->dccps_hc_rx_ccid = ccid_hc_rx_new(dmsk->dccpms_rx_ccid,
230                                                       sk, GFP_KERNEL);
231                 dp->dccps_hc_tx_ccid = ccid_hc_tx_new(dmsk->dccpms_tx_ccid,
232                                                       sk, GFP_KERNEL);
233                 if (unlikely(dp->dccps_hc_rx_ccid == NULL ||
234                              dp->dccps_hc_tx_ccid == NULL)) {
235                         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
236                         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
237                         if (dmsk->dccpms_send_ack_vector) {
238                                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
239                                 dp->dccps_hc_rx_ackvec = NULL;
240                         }
241                         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
242                         return -ENOMEM;
243                 }
244         } else {
245                 /* control socket doesn't need feat nego */
246                 INIT_LIST_HEAD(&dmsk->dccpms_pending);
247                 INIT_LIST_HEAD(&dmsk->dccpms_conf);
248         }
249
250         return 0;
251 }
252
253 EXPORT_SYMBOL_GPL(dccp_init_sock);
254
255 int dccp_destroy_sock(struct sock *sk)
256 {
257         struct dccp_sock *dp = dccp_sk(sk);
258         struct dccp_minisock *dmsk = dccp_msk(sk);
259
260         /*
261          * DCCP doesn't use sk_write_queue, just sk_send_head
262          * for retransmissions
263          */
264         if (sk->sk_send_head != NULL) {
265                 kfree_skb(sk->sk_send_head);
266                 sk->sk_send_head = NULL;
267         }
268
269         /* Clean up a referenced DCCP bind bucket. */
270         if (inet_csk(sk)->icsk_bind_hash != NULL)
271                 inet_put_port(&dccp_hashinfo, sk);
272
273         kfree(dp->dccps_service_list);
274         dp->dccps_service_list = NULL;
275
276         if (dmsk->dccpms_send_ack_vector) {
277                 dccp_ackvec_free(dp->dccps_hc_rx_ackvec);
278                 dp->dccps_hc_rx_ackvec = NULL;
279         }
280         ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
281         ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
282         dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
283
284         /* clean up feature negotiation state */
285         dccp_feat_clean(dmsk);
286
287         return 0;
288 }
289
290 EXPORT_SYMBOL_GPL(dccp_destroy_sock);
291
292 static inline int dccp_listen_start(struct sock *sk, int backlog)
293 {
294         struct dccp_sock *dp = dccp_sk(sk);
295
296         dp->dccps_role = DCCP_ROLE_LISTEN;
297         return inet_csk_listen_start(sk, backlog);
298 }
299
300 static inline int dccp_need_reset(int state)
301 {
302         return state != DCCP_CLOSED && state != DCCP_LISTEN &&
303                state != DCCP_REQUESTING;
304 }
305
306 int dccp_disconnect(struct sock *sk, int flags)
307 {
308         struct inet_connection_sock *icsk = inet_csk(sk);
309         struct inet_sock *inet = inet_sk(sk);
310         int err = 0;
311         const int old_state = sk->sk_state;
312
313         if (old_state != DCCP_CLOSED)
314                 dccp_set_state(sk, DCCP_CLOSED);
315
316         /*
317          * This corresponds to the ABORT function of RFC793, sec. 3.8
318          * TCP uses a RST segment, DCCP a Reset packet with Code 2, "Aborted".
319          */
320         if (old_state == DCCP_LISTEN) {
321                 inet_csk_listen_stop(sk);
322         } else if (dccp_need_reset(old_state)) {
323                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
324                 sk->sk_err = ECONNRESET;
325         } else if (old_state == DCCP_REQUESTING)
326                 sk->sk_err = ECONNRESET;
327
328         dccp_clear_xmit_timers(sk);
329         __skb_queue_purge(&sk->sk_receive_queue);
330         if (sk->sk_send_head != NULL) {
331                 __kfree_skb(sk->sk_send_head);
332                 sk->sk_send_head = NULL;
333         }
334
335         inet->dport = 0;
336
337         if (!(sk->sk_userlocks & SOCK_BINDADDR_LOCK))
338                 inet_reset_saddr(sk);
339
340         sk->sk_shutdown = 0;
341         sock_reset_flag(sk, SOCK_DONE);
342
343         icsk->icsk_backoff = 0;
344         inet_csk_delack_init(sk);
345         __sk_dst_reset(sk);
346
347         BUG_TRAP(!inet->num || icsk->icsk_bind_hash);
348
349         sk->sk_error_report(sk);
350         return err;
351 }
352
353 EXPORT_SYMBOL_GPL(dccp_disconnect);
354
355 /*
356  *      Wait for a DCCP event.
357  *
358  *      Note that we don't need to lock the socket, as the upper poll layers
359  *      take care of normal races (between the test and the event) and we don't
360  *      go look at any of the socket buffers directly.
361  */
362 unsigned int dccp_poll(struct file *file, struct socket *sock,
363                        poll_table *wait)
364 {
365         unsigned int mask;
366         struct sock *sk = sock->sk;
367
368         poll_wait(file, sk->sk_sleep, wait);
369         if (sk->sk_state == DCCP_LISTEN)
370                 return inet_csk_listen_poll(sk);
371
372         /* Socket is not locked. We are protected from async events
373            by poll logic and correct handling of state changes
374            made by another threads is impossible in any case.
375          */
376
377         mask = 0;
378         if (sk->sk_err)
379                 mask = POLLERR;
380
381         if (sk->sk_shutdown == SHUTDOWN_MASK || sk->sk_state == DCCP_CLOSED)
382                 mask |= POLLHUP;
383         if (sk->sk_shutdown & RCV_SHUTDOWN)
384                 mask |= POLLIN | POLLRDNORM | POLLRDHUP;
385
386         /* Connected? */
387         if ((1 << sk->sk_state) & ~(DCCPF_REQUESTING | DCCPF_RESPOND)) {
388                 if (atomic_read(&sk->sk_rmem_alloc) > 0)
389                         mask |= POLLIN | POLLRDNORM;
390
391                 if (!(sk->sk_shutdown & SEND_SHUTDOWN)) {
392                         if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk)) {
393                                 mask |= POLLOUT | POLLWRNORM;
394                         } else {  /* send SIGIO later */
395                                 set_bit(SOCK_ASYNC_NOSPACE,
396                                         &sk->sk_socket->flags);
397                                 set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
398
399                                 /* Race breaker. If space is freed after
400                                  * wspace test but before the flags are set,
401                                  * IO signal will be lost.
402                                  */
403                                 if (sk_stream_wspace(sk) >= sk_stream_min_wspace(sk))
404                                         mask |= POLLOUT | POLLWRNORM;
405                         }
406                 }
407         }
408         return mask;
409 }
410
411 EXPORT_SYMBOL_GPL(dccp_poll);
412
413 int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
414 {
415         int rc = -ENOTCONN;
416
417         lock_sock(sk);
418
419         if (sk->sk_state == DCCP_LISTEN)
420                 goto out;
421
422         switch (cmd) {
423         case SIOCINQ: {
424                 struct sk_buff *skb;
425                 unsigned long amount = 0;
426
427                 skb = skb_peek(&sk->sk_receive_queue);
428                 if (skb != NULL) {
429                         /*
430                          * We will only return the amount of this packet since
431                          * that is all that will be read.
432                          */
433                         amount = skb->len;
434                 }
435                 rc = put_user(amount, (int __user *)arg);
436         }
437                 break;
438         default:
439                 rc = -ENOIOCTLCMD;
440                 break;
441         }
442 out:
443         release_sock(sk);
444         return rc;
445 }
446
447 EXPORT_SYMBOL_GPL(dccp_ioctl);
448
449 static int dccp_setsockopt_service(struct sock *sk, const __be32 service,
450                                    char __user *optval, int optlen)
451 {
452         struct dccp_sock *dp = dccp_sk(sk);
453         struct dccp_service_list *sl = NULL;
454
455         if (service == DCCP_SERVICE_INVALID_VALUE ||
456             optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
457                 return -EINVAL;
458
459         if (optlen > sizeof(service)) {
460                 sl = kmalloc(optlen, GFP_KERNEL);
461                 if (sl == NULL)
462                         return -ENOMEM;
463
464                 sl->dccpsl_nr = optlen / sizeof(u32) - 1;
465                 if (copy_from_user(sl->dccpsl_list,
466                                    optval + sizeof(service),
467                                    optlen - sizeof(service)) ||
468                     dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
469                         kfree(sl);
470                         return -EFAULT;
471                 }
472         }
473
474         lock_sock(sk);
475         dp->dccps_service = service;
476
477         kfree(dp->dccps_service_list);
478
479         dp->dccps_service_list = sl;
480         release_sock(sk);
481         return 0;
482 }
483
484 /* byte 1 is feature.  the rest is the preference list */
485 static int dccp_setsockopt_change(struct sock *sk, int type,
486                                   struct dccp_so_feat __user *optval)
487 {
488         struct dccp_so_feat opt;
489         u8 *val;
490         int rc;
491
492         if (copy_from_user(&opt, optval, sizeof(opt)))
493                 return -EFAULT;
494
495         val = kmalloc(opt.dccpsf_len, GFP_KERNEL);
496         if (!val)
497                 return -ENOMEM;
498
499         if (copy_from_user(val, opt.dccpsf_val, opt.dccpsf_len)) {
500                 rc = -EFAULT;
501                 goto out_free_val;
502         }
503
504         rc = dccp_feat_change(dccp_msk(sk), type, opt.dccpsf_feat,
505                               val, opt.dccpsf_len, GFP_KERNEL);
506         if (rc)
507                 goto out_free_val;
508
509 out:
510         return rc;
511
512 out_free_val:
513         kfree(val);
514         goto out;
515 }
516
517 static int do_dccp_setsockopt(struct sock *sk, int level, int optname,
518                 char __user *optval, int optlen)
519 {
520         struct dccp_sock *dp = dccp_sk(sk);
521         int val, err = 0;
522
523         if (optlen < sizeof(int))
524                 return -EINVAL;
525
526         if (get_user(val, (int __user *)optval))
527                 return -EFAULT;
528
529         if (optname == DCCP_SOCKOPT_SERVICE)
530                 return dccp_setsockopt_service(sk, val, optval, optlen);
531
532         lock_sock(sk);
533         switch (optname) {
534         case DCCP_SOCKOPT_PACKET_SIZE:
535                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
536                 err = 0;
537                 break;
538         case DCCP_SOCKOPT_CHANGE_L:
539                 if (optlen != sizeof(struct dccp_so_feat))
540                         err = -EINVAL;
541                 else
542                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_L,
543                                                      (struct dccp_so_feat __user *)
544                                                      optval);
545                 break;
546         case DCCP_SOCKOPT_CHANGE_R:
547                 if (optlen != sizeof(struct dccp_so_feat))
548                         err = -EINVAL;
549                 else
550                         err = dccp_setsockopt_change(sk, DCCPO_CHANGE_R,
551                                                      (struct dccp_so_feat __user *)
552                                                      optval);
553                 break;
554         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
555                 if (dp->dccps_role != DCCP_ROLE_SERVER)
556                         err = -EOPNOTSUPP;
557                 else
558                         dp->dccps_server_timewait = (val != 0);
559                 break;
560         case DCCP_SOCKOPT_SEND_CSCOV:   /* sender side, RFC 4340, sec. 9.2 */
561                 if (val < 0 || val > 15)
562                         err = -EINVAL;
563                 else
564                         dp->dccps_pcslen = val;
565                 break;
566         case DCCP_SOCKOPT_RECV_CSCOV:   /* receiver side, RFC 4340 sec. 9.2.1 */
567                 if (val < 0 || val > 15)
568                         err = -EINVAL;
569                 else {
570                         dp->dccps_pcrlen = val;
571                         /* FIXME: add feature negotiation,
572                          * ChangeL(MinimumChecksumCoverage, val) */
573                 }
574                 break;
575         default:
576                 err = -ENOPROTOOPT;
577                 break;
578         }
579
580         release_sock(sk);
581         return err;
582 }
583
584 int dccp_setsockopt(struct sock *sk, int level, int optname,
585                     char __user *optval, int optlen)
586 {
587         if (level != SOL_DCCP)
588                 return inet_csk(sk)->icsk_af_ops->setsockopt(sk, level,
589                                                              optname, optval,
590                                                              optlen);
591         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
592 }
593
594 EXPORT_SYMBOL_GPL(dccp_setsockopt);
595
596 #ifdef CONFIG_COMPAT
597 int compat_dccp_setsockopt(struct sock *sk, int level, int optname,
598                            char __user *optval, int optlen)
599 {
600         if (level != SOL_DCCP)
601                 return inet_csk_compat_setsockopt(sk, level, optname,
602                                                   optval, optlen);
603         return do_dccp_setsockopt(sk, level, optname, optval, optlen);
604 }
605
606 EXPORT_SYMBOL_GPL(compat_dccp_setsockopt);
607 #endif
608
609 static int dccp_getsockopt_service(struct sock *sk, int len,
610                                    __be32 __user *optval,
611                                    int __user *optlen)
612 {
613         const struct dccp_sock *dp = dccp_sk(sk);
614         const struct dccp_service_list *sl;
615         int err = -ENOENT, slen = 0, total_len = sizeof(u32);
616
617         lock_sock(sk);
618         if ((sl = dp->dccps_service_list) != NULL) {
619                 slen = sl->dccpsl_nr * sizeof(u32);
620                 total_len += slen;
621         }
622
623         err = -EINVAL;
624         if (total_len > len)
625                 goto out;
626
627         err = 0;
628         if (put_user(total_len, optlen) ||
629             put_user(dp->dccps_service, optval) ||
630             (sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
631                 err = -EFAULT;
632 out:
633         release_sock(sk);
634         return err;
635 }
636
637 static int do_dccp_getsockopt(struct sock *sk, int level, int optname,
638                     char __user *optval, int __user *optlen)
639 {
640         struct dccp_sock *dp;
641         int val, len;
642
643         if (get_user(len, optlen))
644                 return -EFAULT;
645
646         if (len < (int)sizeof(int))
647                 return -EINVAL;
648
649         dp = dccp_sk(sk);
650
651         switch (optname) {
652         case DCCP_SOCKOPT_PACKET_SIZE:
653                 DCCP_WARN("sockopt(PACKET_SIZE) is deprecated: fix your app\n");
654                 return 0;
655         case DCCP_SOCKOPT_SERVICE:
656                 return dccp_getsockopt_service(sk, len,
657                                                (__be32 __user *)optval, optlen);
658         case DCCP_SOCKOPT_GET_CUR_MPS:
659                 val = dp->dccps_mss_cache;
660                 break;
661         case DCCP_SOCKOPT_SERVER_TIMEWAIT:
662                 val = dp->dccps_server_timewait;
663                 break;
664         case DCCP_SOCKOPT_SEND_CSCOV:
665                 val = dp->dccps_pcslen;
666                 break;
667         case DCCP_SOCKOPT_RECV_CSCOV:
668                 val = dp->dccps_pcrlen;
669                 break;
670         case 128 ... 191:
671                 return ccid_hc_rx_getsockopt(dp->dccps_hc_rx_ccid, sk, optname,
672                                              len, (u32 __user *)optval, optlen);
673         case 192 ... 255:
674                 return ccid_hc_tx_getsockopt(dp->dccps_hc_tx_ccid, sk, optname,
675                                              len, (u32 __user *)optval, optlen);
676         default:
677                 return -ENOPROTOOPT;
678         }
679
680         len = sizeof(val);
681         if (put_user(len, optlen) || copy_to_user(optval, &val, len))
682                 return -EFAULT;
683
684         return 0;
685 }
686
687 int dccp_getsockopt(struct sock *sk, int level, int optname,
688                     char __user *optval, int __user *optlen)
689 {
690         if (level != SOL_DCCP)
691                 return inet_csk(sk)->icsk_af_ops->getsockopt(sk, level,
692                                                              optname, optval,
693                                                              optlen);
694         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
695 }
696
697 EXPORT_SYMBOL_GPL(dccp_getsockopt);
698
699 #ifdef CONFIG_COMPAT
700 int compat_dccp_getsockopt(struct sock *sk, int level, int optname,
701                            char __user *optval, int __user *optlen)
702 {
703         if (level != SOL_DCCP)
704                 return inet_csk_compat_getsockopt(sk, level, optname,
705                                                   optval, optlen);
706         return do_dccp_getsockopt(sk, level, optname, optval, optlen);
707 }
708
709 EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
710 #endif
711
712 int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
713                  size_t len)
714 {
715         const struct dccp_sock *dp = dccp_sk(sk);
716         const int flags = msg->msg_flags;
717         const int noblock = flags & MSG_DONTWAIT;
718         struct sk_buff *skb;
719         int rc, size;
720         long timeo;
721
722         if (len > dp->dccps_mss_cache)
723                 return -EMSGSIZE;
724
725         lock_sock(sk);
726
727         if (sysctl_dccp_tx_qlen &&
728             (sk->sk_write_queue.qlen >= sysctl_dccp_tx_qlen)) {
729                 rc = -EAGAIN;
730                 goto out_release;
731         }
732
733         timeo = sock_sndtimeo(sk, noblock);
734
735         /*
736          * We have to use sk_stream_wait_connect here to set sk_write_pending,
737          * so that the trick in dccp_rcv_request_sent_state_process.
738          */
739         /* Wait for a connection to finish. */
740         if ((1 << sk->sk_state) & ~(DCCPF_OPEN | DCCPF_PARTOPEN))
741                 if ((rc = sk_stream_wait_connect(sk, &timeo)) != 0)
742                         goto out_release;
743
744         size = sk->sk_prot->max_header + len;
745         release_sock(sk);
746         skb = sock_alloc_send_skb(sk, size, noblock, &rc);
747         lock_sock(sk);
748         if (skb == NULL)
749                 goto out_release;
750
751         skb_reserve(skb, sk->sk_prot->max_header);
752         rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
753         if (rc != 0)
754                 goto out_discard;
755
756         skb_queue_tail(&sk->sk_write_queue, skb);
757         dccp_write_xmit(sk,0);
758 out_release:
759         release_sock(sk);
760         return rc ? : len;
761 out_discard:
762         kfree_skb(skb);
763         goto out_release;
764 }
765
766 EXPORT_SYMBOL_GPL(dccp_sendmsg);
767
768 int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
769                  size_t len, int nonblock, int flags, int *addr_len)
770 {
771         const struct dccp_hdr *dh;
772         long timeo;
773
774         lock_sock(sk);
775
776         if (sk->sk_state == DCCP_LISTEN) {
777                 len = -ENOTCONN;
778                 goto out;
779         }
780
781         timeo = sock_rcvtimeo(sk, nonblock);
782
783         do {
784                 struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
785
786                 if (skb == NULL)
787                         goto verify_sock_status;
788
789                 dh = dccp_hdr(skb);
790
791                 switch (dh->dccph_type) {
792                 case DCCP_PKT_DATA:
793                 case DCCP_PKT_DATAACK:
794                         goto found_ok_skb;
795
796                 case DCCP_PKT_CLOSE:
797                 case DCCP_PKT_CLOSEREQ:
798                         if (!(flags & MSG_PEEK))
799                                 dccp_finish_passive_close(sk);
800                         /* fall through */
801                 case DCCP_PKT_RESET:
802                         dccp_pr_debug("found fin (%s) ok!\n",
803                                       dccp_packet_name(dh->dccph_type));
804                         len = 0;
805                         goto found_fin_ok;
806                 default:
807                         dccp_pr_debug("packet_type=%s\n",
808                                       dccp_packet_name(dh->dccph_type));
809                         sk_eat_skb(sk, skb, 0);
810                 }
811 verify_sock_status:
812                 if (sock_flag(sk, SOCK_DONE)) {
813                         len = 0;
814                         break;
815                 }
816
817                 if (sk->sk_err) {
818                         len = sock_error(sk);
819                         break;
820                 }
821
822                 if (sk->sk_shutdown & RCV_SHUTDOWN) {
823                         len = 0;
824                         break;
825                 }
826
827                 if (sk->sk_state == DCCP_CLOSED) {
828                         if (!sock_flag(sk, SOCK_DONE)) {
829                                 /* This occurs when user tries to read
830                                  * from never connected socket.
831                                  */
832                                 len = -ENOTCONN;
833                                 break;
834                         }
835                         len = 0;
836                         break;
837                 }
838
839                 if (!timeo) {
840                         len = -EAGAIN;
841                         break;
842                 }
843
844                 if (signal_pending(current)) {
845                         len = sock_intr_errno(timeo);
846                         break;
847                 }
848
849                 sk_wait_data(sk, &timeo);
850                 continue;
851         found_ok_skb:
852                 if (len > skb->len)
853                         len = skb->len;
854                 else if (len < skb->len)
855                         msg->msg_flags |= MSG_TRUNC;
856
857                 if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
858                         /* Exception. Bailout! */
859                         len = -EFAULT;
860                         break;
861                 }
862         found_fin_ok:
863                 if (!(flags & MSG_PEEK))
864                         sk_eat_skb(sk, skb, 0);
865                 break;
866         } while (1);
867 out:
868         release_sock(sk);
869         return len;
870 }
871
872 EXPORT_SYMBOL_GPL(dccp_recvmsg);
873
874 int inet_dccp_listen(struct socket *sock, int backlog)
875 {
876         struct sock *sk = sock->sk;
877         unsigned char old_state;
878         int err;
879
880         lock_sock(sk);
881
882         err = -EINVAL;
883         if (sock->state != SS_UNCONNECTED || sock->type != SOCK_DCCP)
884                 goto out;
885
886         old_state = sk->sk_state;
887         if (!((1 << old_state) & (DCCPF_CLOSED | DCCPF_LISTEN)))
888                 goto out;
889
890         /* Really, if the socket is already in listen state
891          * we can only allow the backlog to be adjusted.
892          */
893         if (old_state != DCCP_LISTEN) {
894                 /*
895                  * FIXME: here it probably should be sk->sk_prot->listen_start
896                  * see tcp_listen_start
897                  */
898                 err = dccp_listen_start(sk, backlog);
899                 if (err)
900                         goto out;
901         }
902         sk->sk_max_ack_backlog = backlog;
903         err = 0;
904
905 out:
906         release_sock(sk);
907         return err;
908 }
909
910 EXPORT_SYMBOL_GPL(inet_dccp_listen);
911
912 static void dccp_terminate_connection(struct sock *sk)
913 {
914         u8 next_state = DCCP_CLOSED;
915
916         switch (sk->sk_state) {
917         case DCCP_PASSIVE_CLOSE:
918         case DCCP_PASSIVE_CLOSEREQ:
919                 dccp_finish_passive_close(sk);
920                 break;
921         case DCCP_PARTOPEN:
922                 dccp_pr_debug("Stop PARTOPEN timer (%p)\n", sk);
923                 inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
924                 /* fall through */
925         case DCCP_OPEN:
926                 dccp_send_close(sk, 1);
927
928                 if (dccp_sk(sk)->dccps_role == DCCP_ROLE_SERVER &&
929                     !dccp_sk(sk)->dccps_server_timewait)
930                         next_state = DCCP_ACTIVE_CLOSEREQ;
931                 else
932                         next_state = DCCP_CLOSING;
933                 /* fall through */
934         default:
935                 dccp_set_state(sk, next_state);
936         }
937 }
938
939 void dccp_close(struct sock *sk, long timeout)
940 {
941         struct dccp_sock *dp = dccp_sk(sk);
942         struct sk_buff *skb;
943         u32 data_was_unread = 0;
944         int state;
945
946         lock_sock(sk);
947
948         sk->sk_shutdown = SHUTDOWN_MASK;
949
950         if (sk->sk_state == DCCP_LISTEN) {
951                 dccp_set_state(sk, DCCP_CLOSED);
952
953                 /* Special case. */
954                 inet_csk_listen_stop(sk);
955
956                 goto adjudge_to_death;
957         }
958
959         sk_stop_timer(sk, &dp->dccps_xmit_timer);
960
961         /*
962          * We need to flush the recv. buffs.  We do this only on the
963          * descriptor close, not protocol-sourced closes, because the
964           *reader process may not have drained the data yet!
965          */
966         while ((skb = __skb_dequeue(&sk->sk_receive_queue)) != NULL) {
967                 data_was_unread += skb->len;
968                 __kfree_skb(skb);
969         }
970
971         if (data_was_unread) {
972                 /* Unread data was tossed, send an appropriate Reset Code */
973                 DCCP_WARN("DCCP: ABORT -- %u bytes unread\n", data_was_unread);
974                 dccp_send_reset(sk, DCCP_RESET_CODE_ABORTED);
975                 dccp_set_state(sk, DCCP_CLOSED);
976         } else if (sock_flag(sk, SOCK_LINGER) && !sk->sk_lingertime) {
977                 /* Check zero linger _after_ checking for unread data. */
978                 sk->sk_prot->disconnect(sk, 0);
979         } else if (sk->sk_state != DCCP_CLOSED) {
980                 dccp_terminate_connection(sk);
981         }
982
983         sk_stream_wait_close(sk, timeout);
984
985 adjudge_to_death:
986         state = sk->sk_state;
987         sock_hold(sk);
988         sock_orphan(sk);
989         atomic_inc(sk->sk_prot->orphan_count);
990
991         /*
992          * It is the last release_sock in its life. It will remove backlog.
993          */
994         release_sock(sk);
995         /*
996          * Now socket is owned by kernel and we acquire BH lock
997          * to finish close. No need to check for user refs.
998          */
999         local_bh_disable();
1000         bh_lock_sock(sk);
1001         BUG_TRAP(!sock_owned_by_user(sk));
1002
1003         /* Have we already been destroyed by a softirq or backlog? */
1004         if (state != DCCP_CLOSED && sk->sk_state == DCCP_CLOSED)
1005                 goto out;
1006
1007         if (sk->sk_state == DCCP_CLOSED)
1008                 inet_csk_destroy_sock(sk);
1009
1010         /* Otherwise, socket is reprieved until protocol close. */
1011
1012 out:
1013         bh_unlock_sock(sk);
1014         local_bh_enable();
1015         sock_put(sk);
1016 }
1017
1018 EXPORT_SYMBOL_GPL(dccp_close);
1019
1020 void dccp_shutdown(struct sock *sk, int how)
1021 {
1022         dccp_pr_debug("called shutdown(%x)\n", how);
1023 }
1024
1025 EXPORT_SYMBOL_GPL(dccp_shutdown);
1026
1027 static int __init dccp_mib_init(void)
1028 {
1029         int rc = -ENOMEM;
1030
1031         dccp_statistics[0] = alloc_percpu(struct dccp_mib);
1032         if (dccp_statistics[0] == NULL)
1033                 goto out;
1034
1035         dccp_statistics[1] = alloc_percpu(struct dccp_mib);
1036         if (dccp_statistics[1] == NULL)
1037                 goto out_free_one;
1038
1039         rc = 0;
1040 out:
1041         return rc;
1042 out_free_one:
1043         free_percpu(dccp_statistics[0]);
1044         dccp_statistics[0] = NULL;
1045         goto out;
1046
1047 }
1048
1049 static void dccp_mib_exit(void)
1050 {
1051         free_percpu(dccp_statistics[0]);
1052         free_percpu(dccp_statistics[1]);
1053         dccp_statistics[0] = dccp_statistics[1] = NULL;
1054 }
1055
1056 static int thash_entries;
1057 module_param(thash_entries, int, 0444);
1058 MODULE_PARM_DESC(thash_entries, "Number of ehash buckets");
1059
1060 #ifdef CONFIG_IP_DCCP_DEBUG
1061 int dccp_debug;
1062 module_param(dccp_debug, bool, 0444);
1063 MODULE_PARM_DESC(dccp_debug, "Enable debug messages");
1064
1065 EXPORT_SYMBOL_GPL(dccp_debug);
1066 #endif
1067
1068 static int __init dccp_init(void)
1069 {
1070         unsigned long goal;
1071         int ehash_order, bhash_order, i;
1072         int rc = -ENOBUFS;
1073
1074         dccp_hashinfo.bind_bucket_cachep =
1075                 kmem_cache_create("dccp_bind_bucket",
1076                                   sizeof(struct inet_bind_bucket), 0,
1077                                   SLAB_HWCACHE_ALIGN, NULL);
1078         if (!dccp_hashinfo.bind_bucket_cachep)
1079                 goto out;
1080
1081         /*
1082          * Size and allocate the main established and bind bucket
1083          * hash tables.
1084          *
1085          * The methodology is similar to that of the buffer cache.
1086          */
1087         if (num_physpages >= (128 * 1024))
1088                 goal = num_physpages >> (21 - PAGE_SHIFT);
1089         else
1090                 goal = num_physpages >> (23 - PAGE_SHIFT);
1091
1092         if (thash_entries)
1093                 goal = (thash_entries *
1094                         sizeof(struct inet_ehash_bucket)) >> PAGE_SHIFT;
1095         for (ehash_order = 0; (1UL << ehash_order) < goal; ehash_order++)
1096                 ;
1097         do {
1098                 dccp_hashinfo.ehash_size = (1UL << ehash_order) * PAGE_SIZE /
1099                                         sizeof(struct inet_ehash_bucket);
1100                 while (dccp_hashinfo.ehash_size &
1101                        (dccp_hashinfo.ehash_size - 1))
1102                         dccp_hashinfo.ehash_size--;
1103                 dccp_hashinfo.ehash = (struct inet_ehash_bucket *)
1104                         __get_free_pages(GFP_ATOMIC, ehash_order);
1105         } while (!dccp_hashinfo.ehash && --ehash_order > 0);
1106
1107         if (!dccp_hashinfo.ehash) {
1108                 DCCP_CRIT("Failed to allocate DCCP established hash table");
1109                 goto out_free_bind_bucket_cachep;
1110         }
1111
1112         for (i = 0; i < dccp_hashinfo.ehash_size; i++) {
1113                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].chain);
1114                 INIT_HLIST_HEAD(&dccp_hashinfo.ehash[i].twchain);
1115         }
1116
1117         if (inet_ehash_locks_alloc(&dccp_hashinfo))
1118                         goto out_free_dccp_ehash;
1119
1120         bhash_order = ehash_order;
1121
1122         do {
1123                 dccp_hashinfo.bhash_size = (1UL << bhash_order) * PAGE_SIZE /
1124                                         sizeof(struct inet_bind_hashbucket);
1125                 if ((dccp_hashinfo.bhash_size > (64 * 1024)) &&
1126                     bhash_order > 0)
1127                         continue;
1128                 dccp_hashinfo.bhash = (struct inet_bind_hashbucket *)
1129                         __get_free_pages(GFP_ATOMIC, bhash_order);
1130         } while (!dccp_hashinfo.bhash && --bhash_order >= 0);
1131
1132         if (!dccp_hashinfo.bhash) {
1133                 DCCP_CRIT("Failed to allocate DCCP bind hash table");
1134                 goto out_free_dccp_locks;
1135         }
1136
1137         for (i = 0; i < dccp_hashinfo.bhash_size; i++) {
1138                 spin_lock_init(&dccp_hashinfo.bhash[i].lock);
1139                 INIT_HLIST_HEAD(&dccp_hashinfo.bhash[i].chain);
1140         }
1141
1142         rc = dccp_mib_init();
1143         if (rc)
1144                 goto out_free_dccp_bhash;
1145
1146         rc = dccp_ackvec_init();
1147         if (rc)
1148                 goto out_free_dccp_mib;
1149
1150         rc = dccp_sysctl_init();
1151         if (rc)
1152                 goto out_ackvec_exit;
1153
1154         dccp_timestamping_init();
1155 out:
1156         return rc;
1157 out_ackvec_exit:
1158         dccp_ackvec_exit();
1159 out_free_dccp_mib:
1160         dccp_mib_exit();
1161 out_free_dccp_bhash:
1162         free_pages((unsigned long)dccp_hashinfo.bhash, bhash_order);
1163         dccp_hashinfo.bhash = NULL;
1164 out_free_dccp_locks:
1165         inet_ehash_locks_free(&dccp_hashinfo);
1166 out_free_dccp_ehash:
1167         free_pages((unsigned long)dccp_hashinfo.ehash, ehash_order);
1168         dccp_hashinfo.ehash = NULL;
1169 out_free_bind_bucket_cachep:
1170         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1171         dccp_hashinfo.bind_bucket_cachep = NULL;
1172         goto out;
1173 }
1174
1175 static void __exit dccp_fini(void)
1176 {
1177         dccp_mib_exit();
1178         free_pages((unsigned long)dccp_hashinfo.bhash,
1179                    get_order(dccp_hashinfo.bhash_size *
1180                              sizeof(struct inet_bind_hashbucket)));
1181         free_pages((unsigned long)dccp_hashinfo.ehash,
1182                    get_order(dccp_hashinfo.ehash_size *
1183                              sizeof(struct inet_ehash_bucket)));
1184         inet_ehash_locks_free(&dccp_hashinfo);
1185         kmem_cache_destroy(dccp_hashinfo.bind_bucket_cachep);
1186         dccp_ackvec_exit();
1187         dccp_sysctl_exit();
1188 }
1189
1190 module_init(dccp_init);
1191 module_exit(dccp_fini);
1192
1193 MODULE_LICENSE("GPL");
1194 MODULE_AUTHOR("Arnaldo Carvalho de Melo <acme@conectiva.com.br>");
1195 MODULE_DESCRIPTION("DCCP - Datagram Congestion Controlled Protocol");