3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
11 * linux/net/ipv4/tcp.c
12 * linux/net/ipv4/tcp_input.c
13 * linux/net/ipv4/tcp_output.c
16 * Hideaki YOSHIFUJI : sin6_scope_id support
17 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
18 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
19 * a single port at the same time.
20 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
28 #include <linux/module.h>
29 #include <linux/config.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/jiffies.h>
37 #include <linux/in6.h>
38 #include <linux/netdevice.h>
39 #include <linux/init.h>
40 #include <linux/jhash.h>
41 #include <linux/ipsec.h>
42 #include <linux/times.h>
44 #include <linux/ipv6.h>
45 #include <linux/icmpv6.h>
46 #include <linux/random.h>
49 #include <net/ndisc.h>
50 #include <net/inet6_hashtables.h>
52 #include <net/transp_v6.h>
53 #include <net/addrconf.h>
54 #include <net/ip6_route.h>
55 #include <net/ip6_checksum.h>
56 #include <net/inet_ecn.h>
57 #include <net/protocol.h>
59 #include <net/addrconf.h>
61 #include <net/dsfield.h>
63 #include <asm/uaccess.h>
65 #include <linux/proc_fs.h>
66 #include <linux/seq_file.h>
68 static void tcp_v6_send_reset(struct sk_buff *skb);
69 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
70 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
73 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
74 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
76 static struct tcp_func ipv6_mapped;
77 static struct tcp_func ipv6_specific;
79 int inet6_csk_bind_conflict(const struct sock *sk,
80 const struct inet_bind_bucket *tb)
82 const struct sock *sk2;
83 const struct hlist_node *node;
85 /* We must walk the whole port owner list in this case. -DaveM */
86 sk_for_each_bound(sk2, node, &tb->owners) {
88 (!sk->sk_bound_dev_if ||
89 !sk2->sk_bound_dev_if ||
90 sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
91 (!sk->sk_reuse || !sk2->sk_reuse ||
92 sk2->sk_state == TCP_LISTEN) &&
93 ipv6_rcv_saddr_equal(sk, sk2))
100 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
102 return inet_csk_get_port(&tcp_hashinfo, sk, snum,
103 inet6_csk_bind_conflict);
106 static void tcp_v6_hash(struct sock *sk)
108 if (sk->sk_state != TCP_CLOSE) {
109 struct tcp_sock *tp = tcp_sk(sk);
111 if (tp->af_specific == &ipv6_mapped) {
116 __inet6_hash(&tcp_hashinfo, sk);
122 * Open request hash tables.
125 static u32 tcp_v6_synq_hash(const struct in6_addr *raddr, const u16 rport, const u32 rnd)
129 a = raddr->s6_addr32[0];
130 b = raddr->s6_addr32[1];
131 c = raddr->s6_addr32[2];
133 a += JHASH_GOLDEN_RATIO;
134 b += JHASH_GOLDEN_RATIO;
136 __jhash_mix(a, b, c);
138 a += raddr->s6_addr32[3];
140 __jhash_mix(a, b, c);
142 return c & (TCP_SYNQ_HSIZE - 1);
145 static struct request_sock *tcp_v6_search_req(const struct sock *sk,
146 struct request_sock ***prevp,
148 struct in6_addr *raddr,
149 struct in6_addr *laddr,
152 const struct inet_connection_sock *icsk = inet_csk(sk);
153 struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
154 struct request_sock *req, **prev;
156 for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
157 (req = *prev) != NULL;
158 prev = &req->dl_next) {
159 const struct tcp6_request_sock *treq = tcp6_rsk(req);
161 if (inet_rsk(req)->rmt_port == rport &&
162 req->rsk_ops->family == AF_INET6 &&
163 ipv6_addr_equal(&treq->rmt_addr, raddr) &&
164 ipv6_addr_equal(&treq->loc_addr, laddr) &&
165 (!treq->iif || treq->iif == iif)) {
166 BUG_TRAP(req->sk == NULL);
175 static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
176 struct in6_addr *saddr,
177 struct in6_addr *daddr,
180 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
183 static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
185 if (skb->protocol == htons(ETH_P_IPV6)) {
186 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
187 skb->nh.ipv6h->saddr.s6_addr32,
191 return secure_tcp_sequence_number(skb->nh.iph->daddr,
198 static int __tcp_v6_check_established(struct sock *sk, const __u16 lport,
199 struct inet_timewait_sock **twp)
201 struct inet_sock *inet = inet_sk(sk);
202 const struct ipv6_pinfo *np = inet6_sk(sk);
203 const struct in6_addr *daddr = &np->rcv_saddr;
204 const struct in6_addr *saddr = &np->daddr;
205 const int dif = sk->sk_bound_dev_if;
206 const u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
207 unsigned int hash = inet6_ehashfn(daddr, inet->num, saddr, inet->dport);
208 struct inet_ehash_bucket *head = inet_ehash_bucket(&tcp_hashinfo, hash);
210 const struct hlist_node *node;
211 struct inet_timewait_sock *tw;
213 prefetch(head->chain.first);
214 write_lock(&head->lock);
216 /* Check TIME-WAIT sockets first. */
217 sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
218 const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk2);
222 if(*((__u32 *)&(tw->tw_dport)) == ports &&
223 sk2->sk_family == PF_INET6 &&
224 ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) &&
225 ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) &&
226 sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
227 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2);
228 struct tcp_sock *tp = tcp_sk(sk);
230 if (tcptw->tw_ts_recent_stamp &&
232 (sysctl_tcp_tw_reuse &&
233 xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
234 /* See comment in tcp_ipv4.c */
235 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
238 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
239 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
248 /* And established part... */
249 sk_for_each(sk2, node, &head->chain) {
250 if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif))
255 BUG_TRAP(sk_unhashed(sk));
256 __sk_add_node(sk, &head->chain);
258 sock_prot_inc_use(sk->sk_prot);
259 write_unlock(&head->lock);
263 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
265 /* Silly. Should hash-dance instead... */
266 inet_twsk_deschedule(tw, &tcp_death_row);
267 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
274 write_unlock(&head->lock);
275 return -EADDRNOTAVAIL;
278 static inline u32 tcpv6_port_offset(const struct sock *sk)
280 const struct inet_sock *inet = inet_sk(sk);
281 const struct ipv6_pinfo *np = inet6_sk(sk);
283 return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
288 static int tcp_v6_hash_connect(struct sock *sk)
290 unsigned short snum = inet_sk(sk)->num;
291 struct inet_bind_hashbucket *head;
292 struct inet_bind_bucket *tb;
296 int low = sysctl_local_port_range[0];
297 int high = sysctl_local_port_range[1];
298 int range = high - low;
302 u32 offset = hint + tcpv6_port_offset(sk);
303 struct hlist_node *node;
304 struct inet_timewait_sock *tw = NULL;
307 for (i = 1; i <= range; i++) {
308 port = low + (i + offset) % range;
309 head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
310 spin_lock(&head->lock);
312 /* Does not bother with rcv_saddr checks,
313 * because the established check is already
316 inet_bind_bucket_for_each(tb, node, &head->chain) {
317 if (tb->port == port) {
318 BUG_TRAP(!hlist_empty(&tb->owners));
319 if (tb->fastreuse >= 0)
321 if (!__tcp_v6_check_established(sk,
329 tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
331 spin_unlock(&head->lock);
338 spin_unlock(&head->lock);
342 return -EADDRNOTAVAIL;
347 /* Head lock still held and bh's disabled */
348 inet_bind_hash(sk, tb, port);
349 if (sk_unhashed(sk)) {
350 inet_sk(sk)->sport = htons(port);
351 __inet6_hash(&tcp_hashinfo, sk);
353 spin_unlock(&head->lock);
356 inet_twsk_deschedule(tw, &tcp_death_row);
364 head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
365 tb = inet_csk(sk)->icsk_bind_hash;
366 spin_lock_bh(&head->lock);
368 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
369 __inet6_hash(&tcp_hashinfo, sk);
370 spin_unlock_bh(&head->lock);
373 spin_unlock(&head->lock);
374 /* No definite answer... Walk to established hash table */
375 ret = __tcp_v6_check_established(sk, snum, NULL);
382 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
385 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
386 struct inet_sock *inet = inet_sk(sk);
387 struct ipv6_pinfo *np = inet6_sk(sk);
388 struct tcp_sock *tp = tcp_sk(sk);
389 struct in6_addr *saddr = NULL, *final_p = NULL, final;
391 struct dst_entry *dst;
395 if (addr_len < SIN6_LEN_RFC2133)
398 if (usin->sin6_family != AF_INET6)
399 return(-EAFNOSUPPORT);
401 memset(&fl, 0, sizeof(fl));
404 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
405 IP6_ECN_flow_init(fl.fl6_flowlabel);
406 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
407 struct ip6_flowlabel *flowlabel;
408 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
409 if (flowlabel == NULL)
411 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
412 fl6_sock_release(flowlabel);
417 * connect() to INADDR_ANY means loopback (BSD'ism).
420 if(ipv6_addr_any(&usin->sin6_addr))
421 usin->sin6_addr.s6_addr[15] = 0x1;
423 addr_type = ipv6_addr_type(&usin->sin6_addr);
425 if(addr_type & IPV6_ADDR_MULTICAST)
428 if (addr_type&IPV6_ADDR_LINKLOCAL) {
429 if (addr_len >= sizeof(struct sockaddr_in6) &&
430 usin->sin6_scope_id) {
431 /* If interface is set while binding, indices
434 if (sk->sk_bound_dev_if &&
435 sk->sk_bound_dev_if != usin->sin6_scope_id)
438 sk->sk_bound_dev_if = usin->sin6_scope_id;
441 /* Connect to link-local address requires an interface */
442 if (!sk->sk_bound_dev_if)
446 if (tp->rx_opt.ts_recent_stamp &&
447 !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
448 tp->rx_opt.ts_recent = 0;
449 tp->rx_opt.ts_recent_stamp = 0;
453 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
454 np->flow_label = fl.fl6_flowlabel;
460 if (addr_type == IPV6_ADDR_MAPPED) {
461 u32 exthdrlen = tp->ext_header_len;
462 struct sockaddr_in sin;
464 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
466 if (__ipv6_only_sock(sk))
469 sin.sin_family = AF_INET;
470 sin.sin_port = usin->sin6_port;
471 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
473 tp->af_specific = &ipv6_mapped;
474 sk->sk_backlog_rcv = tcp_v4_do_rcv;
476 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
479 tp->ext_header_len = exthdrlen;
480 tp->af_specific = &ipv6_specific;
481 sk->sk_backlog_rcv = tcp_v6_do_rcv;
484 ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
486 ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
493 if (!ipv6_addr_any(&np->rcv_saddr))
494 saddr = &np->rcv_saddr;
496 fl.proto = IPPROTO_TCP;
497 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
498 ipv6_addr_copy(&fl.fl6_src,
499 (saddr ? saddr : &np->saddr));
500 fl.oif = sk->sk_bound_dev_if;
501 fl.fl_ip_dport = usin->sin6_port;
502 fl.fl_ip_sport = inet->sport;
504 if (np->opt && np->opt->srcrt) {
505 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
506 ipv6_addr_copy(&final, &fl.fl6_dst);
507 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
511 err = ip6_dst_lookup(sk, &dst, &fl);
515 ipv6_addr_copy(&fl.fl6_dst, final_p);
517 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
522 ipv6_addr_copy(&np->rcv_saddr, saddr);
525 /* set the source address */
526 ipv6_addr_copy(&np->saddr, saddr);
527 inet->rcv_saddr = LOOPBACK4_IPV6;
529 ip6_dst_store(sk, dst, NULL);
530 sk->sk_route_caps = dst->dev->features &
531 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
533 tp->ext_header_len = 0;
535 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
537 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
539 inet->dport = usin->sin6_port;
541 tcp_set_state(sk, TCP_SYN_SENT);
542 err = tcp_v6_hash_connect(sk);
547 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
552 err = tcp_connect(sk);
559 tcp_set_state(sk, TCP_CLOSE);
563 sk->sk_route_caps = 0;
567 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
568 int type, int code, int offset, __u32 info)
570 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
571 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
572 struct ipv6_pinfo *np;
578 sk = inet6_lookup(&tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr,
579 th->source, skb->dev->ifindex);
582 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
586 if (sk->sk_state == TCP_TIME_WAIT) {
587 inet_twsk_put((struct inet_timewait_sock *)sk);
592 if (sock_owned_by_user(sk))
593 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
595 if (sk->sk_state == TCP_CLOSE)
599 seq = ntohl(th->seq);
600 if (sk->sk_state != TCP_LISTEN &&
601 !between(seq, tp->snd_una, tp->snd_nxt)) {
602 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
608 if (type == ICMPV6_PKT_TOOBIG) {
609 struct dst_entry *dst = NULL;
611 if (sock_owned_by_user(sk))
613 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
616 /* icmp should have updated the destination cache entry */
617 dst = __sk_dst_check(sk, np->dst_cookie);
620 struct inet_sock *inet = inet_sk(sk);
623 /* BUGGG_FUTURE: Again, it is not clear how
624 to handle rthdr case. Ignore this complexity
627 memset(&fl, 0, sizeof(fl));
628 fl.proto = IPPROTO_TCP;
629 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
630 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
631 fl.oif = sk->sk_bound_dev_if;
632 fl.fl_ip_dport = inet->dport;
633 fl.fl_ip_sport = inet->sport;
635 if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
636 sk->sk_err_soft = -err;
640 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
641 sk->sk_err_soft = -err;
648 if (tp->pmtu_cookie > dst_mtu(dst)) {
649 tcp_sync_mss(sk, dst_mtu(dst));
650 tcp_simple_retransmit(sk);
651 } /* else let the usual retransmit timer handle it */
656 icmpv6_err_convert(type, code, &err);
658 /* Might be for an request_sock */
659 switch (sk->sk_state) {
660 struct request_sock *req, **prev;
662 if (sock_owned_by_user(sk))
665 req = tcp_v6_search_req(sk, &prev, th->dest, &hdr->daddr,
666 &hdr->saddr, inet6_iif(skb));
670 /* ICMPs are not backlogged, hence we cannot get
671 * an established socket here.
673 BUG_TRAP(req->sk == NULL);
675 if (seq != tcp_rsk(req)->snt_isn) {
676 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
680 inet_csk_reqsk_queue_drop(sk, req, prev);
684 case TCP_SYN_RECV: /* Cannot happen.
685 It can, it SYNs are crossed. --ANK */
686 if (!sock_owned_by_user(sk)) {
687 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
689 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
693 sk->sk_err_soft = err;
697 if (!sock_owned_by_user(sk) && np->recverr) {
699 sk->sk_error_report(sk);
701 sk->sk_err_soft = err;
709 static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
710 struct dst_entry *dst)
712 struct tcp6_request_sock *treq = tcp6_rsk(req);
713 struct ipv6_pinfo *np = inet6_sk(sk);
714 struct sk_buff * skb;
715 struct ipv6_txoptions *opt = NULL;
716 struct in6_addr * final_p = NULL, final;
720 memset(&fl, 0, sizeof(fl));
721 fl.proto = IPPROTO_TCP;
722 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
723 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
724 fl.fl6_flowlabel = 0;
726 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
727 fl.fl_ip_sport = inet_sk(sk)->sport;
732 np->rxopt.bits.osrcrt == 2 &&
734 struct sk_buff *pktopts = treq->pktopts;
735 struct inet6_skb_parm *rxopt = IP6CB(pktopts);
737 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
740 if (opt && opt->srcrt) {
741 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
742 ipv6_addr_copy(&final, &fl.fl6_dst);
743 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
747 err = ip6_dst_lookup(sk, &dst, &fl);
751 ipv6_addr_copy(&fl.fl6_dst, final_p);
752 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
756 skb = tcp_make_synack(sk, dst, req);
758 struct tcphdr *th = skb->h.th;
760 th->check = tcp_v6_check(th, skb->len,
761 &treq->loc_addr, &treq->rmt_addr,
762 csum_partial((char *)th, skb->len, skb->csum));
764 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
765 err = ip6_xmit(sk, skb, &fl, opt, 0);
766 if (err == NET_XMIT_CN)
771 if (opt && opt != np->opt)
772 sock_kfree_s(sk, opt, opt->tot_len);
776 static void tcp_v6_reqsk_destructor(struct request_sock *req)
778 if (tcp6_rsk(req)->pktopts)
779 kfree_skb(tcp6_rsk(req)->pktopts);
782 static struct request_sock_ops tcp6_request_sock_ops = {
784 .obj_size = sizeof(struct tcp6_request_sock),
785 .rtx_syn_ack = tcp_v6_send_synack,
786 .send_ack = tcp_v6_reqsk_send_ack,
787 .destructor = tcp_v6_reqsk_destructor,
788 .send_reset = tcp_v6_send_reset
791 static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
793 struct ipv6_pinfo *np = inet6_sk(sk);
794 struct inet6_skb_parm *opt = IP6CB(skb);
797 if ((opt->hop && (np->rxopt.bits.hopopts || np->rxopt.bits.ohopopts)) ||
798 ((IPV6_FLOWINFO_MASK & *(u32*)skb->nh.raw) && np->rxopt.bits.rxflow) ||
799 (opt->srcrt && (np->rxopt.bits.srcrt || np->rxopt.bits.osrcrt)) ||
800 ((opt->dst1 || opt->dst0) && (np->rxopt.bits.dstopts || np->rxopt.bits.odstopts)))
807 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
810 struct ipv6_pinfo *np = inet6_sk(sk);
812 if (skb->ip_summed == CHECKSUM_HW) {
813 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
814 skb->csum = offsetof(struct tcphdr, check);
816 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
817 csum_partial((char *)th, th->doff<<2,
823 static void tcp_v6_send_reset(struct sk_buff *skb)
825 struct tcphdr *th = skb->h.th, *t1;
826 struct sk_buff *buff;
832 if (!ipv6_unicast_destination(skb))
836 * We need to grab some memory, and put together an RST,
837 * and then put it into the queue to be sent.
840 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
845 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
847 t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
849 /* Swap the send and the receive. */
850 memset(t1, 0, sizeof(*t1));
851 t1->dest = th->source;
852 t1->source = th->dest;
853 t1->doff = sizeof(*t1)/4;
857 t1->seq = th->ack_seq;
860 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
861 + skb->len - (th->doff<<2));
864 buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
866 memset(&fl, 0, sizeof(fl));
867 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
868 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
870 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
871 sizeof(*t1), IPPROTO_TCP,
874 fl.proto = IPPROTO_TCP;
875 fl.oif = inet6_iif(skb);
876 fl.fl_ip_dport = t1->dest;
877 fl.fl_ip_sport = t1->source;
879 /* sk = NULL, but it is safe for now. RST socket required. */
880 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
882 if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
883 ip6_xmit(NULL, buff, &fl, NULL, 0);
884 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
885 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
893 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
895 struct tcphdr *th = skb->h.th, *t1;
896 struct sk_buff *buff;
898 int tot_len = sizeof(struct tcphdr);
903 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
908 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
910 t1 = (struct tcphdr *) skb_push(buff,tot_len);
912 /* Swap the send and the receive. */
913 memset(t1, 0, sizeof(*t1));
914 t1->dest = th->source;
915 t1->source = th->dest;
916 t1->doff = tot_len/4;
917 t1->seq = htonl(seq);
918 t1->ack_seq = htonl(ack);
920 t1->window = htons(win);
923 u32 *ptr = (u32*)(t1 + 1);
924 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
925 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
926 *ptr++ = htonl(tcp_time_stamp);
930 buff->csum = csum_partial((char *)t1, tot_len, 0);
932 memset(&fl, 0, sizeof(fl));
933 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
934 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
936 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
937 tot_len, IPPROTO_TCP,
940 fl.proto = IPPROTO_TCP;
941 fl.oif = inet6_iif(skb);
942 fl.fl_ip_dport = t1->dest;
943 fl.fl_ip_sport = t1->source;
945 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
946 if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
947 ip6_xmit(NULL, buff, &fl, NULL, 0);
948 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
956 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
958 struct inet_timewait_sock *tw = inet_twsk(sk);
959 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
961 tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
962 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
963 tcptw->tw_ts_recent);
968 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
970 tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent);
974 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
976 struct request_sock *req, **prev;
977 const struct tcphdr *th = skb->h.th;
980 /* Find possible connection requests. */
981 req = tcp_v6_search_req(sk, &prev, th->source, &skb->nh.ipv6h->saddr,
982 &skb->nh.ipv6h->daddr, inet6_iif(skb));
984 return tcp_check_req(sk, skb, req, prev);
986 nsk = __inet6_lookup_established(&tcp_hashinfo, &skb->nh.ipv6h->saddr,
987 th->source, &skb->nh.ipv6h->daddr,
988 ntohs(th->dest), inet6_iif(skb));
991 if (nsk->sk_state != TCP_TIME_WAIT) {
995 inet_twsk_put((struct inet_timewait_sock *)nsk);
999 #if 0 /*def CONFIG_SYN_COOKIES*/
1000 if (!th->rst && !th->syn && th->ack)
1001 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
1006 static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req)
1008 struct inet_connection_sock *icsk = inet_csk(sk);
1009 struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
1010 const u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
1012 reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, TCP_TIMEOUT_INIT);
1013 inet_csk_reqsk_queue_added(sk, TCP_TIMEOUT_INIT);
1017 /* FIXME: this is substantially similar to the ipv4 code.
1018 * Can some kind of merge be done? -- erics
1020 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1022 struct tcp6_request_sock *treq;
1023 struct ipv6_pinfo *np = inet6_sk(sk);
1024 struct tcp_options_received tmp_opt;
1025 struct tcp_sock *tp = tcp_sk(sk);
1026 struct request_sock *req = NULL;
1027 __u32 isn = TCP_SKB_CB(skb)->when;
1029 if (skb->protocol == htons(ETH_P_IP))
1030 return tcp_v4_conn_request(sk, skb);
1032 if (!ipv6_unicast_destination(skb))
1036 * There are no SYN attacks on IPv6, yet...
1038 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1039 if (net_ratelimit())
1040 printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
1044 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1047 req = reqsk_alloc(&tcp6_request_sock_ops);
1051 tcp_clear_options(&tmp_opt);
1052 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1053 tmp_opt.user_mss = tp->rx_opt.user_mss;
1055 tcp_parse_options(skb, &tmp_opt, 0);
1057 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1058 tcp_openreq_init(req, &tmp_opt, skb);
1060 treq = tcp6_rsk(req);
1061 ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
1062 ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
1063 TCP_ECN_create_request(req, skb->h.th);
1064 treq->pktopts = NULL;
1065 if (ipv6_opt_accepted(sk, skb) ||
1066 np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
1067 np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
1068 atomic_inc(&skb->users);
1069 treq->pktopts = skb;
1071 treq->iif = sk->sk_bound_dev_if;
1073 /* So that link locals have meaning */
1074 if (!sk->sk_bound_dev_if &&
1075 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1076 treq->iif = inet6_iif(skb);
1079 isn = tcp_v6_init_sequence(sk,skb);
1081 tcp_rsk(req)->snt_isn = isn;
1083 if (tcp_v6_send_synack(sk, req, NULL))
1086 tcp_v6_synq_add(sk, req);
1094 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1095 return 0; /* don't send reset */
1098 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1099 struct request_sock *req,
1100 struct dst_entry *dst)
1102 struct tcp6_request_sock *treq = tcp6_rsk(req);
1103 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1104 struct tcp6_sock *newtcp6sk;
1105 struct inet_sock *newinet;
1106 struct tcp_sock *newtp;
1108 struct ipv6_txoptions *opt;
1110 if (skb->protocol == htons(ETH_P_IP)) {
1115 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1120 newtcp6sk = (struct tcp6_sock *)newsk;
1121 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1123 newinet = inet_sk(newsk);
1124 newnp = inet6_sk(newsk);
1125 newtp = tcp_sk(newsk);
1127 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1129 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1132 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1135 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1137 newtp->af_specific = &ipv6_mapped;
1138 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1139 newnp->pktoptions = NULL;
1141 newnp->mcast_oif = inet6_iif(skb);
1142 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1145 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1146 * here, tcp_create_openreq_child now does this for us, see the comment in
1147 * that function for the gory details. -acme
1150 /* It is tricky place. Until this moment IPv4 tcp
1151 worked with IPv6 af_tcp.af_specific.
1154 tcp_sync_mss(newsk, newtp->pmtu_cookie);
1161 if (sk_acceptq_is_full(sk))
1164 if (np->rxopt.bits.osrcrt == 2 &&
1165 opt == NULL && treq->pktopts) {
1166 struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts);
1168 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr *)(treq->pktopts->nh.raw + rxopt->srcrt));
1172 struct in6_addr *final_p = NULL, final;
1175 memset(&fl, 0, sizeof(fl));
1176 fl.proto = IPPROTO_TCP;
1177 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1178 if (opt && opt->srcrt) {
1179 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1180 ipv6_addr_copy(&final, &fl.fl6_dst);
1181 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1184 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
1185 fl.oif = sk->sk_bound_dev_if;
1186 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
1187 fl.fl_ip_sport = inet_sk(sk)->sport;
1189 if (ip6_dst_lookup(sk, &dst, &fl))
1193 ipv6_addr_copy(&fl.fl6_dst, final_p);
1195 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1199 newsk = tcp_create_openreq_child(sk, req, skb);
1204 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1205 * count here, tcp_create_openreq_child now does this for us, see the
1206 * comment in that function for the gory details. -acme
1209 ip6_dst_store(newsk, dst, NULL);
1210 newsk->sk_route_caps = dst->dev->features &
1211 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1213 newtcp6sk = (struct tcp6_sock *)newsk;
1214 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1216 newtp = tcp_sk(newsk);
1217 newinet = inet_sk(newsk);
1218 newnp = inet6_sk(newsk);
1220 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1222 ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
1223 ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
1224 ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
1225 newsk->sk_bound_dev_if = treq->iif;
1227 /* Now IPv6 options...
1229 First: no IPv4 options.
1231 newinet->opt = NULL;
1234 newnp->rxopt.all = np->rxopt.all;
1236 /* Clone pktoptions received with SYN */
1237 newnp->pktoptions = NULL;
1238 if (treq->pktopts != NULL) {
1239 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
1240 kfree_skb(treq->pktopts);
1241 treq->pktopts = NULL;
1242 if (newnp->pktoptions)
1243 skb_set_owner_r(newnp->pktoptions, newsk);
1246 newnp->mcast_oif = inet6_iif(skb);
1247 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1249 /* Clone native IPv6 options from listening socket (if any)
1251 Yes, keeping reference count would be much more clever,
1252 but we make one more one thing there: reattach optmem
1256 newnp->opt = ipv6_dup_options(newsk, opt);
1258 sock_kfree_s(sk, opt, opt->tot_len);
1261 newtp->ext_header_len = 0;
1263 newtp->ext_header_len = newnp->opt->opt_nflen +
1264 newnp->opt->opt_flen;
1266 tcp_sync_mss(newsk, dst_mtu(dst));
1267 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1268 tcp_initialize_rcv_mss(newsk);
1270 newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1272 __inet6_hash(&tcp_hashinfo, newsk);
1273 inet_inherit_port(&tcp_hashinfo, sk, newsk);
1278 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1280 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1281 if (opt && opt != np->opt)
1282 sock_kfree_s(sk, opt, opt->tot_len);
1287 static int tcp_v6_checksum_init(struct sk_buff *skb)
1289 if (skb->ip_summed == CHECKSUM_HW) {
1290 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1291 &skb->nh.ipv6h->daddr,skb->csum)) {
1292 skb->ip_summed = CHECKSUM_UNNECESSARY;
1297 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1298 &skb->nh.ipv6h->daddr, 0);
1300 if (skb->len <= 76) {
1301 return __skb_checksum_complete(skb);
1306 /* The socket must have it's spinlock held when we get
1309 * We have a potential double-lock case here, so even when
1310 * doing backlog processing we use the BH locking scheme.
1311 * This is because we cannot sleep with the original spinlock
1314 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1316 struct ipv6_pinfo *np = inet6_sk(sk);
1317 struct tcp_sock *tp;
1318 struct sk_buff *opt_skb = NULL;
1320 /* Imagine: socket is IPv6. IPv4 packet arrives,
1321 goes to IPv4 receive handler and backlogged.
1322 From backlog it always goes here. Kerboom...
1323 Fortunately, tcp_rcv_established and rcv_established
1324 handle them correctly, but it is not case with
1325 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1328 if (skb->protocol == htons(ETH_P_IP))
1329 return tcp_v4_do_rcv(sk, skb);
1331 if (sk_filter(sk, skb, 0))
1335 * socket locking is here for SMP purposes as backlog rcv
1336 * is currently called with bh processing disabled.
1339 /* Do Stevens' IPV6_PKTOPTIONS.
1341 Yes, guys, it is the only place in our code, where we
1342 may make it not affecting IPv4.
1343 The rest of code is protocol independent,
1344 and I do not like idea to uglify IPv4.
1346 Actually, all the idea behind IPV6_PKTOPTIONS
1347 looks not very well thought. For now we latch
1348 options, received in the last packet, enqueued
1349 by tcp. Feel free to propose better solution.
1353 opt_skb = skb_clone(skb, GFP_ATOMIC);
1355 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1356 TCP_CHECK_TIMER(sk);
1357 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1359 TCP_CHECK_TIMER(sk);
1361 goto ipv6_pktoptions;
1365 if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1368 if (sk->sk_state == TCP_LISTEN) {
1369 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1374 * Queue it on the new socket if the new socket is active,
1375 * otherwise we just shortcircuit this and continue with
1379 if (tcp_child_process(sk, nsk, skb))
1382 __kfree_skb(opt_skb);
1387 TCP_CHECK_TIMER(sk);
1388 if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1390 TCP_CHECK_TIMER(sk);
1392 goto ipv6_pktoptions;
1396 tcp_v6_send_reset(skb);
1399 __kfree_skb(opt_skb);
1403 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1408 /* Do you ask, what is it?
1410 1. skb was enqueued by tcp.
1411 2. skb is added to tail of read queue, rather than out of order.
1412 3. socket is not in passive state.
1413 4. Finally, it really contains options, which user wants to receive.
1416 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1417 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1418 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1419 np->mcast_oif = inet6_iif(opt_skb);
1420 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1421 np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1422 if (ipv6_opt_accepted(sk, opt_skb)) {
1423 skb_set_owner_r(opt_skb, sk);
1424 opt_skb = xchg(&np->pktoptions, opt_skb);
1426 __kfree_skb(opt_skb);
1427 opt_skb = xchg(&np->pktoptions, NULL);
1436 static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1438 struct sk_buff *skb = *pskb;
1443 if (skb->pkt_type != PACKET_HOST)
1447 * Count it even if it's bad.
1449 TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1451 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1456 if (th->doff < sizeof(struct tcphdr)/4)
1458 if (!pskb_may_pull(skb, th->doff*4))
1461 if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1462 tcp_v6_checksum_init(skb)))
1466 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1467 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1468 skb->len - th->doff*4);
1469 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1470 TCP_SKB_CB(skb)->when = 0;
1471 TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
1472 TCP_SKB_CB(skb)->sacked = 0;
1474 sk = __inet6_lookup(&tcp_hashinfo, &skb->nh.ipv6h->saddr, th->source,
1475 &skb->nh.ipv6h->daddr, ntohs(th->dest),
1482 if (sk->sk_state == TCP_TIME_WAIT)
1485 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1486 goto discard_and_relse;
1488 if (sk_filter(sk, skb, 0))
1489 goto discard_and_relse;
1495 if (!sock_owned_by_user(sk)) {
1496 if (!tcp_prequeue(sk, skb))
1497 ret = tcp_v6_do_rcv(sk, skb);
1499 sk_add_backlog(sk, skb);
1503 return ret ? -1 : 0;
1506 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1509 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1511 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1513 tcp_v6_send_reset(skb);
1530 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1531 inet_twsk_put((struct inet_timewait_sock *)sk);
1535 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1536 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1537 inet_twsk_put((struct inet_timewait_sock *)sk);
1541 switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk,
1547 sk2 = inet6_lookup_listener(&tcp_hashinfo,
1548 &skb->nh.ipv6h->daddr,
1549 ntohs(th->dest), inet6_iif(skb));
1551 struct inet_timewait_sock *tw = inet_twsk(sk);
1552 inet_twsk_deschedule(tw, &tcp_death_row);
1557 /* Fall through to ACK */
1560 tcp_v6_timewait_ack(sk, skb);
1564 case TCP_TW_SUCCESS:;
1569 static int tcp_v6_rebuild_header(struct sock *sk)
1572 struct dst_entry *dst;
1573 struct ipv6_pinfo *np = inet6_sk(sk);
1575 dst = __sk_dst_check(sk, np->dst_cookie);
1578 struct inet_sock *inet = inet_sk(sk);
1579 struct in6_addr *final_p = NULL, final;
1582 memset(&fl, 0, sizeof(fl));
1583 fl.proto = IPPROTO_TCP;
1584 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1585 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1586 fl.fl6_flowlabel = np->flow_label;
1587 fl.oif = sk->sk_bound_dev_if;
1588 fl.fl_ip_dport = inet->dport;
1589 fl.fl_ip_sport = inet->sport;
1591 if (np->opt && np->opt->srcrt) {
1592 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1593 ipv6_addr_copy(&final, &fl.fl6_dst);
1594 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1598 err = ip6_dst_lookup(sk, &dst, &fl);
1600 sk->sk_route_caps = 0;
1604 ipv6_addr_copy(&fl.fl6_dst, final_p);
1606 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1607 sk->sk_err_soft = -err;
1611 ip6_dst_store(sk, dst, NULL);
1612 sk->sk_route_caps = dst->dev->features &
1613 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1619 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1621 struct sock *sk = skb->sk;
1622 struct inet_sock *inet = inet_sk(sk);
1623 struct ipv6_pinfo *np = inet6_sk(sk);
1625 struct dst_entry *dst;
1626 struct in6_addr *final_p = NULL, final;
1628 memset(&fl, 0, sizeof(fl));
1629 fl.proto = IPPROTO_TCP;
1630 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1631 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1632 fl.fl6_flowlabel = np->flow_label;
1633 IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1634 fl.oif = sk->sk_bound_dev_if;
1635 fl.fl_ip_sport = inet->sport;
1636 fl.fl_ip_dport = inet->dport;
1638 if (np->opt && np->opt->srcrt) {
1639 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1640 ipv6_addr_copy(&final, &fl.fl6_dst);
1641 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1645 dst = __sk_dst_check(sk, np->dst_cookie);
1648 int err = ip6_dst_lookup(sk, &dst, &fl);
1651 sk->sk_err_soft = -err;
1656 ipv6_addr_copy(&fl.fl6_dst, final_p);
1658 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1659 sk->sk_route_caps = 0;
1663 ip6_dst_store(sk, dst, NULL);
1664 sk->sk_route_caps = dst->dev->features &
1665 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1668 skb->dst = dst_clone(dst);
1670 /* Restore final destination back after routing done */
1671 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1673 return ip6_xmit(sk, skb, &fl, np->opt, 0);
1676 static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1678 struct ipv6_pinfo *np = inet6_sk(sk);
1679 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1681 sin6->sin6_family = AF_INET6;
1682 ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1683 sin6->sin6_port = inet_sk(sk)->dport;
1684 /* We do not store received flowlabel for TCP */
1685 sin6->sin6_flowinfo = 0;
1686 sin6->sin6_scope_id = 0;
1687 if (sk->sk_bound_dev_if &&
1688 ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1689 sin6->sin6_scope_id = sk->sk_bound_dev_if;
1692 static int tcp_v6_remember_stamp(struct sock *sk)
1694 /* Alas, not yet... */
1698 static struct tcp_func ipv6_specific = {
1699 .queue_xmit = tcp_v6_xmit,
1700 .send_check = tcp_v6_send_check,
1701 .rebuild_header = tcp_v6_rebuild_header,
1702 .conn_request = tcp_v6_conn_request,
1703 .syn_recv_sock = tcp_v6_syn_recv_sock,
1704 .remember_stamp = tcp_v6_remember_stamp,
1705 .net_header_len = sizeof(struct ipv6hdr),
1707 .setsockopt = ipv6_setsockopt,
1708 .getsockopt = ipv6_getsockopt,
1709 .addr2sockaddr = v6_addr2sockaddr,
1710 .sockaddr_len = sizeof(struct sockaddr_in6)
1714 * TCP over IPv4 via INET6 API
1717 static struct tcp_func ipv6_mapped = {
1718 .queue_xmit = ip_queue_xmit,
1719 .send_check = tcp_v4_send_check,
1720 .rebuild_header = inet_sk_rebuild_header,
1721 .conn_request = tcp_v6_conn_request,
1722 .syn_recv_sock = tcp_v6_syn_recv_sock,
1723 .remember_stamp = tcp_v4_remember_stamp,
1724 .net_header_len = sizeof(struct iphdr),
1726 .setsockopt = ipv6_setsockopt,
1727 .getsockopt = ipv6_getsockopt,
1728 .addr2sockaddr = v6_addr2sockaddr,
1729 .sockaddr_len = sizeof(struct sockaddr_in6)
1734 /* NOTE: A lot of things set to zero explicitly by call to
1735 * sk_alloc() so need not be done here.
1737 static int tcp_v6_init_sock(struct sock *sk)
1739 struct inet_connection_sock *icsk = inet_csk(sk);
1740 struct tcp_sock *tp = tcp_sk(sk);
1742 skb_queue_head_init(&tp->out_of_order_queue);
1743 tcp_init_xmit_timers(sk);
1744 tcp_prequeue_init(tp);
1746 icsk->icsk_rto = TCP_TIMEOUT_INIT;
1747 tp->mdev = TCP_TIMEOUT_INIT;
1749 /* So many TCP implementations out there (incorrectly) count the
1750 * initial SYN frame in their delayed-ACK and congestion control
1751 * algorithms that we must have the following bandaid to talk
1752 * efficiently to them. -DaveM
1756 /* See draft-stevens-tcpca-spec-01 for discussion of the
1757 * initialization of these values.
1759 tp->snd_ssthresh = 0x7fffffff;
1760 tp->snd_cwnd_clamp = ~0;
1761 tp->mss_cache = 536;
1763 tp->reordering = sysctl_tcp_reordering;
1765 sk->sk_state = TCP_CLOSE;
1767 tp->af_specific = &ipv6_specific;
1768 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1769 sk->sk_write_space = sk_stream_write_space;
1770 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1772 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1773 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1775 atomic_inc(&tcp_sockets_allocated);
1780 static int tcp_v6_destroy_sock(struct sock *sk)
1782 tcp_v4_destroy_sock(sk);
1783 return inet6_destroy_sock(sk);
1786 /* Proc filesystem TCPv6 sock list dumping. */
1787 static void get_openreq6(struct seq_file *seq,
1788 struct sock *sk, struct request_sock *req, int i, int uid)
1790 struct in6_addr *dest, *src;
1791 int ttd = req->expires - jiffies;
1796 src = &tcp6_rsk(req)->loc_addr;
1797 dest = &tcp6_rsk(req)->rmt_addr;
1799 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1800 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1802 src->s6_addr32[0], src->s6_addr32[1],
1803 src->s6_addr32[2], src->s6_addr32[3],
1804 ntohs(inet_sk(sk)->sport),
1805 dest->s6_addr32[0], dest->s6_addr32[1],
1806 dest->s6_addr32[2], dest->s6_addr32[3],
1807 ntohs(inet_rsk(req)->rmt_port),
1809 0,0, /* could print option size, but that is af dependent. */
1810 1, /* timers active (only the expire timer) */
1811 jiffies_to_clock_t(ttd),
1814 0, /* non standard timer */
1815 0, /* open_requests have no inode */
1819 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1821 struct in6_addr *dest, *src;
1824 unsigned long timer_expires;
1825 struct inet_sock *inet = inet_sk(sp);
1826 struct tcp_sock *tp = tcp_sk(sp);
1827 const struct inet_connection_sock *icsk = inet_csk(sp);
1828 struct ipv6_pinfo *np = inet6_sk(sp);
1831 src = &np->rcv_saddr;
1832 destp = ntohs(inet->dport);
1833 srcp = ntohs(inet->sport);
1835 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1837 timer_expires = icsk->icsk_timeout;
1838 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1840 timer_expires = icsk->icsk_timeout;
1841 } else if (timer_pending(&sp->sk_timer)) {
1843 timer_expires = sp->sk_timer.expires;
1846 timer_expires = jiffies;
1850 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1851 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
1853 src->s6_addr32[0], src->s6_addr32[1],
1854 src->s6_addr32[2], src->s6_addr32[3], srcp,
1855 dest->s6_addr32[0], dest->s6_addr32[1],
1856 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1858 tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
1860 jiffies_to_clock_t(timer_expires - jiffies),
1861 icsk->icsk_retransmits,
1863 icsk->icsk_probes_out,
1865 atomic_read(&sp->sk_refcnt), sp,
1868 (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
1869 tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
1873 static void get_timewait6_sock(struct seq_file *seq,
1874 struct inet_timewait_sock *tw, int i)
1876 struct in6_addr *dest, *src;
1878 struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw);
1879 int ttd = tw->tw_ttd - jiffies;
1884 dest = &tcp6tw->tw_v6_daddr;
1885 src = &tcp6tw->tw_v6_rcv_saddr;
1886 destp = ntohs(tw->tw_dport);
1887 srcp = ntohs(tw->tw_sport);
1890 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1891 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1893 src->s6_addr32[0], src->s6_addr32[1],
1894 src->s6_addr32[2], src->s6_addr32[3], srcp,
1895 dest->s6_addr32[0], dest->s6_addr32[1],
1896 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1897 tw->tw_substate, 0, 0,
1898 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
1899 atomic_read(&tw->tw_refcnt), tw);
1902 #ifdef CONFIG_PROC_FS
1903 static int tcp6_seq_show(struct seq_file *seq, void *v)
1905 struct tcp_iter_state *st;
1907 if (v == SEQ_START_TOKEN) {
1912 "st tx_queue rx_queue tr tm->when retrnsmt"
1913 " uid timeout inode\n");
1918 switch (st->state) {
1919 case TCP_SEQ_STATE_LISTENING:
1920 case TCP_SEQ_STATE_ESTABLISHED:
1921 get_tcp6_sock(seq, v, st->num);
1923 case TCP_SEQ_STATE_OPENREQ:
1924 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
1926 case TCP_SEQ_STATE_TIME_WAIT:
1927 get_timewait6_sock(seq, v, st->num);
1934 static struct file_operations tcp6_seq_fops;
1935 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
1936 .owner = THIS_MODULE,
1939 .seq_show = tcp6_seq_show,
1940 .seq_fops = &tcp6_seq_fops,
1943 int __init tcp6_proc_init(void)
1945 return tcp_proc_register(&tcp6_seq_afinfo);
1948 void tcp6_proc_exit(void)
1950 tcp_proc_unregister(&tcp6_seq_afinfo);
1954 struct proto tcpv6_prot = {
1956 .owner = THIS_MODULE,
1958 .connect = tcp_v6_connect,
1959 .disconnect = tcp_disconnect,
1960 .accept = inet_csk_accept,
1962 .init = tcp_v6_init_sock,
1963 .destroy = tcp_v6_destroy_sock,
1964 .shutdown = tcp_shutdown,
1965 .setsockopt = tcp_setsockopt,
1966 .getsockopt = tcp_getsockopt,
1967 .sendmsg = tcp_sendmsg,
1968 .recvmsg = tcp_recvmsg,
1969 .backlog_rcv = tcp_v6_do_rcv,
1970 .hash = tcp_v6_hash,
1971 .unhash = tcp_unhash,
1972 .get_port = tcp_v6_get_port,
1973 .enter_memory_pressure = tcp_enter_memory_pressure,
1974 .sockets_allocated = &tcp_sockets_allocated,
1975 .memory_allocated = &tcp_memory_allocated,
1976 .memory_pressure = &tcp_memory_pressure,
1977 .orphan_count = &tcp_orphan_count,
1978 .sysctl_mem = sysctl_tcp_mem,
1979 .sysctl_wmem = sysctl_tcp_wmem,
1980 .sysctl_rmem = sysctl_tcp_rmem,
1981 .max_header = MAX_TCP_HEADER,
1982 .obj_size = sizeof(struct tcp6_sock),
1983 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1984 .rsk_prot = &tcp6_request_sock_ops,
1987 static struct inet6_protocol tcpv6_protocol = {
1988 .handler = tcp_v6_rcv,
1989 .err_handler = tcp_v6_err,
1990 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
1993 static struct inet_protosw tcpv6_protosw = {
1994 .type = SOCK_STREAM,
1995 .protocol = IPPROTO_TCP,
1996 .prot = &tcpv6_prot,
1997 .ops = &inet6_stream_ops,
2000 .flags = INET_PROTOSW_PERMANENT,
2003 void __init tcpv6_init(void)
2005 /* register inet6 protocol */
2006 if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
2007 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
2008 inet6_register_protosw(&tcpv6_protosw);