3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
11 * linux/net/ipv4/tcp.c
12 * linux/net/ipv4/tcp_input.c
13 * linux/net/ipv4/tcp_output.c
16 * Hideaki YOSHIFUJI : sin6_scope_id support
17 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
18 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
19 * a single port at the same time.
20 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
28 #include <linux/module.h>
29 #include <linux/config.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/jiffies.h>
37 #include <linux/in6.h>
38 #include <linux/netdevice.h>
39 #include <linux/init.h>
40 #include <linux/jhash.h>
41 #include <linux/ipsec.h>
42 #include <linux/times.h>
44 #include <linux/ipv6.h>
45 #include <linux/icmpv6.h>
46 #include <linux/random.h>
49 #include <net/ndisc.h>
50 #include <net/inet6_hashtables.h>
52 #include <net/transp_v6.h>
53 #include <net/addrconf.h>
54 #include <net/ip6_route.h>
55 #include <net/ip6_checksum.h>
56 #include <net/inet_ecn.h>
57 #include <net/protocol.h>
59 #include <net/addrconf.h>
61 #include <net/dsfield.h>
63 #include <asm/uaccess.h>
65 #include <linux/proc_fs.h>
66 #include <linux/seq_file.h>
68 static void tcp_v6_send_reset(struct sk_buff *skb);
69 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
70 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
73 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
74 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
76 static struct tcp_func ipv6_mapped;
77 static struct tcp_func ipv6_specific;
79 static inline int tcp_v6_bind_conflict(const struct sock *sk,
80 const struct inet_bind_bucket *tb)
82 const struct sock *sk2;
83 const struct hlist_node *node;
85 /* We must walk the whole port owner list in this case. -DaveM */
86 sk_for_each_bound(sk2, node, &tb->owners) {
88 (!sk->sk_bound_dev_if ||
89 !sk2->sk_bound_dev_if ||
90 sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
91 (!sk->sk_reuse || !sk2->sk_reuse ||
92 sk2->sk_state == TCP_LISTEN) &&
93 ipv6_rcv_saddr_equal(sk, sk2))
100 /* Grrr, addr_type already calculated by caller, but I don't want
101 * to add some silly "cookie" argument to this method just for that.
102 * But it doesn't matter, the recalculation is in the rarest path
103 * this function ever takes.
105 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
107 struct inet_bind_hashbucket *head;
108 struct inet_bind_bucket *tb;
109 struct hlist_node *node;
114 int low = sysctl_local_port_range[0];
115 int high = sysctl_local_port_range[1];
116 int remaining = (high - low) + 1;
117 int rover = net_random() % (high - low) + low;
120 head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)];
121 spin_lock(&head->lock);
122 inet_bind_bucket_for_each(tb, node, &head->chain)
123 if (tb->port == rover)
127 spin_unlock(&head->lock);
130 } while (--remaining > 0);
132 /* Exhausted local port range during search? It is not
133 * possible for us to be holding one of the bind hash
134 * locks if this test triggers, because if 'remaining'
135 * drops to zero, we broke out of the do/while loop at
136 * the top level, not from the 'break;' statement.
139 if (unlikely(remaining <= 0))
142 /* OK, here is the one we will use. */
145 head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
146 spin_lock(&head->lock);
147 inet_bind_bucket_for_each(tb, node, &head->chain)
148 if (tb->port == snum)
154 if (tb && !hlist_empty(&tb->owners)) {
155 if (tb->fastreuse > 0 && sk->sk_reuse &&
156 sk->sk_state != TCP_LISTEN) {
160 if (tcp_v6_bind_conflict(sk, tb))
167 tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, snum);
171 if (hlist_empty(&tb->owners)) {
172 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
176 } else if (tb->fastreuse &&
177 (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
181 if (!inet_csk(sk)->icsk_bind_hash)
182 inet_bind_hash(sk, tb, snum);
183 BUG_TRAP(inet_csk(sk)->icsk_bind_hash == tb);
187 spin_unlock(&head->lock);
193 static __inline__ void __tcp_v6_hash(struct sock *sk)
195 struct hlist_head *list;
198 BUG_TRAP(sk_unhashed(sk));
200 if (sk->sk_state == TCP_LISTEN) {
201 list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)];
202 lock = &tcp_hashinfo.lhash_lock;
203 inet_listen_wlock(&tcp_hashinfo);
206 sk->sk_hash = hash = inet6_sk_ehashfn(sk);
207 hash &= (tcp_hashinfo.ehash_size - 1);
208 list = &tcp_hashinfo.ehash[hash].chain;
209 lock = &tcp_hashinfo.ehash[hash].lock;
213 __sk_add_node(sk, list);
214 sock_prot_inc_use(sk->sk_prot);
219 static void tcp_v6_hash(struct sock *sk)
221 if (sk->sk_state != TCP_CLOSE) {
222 struct tcp_sock *tp = tcp_sk(sk);
224 if (tp->af_specific == &ipv6_mapped) {
235 * Open request hash tables.
238 static u32 tcp_v6_synq_hash(const struct in6_addr *raddr, const u16 rport, const u32 rnd)
242 a = raddr->s6_addr32[0];
243 b = raddr->s6_addr32[1];
244 c = raddr->s6_addr32[2];
246 a += JHASH_GOLDEN_RATIO;
247 b += JHASH_GOLDEN_RATIO;
249 __jhash_mix(a, b, c);
251 a += raddr->s6_addr32[3];
253 __jhash_mix(a, b, c);
255 return c & (TCP_SYNQ_HSIZE - 1);
258 static struct request_sock *tcp_v6_search_req(const struct sock *sk,
259 struct request_sock ***prevp,
261 struct in6_addr *raddr,
262 struct in6_addr *laddr,
265 const struct inet_connection_sock *icsk = inet_csk(sk);
266 struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
267 struct request_sock *req, **prev;
269 for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
270 (req = *prev) != NULL;
271 prev = &req->dl_next) {
272 const struct tcp6_request_sock *treq = tcp6_rsk(req);
274 if (inet_rsk(req)->rmt_port == rport &&
275 req->rsk_ops->family == AF_INET6 &&
276 ipv6_addr_equal(&treq->rmt_addr, raddr) &&
277 ipv6_addr_equal(&treq->loc_addr, laddr) &&
278 (!treq->iif || treq->iif == iif)) {
279 BUG_TRAP(req->sk == NULL);
288 static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
289 struct in6_addr *saddr,
290 struct in6_addr *daddr,
293 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
296 static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
298 if (skb->protocol == htons(ETH_P_IPV6)) {
299 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
300 skb->nh.ipv6h->saddr.s6_addr32,
304 return secure_tcp_sequence_number(skb->nh.iph->daddr,
311 static int __tcp_v6_check_established(struct sock *sk, const __u16 lport,
312 struct inet_timewait_sock **twp)
314 struct inet_sock *inet = inet_sk(sk);
315 const struct ipv6_pinfo *np = inet6_sk(sk);
316 const struct in6_addr *daddr = &np->rcv_saddr;
317 const struct in6_addr *saddr = &np->daddr;
318 const int dif = sk->sk_bound_dev_if;
319 const u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
320 unsigned int hash = inet6_ehashfn(daddr, inet->num, saddr, inet->dport);
321 struct inet_ehash_bucket *head = inet_ehash_bucket(&tcp_hashinfo, hash);
323 const struct hlist_node *node;
324 struct inet_timewait_sock *tw;
326 prefetch(head->chain.first);
327 write_lock(&head->lock);
329 /* Check TIME-WAIT sockets first. */
330 sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
331 const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk2);
335 if(*((__u32 *)&(tw->tw_dport)) == ports &&
336 sk2->sk_family == PF_INET6 &&
337 ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) &&
338 ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) &&
339 sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
340 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2);
341 struct tcp_sock *tp = tcp_sk(sk);
343 if (tcptw->tw_ts_recent_stamp &&
345 (sysctl_tcp_tw_reuse &&
346 xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
347 /* See comment in tcp_ipv4.c */
348 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
351 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
352 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
361 /* And established part... */
362 sk_for_each(sk2, node, &head->chain) {
363 if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif))
368 BUG_TRAP(sk_unhashed(sk));
369 __sk_add_node(sk, &head->chain);
371 sock_prot_inc_use(sk->sk_prot);
372 write_unlock(&head->lock);
376 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
378 /* Silly. Should hash-dance instead... */
379 inet_twsk_deschedule(tw, &tcp_death_row);
380 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
387 write_unlock(&head->lock);
388 return -EADDRNOTAVAIL;
391 static inline u32 tcpv6_port_offset(const struct sock *sk)
393 const struct inet_sock *inet = inet_sk(sk);
394 const struct ipv6_pinfo *np = inet6_sk(sk);
396 return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
401 static int tcp_v6_hash_connect(struct sock *sk)
403 unsigned short snum = inet_sk(sk)->num;
404 struct inet_bind_hashbucket *head;
405 struct inet_bind_bucket *tb;
409 int low = sysctl_local_port_range[0];
410 int high = sysctl_local_port_range[1];
411 int range = high - low;
415 u32 offset = hint + tcpv6_port_offset(sk);
416 struct hlist_node *node;
417 struct inet_timewait_sock *tw = NULL;
420 for (i = 1; i <= range; i++) {
421 port = low + (i + offset) % range;
422 head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
423 spin_lock(&head->lock);
425 /* Does not bother with rcv_saddr checks,
426 * because the established check is already
429 inet_bind_bucket_for_each(tb, node, &head->chain) {
430 if (tb->port == port) {
431 BUG_TRAP(!hlist_empty(&tb->owners));
432 if (tb->fastreuse >= 0)
434 if (!__tcp_v6_check_established(sk,
442 tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
444 spin_unlock(&head->lock);
451 spin_unlock(&head->lock);
455 return -EADDRNOTAVAIL;
460 /* Head lock still held and bh's disabled */
461 inet_bind_hash(sk, tb, port);
462 if (sk_unhashed(sk)) {
463 inet_sk(sk)->sport = htons(port);
466 spin_unlock(&head->lock);
469 inet_twsk_deschedule(tw, &tcp_death_row);
477 head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
478 tb = inet_csk(sk)->icsk_bind_hash;
479 spin_lock_bh(&head->lock);
481 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
483 spin_unlock_bh(&head->lock);
486 spin_unlock(&head->lock);
487 /* No definite answer... Walk to established hash table */
488 ret = __tcp_v6_check_established(sk, snum, NULL);
495 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
498 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
499 struct inet_sock *inet = inet_sk(sk);
500 struct ipv6_pinfo *np = inet6_sk(sk);
501 struct tcp_sock *tp = tcp_sk(sk);
502 struct in6_addr *saddr = NULL, *final_p = NULL, final;
504 struct dst_entry *dst;
508 if (addr_len < SIN6_LEN_RFC2133)
511 if (usin->sin6_family != AF_INET6)
512 return(-EAFNOSUPPORT);
514 memset(&fl, 0, sizeof(fl));
517 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
518 IP6_ECN_flow_init(fl.fl6_flowlabel);
519 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
520 struct ip6_flowlabel *flowlabel;
521 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
522 if (flowlabel == NULL)
524 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
525 fl6_sock_release(flowlabel);
530 * connect() to INADDR_ANY means loopback (BSD'ism).
533 if(ipv6_addr_any(&usin->sin6_addr))
534 usin->sin6_addr.s6_addr[15] = 0x1;
536 addr_type = ipv6_addr_type(&usin->sin6_addr);
538 if(addr_type & IPV6_ADDR_MULTICAST)
541 if (addr_type&IPV6_ADDR_LINKLOCAL) {
542 if (addr_len >= sizeof(struct sockaddr_in6) &&
543 usin->sin6_scope_id) {
544 /* If interface is set while binding, indices
547 if (sk->sk_bound_dev_if &&
548 sk->sk_bound_dev_if != usin->sin6_scope_id)
551 sk->sk_bound_dev_if = usin->sin6_scope_id;
554 /* Connect to link-local address requires an interface */
555 if (!sk->sk_bound_dev_if)
559 if (tp->rx_opt.ts_recent_stamp &&
560 !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
561 tp->rx_opt.ts_recent = 0;
562 tp->rx_opt.ts_recent_stamp = 0;
566 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
567 np->flow_label = fl.fl6_flowlabel;
573 if (addr_type == IPV6_ADDR_MAPPED) {
574 u32 exthdrlen = tp->ext_header_len;
575 struct sockaddr_in sin;
577 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
579 if (__ipv6_only_sock(sk))
582 sin.sin_family = AF_INET;
583 sin.sin_port = usin->sin6_port;
584 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
586 tp->af_specific = &ipv6_mapped;
587 sk->sk_backlog_rcv = tcp_v4_do_rcv;
589 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
592 tp->ext_header_len = exthdrlen;
593 tp->af_specific = &ipv6_specific;
594 sk->sk_backlog_rcv = tcp_v6_do_rcv;
597 ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
599 ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
606 if (!ipv6_addr_any(&np->rcv_saddr))
607 saddr = &np->rcv_saddr;
609 fl.proto = IPPROTO_TCP;
610 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
611 ipv6_addr_copy(&fl.fl6_src,
612 (saddr ? saddr : &np->saddr));
613 fl.oif = sk->sk_bound_dev_if;
614 fl.fl_ip_dport = usin->sin6_port;
615 fl.fl_ip_sport = inet->sport;
617 if (np->opt && np->opt->srcrt) {
618 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
619 ipv6_addr_copy(&final, &fl.fl6_dst);
620 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
624 err = ip6_dst_lookup(sk, &dst, &fl);
628 ipv6_addr_copy(&fl.fl6_dst, final_p);
630 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
635 ipv6_addr_copy(&np->rcv_saddr, saddr);
638 /* set the source address */
639 ipv6_addr_copy(&np->saddr, saddr);
640 inet->rcv_saddr = LOOPBACK4_IPV6;
642 ip6_dst_store(sk, dst, NULL);
643 sk->sk_route_caps = dst->dev->features &
644 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
646 tp->ext_header_len = 0;
648 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
650 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
652 inet->dport = usin->sin6_port;
654 tcp_set_state(sk, TCP_SYN_SENT);
655 err = tcp_v6_hash_connect(sk);
660 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
665 err = tcp_connect(sk);
672 tcp_set_state(sk, TCP_CLOSE);
676 sk->sk_route_caps = 0;
680 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
681 int type, int code, int offset, __u32 info)
683 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
684 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
685 struct ipv6_pinfo *np;
691 sk = inet6_lookup(&tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr,
692 th->source, skb->dev->ifindex);
695 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
699 if (sk->sk_state == TCP_TIME_WAIT) {
700 inet_twsk_put((struct inet_timewait_sock *)sk);
705 if (sock_owned_by_user(sk))
706 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
708 if (sk->sk_state == TCP_CLOSE)
712 seq = ntohl(th->seq);
713 if (sk->sk_state != TCP_LISTEN &&
714 !between(seq, tp->snd_una, tp->snd_nxt)) {
715 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
721 if (type == ICMPV6_PKT_TOOBIG) {
722 struct dst_entry *dst = NULL;
724 if (sock_owned_by_user(sk))
726 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
729 /* icmp should have updated the destination cache entry */
730 dst = __sk_dst_check(sk, np->dst_cookie);
733 struct inet_sock *inet = inet_sk(sk);
736 /* BUGGG_FUTURE: Again, it is not clear how
737 to handle rthdr case. Ignore this complexity
740 memset(&fl, 0, sizeof(fl));
741 fl.proto = IPPROTO_TCP;
742 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
743 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
744 fl.oif = sk->sk_bound_dev_if;
745 fl.fl_ip_dport = inet->dport;
746 fl.fl_ip_sport = inet->sport;
748 if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
749 sk->sk_err_soft = -err;
753 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
754 sk->sk_err_soft = -err;
761 if (tp->pmtu_cookie > dst_mtu(dst)) {
762 tcp_sync_mss(sk, dst_mtu(dst));
763 tcp_simple_retransmit(sk);
764 } /* else let the usual retransmit timer handle it */
769 icmpv6_err_convert(type, code, &err);
771 /* Might be for an request_sock */
772 switch (sk->sk_state) {
773 struct request_sock *req, **prev;
775 if (sock_owned_by_user(sk))
778 req = tcp_v6_search_req(sk, &prev, th->dest, &hdr->daddr,
779 &hdr->saddr, inet6_iif(skb));
783 /* ICMPs are not backlogged, hence we cannot get
784 * an established socket here.
786 BUG_TRAP(req->sk == NULL);
788 if (seq != tcp_rsk(req)->snt_isn) {
789 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
793 inet_csk_reqsk_queue_drop(sk, req, prev);
797 case TCP_SYN_RECV: /* Cannot happen.
798 It can, it SYNs are crossed. --ANK */
799 if (!sock_owned_by_user(sk)) {
800 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
802 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
806 sk->sk_err_soft = err;
810 if (!sock_owned_by_user(sk) && np->recverr) {
812 sk->sk_error_report(sk);
814 sk->sk_err_soft = err;
822 static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
823 struct dst_entry *dst)
825 struct tcp6_request_sock *treq = tcp6_rsk(req);
826 struct ipv6_pinfo *np = inet6_sk(sk);
827 struct sk_buff * skb;
828 struct ipv6_txoptions *opt = NULL;
829 struct in6_addr * final_p = NULL, final;
833 memset(&fl, 0, sizeof(fl));
834 fl.proto = IPPROTO_TCP;
835 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
836 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
837 fl.fl6_flowlabel = 0;
839 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
840 fl.fl_ip_sport = inet_sk(sk)->sport;
845 np->rxopt.bits.osrcrt == 2 &&
847 struct sk_buff *pktopts = treq->pktopts;
848 struct inet6_skb_parm *rxopt = IP6CB(pktopts);
850 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
853 if (opt && opt->srcrt) {
854 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
855 ipv6_addr_copy(&final, &fl.fl6_dst);
856 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
860 err = ip6_dst_lookup(sk, &dst, &fl);
864 ipv6_addr_copy(&fl.fl6_dst, final_p);
865 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
869 skb = tcp_make_synack(sk, dst, req);
871 struct tcphdr *th = skb->h.th;
873 th->check = tcp_v6_check(th, skb->len,
874 &treq->loc_addr, &treq->rmt_addr,
875 csum_partial((char *)th, skb->len, skb->csum));
877 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
878 err = ip6_xmit(sk, skb, &fl, opt, 0);
879 if (err == NET_XMIT_CN)
884 if (opt && opt != np->opt)
885 sock_kfree_s(sk, opt, opt->tot_len);
889 static void tcp_v6_reqsk_destructor(struct request_sock *req)
891 if (tcp6_rsk(req)->pktopts)
892 kfree_skb(tcp6_rsk(req)->pktopts);
895 static struct request_sock_ops tcp6_request_sock_ops = {
897 .obj_size = sizeof(struct tcp6_request_sock),
898 .rtx_syn_ack = tcp_v6_send_synack,
899 .send_ack = tcp_v6_reqsk_send_ack,
900 .destructor = tcp_v6_reqsk_destructor,
901 .send_reset = tcp_v6_send_reset
904 static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
906 struct ipv6_pinfo *np = inet6_sk(sk);
907 struct inet6_skb_parm *opt = IP6CB(skb);
910 if ((opt->hop && (np->rxopt.bits.hopopts || np->rxopt.bits.ohopopts)) ||
911 ((IPV6_FLOWINFO_MASK & *(u32*)skb->nh.raw) && np->rxopt.bits.rxflow) ||
912 (opt->srcrt && (np->rxopt.bits.srcrt || np->rxopt.bits.osrcrt)) ||
913 ((opt->dst1 || opt->dst0) && (np->rxopt.bits.dstopts || np->rxopt.bits.odstopts)))
920 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
923 struct ipv6_pinfo *np = inet6_sk(sk);
925 if (skb->ip_summed == CHECKSUM_HW) {
926 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
927 skb->csum = offsetof(struct tcphdr, check);
929 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
930 csum_partial((char *)th, th->doff<<2,
936 static void tcp_v6_send_reset(struct sk_buff *skb)
938 struct tcphdr *th = skb->h.th, *t1;
939 struct sk_buff *buff;
945 if (!ipv6_unicast_destination(skb))
949 * We need to grab some memory, and put together an RST,
950 * and then put it into the queue to be sent.
953 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
958 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
960 t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
962 /* Swap the send and the receive. */
963 memset(t1, 0, sizeof(*t1));
964 t1->dest = th->source;
965 t1->source = th->dest;
966 t1->doff = sizeof(*t1)/4;
970 t1->seq = th->ack_seq;
973 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
974 + skb->len - (th->doff<<2));
977 buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
979 memset(&fl, 0, sizeof(fl));
980 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
981 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
983 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
984 sizeof(*t1), IPPROTO_TCP,
987 fl.proto = IPPROTO_TCP;
988 fl.oif = inet6_iif(skb);
989 fl.fl_ip_dport = t1->dest;
990 fl.fl_ip_sport = t1->source;
992 /* sk = NULL, but it is safe for now. RST socket required. */
993 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
995 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0)
998 ip6_xmit(NULL, buff, &fl, NULL, 0);
999 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1000 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
1007 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
1009 struct tcphdr *th = skb->h.th, *t1;
1010 struct sk_buff *buff;
1012 int tot_len = sizeof(struct tcphdr);
1017 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
1022 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
1024 t1 = (struct tcphdr *) skb_push(buff,tot_len);
1026 /* Swap the send and the receive. */
1027 memset(t1, 0, sizeof(*t1));
1028 t1->dest = th->source;
1029 t1->source = th->dest;
1030 t1->doff = tot_len/4;
1031 t1->seq = htonl(seq);
1032 t1->ack_seq = htonl(ack);
1034 t1->window = htons(win);
1037 u32 *ptr = (u32*)(t1 + 1);
1038 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1039 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1040 *ptr++ = htonl(tcp_time_stamp);
1044 buff->csum = csum_partial((char *)t1, tot_len, 0);
1046 memset(&fl, 0, sizeof(fl));
1047 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1048 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1050 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1051 tot_len, IPPROTO_TCP,
1054 fl.proto = IPPROTO_TCP;
1055 fl.oif = inet6_iif(skb);
1056 fl.fl_ip_dport = t1->dest;
1057 fl.fl_ip_sport = t1->source;
1059 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1060 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0)
1062 ip6_xmit(NULL, buff, &fl, NULL, 0);
1063 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1070 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1072 struct inet_timewait_sock *tw = inet_twsk(sk);
1073 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1075 tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1076 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1077 tcptw->tw_ts_recent);
1082 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
1084 tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent);
1088 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1090 struct request_sock *req, **prev;
1091 const struct tcphdr *th = skb->h.th;
1094 /* Find possible connection requests. */
1095 req = tcp_v6_search_req(sk, &prev, th->source, &skb->nh.ipv6h->saddr,
1096 &skb->nh.ipv6h->daddr, inet6_iif(skb));
1098 return tcp_check_req(sk, skb, req, prev);
1100 nsk = __inet6_lookup_established(&tcp_hashinfo, &skb->nh.ipv6h->saddr,
1101 th->source, &skb->nh.ipv6h->daddr,
1102 ntohs(th->dest), inet6_iif(skb));
1105 if (nsk->sk_state != TCP_TIME_WAIT) {
1109 inet_twsk_put((struct inet_timewait_sock *)nsk);
1113 #if 0 /*def CONFIG_SYN_COOKIES*/
1114 if (!th->rst && !th->syn && th->ack)
1115 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
1120 static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req)
1122 struct inet_connection_sock *icsk = inet_csk(sk);
1123 struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
1124 const u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
1126 reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, TCP_TIMEOUT_INIT);
1127 inet_csk_reqsk_queue_added(sk, TCP_TIMEOUT_INIT);
1131 /* FIXME: this is substantially similar to the ipv4 code.
1132 * Can some kind of merge be done? -- erics
1134 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1136 struct tcp6_request_sock *treq;
1137 struct ipv6_pinfo *np = inet6_sk(sk);
1138 struct tcp_options_received tmp_opt;
1139 struct tcp_sock *tp = tcp_sk(sk);
1140 struct request_sock *req = NULL;
1141 __u32 isn = TCP_SKB_CB(skb)->when;
1143 if (skb->protocol == htons(ETH_P_IP))
1144 return tcp_v4_conn_request(sk, skb);
1146 if (!ipv6_unicast_destination(skb))
1150 * There are no SYN attacks on IPv6, yet...
1152 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1153 if (net_ratelimit())
1154 printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
1158 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1161 req = reqsk_alloc(&tcp6_request_sock_ops);
1165 tcp_clear_options(&tmp_opt);
1166 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1167 tmp_opt.user_mss = tp->rx_opt.user_mss;
1169 tcp_parse_options(skb, &tmp_opt, 0);
1171 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1172 tcp_openreq_init(req, &tmp_opt, skb);
1174 treq = tcp6_rsk(req);
1175 ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
1176 ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
1177 TCP_ECN_create_request(req, skb->h.th);
1178 treq->pktopts = NULL;
1179 if (ipv6_opt_accepted(sk, skb) ||
1180 np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
1181 np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
1182 atomic_inc(&skb->users);
1183 treq->pktopts = skb;
1185 treq->iif = sk->sk_bound_dev_if;
1187 /* So that link locals have meaning */
1188 if (!sk->sk_bound_dev_if &&
1189 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1190 treq->iif = inet6_iif(skb);
1193 isn = tcp_v6_init_sequence(sk,skb);
1195 tcp_rsk(req)->snt_isn = isn;
1197 if (tcp_v6_send_synack(sk, req, NULL))
1200 tcp_v6_synq_add(sk, req);
1208 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1209 return 0; /* don't send reset */
1212 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1213 struct request_sock *req,
1214 struct dst_entry *dst)
1216 struct tcp6_request_sock *treq = tcp6_rsk(req);
1217 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1218 struct tcp6_sock *newtcp6sk;
1219 struct inet_sock *newinet;
1220 struct tcp_sock *newtp;
1222 struct ipv6_txoptions *opt;
1224 if (skb->protocol == htons(ETH_P_IP)) {
1229 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1234 newtcp6sk = (struct tcp6_sock *)newsk;
1235 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1237 newinet = inet_sk(newsk);
1238 newnp = inet6_sk(newsk);
1239 newtp = tcp_sk(newsk);
1241 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1243 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1246 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1249 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1251 newtp->af_specific = &ipv6_mapped;
1252 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1253 newnp->pktoptions = NULL;
1255 newnp->mcast_oif = inet6_iif(skb);
1256 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1259 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1260 * here, tcp_create_openreq_child now does this for us, see the comment in
1261 * that function for the gory details. -acme
1264 /* It is tricky place. Until this moment IPv4 tcp
1265 worked with IPv6 af_tcp.af_specific.
1268 tcp_sync_mss(newsk, newtp->pmtu_cookie);
1275 if (sk_acceptq_is_full(sk))
1278 if (np->rxopt.bits.osrcrt == 2 &&
1279 opt == NULL && treq->pktopts) {
1280 struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts);
1282 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr *)(treq->pktopts->nh.raw + rxopt->srcrt));
1286 struct in6_addr *final_p = NULL, final;
1289 memset(&fl, 0, sizeof(fl));
1290 fl.proto = IPPROTO_TCP;
1291 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1292 if (opt && opt->srcrt) {
1293 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1294 ipv6_addr_copy(&final, &fl.fl6_dst);
1295 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1298 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
1299 fl.oif = sk->sk_bound_dev_if;
1300 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
1301 fl.fl_ip_sport = inet_sk(sk)->sport;
1303 if (ip6_dst_lookup(sk, &dst, &fl))
1307 ipv6_addr_copy(&fl.fl6_dst, final_p);
1309 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1313 newsk = tcp_create_openreq_child(sk, req, skb);
1318 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1319 * count here, tcp_create_openreq_child now does this for us, see the
1320 * comment in that function for the gory details. -acme
1323 ip6_dst_store(newsk, dst, NULL);
1324 newsk->sk_route_caps = dst->dev->features &
1325 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1327 newtcp6sk = (struct tcp6_sock *)newsk;
1328 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1330 newtp = tcp_sk(newsk);
1331 newinet = inet_sk(newsk);
1332 newnp = inet6_sk(newsk);
1334 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1336 ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
1337 ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
1338 ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
1339 newsk->sk_bound_dev_if = treq->iif;
1341 /* Now IPv6 options...
1343 First: no IPv4 options.
1345 newinet->opt = NULL;
1348 newnp->rxopt.all = np->rxopt.all;
1350 /* Clone pktoptions received with SYN */
1351 newnp->pktoptions = NULL;
1352 if (treq->pktopts != NULL) {
1353 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
1354 kfree_skb(treq->pktopts);
1355 treq->pktopts = NULL;
1356 if (newnp->pktoptions)
1357 skb_set_owner_r(newnp->pktoptions, newsk);
1360 newnp->mcast_oif = inet6_iif(skb);
1361 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1363 /* Clone native IPv6 options from listening socket (if any)
1365 Yes, keeping reference count would be much more clever,
1366 but we make one more one thing there: reattach optmem
1370 newnp->opt = ipv6_dup_options(newsk, opt);
1372 sock_kfree_s(sk, opt, opt->tot_len);
1375 newtp->ext_header_len = 0;
1377 newtp->ext_header_len = newnp->opt->opt_nflen +
1378 newnp->opt->opt_flen;
1380 tcp_sync_mss(newsk, dst_mtu(dst));
1381 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1382 tcp_initialize_rcv_mss(newsk);
1384 newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1386 __tcp_v6_hash(newsk);
1387 inet_inherit_port(&tcp_hashinfo, sk, newsk);
1392 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1394 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1395 if (opt && opt != np->opt)
1396 sock_kfree_s(sk, opt, opt->tot_len);
1401 static int tcp_v6_checksum_init(struct sk_buff *skb)
1403 if (skb->ip_summed == CHECKSUM_HW) {
1404 skb->ip_summed = CHECKSUM_UNNECESSARY;
1405 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1406 &skb->nh.ipv6h->daddr,skb->csum))
1408 LIMIT_NETDEBUG(KERN_DEBUG "hw tcp v6 csum failed\n");
1410 if (skb->len <= 76) {
1411 if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1412 &skb->nh.ipv6h->daddr,skb_checksum(skb, 0, skb->len, 0)))
1414 skb->ip_summed = CHECKSUM_UNNECESSARY;
1416 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1417 &skb->nh.ipv6h->daddr,0);
1422 /* The socket must have it's spinlock held when we get
1425 * We have a potential double-lock case here, so even when
1426 * doing backlog processing we use the BH locking scheme.
1427 * This is because we cannot sleep with the original spinlock
1430 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1432 struct ipv6_pinfo *np = inet6_sk(sk);
1433 struct tcp_sock *tp;
1434 struct sk_buff *opt_skb = NULL;
1436 /* Imagine: socket is IPv6. IPv4 packet arrives,
1437 goes to IPv4 receive handler and backlogged.
1438 From backlog it always goes here. Kerboom...
1439 Fortunately, tcp_rcv_established and rcv_established
1440 handle them correctly, but it is not case with
1441 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1444 if (skb->protocol == htons(ETH_P_IP))
1445 return tcp_v4_do_rcv(sk, skb);
1447 if (sk_filter(sk, skb, 0))
1451 * socket locking is here for SMP purposes as backlog rcv
1452 * is currently called with bh processing disabled.
1455 /* Do Stevens' IPV6_PKTOPTIONS.
1457 Yes, guys, it is the only place in our code, where we
1458 may make it not affecting IPv4.
1459 The rest of code is protocol independent,
1460 and I do not like idea to uglify IPv4.
1462 Actually, all the idea behind IPV6_PKTOPTIONS
1463 looks not very well thought. For now we latch
1464 options, received in the last packet, enqueued
1465 by tcp. Feel free to propose better solution.
1469 opt_skb = skb_clone(skb, GFP_ATOMIC);
1471 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1472 TCP_CHECK_TIMER(sk);
1473 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1475 TCP_CHECK_TIMER(sk);
1477 goto ipv6_pktoptions;
1481 if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1484 if (sk->sk_state == TCP_LISTEN) {
1485 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1490 * Queue it on the new socket if the new socket is active,
1491 * otherwise we just shortcircuit this and continue with
1495 if (tcp_child_process(sk, nsk, skb))
1498 __kfree_skb(opt_skb);
1503 TCP_CHECK_TIMER(sk);
1504 if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1506 TCP_CHECK_TIMER(sk);
1508 goto ipv6_pktoptions;
1512 tcp_v6_send_reset(skb);
1515 __kfree_skb(opt_skb);
1519 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1524 /* Do you ask, what is it?
1526 1. skb was enqueued by tcp.
1527 2. skb is added to tail of read queue, rather than out of order.
1528 3. socket is not in passive state.
1529 4. Finally, it really contains options, which user wants to receive.
1532 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1533 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1534 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1535 np->mcast_oif = inet6_iif(opt_skb);
1536 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1537 np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1538 if (ipv6_opt_accepted(sk, opt_skb)) {
1539 skb_set_owner_r(opt_skb, sk);
1540 opt_skb = xchg(&np->pktoptions, opt_skb);
1542 __kfree_skb(opt_skb);
1543 opt_skb = xchg(&np->pktoptions, NULL);
1552 static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1554 struct sk_buff *skb = *pskb;
1559 if (skb->pkt_type != PACKET_HOST)
1563 * Count it even if it's bad.
1565 TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1567 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1572 if (th->doff < sizeof(struct tcphdr)/4)
1574 if (!pskb_may_pull(skb, th->doff*4))
1577 if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1578 tcp_v6_checksum_init(skb) < 0))
1582 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1583 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1584 skb->len - th->doff*4);
1585 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1586 TCP_SKB_CB(skb)->when = 0;
1587 TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
1588 TCP_SKB_CB(skb)->sacked = 0;
1590 sk = __inet6_lookup(&tcp_hashinfo, &skb->nh.ipv6h->saddr, th->source,
1591 &skb->nh.ipv6h->daddr, ntohs(th->dest),
1598 if (sk->sk_state == TCP_TIME_WAIT)
1601 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1602 goto discard_and_relse;
1604 if (sk_filter(sk, skb, 0))
1605 goto discard_and_relse;
1611 if (!sock_owned_by_user(sk)) {
1612 if (!tcp_prequeue(sk, skb))
1613 ret = tcp_v6_do_rcv(sk, skb);
1615 sk_add_backlog(sk, skb);
1619 return ret ? -1 : 0;
1622 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1625 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1627 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1629 tcp_v6_send_reset(skb);
1646 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1647 inet_twsk_put((struct inet_timewait_sock *)sk);
1651 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1652 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1653 inet_twsk_put((struct inet_timewait_sock *)sk);
1657 switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk,
1663 sk2 = inet6_lookup_listener(&tcp_hashinfo,
1664 &skb->nh.ipv6h->daddr,
1665 ntohs(th->dest), inet6_iif(skb));
1667 struct inet_timewait_sock *tw = inet_twsk(sk);
1668 inet_twsk_deschedule(tw, &tcp_death_row);
1673 /* Fall through to ACK */
1676 tcp_v6_timewait_ack(sk, skb);
1680 case TCP_TW_SUCCESS:;
1685 static int tcp_v6_rebuild_header(struct sock *sk)
1688 struct dst_entry *dst;
1689 struct ipv6_pinfo *np = inet6_sk(sk);
1691 dst = __sk_dst_check(sk, np->dst_cookie);
1694 struct inet_sock *inet = inet_sk(sk);
1695 struct in6_addr *final_p = NULL, final;
1698 memset(&fl, 0, sizeof(fl));
1699 fl.proto = IPPROTO_TCP;
1700 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1701 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1702 fl.fl6_flowlabel = np->flow_label;
1703 fl.oif = sk->sk_bound_dev_if;
1704 fl.fl_ip_dport = inet->dport;
1705 fl.fl_ip_sport = inet->sport;
1707 if (np->opt && np->opt->srcrt) {
1708 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1709 ipv6_addr_copy(&final, &fl.fl6_dst);
1710 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1714 err = ip6_dst_lookup(sk, &dst, &fl);
1716 sk->sk_route_caps = 0;
1720 ipv6_addr_copy(&fl.fl6_dst, final_p);
1722 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1723 sk->sk_err_soft = -err;
1727 ip6_dst_store(sk, dst, NULL);
1728 sk->sk_route_caps = dst->dev->features &
1729 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1735 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1737 struct sock *sk = skb->sk;
1738 struct inet_sock *inet = inet_sk(sk);
1739 struct ipv6_pinfo *np = inet6_sk(sk);
1741 struct dst_entry *dst;
1742 struct in6_addr *final_p = NULL, final;
1744 memset(&fl, 0, sizeof(fl));
1745 fl.proto = IPPROTO_TCP;
1746 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1747 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1748 fl.fl6_flowlabel = np->flow_label;
1749 IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1750 fl.oif = sk->sk_bound_dev_if;
1751 fl.fl_ip_sport = inet->sport;
1752 fl.fl_ip_dport = inet->dport;
1754 if (np->opt && np->opt->srcrt) {
1755 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1756 ipv6_addr_copy(&final, &fl.fl6_dst);
1757 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1761 dst = __sk_dst_check(sk, np->dst_cookie);
1764 int err = ip6_dst_lookup(sk, &dst, &fl);
1767 sk->sk_err_soft = -err;
1772 ipv6_addr_copy(&fl.fl6_dst, final_p);
1774 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1775 sk->sk_route_caps = 0;
1779 ip6_dst_store(sk, dst, NULL);
1780 sk->sk_route_caps = dst->dev->features &
1781 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1784 skb->dst = dst_clone(dst);
1786 /* Restore final destination back after routing done */
1787 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1789 return ip6_xmit(sk, skb, &fl, np->opt, 0);
1792 static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1794 struct ipv6_pinfo *np = inet6_sk(sk);
1795 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1797 sin6->sin6_family = AF_INET6;
1798 ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1799 sin6->sin6_port = inet_sk(sk)->dport;
1800 /* We do not store received flowlabel for TCP */
1801 sin6->sin6_flowinfo = 0;
1802 sin6->sin6_scope_id = 0;
1803 if (sk->sk_bound_dev_if &&
1804 ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1805 sin6->sin6_scope_id = sk->sk_bound_dev_if;
1808 static int tcp_v6_remember_stamp(struct sock *sk)
1810 /* Alas, not yet... */
1814 static struct tcp_func ipv6_specific = {
1815 .queue_xmit = tcp_v6_xmit,
1816 .send_check = tcp_v6_send_check,
1817 .rebuild_header = tcp_v6_rebuild_header,
1818 .conn_request = tcp_v6_conn_request,
1819 .syn_recv_sock = tcp_v6_syn_recv_sock,
1820 .remember_stamp = tcp_v6_remember_stamp,
1821 .net_header_len = sizeof(struct ipv6hdr),
1823 .setsockopt = ipv6_setsockopt,
1824 .getsockopt = ipv6_getsockopt,
1825 .addr2sockaddr = v6_addr2sockaddr,
1826 .sockaddr_len = sizeof(struct sockaddr_in6)
1830 * TCP over IPv4 via INET6 API
1833 static struct tcp_func ipv6_mapped = {
1834 .queue_xmit = ip_queue_xmit,
1835 .send_check = tcp_v4_send_check,
1836 .rebuild_header = inet_sk_rebuild_header,
1837 .conn_request = tcp_v6_conn_request,
1838 .syn_recv_sock = tcp_v6_syn_recv_sock,
1839 .remember_stamp = tcp_v4_remember_stamp,
1840 .net_header_len = sizeof(struct iphdr),
1842 .setsockopt = ipv6_setsockopt,
1843 .getsockopt = ipv6_getsockopt,
1844 .addr2sockaddr = v6_addr2sockaddr,
1845 .sockaddr_len = sizeof(struct sockaddr_in6)
1850 /* NOTE: A lot of things set to zero explicitly by call to
1851 * sk_alloc() so need not be done here.
1853 static int tcp_v6_init_sock(struct sock *sk)
1855 struct inet_connection_sock *icsk = inet_csk(sk);
1856 struct tcp_sock *tp = tcp_sk(sk);
1858 skb_queue_head_init(&tp->out_of_order_queue);
1859 tcp_init_xmit_timers(sk);
1860 tcp_prequeue_init(tp);
1862 icsk->icsk_rto = TCP_TIMEOUT_INIT;
1863 tp->mdev = TCP_TIMEOUT_INIT;
1865 /* So many TCP implementations out there (incorrectly) count the
1866 * initial SYN frame in their delayed-ACK and congestion control
1867 * algorithms that we must have the following bandaid to talk
1868 * efficiently to them. -DaveM
1872 /* See draft-stevens-tcpca-spec-01 for discussion of the
1873 * initialization of these values.
1875 tp->snd_ssthresh = 0x7fffffff;
1876 tp->snd_cwnd_clamp = ~0;
1877 tp->mss_cache = 536;
1879 tp->reordering = sysctl_tcp_reordering;
1881 sk->sk_state = TCP_CLOSE;
1883 tp->af_specific = &ipv6_specific;
1884 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1885 sk->sk_write_space = sk_stream_write_space;
1886 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1888 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1889 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1891 atomic_inc(&tcp_sockets_allocated);
1896 static int tcp_v6_destroy_sock(struct sock *sk)
1898 tcp_v4_destroy_sock(sk);
1899 return inet6_destroy_sock(sk);
1902 /* Proc filesystem TCPv6 sock list dumping. */
1903 static void get_openreq6(struct seq_file *seq,
1904 struct sock *sk, struct request_sock *req, int i, int uid)
1906 struct in6_addr *dest, *src;
1907 int ttd = req->expires - jiffies;
1912 src = &tcp6_rsk(req)->loc_addr;
1913 dest = &tcp6_rsk(req)->rmt_addr;
1915 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1916 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1918 src->s6_addr32[0], src->s6_addr32[1],
1919 src->s6_addr32[2], src->s6_addr32[3],
1920 ntohs(inet_sk(sk)->sport),
1921 dest->s6_addr32[0], dest->s6_addr32[1],
1922 dest->s6_addr32[2], dest->s6_addr32[3],
1923 ntohs(inet_rsk(req)->rmt_port),
1925 0,0, /* could print option size, but that is af dependent. */
1926 1, /* timers active (only the expire timer) */
1927 jiffies_to_clock_t(ttd),
1930 0, /* non standard timer */
1931 0, /* open_requests have no inode */
1935 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1937 struct in6_addr *dest, *src;
1940 unsigned long timer_expires;
1941 struct inet_sock *inet = inet_sk(sp);
1942 struct tcp_sock *tp = tcp_sk(sp);
1943 const struct inet_connection_sock *icsk = inet_csk(sp);
1944 struct ipv6_pinfo *np = inet6_sk(sp);
1947 src = &np->rcv_saddr;
1948 destp = ntohs(inet->dport);
1949 srcp = ntohs(inet->sport);
1951 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1953 timer_expires = icsk->icsk_timeout;
1954 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1956 timer_expires = icsk->icsk_timeout;
1957 } else if (timer_pending(&sp->sk_timer)) {
1959 timer_expires = sp->sk_timer.expires;
1962 timer_expires = jiffies;
1966 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1967 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
1969 src->s6_addr32[0], src->s6_addr32[1],
1970 src->s6_addr32[2], src->s6_addr32[3], srcp,
1971 dest->s6_addr32[0], dest->s6_addr32[1],
1972 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1974 tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
1976 jiffies_to_clock_t(timer_expires - jiffies),
1977 icsk->icsk_retransmits,
1979 icsk->icsk_probes_out,
1981 atomic_read(&sp->sk_refcnt), sp,
1984 (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
1985 tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
1989 static void get_timewait6_sock(struct seq_file *seq,
1990 struct inet_timewait_sock *tw, int i)
1992 struct in6_addr *dest, *src;
1994 struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw);
1995 int ttd = tw->tw_ttd - jiffies;
2000 dest = &tcp6tw->tw_v6_daddr;
2001 src = &tcp6tw->tw_v6_rcv_saddr;
2002 destp = ntohs(tw->tw_dport);
2003 srcp = ntohs(tw->tw_sport);
2006 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2007 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2009 src->s6_addr32[0], src->s6_addr32[1],
2010 src->s6_addr32[2], src->s6_addr32[3], srcp,
2011 dest->s6_addr32[0], dest->s6_addr32[1],
2012 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2013 tw->tw_substate, 0, 0,
2014 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2015 atomic_read(&tw->tw_refcnt), tw);
2018 #ifdef CONFIG_PROC_FS
2019 static int tcp6_seq_show(struct seq_file *seq, void *v)
2021 struct tcp_iter_state *st;
2023 if (v == SEQ_START_TOKEN) {
2028 "st tx_queue rx_queue tr tm->when retrnsmt"
2029 " uid timeout inode\n");
2034 switch (st->state) {
2035 case TCP_SEQ_STATE_LISTENING:
2036 case TCP_SEQ_STATE_ESTABLISHED:
2037 get_tcp6_sock(seq, v, st->num);
2039 case TCP_SEQ_STATE_OPENREQ:
2040 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2042 case TCP_SEQ_STATE_TIME_WAIT:
2043 get_timewait6_sock(seq, v, st->num);
2050 static struct file_operations tcp6_seq_fops;
2051 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2052 .owner = THIS_MODULE,
2055 .seq_show = tcp6_seq_show,
2056 .seq_fops = &tcp6_seq_fops,
2059 int __init tcp6_proc_init(void)
2061 return tcp_proc_register(&tcp6_seq_afinfo);
2064 void tcp6_proc_exit(void)
2066 tcp_proc_unregister(&tcp6_seq_afinfo);
2070 struct proto tcpv6_prot = {
2072 .owner = THIS_MODULE,
2074 .connect = tcp_v6_connect,
2075 .disconnect = tcp_disconnect,
2076 .accept = inet_csk_accept,
2078 .init = tcp_v6_init_sock,
2079 .destroy = tcp_v6_destroy_sock,
2080 .shutdown = tcp_shutdown,
2081 .setsockopt = tcp_setsockopt,
2082 .getsockopt = tcp_getsockopt,
2083 .sendmsg = tcp_sendmsg,
2084 .recvmsg = tcp_recvmsg,
2085 .backlog_rcv = tcp_v6_do_rcv,
2086 .hash = tcp_v6_hash,
2087 .unhash = tcp_unhash,
2088 .get_port = tcp_v6_get_port,
2089 .enter_memory_pressure = tcp_enter_memory_pressure,
2090 .sockets_allocated = &tcp_sockets_allocated,
2091 .memory_allocated = &tcp_memory_allocated,
2092 .memory_pressure = &tcp_memory_pressure,
2093 .orphan_count = &tcp_orphan_count,
2094 .sysctl_mem = sysctl_tcp_mem,
2095 .sysctl_wmem = sysctl_tcp_wmem,
2096 .sysctl_rmem = sysctl_tcp_rmem,
2097 .max_header = MAX_TCP_HEADER,
2098 .obj_size = sizeof(struct tcp6_sock),
2099 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
2100 .rsk_prot = &tcp6_request_sock_ops,
2103 static struct inet6_protocol tcpv6_protocol = {
2104 .handler = tcp_v6_rcv,
2105 .err_handler = tcp_v6_err,
2106 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2109 static struct inet_protosw tcpv6_protosw = {
2110 .type = SOCK_STREAM,
2111 .protocol = IPPROTO_TCP,
2112 .prot = &tcpv6_prot,
2113 .ops = &inet6_stream_ops,
2116 .flags = INET_PROTOSW_PERMANENT,
2119 void __init tcpv6_init(void)
2121 /* register inet6 protocol */
2122 if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
2123 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
2124 inet6_register_protosw(&tcpv6_protosw);