3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
11 * linux/net/ipv4/tcp.c
12 * linux/net/ipv4/tcp_input.c
13 * linux/net/ipv4/tcp_output.c
16 * Hideaki YOSHIFUJI : sin6_scope_id support
17 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
18 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
19 * a single port at the same time.
20 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
28 #include <linux/module.h>
29 #include <linux/config.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/jiffies.h>
37 #include <linux/in6.h>
38 #include <linux/netdevice.h>
39 #include <linux/init.h>
40 #include <linux/jhash.h>
41 #include <linux/ipsec.h>
42 #include <linux/times.h>
44 #include <linux/ipv6.h>
45 #include <linux/icmpv6.h>
46 #include <linux/random.h>
49 #include <net/ndisc.h>
50 #include <net/inet6_hashtables.h>
52 #include <net/transp_v6.h>
53 #include <net/addrconf.h>
54 #include <net/ip6_route.h>
55 #include <net/ip6_checksum.h>
56 #include <net/inet_ecn.h>
57 #include <net/protocol.h>
59 #include <net/addrconf.h>
61 #include <net/dsfield.h>
63 #include <asm/uaccess.h>
65 #include <linux/proc_fs.h>
66 #include <linux/seq_file.h>
68 static void tcp_v6_send_reset(struct sk_buff *skb);
69 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
70 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
73 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
74 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
76 static struct tcp_func ipv6_mapped;
77 static struct tcp_func ipv6_specific;
79 static inline int tcp_v6_bind_conflict(const struct sock *sk,
80 const struct inet_bind_bucket *tb)
82 const struct sock *sk2;
83 const struct hlist_node *node;
85 /* We must walk the whole port owner list in this case. -DaveM */
86 sk_for_each_bound(sk2, node, &tb->owners) {
88 (!sk->sk_bound_dev_if ||
89 !sk2->sk_bound_dev_if ||
90 sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
91 (!sk->sk_reuse || !sk2->sk_reuse ||
92 sk2->sk_state == TCP_LISTEN) &&
93 ipv6_rcv_saddr_equal(sk, sk2))
100 /* Grrr, addr_type already calculated by caller, but I don't want
101 * to add some silly "cookie" argument to this method just for that.
102 * But it doesn't matter, the recalculation is in the rarest path
103 * this function ever takes.
105 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
107 struct inet_bind_hashbucket *head;
108 struct inet_bind_bucket *tb;
109 struct hlist_node *node;
114 int low = sysctl_local_port_range[0];
115 int high = sysctl_local_port_range[1];
116 int remaining = (high - low) + 1;
119 spin_lock(&tcp_hashinfo.portalloc_lock);
120 if (tcp_hashinfo.port_rover < low)
123 rover = tcp_hashinfo.port_rover;
127 head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)];
128 spin_lock(&head->lock);
129 inet_bind_bucket_for_each(tb, node, &head->chain)
130 if (tb->port == rover)
134 spin_unlock(&head->lock);
135 } while (--remaining > 0);
136 tcp_hashinfo.port_rover = rover;
137 spin_unlock(&tcp_hashinfo.portalloc_lock);
139 /* Exhausted local port range during search? It is not
140 * possible for us to be holding one of the bind hash
141 * locks if this test triggers, because if 'remaining'
142 * drops to zero, we broke out of the do/while loop at
143 * the top level, not from the 'break;' statement.
146 if (unlikely(remaining <= 0))
149 /* OK, here is the one we will use. */
152 head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
153 spin_lock(&head->lock);
154 inet_bind_bucket_for_each(tb, node, &head->chain)
155 if (tb->port == snum)
161 if (tb && !hlist_empty(&tb->owners)) {
162 if (tb->fastreuse > 0 && sk->sk_reuse &&
163 sk->sk_state != TCP_LISTEN) {
167 if (tcp_v6_bind_conflict(sk, tb))
174 tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, snum);
178 if (hlist_empty(&tb->owners)) {
179 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
183 } else if (tb->fastreuse &&
184 (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
188 if (!inet_csk(sk)->icsk_bind_hash)
189 inet_bind_hash(sk, tb, snum);
190 BUG_TRAP(inet_csk(sk)->icsk_bind_hash == tb);
194 spin_unlock(&head->lock);
200 static __inline__ void __tcp_v6_hash(struct sock *sk)
202 struct hlist_head *list;
205 BUG_TRAP(sk_unhashed(sk));
207 if (sk->sk_state == TCP_LISTEN) {
208 list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)];
209 lock = &tcp_hashinfo.lhash_lock;
210 inet_listen_wlock(&tcp_hashinfo);
213 sk->sk_hash = hash = inet6_sk_ehashfn(sk);
214 hash &= (tcp_hashinfo.ehash_size - 1);
215 list = &tcp_hashinfo.ehash[hash].chain;
216 lock = &tcp_hashinfo.ehash[hash].lock;
220 __sk_add_node(sk, list);
221 sock_prot_inc_use(sk->sk_prot);
226 static void tcp_v6_hash(struct sock *sk)
228 if (sk->sk_state != TCP_CLOSE) {
229 struct tcp_sock *tp = tcp_sk(sk);
231 if (tp->af_specific == &ipv6_mapped) {
242 * Open request hash tables.
245 static u32 tcp_v6_synq_hash(const struct in6_addr *raddr, const u16 rport, const u32 rnd)
249 a = raddr->s6_addr32[0];
250 b = raddr->s6_addr32[1];
251 c = raddr->s6_addr32[2];
253 a += JHASH_GOLDEN_RATIO;
254 b += JHASH_GOLDEN_RATIO;
256 __jhash_mix(a, b, c);
258 a += raddr->s6_addr32[3];
260 __jhash_mix(a, b, c);
262 return c & (TCP_SYNQ_HSIZE - 1);
265 static struct request_sock *tcp_v6_search_req(const struct sock *sk,
266 struct request_sock ***prevp,
268 struct in6_addr *raddr,
269 struct in6_addr *laddr,
272 const struct inet_connection_sock *icsk = inet_csk(sk);
273 struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
274 struct request_sock *req, **prev;
276 for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
277 (req = *prev) != NULL;
278 prev = &req->dl_next) {
279 const struct tcp6_request_sock *treq = tcp6_rsk(req);
281 if (inet_rsk(req)->rmt_port == rport &&
282 req->rsk_ops->family == AF_INET6 &&
283 ipv6_addr_equal(&treq->rmt_addr, raddr) &&
284 ipv6_addr_equal(&treq->loc_addr, laddr) &&
285 (!treq->iif || treq->iif == iif)) {
286 BUG_TRAP(req->sk == NULL);
295 static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
296 struct in6_addr *saddr,
297 struct in6_addr *daddr,
300 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
303 static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
305 if (skb->protocol == htons(ETH_P_IPV6)) {
306 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
307 skb->nh.ipv6h->saddr.s6_addr32,
311 return secure_tcp_sequence_number(skb->nh.iph->daddr,
318 static int __tcp_v6_check_established(struct sock *sk, const __u16 lport,
319 struct inet_timewait_sock **twp)
321 struct inet_sock *inet = inet_sk(sk);
322 const struct ipv6_pinfo *np = inet6_sk(sk);
323 const struct in6_addr *daddr = &np->rcv_saddr;
324 const struct in6_addr *saddr = &np->daddr;
325 const int dif = sk->sk_bound_dev_if;
326 const u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
327 unsigned int hash = inet6_ehashfn(daddr, inet->num, saddr, inet->dport);
328 struct inet_ehash_bucket *head = inet_ehash_bucket(&tcp_hashinfo, hash);
330 const struct hlist_node *node;
331 struct inet_timewait_sock *tw;
333 prefetch(head->chain.first);
334 write_lock(&head->lock);
336 /* Check TIME-WAIT sockets first. */
337 sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
338 const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk2);
342 if(*((__u32 *)&(tw->tw_dport)) == ports &&
343 sk2->sk_family == PF_INET6 &&
344 ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) &&
345 ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) &&
346 sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
347 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2);
348 struct tcp_sock *tp = tcp_sk(sk);
350 if (tcptw->tw_ts_recent_stamp &&
352 (sysctl_tcp_tw_reuse &&
353 xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
354 /* See comment in tcp_ipv4.c */
355 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
358 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
359 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
368 /* And established part... */
369 sk_for_each(sk2, node, &head->chain) {
370 if (INET6_MATCH(sk2, hash, saddr, daddr, ports, dif))
375 BUG_TRAP(sk_unhashed(sk));
376 __sk_add_node(sk, &head->chain);
378 sock_prot_inc_use(sk->sk_prot);
379 write_unlock(&head->lock);
383 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
385 /* Silly. Should hash-dance instead... */
386 inet_twsk_deschedule(tw, &tcp_death_row);
387 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
394 write_unlock(&head->lock);
395 return -EADDRNOTAVAIL;
398 static inline u32 tcpv6_port_offset(const struct sock *sk)
400 const struct inet_sock *inet = inet_sk(sk);
401 const struct ipv6_pinfo *np = inet6_sk(sk);
403 return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
408 static int tcp_v6_hash_connect(struct sock *sk)
410 unsigned short snum = inet_sk(sk)->num;
411 struct inet_bind_hashbucket *head;
412 struct inet_bind_bucket *tb;
416 int low = sysctl_local_port_range[0];
417 int high = sysctl_local_port_range[1];
418 int range = high - low;
422 u32 offset = hint + tcpv6_port_offset(sk);
423 struct hlist_node *node;
424 struct inet_timewait_sock *tw = NULL;
427 for (i = 1; i <= range; i++) {
428 port = low + (i + offset) % range;
429 head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
430 spin_lock(&head->lock);
432 /* Does not bother with rcv_saddr checks,
433 * because the established check is already
436 inet_bind_bucket_for_each(tb, node, &head->chain) {
437 if (tb->port == port) {
438 BUG_TRAP(!hlist_empty(&tb->owners));
439 if (tb->fastreuse >= 0)
441 if (!__tcp_v6_check_established(sk,
449 tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
451 spin_unlock(&head->lock);
458 spin_unlock(&head->lock);
462 return -EADDRNOTAVAIL;
467 /* Head lock still held and bh's disabled */
468 inet_bind_hash(sk, tb, port);
469 if (sk_unhashed(sk)) {
470 inet_sk(sk)->sport = htons(port);
473 spin_unlock(&head->lock);
476 inet_twsk_deschedule(tw, &tcp_death_row);
484 head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
485 tb = inet_csk(sk)->icsk_bind_hash;
486 spin_lock_bh(&head->lock);
488 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
490 spin_unlock_bh(&head->lock);
493 spin_unlock(&head->lock);
494 /* No definite answer... Walk to established hash table */
495 ret = __tcp_v6_check_established(sk, snum, NULL);
502 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
505 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
506 struct inet_sock *inet = inet_sk(sk);
507 struct ipv6_pinfo *np = inet6_sk(sk);
508 struct tcp_sock *tp = tcp_sk(sk);
509 struct in6_addr *saddr = NULL, *final_p = NULL, final;
511 struct dst_entry *dst;
515 if (addr_len < SIN6_LEN_RFC2133)
518 if (usin->sin6_family != AF_INET6)
519 return(-EAFNOSUPPORT);
521 memset(&fl, 0, sizeof(fl));
524 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
525 IP6_ECN_flow_init(fl.fl6_flowlabel);
526 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
527 struct ip6_flowlabel *flowlabel;
528 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
529 if (flowlabel == NULL)
531 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
532 fl6_sock_release(flowlabel);
537 * connect() to INADDR_ANY means loopback (BSD'ism).
540 if(ipv6_addr_any(&usin->sin6_addr))
541 usin->sin6_addr.s6_addr[15] = 0x1;
543 addr_type = ipv6_addr_type(&usin->sin6_addr);
545 if(addr_type & IPV6_ADDR_MULTICAST)
548 if (addr_type&IPV6_ADDR_LINKLOCAL) {
549 if (addr_len >= sizeof(struct sockaddr_in6) &&
550 usin->sin6_scope_id) {
551 /* If interface is set while binding, indices
554 if (sk->sk_bound_dev_if &&
555 sk->sk_bound_dev_if != usin->sin6_scope_id)
558 sk->sk_bound_dev_if = usin->sin6_scope_id;
561 /* Connect to link-local address requires an interface */
562 if (!sk->sk_bound_dev_if)
566 if (tp->rx_opt.ts_recent_stamp &&
567 !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
568 tp->rx_opt.ts_recent = 0;
569 tp->rx_opt.ts_recent_stamp = 0;
573 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
574 np->flow_label = fl.fl6_flowlabel;
580 if (addr_type == IPV6_ADDR_MAPPED) {
581 u32 exthdrlen = tp->ext_header_len;
582 struct sockaddr_in sin;
584 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
586 if (__ipv6_only_sock(sk))
589 sin.sin_family = AF_INET;
590 sin.sin_port = usin->sin6_port;
591 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
593 tp->af_specific = &ipv6_mapped;
594 sk->sk_backlog_rcv = tcp_v4_do_rcv;
596 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
599 tp->ext_header_len = exthdrlen;
600 tp->af_specific = &ipv6_specific;
601 sk->sk_backlog_rcv = tcp_v6_do_rcv;
604 ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
606 ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
613 if (!ipv6_addr_any(&np->rcv_saddr))
614 saddr = &np->rcv_saddr;
616 fl.proto = IPPROTO_TCP;
617 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
618 ipv6_addr_copy(&fl.fl6_src,
619 (saddr ? saddr : &np->saddr));
620 fl.oif = sk->sk_bound_dev_if;
621 fl.fl_ip_dport = usin->sin6_port;
622 fl.fl_ip_sport = inet->sport;
624 if (np->opt && np->opt->srcrt) {
625 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
626 ipv6_addr_copy(&final, &fl.fl6_dst);
627 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
631 err = ip6_dst_lookup(sk, &dst, &fl);
635 ipv6_addr_copy(&fl.fl6_dst, final_p);
637 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
642 ipv6_addr_copy(&np->rcv_saddr, saddr);
645 /* set the source address */
646 ipv6_addr_copy(&np->saddr, saddr);
647 inet->rcv_saddr = LOOPBACK4_IPV6;
649 ip6_dst_store(sk, dst, NULL);
650 sk->sk_route_caps = dst->dev->features &
651 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
653 tp->ext_header_len = 0;
655 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
657 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
659 inet->dport = usin->sin6_port;
661 tcp_set_state(sk, TCP_SYN_SENT);
662 err = tcp_v6_hash_connect(sk);
667 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
672 err = tcp_connect(sk);
679 tcp_set_state(sk, TCP_CLOSE);
683 sk->sk_route_caps = 0;
687 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
688 int type, int code, int offset, __u32 info)
690 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
691 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
692 struct ipv6_pinfo *np;
698 sk = inet6_lookup(&tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr,
699 th->source, skb->dev->ifindex);
702 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
706 if (sk->sk_state == TCP_TIME_WAIT) {
707 inet_twsk_put((struct inet_timewait_sock *)sk);
712 if (sock_owned_by_user(sk))
713 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
715 if (sk->sk_state == TCP_CLOSE)
719 seq = ntohl(th->seq);
720 if (sk->sk_state != TCP_LISTEN &&
721 !between(seq, tp->snd_una, tp->snd_nxt)) {
722 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
728 if (type == ICMPV6_PKT_TOOBIG) {
729 struct dst_entry *dst = NULL;
731 if (sock_owned_by_user(sk))
733 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
736 /* icmp should have updated the destination cache entry */
737 dst = __sk_dst_check(sk, np->dst_cookie);
740 struct inet_sock *inet = inet_sk(sk);
743 /* BUGGG_FUTURE: Again, it is not clear how
744 to handle rthdr case. Ignore this complexity
747 memset(&fl, 0, sizeof(fl));
748 fl.proto = IPPROTO_TCP;
749 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
750 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
751 fl.oif = sk->sk_bound_dev_if;
752 fl.fl_ip_dport = inet->dport;
753 fl.fl_ip_sport = inet->sport;
755 if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
756 sk->sk_err_soft = -err;
760 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
761 sk->sk_err_soft = -err;
768 if (tp->pmtu_cookie > dst_mtu(dst)) {
769 tcp_sync_mss(sk, dst_mtu(dst));
770 tcp_simple_retransmit(sk);
771 } /* else let the usual retransmit timer handle it */
776 icmpv6_err_convert(type, code, &err);
778 /* Might be for an request_sock */
779 switch (sk->sk_state) {
780 struct request_sock *req, **prev;
782 if (sock_owned_by_user(sk))
785 req = tcp_v6_search_req(sk, &prev, th->dest, &hdr->daddr,
786 &hdr->saddr, inet6_iif(skb));
790 /* ICMPs are not backlogged, hence we cannot get
791 * an established socket here.
793 BUG_TRAP(req->sk == NULL);
795 if (seq != tcp_rsk(req)->snt_isn) {
796 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
800 inet_csk_reqsk_queue_drop(sk, req, prev);
804 case TCP_SYN_RECV: /* Cannot happen.
805 It can, it SYNs are crossed. --ANK */
806 if (!sock_owned_by_user(sk)) {
807 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
809 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
813 sk->sk_err_soft = err;
817 if (!sock_owned_by_user(sk) && np->recverr) {
819 sk->sk_error_report(sk);
821 sk->sk_err_soft = err;
829 static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
830 struct dst_entry *dst)
832 struct tcp6_request_sock *treq = tcp6_rsk(req);
833 struct ipv6_pinfo *np = inet6_sk(sk);
834 struct sk_buff * skb;
835 struct ipv6_txoptions *opt = NULL;
836 struct in6_addr * final_p = NULL, final;
840 memset(&fl, 0, sizeof(fl));
841 fl.proto = IPPROTO_TCP;
842 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
843 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
844 fl.fl6_flowlabel = 0;
846 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
847 fl.fl_ip_sport = inet_sk(sk)->sport;
852 np->rxopt.bits.osrcrt == 2 &&
854 struct sk_buff *pktopts = treq->pktopts;
855 struct inet6_skb_parm *rxopt = IP6CB(pktopts);
857 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
860 if (opt && opt->srcrt) {
861 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
862 ipv6_addr_copy(&final, &fl.fl6_dst);
863 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
867 err = ip6_dst_lookup(sk, &dst, &fl);
871 ipv6_addr_copy(&fl.fl6_dst, final_p);
872 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
876 skb = tcp_make_synack(sk, dst, req);
878 struct tcphdr *th = skb->h.th;
880 th->check = tcp_v6_check(th, skb->len,
881 &treq->loc_addr, &treq->rmt_addr,
882 csum_partial((char *)th, skb->len, skb->csum));
884 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
885 err = ip6_xmit(sk, skb, &fl, opt, 0);
886 if (err == NET_XMIT_CN)
891 if (opt && opt != np->opt)
892 sock_kfree_s(sk, opt, opt->tot_len);
896 static void tcp_v6_reqsk_destructor(struct request_sock *req)
898 if (tcp6_rsk(req)->pktopts)
899 kfree_skb(tcp6_rsk(req)->pktopts);
902 static struct request_sock_ops tcp6_request_sock_ops = {
904 .obj_size = sizeof(struct tcp6_request_sock),
905 .rtx_syn_ack = tcp_v6_send_synack,
906 .send_ack = tcp_v6_reqsk_send_ack,
907 .destructor = tcp_v6_reqsk_destructor,
908 .send_reset = tcp_v6_send_reset
911 static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
913 struct ipv6_pinfo *np = inet6_sk(sk);
914 struct inet6_skb_parm *opt = IP6CB(skb);
917 if ((opt->hop && (np->rxopt.bits.hopopts || np->rxopt.bits.ohopopts)) ||
918 ((IPV6_FLOWINFO_MASK & *(u32*)skb->nh.raw) && np->rxopt.bits.rxflow) ||
919 (opt->srcrt && (np->rxopt.bits.srcrt || np->rxopt.bits.osrcrt)) ||
920 ((opt->dst1 || opt->dst0) && (np->rxopt.bits.dstopts || np->rxopt.bits.odstopts)))
927 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
930 struct ipv6_pinfo *np = inet6_sk(sk);
932 if (skb->ip_summed == CHECKSUM_HW) {
933 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
934 skb->csum = offsetof(struct tcphdr, check);
936 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
937 csum_partial((char *)th, th->doff<<2,
943 static void tcp_v6_send_reset(struct sk_buff *skb)
945 struct tcphdr *th = skb->h.th, *t1;
946 struct sk_buff *buff;
952 if (!ipv6_unicast_destination(skb))
956 * We need to grab some memory, and put together an RST,
957 * and then put it into the queue to be sent.
960 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
965 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
967 t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
969 /* Swap the send and the receive. */
970 memset(t1, 0, sizeof(*t1));
971 t1->dest = th->source;
972 t1->source = th->dest;
973 t1->doff = sizeof(*t1)/4;
977 t1->seq = th->ack_seq;
980 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
981 + skb->len - (th->doff<<2));
984 buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
986 memset(&fl, 0, sizeof(fl));
987 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
988 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
990 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
991 sizeof(*t1), IPPROTO_TCP,
994 fl.proto = IPPROTO_TCP;
995 fl.oif = inet6_iif(skb);
996 fl.fl_ip_dport = t1->dest;
997 fl.fl_ip_sport = t1->source;
999 /* sk = NULL, but it is safe for now. RST socket required. */
1000 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1002 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0)
1005 ip6_xmit(NULL, buff, &fl, NULL, 0);
1006 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1007 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
1014 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
1016 struct tcphdr *th = skb->h.th, *t1;
1017 struct sk_buff *buff;
1019 int tot_len = sizeof(struct tcphdr);
1024 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
1029 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
1031 t1 = (struct tcphdr *) skb_push(buff,tot_len);
1033 /* Swap the send and the receive. */
1034 memset(t1, 0, sizeof(*t1));
1035 t1->dest = th->source;
1036 t1->source = th->dest;
1037 t1->doff = tot_len/4;
1038 t1->seq = htonl(seq);
1039 t1->ack_seq = htonl(ack);
1041 t1->window = htons(win);
1044 u32 *ptr = (u32*)(t1 + 1);
1045 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1046 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1047 *ptr++ = htonl(tcp_time_stamp);
1051 buff->csum = csum_partial((char *)t1, tot_len, 0);
1053 memset(&fl, 0, sizeof(fl));
1054 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1055 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1057 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1058 tot_len, IPPROTO_TCP,
1061 fl.proto = IPPROTO_TCP;
1062 fl.oif = inet6_iif(skb);
1063 fl.fl_ip_dport = t1->dest;
1064 fl.fl_ip_sport = t1->source;
1066 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1067 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0)
1069 ip6_xmit(NULL, buff, &fl, NULL, 0);
1070 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1077 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1079 struct inet_timewait_sock *tw = inet_twsk(sk);
1080 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1082 tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1083 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1084 tcptw->tw_ts_recent);
1089 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
1091 tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent);
1095 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1097 struct request_sock *req, **prev;
1098 const struct tcphdr *th = skb->h.th;
1101 /* Find possible connection requests. */
1102 req = tcp_v6_search_req(sk, &prev, th->source, &skb->nh.ipv6h->saddr,
1103 &skb->nh.ipv6h->daddr, inet6_iif(skb));
1105 return tcp_check_req(sk, skb, req, prev);
1107 nsk = __inet6_lookup_established(&tcp_hashinfo, &skb->nh.ipv6h->saddr,
1108 th->source, &skb->nh.ipv6h->daddr,
1109 ntohs(th->dest), inet6_iif(skb));
1112 if (nsk->sk_state != TCP_TIME_WAIT) {
1116 inet_twsk_put((struct inet_timewait_sock *)nsk);
1120 #if 0 /*def CONFIG_SYN_COOKIES*/
1121 if (!th->rst && !th->syn && th->ack)
1122 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
1127 static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req)
1129 struct inet_connection_sock *icsk = inet_csk(sk);
1130 struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
1131 const u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
1133 reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, TCP_TIMEOUT_INIT);
1134 inet_csk_reqsk_queue_added(sk, TCP_TIMEOUT_INIT);
1138 /* FIXME: this is substantially similar to the ipv4 code.
1139 * Can some kind of merge be done? -- erics
1141 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1143 struct tcp6_request_sock *treq;
1144 struct ipv6_pinfo *np = inet6_sk(sk);
1145 struct tcp_options_received tmp_opt;
1146 struct tcp_sock *tp = tcp_sk(sk);
1147 struct request_sock *req = NULL;
1148 __u32 isn = TCP_SKB_CB(skb)->when;
1150 if (skb->protocol == htons(ETH_P_IP))
1151 return tcp_v4_conn_request(sk, skb);
1153 if (!ipv6_unicast_destination(skb))
1157 * There are no SYN attacks on IPv6, yet...
1159 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1160 if (net_ratelimit())
1161 printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
1165 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1168 req = reqsk_alloc(&tcp6_request_sock_ops);
1172 tcp_clear_options(&tmp_opt);
1173 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1174 tmp_opt.user_mss = tp->rx_opt.user_mss;
1176 tcp_parse_options(skb, &tmp_opt, 0);
1178 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1179 tcp_openreq_init(req, &tmp_opt, skb);
1181 treq = tcp6_rsk(req);
1182 ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
1183 ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
1184 TCP_ECN_create_request(req, skb->h.th);
1185 treq->pktopts = NULL;
1186 if (ipv6_opt_accepted(sk, skb) ||
1187 np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
1188 np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
1189 atomic_inc(&skb->users);
1190 treq->pktopts = skb;
1192 treq->iif = sk->sk_bound_dev_if;
1194 /* So that link locals have meaning */
1195 if (!sk->sk_bound_dev_if &&
1196 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1197 treq->iif = inet6_iif(skb);
1200 isn = tcp_v6_init_sequence(sk,skb);
1202 tcp_rsk(req)->snt_isn = isn;
1204 if (tcp_v6_send_synack(sk, req, NULL))
1207 tcp_v6_synq_add(sk, req);
1215 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1216 return 0; /* don't send reset */
1219 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1220 struct request_sock *req,
1221 struct dst_entry *dst)
1223 struct tcp6_request_sock *treq = tcp6_rsk(req);
1224 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1225 struct tcp6_sock *newtcp6sk;
1226 struct inet_sock *newinet;
1227 struct tcp_sock *newtp;
1229 struct ipv6_txoptions *opt;
1231 if (skb->protocol == htons(ETH_P_IP)) {
1236 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1241 newtcp6sk = (struct tcp6_sock *)newsk;
1242 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1244 newinet = inet_sk(newsk);
1245 newnp = inet6_sk(newsk);
1246 newtp = tcp_sk(newsk);
1248 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1250 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1253 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1256 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1258 newtp->af_specific = &ipv6_mapped;
1259 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1260 newnp->pktoptions = NULL;
1262 newnp->mcast_oif = inet6_iif(skb);
1263 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1266 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1267 * here, tcp_create_openreq_child now does this for us, see the comment in
1268 * that function for the gory details. -acme
1271 /* It is tricky place. Until this moment IPv4 tcp
1272 worked with IPv6 af_tcp.af_specific.
1275 tcp_sync_mss(newsk, newtp->pmtu_cookie);
1282 if (sk_acceptq_is_full(sk))
1285 if (np->rxopt.bits.osrcrt == 2 &&
1286 opt == NULL && treq->pktopts) {
1287 struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts);
1289 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr *)(treq->pktopts->nh.raw + rxopt->srcrt));
1293 struct in6_addr *final_p = NULL, final;
1296 memset(&fl, 0, sizeof(fl));
1297 fl.proto = IPPROTO_TCP;
1298 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1299 if (opt && opt->srcrt) {
1300 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1301 ipv6_addr_copy(&final, &fl.fl6_dst);
1302 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1305 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
1306 fl.oif = sk->sk_bound_dev_if;
1307 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
1308 fl.fl_ip_sport = inet_sk(sk)->sport;
1310 if (ip6_dst_lookup(sk, &dst, &fl))
1314 ipv6_addr_copy(&fl.fl6_dst, final_p);
1316 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1320 newsk = tcp_create_openreq_child(sk, req, skb);
1325 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1326 * count here, tcp_create_openreq_child now does this for us, see the
1327 * comment in that function for the gory details. -acme
1330 ip6_dst_store(newsk, dst, NULL);
1331 newsk->sk_route_caps = dst->dev->features &
1332 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1334 newtcp6sk = (struct tcp6_sock *)newsk;
1335 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1337 newtp = tcp_sk(newsk);
1338 newinet = inet_sk(newsk);
1339 newnp = inet6_sk(newsk);
1341 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1343 ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
1344 ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
1345 ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
1346 newsk->sk_bound_dev_if = treq->iif;
1348 /* Now IPv6 options...
1350 First: no IPv4 options.
1352 newinet->opt = NULL;
1355 newnp->rxopt.all = np->rxopt.all;
1357 /* Clone pktoptions received with SYN */
1358 newnp->pktoptions = NULL;
1359 if (treq->pktopts != NULL) {
1360 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
1361 kfree_skb(treq->pktopts);
1362 treq->pktopts = NULL;
1363 if (newnp->pktoptions)
1364 skb_set_owner_r(newnp->pktoptions, newsk);
1367 newnp->mcast_oif = inet6_iif(skb);
1368 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1370 /* Clone native IPv6 options from listening socket (if any)
1372 Yes, keeping reference count would be much more clever,
1373 but we make one more one thing there: reattach optmem
1377 newnp->opt = ipv6_dup_options(newsk, opt);
1379 sock_kfree_s(sk, opt, opt->tot_len);
1382 newtp->ext_header_len = 0;
1384 newtp->ext_header_len = newnp->opt->opt_nflen +
1385 newnp->opt->opt_flen;
1387 tcp_sync_mss(newsk, dst_mtu(dst));
1388 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1389 tcp_initialize_rcv_mss(newsk);
1391 newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1393 __tcp_v6_hash(newsk);
1394 inet_inherit_port(&tcp_hashinfo, sk, newsk);
1399 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1401 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1402 if (opt && opt != np->opt)
1403 sock_kfree_s(sk, opt, opt->tot_len);
1408 static int tcp_v6_checksum_init(struct sk_buff *skb)
1410 if (skb->ip_summed == CHECKSUM_HW) {
1411 skb->ip_summed = CHECKSUM_UNNECESSARY;
1412 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1413 &skb->nh.ipv6h->daddr,skb->csum))
1415 LIMIT_NETDEBUG(KERN_DEBUG "hw tcp v6 csum failed\n");
1417 if (skb->len <= 76) {
1418 if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1419 &skb->nh.ipv6h->daddr,skb_checksum(skb, 0, skb->len, 0)))
1421 skb->ip_summed = CHECKSUM_UNNECESSARY;
1423 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1424 &skb->nh.ipv6h->daddr,0);
1429 /* The socket must have it's spinlock held when we get
1432 * We have a potential double-lock case here, so even when
1433 * doing backlog processing we use the BH locking scheme.
1434 * This is because we cannot sleep with the original spinlock
1437 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1439 struct ipv6_pinfo *np = inet6_sk(sk);
1440 struct tcp_sock *tp;
1441 struct sk_buff *opt_skb = NULL;
1443 /* Imagine: socket is IPv6. IPv4 packet arrives,
1444 goes to IPv4 receive handler and backlogged.
1445 From backlog it always goes here. Kerboom...
1446 Fortunately, tcp_rcv_established and rcv_established
1447 handle them correctly, but it is not case with
1448 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1451 if (skb->protocol == htons(ETH_P_IP))
1452 return tcp_v4_do_rcv(sk, skb);
1454 if (sk_filter(sk, skb, 0))
1458 * socket locking is here for SMP purposes as backlog rcv
1459 * is currently called with bh processing disabled.
1462 /* Do Stevens' IPV6_PKTOPTIONS.
1464 Yes, guys, it is the only place in our code, where we
1465 may make it not affecting IPv4.
1466 The rest of code is protocol independent,
1467 and I do not like idea to uglify IPv4.
1469 Actually, all the idea behind IPV6_PKTOPTIONS
1470 looks not very well thought. For now we latch
1471 options, received in the last packet, enqueued
1472 by tcp. Feel free to propose better solution.
1476 opt_skb = skb_clone(skb, GFP_ATOMIC);
1478 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1479 TCP_CHECK_TIMER(sk);
1480 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1482 TCP_CHECK_TIMER(sk);
1484 goto ipv6_pktoptions;
1488 if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1491 if (sk->sk_state == TCP_LISTEN) {
1492 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1497 * Queue it on the new socket if the new socket is active,
1498 * otherwise we just shortcircuit this and continue with
1502 if (tcp_child_process(sk, nsk, skb))
1505 __kfree_skb(opt_skb);
1510 TCP_CHECK_TIMER(sk);
1511 if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1513 TCP_CHECK_TIMER(sk);
1515 goto ipv6_pktoptions;
1519 tcp_v6_send_reset(skb);
1522 __kfree_skb(opt_skb);
1526 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1531 /* Do you ask, what is it?
1533 1. skb was enqueued by tcp.
1534 2. skb is added to tail of read queue, rather than out of order.
1535 3. socket is not in passive state.
1536 4. Finally, it really contains options, which user wants to receive.
1539 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1540 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1541 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1542 np->mcast_oif = inet6_iif(opt_skb);
1543 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1544 np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1545 if (ipv6_opt_accepted(sk, opt_skb)) {
1546 skb_set_owner_r(opt_skb, sk);
1547 opt_skb = xchg(&np->pktoptions, opt_skb);
1549 __kfree_skb(opt_skb);
1550 opt_skb = xchg(&np->pktoptions, NULL);
1559 static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1561 struct sk_buff *skb = *pskb;
1566 if (skb->pkt_type != PACKET_HOST)
1570 * Count it even if it's bad.
1572 TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1574 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1579 if (th->doff < sizeof(struct tcphdr)/4)
1581 if (!pskb_may_pull(skb, th->doff*4))
1584 if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1585 tcp_v6_checksum_init(skb) < 0))
1589 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1590 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1591 skb->len - th->doff*4);
1592 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1593 TCP_SKB_CB(skb)->when = 0;
1594 TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
1595 TCP_SKB_CB(skb)->sacked = 0;
1597 sk = __inet6_lookup(&tcp_hashinfo, &skb->nh.ipv6h->saddr, th->source,
1598 &skb->nh.ipv6h->daddr, ntohs(th->dest),
1605 if (sk->sk_state == TCP_TIME_WAIT)
1608 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1609 goto discard_and_relse;
1611 if (sk_filter(sk, skb, 0))
1612 goto discard_and_relse;
1618 if (!sock_owned_by_user(sk)) {
1619 if (!tcp_prequeue(sk, skb))
1620 ret = tcp_v6_do_rcv(sk, skb);
1622 sk_add_backlog(sk, skb);
1626 return ret ? -1 : 0;
1629 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1632 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1634 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1636 tcp_v6_send_reset(skb);
1653 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1654 inet_twsk_put((struct inet_timewait_sock *)sk);
1658 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1659 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1660 inet_twsk_put((struct inet_timewait_sock *)sk);
1664 switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk,
1670 sk2 = inet6_lookup_listener(&tcp_hashinfo,
1671 &skb->nh.ipv6h->daddr,
1672 ntohs(th->dest), inet6_iif(skb));
1674 struct inet_timewait_sock *tw = inet_twsk(sk);
1675 inet_twsk_deschedule(tw, &tcp_death_row);
1680 /* Fall through to ACK */
1683 tcp_v6_timewait_ack(sk, skb);
1687 case TCP_TW_SUCCESS:;
1692 static int tcp_v6_rebuild_header(struct sock *sk)
1695 struct dst_entry *dst;
1696 struct ipv6_pinfo *np = inet6_sk(sk);
1698 dst = __sk_dst_check(sk, np->dst_cookie);
1701 struct inet_sock *inet = inet_sk(sk);
1702 struct in6_addr *final_p = NULL, final;
1705 memset(&fl, 0, sizeof(fl));
1706 fl.proto = IPPROTO_TCP;
1707 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1708 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1709 fl.fl6_flowlabel = np->flow_label;
1710 fl.oif = sk->sk_bound_dev_if;
1711 fl.fl_ip_dport = inet->dport;
1712 fl.fl_ip_sport = inet->sport;
1714 if (np->opt && np->opt->srcrt) {
1715 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1716 ipv6_addr_copy(&final, &fl.fl6_dst);
1717 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1721 err = ip6_dst_lookup(sk, &dst, &fl);
1723 sk->sk_route_caps = 0;
1727 ipv6_addr_copy(&fl.fl6_dst, final_p);
1729 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1730 sk->sk_err_soft = -err;
1734 ip6_dst_store(sk, dst, NULL);
1735 sk->sk_route_caps = dst->dev->features &
1736 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1742 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1744 struct sock *sk = skb->sk;
1745 struct inet_sock *inet = inet_sk(sk);
1746 struct ipv6_pinfo *np = inet6_sk(sk);
1748 struct dst_entry *dst;
1749 struct in6_addr *final_p = NULL, final;
1751 memset(&fl, 0, sizeof(fl));
1752 fl.proto = IPPROTO_TCP;
1753 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1754 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1755 fl.fl6_flowlabel = np->flow_label;
1756 IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1757 fl.oif = sk->sk_bound_dev_if;
1758 fl.fl_ip_sport = inet->sport;
1759 fl.fl_ip_dport = inet->dport;
1761 if (np->opt && np->opt->srcrt) {
1762 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1763 ipv6_addr_copy(&final, &fl.fl6_dst);
1764 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1768 dst = __sk_dst_check(sk, np->dst_cookie);
1771 int err = ip6_dst_lookup(sk, &dst, &fl);
1774 sk->sk_err_soft = -err;
1779 ipv6_addr_copy(&fl.fl6_dst, final_p);
1781 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1782 sk->sk_route_caps = 0;
1786 ip6_dst_store(sk, dst, NULL);
1787 sk->sk_route_caps = dst->dev->features &
1788 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1791 skb->dst = dst_clone(dst);
1793 /* Restore final destination back after routing done */
1794 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1796 return ip6_xmit(sk, skb, &fl, np->opt, 0);
1799 static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1801 struct ipv6_pinfo *np = inet6_sk(sk);
1802 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1804 sin6->sin6_family = AF_INET6;
1805 ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1806 sin6->sin6_port = inet_sk(sk)->dport;
1807 /* We do not store received flowlabel for TCP */
1808 sin6->sin6_flowinfo = 0;
1809 sin6->sin6_scope_id = 0;
1810 if (sk->sk_bound_dev_if &&
1811 ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1812 sin6->sin6_scope_id = sk->sk_bound_dev_if;
1815 static int tcp_v6_remember_stamp(struct sock *sk)
1817 /* Alas, not yet... */
1821 static struct tcp_func ipv6_specific = {
1822 .queue_xmit = tcp_v6_xmit,
1823 .send_check = tcp_v6_send_check,
1824 .rebuild_header = tcp_v6_rebuild_header,
1825 .conn_request = tcp_v6_conn_request,
1826 .syn_recv_sock = tcp_v6_syn_recv_sock,
1827 .remember_stamp = tcp_v6_remember_stamp,
1828 .net_header_len = sizeof(struct ipv6hdr),
1830 .setsockopt = ipv6_setsockopt,
1831 .getsockopt = ipv6_getsockopt,
1832 .addr2sockaddr = v6_addr2sockaddr,
1833 .sockaddr_len = sizeof(struct sockaddr_in6)
1837 * TCP over IPv4 via INET6 API
1840 static struct tcp_func ipv6_mapped = {
1841 .queue_xmit = ip_queue_xmit,
1842 .send_check = tcp_v4_send_check,
1843 .rebuild_header = inet_sk_rebuild_header,
1844 .conn_request = tcp_v6_conn_request,
1845 .syn_recv_sock = tcp_v6_syn_recv_sock,
1846 .remember_stamp = tcp_v4_remember_stamp,
1847 .net_header_len = sizeof(struct iphdr),
1849 .setsockopt = ipv6_setsockopt,
1850 .getsockopt = ipv6_getsockopt,
1851 .addr2sockaddr = v6_addr2sockaddr,
1852 .sockaddr_len = sizeof(struct sockaddr_in6)
1857 /* NOTE: A lot of things set to zero explicitly by call to
1858 * sk_alloc() so need not be done here.
1860 static int tcp_v6_init_sock(struct sock *sk)
1862 struct inet_connection_sock *icsk = inet_csk(sk);
1863 struct tcp_sock *tp = tcp_sk(sk);
1865 skb_queue_head_init(&tp->out_of_order_queue);
1866 tcp_init_xmit_timers(sk);
1867 tcp_prequeue_init(tp);
1869 icsk->icsk_rto = TCP_TIMEOUT_INIT;
1870 tp->mdev = TCP_TIMEOUT_INIT;
1872 /* So many TCP implementations out there (incorrectly) count the
1873 * initial SYN frame in their delayed-ACK and congestion control
1874 * algorithms that we must have the following bandaid to talk
1875 * efficiently to them. -DaveM
1879 /* See draft-stevens-tcpca-spec-01 for discussion of the
1880 * initialization of these values.
1882 tp->snd_ssthresh = 0x7fffffff;
1883 tp->snd_cwnd_clamp = ~0;
1884 tp->mss_cache = 536;
1886 tp->reordering = sysctl_tcp_reordering;
1888 sk->sk_state = TCP_CLOSE;
1890 tp->af_specific = &ipv6_specific;
1891 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1892 sk->sk_write_space = sk_stream_write_space;
1893 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1895 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1896 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1898 atomic_inc(&tcp_sockets_allocated);
1903 static int tcp_v6_destroy_sock(struct sock *sk)
1905 tcp_v4_destroy_sock(sk);
1906 return inet6_destroy_sock(sk);
1909 /* Proc filesystem TCPv6 sock list dumping. */
1910 static void get_openreq6(struct seq_file *seq,
1911 struct sock *sk, struct request_sock *req, int i, int uid)
1913 struct in6_addr *dest, *src;
1914 int ttd = req->expires - jiffies;
1919 src = &tcp6_rsk(req)->loc_addr;
1920 dest = &tcp6_rsk(req)->rmt_addr;
1922 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1923 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1925 src->s6_addr32[0], src->s6_addr32[1],
1926 src->s6_addr32[2], src->s6_addr32[3],
1927 ntohs(inet_sk(sk)->sport),
1928 dest->s6_addr32[0], dest->s6_addr32[1],
1929 dest->s6_addr32[2], dest->s6_addr32[3],
1930 ntohs(inet_rsk(req)->rmt_port),
1932 0,0, /* could print option size, but that is af dependent. */
1933 1, /* timers active (only the expire timer) */
1934 jiffies_to_clock_t(ttd),
1937 0, /* non standard timer */
1938 0, /* open_requests have no inode */
1942 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1944 struct in6_addr *dest, *src;
1947 unsigned long timer_expires;
1948 struct inet_sock *inet = inet_sk(sp);
1949 struct tcp_sock *tp = tcp_sk(sp);
1950 const struct inet_connection_sock *icsk = inet_csk(sp);
1951 struct ipv6_pinfo *np = inet6_sk(sp);
1954 src = &np->rcv_saddr;
1955 destp = ntohs(inet->dport);
1956 srcp = ntohs(inet->sport);
1958 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1960 timer_expires = icsk->icsk_timeout;
1961 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1963 timer_expires = icsk->icsk_timeout;
1964 } else if (timer_pending(&sp->sk_timer)) {
1966 timer_expires = sp->sk_timer.expires;
1969 timer_expires = jiffies;
1973 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1974 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
1976 src->s6_addr32[0], src->s6_addr32[1],
1977 src->s6_addr32[2], src->s6_addr32[3], srcp,
1978 dest->s6_addr32[0], dest->s6_addr32[1],
1979 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1981 tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
1983 jiffies_to_clock_t(timer_expires - jiffies),
1984 icsk->icsk_retransmits,
1986 icsk->icsk_probes_out,
1988 atomic_read(&sp->sk_refcnt), sp,
1991 (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
1992 tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
1996 static void get_timewait6_sock(struct seq_file *seq,
1997 struct inet_timewait_sock *tw, int i)
1999 struct in6_addr *dest, *src;
2001 struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw);
2002 int ttd = tw->tw_ttd - jiffies;
2007 dest = &tcp6tw->tw_v6_daddr;
2008 src = &tcp6tw->tw_v6_rcv_saddr;
2009 destp = ntohs(tw->tw_dport);
2010 srcp = ntohs(tw->tw_sport);
2013 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2014 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2016 src->s6_addr32[0], src->s6_addr32[1],
2017 src->s6_addr32[2], src->s6_addr32[3], srcp,
2018 dest->s6_addr32[0], dest->s6_addr32[1],
2019 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2020 tw->tw_substate, 0, 0,
2021 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2022 atomic_read(&tw->tw_refcnt), tw);
2025 #ifdef CONFIG_PROC_FS
2026 static int tcp6_seq_show(struct seq_file *seq, void *v)
2028 struct tcp_iter_state *st;
2030 if (v == SEQ_START_TOKEN) {
2035 "st tx_queue rx_queue tr tm->when retrnsmt"
2036 " uid timeout inode\n");
2041 switch (st->state) {
2042 case TCP_SEQ_STATE_LISTENING:
2043 case TCP_SEQ_STATE_ESTABLISHED:
2044 get_tcp6_sock(seq, v, st->num);
2046 case TCP_SEQ_STATE_OPENREQ:
2047 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2049 case TCP_SEQ_STATE_TIME_WAIT:
2050 get_timewait6_sock(seq, v, st->num);
2057 static struct file_operations tcp6_seq_fops;
2058 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2059 .owner = THIS_MODULE,
2062 .seq_show = tcp6_seq_show,
2063 .seq_fops = &tcp6_seq_fops,
2066 int __init tcp6_proc_init(void)
2068 return tcp_proc_register(&tcp6_seq_afinfo);
2071 void tcp6_proc_exit(void)
2073 tcp_proc_unregister(&tcp6_seq_afinfo);
2077 struct proto tcpv6_prot = {
2079 .owner = THIS_MODULE,
2081 .connect = tcp_v6_connect,
2082 .disconnect = tcp_disconnect,
2083 .accept = inet_csk_accept,
2085 .init = tcp_v6_init_sock,
2086 .destroy = tcp_v6_destroy_sock,
2087 .shutdown = tcp_shutdown,
2088 .setsockopt = tcp_setsockopt,
2089 .getsockopt = tcp_getsockopt,
2090 .sendmsg = tcp_sendmsg,
2091 .recvmsg = tcp_recvmsg,
2092 .backlog_rcv = tcp_v6_do_rcv,
2093 .hash = tcp_v6_hash,
2094 .unhash = tcp_unhash,
2095 .get_port = tcp_v6_get_port,
2096 .enter_memory_pressure = tcp_enter_memory_pressure,
2097 .sockets_allocated = &tcp_sockets_allocated,
2098 .memory_allocated = &tcp_memory_allocated,
2099 .memory_pressure = &tcp_memory_pressure,
2100 .orphan_count = &tcp_orphan_count,
2101 .sysctl_mem = sysctl_tcp_mem,
2102 .sysctl_wmem = sysctl_tcp_wmem,
2103 .sysctl_rmem = sysctl_tcp_rmem,
2104 .max_header = MAX_TCP_HEADER,
2105 .obj_size = sizeof(struct tcp6_sock),
2106 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
2107 .rsk_prot = &tcp6_request_sock_ops,
2110 static struct inet6_protocol tcpv6_protocol = {
2111 .handler = tcp_v6_rcv,
2112 .err_handler = tcp_v6_err,
2113 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2116 static struct inet_protosw tcpv6_protosw = {
2117 .type = SOCK_STREAM,
2118 .protocol = IPPROTO_TCP,
2119 .prot = &tcpv6_prot,
2120 .ops = &inet6_stream_ops,
2123 .flags = INET_PROTOSW_PERMANENT,
2126 void __init tcpv6_init(void)
2128 /* register inet6 protocol */
2129 if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
2130 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
2131 inet6_register_protosw(&tcpv6_protosw);