3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
11 * linux/net/ipv4/tcp.c
12 * linux/net/ipv4/tcp_input.c
13 * linux/net/ipv4/tcp_output.c
16 * Hideaki YOSHIFUJI : sin6_scope_id support
17 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
18 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
19 * a single port at the same time.
20 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
28 #include <linux/module.h>
29 #include <linux/config.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/jiffies.h>
37 #include <linux/in6.h>
38 #include <linux/netdevice.h>
39 #include <linux/init.h>
40 #include <linux/jhash.h>
41 #include <linux/ipsec.h>
42 #include <linux/times.h>
44 #include <linux/ipv6.h>
45 #include <linux/icmpv6.h>
46 #include <linux/random.h>
49 #include <net/ndisc.h>
51 #include <net/transp_v6.h>
52 #include <net/addrconf.h>
53 #include <net/ip6_route.h>
54 #include <net/ip6_checksum.h>
55 #include <net/inet_ecn.h>
56 #include <net/protocol.h>
58 #include <net/addrconf.h>
60 #include <net/dsfield.h>
62 #include <asm/uaccess.h>
64 #include <linux/proc_fs.h>
65 #include <linux/seq_file.h>
67 static void tcp_v6_send_reset(struct sk_buff *skb);
68 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
69 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
72 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
73 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
75 static struct tcp_func ipv6_mapped;
76 static struct tcp_func ipv6_specific;
78 /* I have no idea if this is a good hash for v6 or not. -DaveM */
79 static __inline__ int tcp_v6_hashfn(struct in6_addr *laddr, u16 lport,
80 struct in6_addr *faddr, u16 fport)
82 int hashent = (lport ^ fport);
84 hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]);
85 hashent ^= hashent>>16;
86 hashent ^= hashent>>8;
87 return (hashent & (tcp_hashinfo.ehash_size - 1));
90 static __inline__ int tcp_v6_sk_hashfn(struct sock *sk)
92 struct inet_sock *inet = inet_sk(sk);
93 struct ipv6_pinfo *np = inet6_sk(sk);
94 struct in6_addr *laddr = &np->rcv_saddr;
95 struct in6_addr *faddr = &np->daddr;
96 __u16 lport = inet->num;
97 __u16 fport = inet->dport;
98 return tcp_v6_hashfn(laddr, lport, faddr, fport);
101 static inline int tcp_v6_bind_conflict(const struct sock *sk,
102 const struct inet_bind_bucket *tb)
104 const struct sock *sk2;
105 const struct hlist_node *node;
107 /* We must walk the whole port owner list in this case. -DaveM */
108 sk_for_each_bound(sk2, node, &tb->owners) {
110 (!sk->sk_bound_dev_if ||
111 !sk2->sk_bound_dev_if ||
112 sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
113 (!sk->sk_reuse || !sk2->sk_reuse ||
114 sk2->sk_state == TCP_LISTEN) &&
115 ipv6_rcv_saddr_equal(sk, sk2))
122 /* Grrr, addr_type already calculated by caller, but I don't want
123 * to add some silly "cookie" argument to this method just for that.
124 * But it doesn't matter, the recalculation is in the rarest path
125 * this function ever takes.
127 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
129 struct inet_bind_hashbucket *head;
130 struct inet_bind_bucket *tb;
131 struct hlist_node *node;
136 int low = sysctl_local_port_range[0];
137 int high = sysctl_local_port_range[1];
138 int remaining = (high - low) + 1;
141 spin_lock(&tcp_hashinfo.portalloc_lock);
142 if (tcp_hashinfo.port_rover < low)
145 rover = tcp_hashinfo.port_rover;
149 head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)];
150 spin_lock(&head->lock);
151 inet_bind_bucket_for_each(tb, node, &head->chain)
152 if (tb->port == rover)
156 spin_unlock(&head->lock);
157 } while (--remaining > 0);
158 tcp_hashinfo.port_rover = rover;
159 spin_unlock(&tcp_hashinfo.portalloc_lock);
161 /* Exhausted local port range during search? It is not
162 * possible for us to be holding one of the bind hash
163 * locks if this test triggers, because if 'remaining'
164 * drops to zero, we broke out of the do/while loop at
165 * the top level, not from the 'break;' statement.
168 if (unlikely(remaining <= 0))
171 /* OK, here is the one we will use. */
174 head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
175 spin_lock(&head->lock);
176 inet_bind_bucket_for_each(tb, node, &head->chain)
177 if (tb->port == snum)
183 if (tb && !hlist_empty(&tb->owners)) {
184 if (tb->fastreuse > 0 && sk->sk_reuse &&
185 sk->sk_state != TCP_LISTEN) {
189 if (tcp_v6_bind_conflict(sk, tb))
196 tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, snum);
200 if (hlist_empty(&tb->owners)) {
201 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
205 } else if (tb->fastreuse &&
206 (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
210 if (!inet_csk(sk)->icsk_bind_hash)
211 inet_bind_hash(sk, tb, snum);
212 BUG_TRAP(inet_csk(sk)->icsk_bind_hash == tb);
216 spin_unlock(&head->lock);
222 static __inline__ void __tcp_v6_hash(struct sock *sk)
224 struct hlist_head *list;
227 BUG_TRAP(sk_unhashed(sk));
229 if (sk->sk_state == TCP_LISTEN) {
230 list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)];
231 lock = &tcp_hashinfo.lhash_lock;
232 inet_listen_wlock(&tcp_hashinfo);
234 sk->sk_hashent = tcp_v6_sk_hashfn(sk);
235 list = &tcp_hashinfo.ehash[sk->sk_hashent].chain;
236 lock = &tcp_hashinfo.ehash[sk->sk_hashent].lock;
240 __sk_add_node(sk, list);
241 sock_prot_inc_use(sk->sk_prot);
246 static void tcp_v6_hash(struct sock *sk)
248 if (sk->sk_state != TCP_CLOSE) {
249 struct tcp_sock *tp = tcp_sk(sk);
251 if (tp->af_specific == &ipv6_mapped) {
261 static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned short hnum, int dif)
264 struct hlist_node *node;
265 struct sock *result = NULL;
269 read_lock(&tcp_hashinfo.lhash_lock);
270 sk_for_each(sk, node, &tcp_hashinfo.listening_hash[inet_lhashfn(hnum)]) {
271 if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
272 struct ipv6_pinfo *np = inet6_sk(sk);
275 if (!ipv6_addr_any(&np->rcv_saddr)) {
276 if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
280 if (sk->sk_bound_dev_if) {
281 if (sk->sk_bound_dev_if != dif)
289 if (score > hiscore) {
297 read_unlock(&tcp_hashinfo.lhash_lock);
301 /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
302 * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
304 * The sockhash lock must be held as a reader here.
307 static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u16 sport,
308 struct in6_addr *daddr, u16 hnum,
312 const struct hlist_node *node;
313 const __u32 ports = INET_COMBINED_PORTS(sport, hnum);
314 /* Optimize here for direct hit, only listening connections can
315 * have wildcards anyways.
317 const int hash = tcp_v6_hashfn(daddr, hnum, saddr, sport);
318 struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash];
320 read_lock(&head->lock);
321 sk_for_each(sk, node, &head->chain) {
322 /* For IPV6 do the cheaper port and family tests first. */
323 if (INET6_MATCH(sk, saddr, daddr, ports, dif))
324 goto hit; /* You sunk my battleship! */
326 /* Must check for a TIME_WAIT'er before going to listener hash. */
327 sk_for_each(sk, node, &(head + tcp_hashinfo.ehash_size)->chain) {
328 const struct inet_timewait_sock *tw = inet_twsk(sk);
330 if(*((__u32 *)&(tw->tw_dport)) == ports &&
331 sk->sk_family == PF_INET6) {
332 const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk);
334 if (ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) &&
335 ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) &&
336 (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif))
340 read_unlock(&head->lock);
345 read_unlock(&head->lock);
350 static inline struct sock *__tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
351 struct in6_addr *daddr, u16 hnum,
356 sk = __tcp_v6_lookup_established(saddr, sport, daddr, hnum, dif);
361 return tcp_v6_lookup_listener(daddr, hnum, dif);
364 inline struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
365 struct in6_addr *daddr, u16 dport,
371 sk = __tcp_v6_lookup(saddr, sport, daddr, ntohs(dport), dif);
377 EXPORT_SYMBOL_GPL(tcp_v6_lookup);
381 * Open request hash tables.
384 static u32 tcp_v6_synq_hash(const struct in6_addr *raddr, const u16 rport, const u32 rnd)
388 a = raddr->s6_addr32[0];
389 b = raddr->s6_addr32[1];
390 c = raddr->s6_addr32[2];
392 a += JHASH_GOLDEN_RATIO;
393 b += JHASH_GOLDEN_RATIO;
395 __jhash_mix(a, b, c);
397 a += raddr->s6_addr32[3];
399 __jhash_mix(a, b, c);
401 return c & (TCP_SYNQ_HSIZE - 1);
404 static struct request_sock *tcp_v6_search_req(const struct sock *sk,
405 struct request_sock ***prevp,
407 struct in6_addr *raddr,
408 struct in6_addr *laddr,
411 const struct inet_connection_sock *icsk = inet_csk(sk);
412 struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
413 struct request_sock *req, **prev;
415 for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
416 (req = *prev) != NULL;
417 prev = &req->dl_next) {
418 const struct tcp6_request_sock *treq = tcp6_rsk(req);
420 if (inet_rsk(req)->rmt_port == rport &&
421 req->rsk_ops->family == AF_INET6 &&
422 ipv6_addr_equal(&treq->rmt_addr, raddr) &&
423 ipv6_addr_equal(&treq->loc_addr, laddr) &&
424 (!treq->iif || treq->iif == iif)) {
425 BUG_TRAP(req->sk == NULL);
434 static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
435 struct in6_addr *saddr,
436 struct in6_addr *daddr,
439 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
442 static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
444 if (skb->protocol == htons(ETH_P_IPV6)) {
445 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
446 skb->nh.ipv6h->saddr.s6_addr32,
450 return secure_tcp_sequence_number(skb->nh.iph->daddr,
457 static int __tcp_v6_check_established(struct sock *sk, __u16 lport,
458 struct inet_timewait_sock **twp)
460 struct inet_sock *inet = inet_sk(sk);
461 struct ipv6_pinfo *np = inet6_sk(sk);
462 struct in6_addr *daddr = &np->rcv_saddr;
463 struct in6_addr *saddr = &np->daddr;
464 int dif = sk->sk_bound_dev_if;
465 const u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
466 const int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport);
467 struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash];
469 const struct hlist_node *node;
470 struct inet_timewait_sock *tw;
472 write_lock(&head->lock);
474 /* Check TIME-WAIT sockets first. */
475 sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
476 const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk2);
480 if(*((__u32 *)&(tw->tw_dport)) == ports &&
481 sk2->sk_family == PF_INET6 &&
482 ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) &&
483 ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) &&
484 sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
485 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2);
486 struct tcp_sock *tp = tcp_sk(sk);
488 if (tcptw->tw_ts_recent_stamp &&
490 (sysctl_tcp_tw_reuse &&
491 xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
492 /* See comment in tcp_ipv4.c */
493 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
496 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
497 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
506 /* And established part... */
507 sk_for_each(sk2, node, &head->chain) {
508 if (INET6_MATCH(sk2, saddr, daddr, ports, dif))
513 BUG_TRAP(sk_unhashed(sk));
514 __sk_add_node(sk, &head->chain);
515 sk->sk_hashent = hash;
516 sock_prot_inc_use(sk->sk_prot);
517 write_unlock(&head->lock);
521 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
523 /* Silly. Should hash-dance instead... */
524 inet_twsk_deschedule(tw, &tcp_death_row);
525 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
532 write_unlock(&head->lock);
533 return -EADDRNOTAVAIL;
536 static inline u32 tcpv6_port_offset(const struct sock *sk)
538 const struct inet_sock *inet = inet_sk(sk);
539 const struct ipv6_pinfo *np = inet6_sk(sk);
541 return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
546 static int tcp_v6_hash_connect(struct sock *sk)
548 unsigned short snum = inet_sk(sk)->num;
549 struct inet_bind_hashbucket *head;
550 struct inet_bind_bucket *tb;
554 int low = sysctl_local_port_range[0];
555 int high = sysctl_local_port_range[1];
556 int range = high - low;
560 u32 offset = hint + tcpv6_port_offset(sk);
561 struct hlist_node *node;
562 struct inet_timewait_sock *tw = NULL;
565 for (i = 1; i <= range; i++) {
566 port = low + (i + offset) % range;
567 head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
568 spin_lock(&head->lock);
570 /* Does not bother with rcv_saddr checks,
571 * because the established check is already
574 inet_bind_bucket_for_each(tb, node, &head->chain) {
575 if (tb->port == port) {
576 BUG_TRAP(!hlist_empty(&tb->owners));
577 if (tb->fastreuse >= 0)
579 if (!__tcp_v6_check_established(sk,
587 tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
589 spin_unlock(&head->lock);
596 spin_unlock(&head->lock);
600 return -EADDRNOTAVAIL;
605 /* Head lock still held and bh's disabled */
606 inet_bind_hash(sk, tb, port);
607 if (sk_unhashed(sk)) {
608 inet_sk(sk)->sport = htons(port);
611 spin_unlock(&head->lock);
614 inet_twsk_deschedule(tw, &tcp_death_row);
622 head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
623 tb = inet_csk(sk)->icsk_bind_hash;
624 spin_lock_bh(&head->lock);
626 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
628 spin_unlock_bh(&head->lock);
631 spin_unlock(&head->lock);
632 /* No definite answer... Walk to established hash table */
633 ret = __tcp_v6_check_established(sk, snum, NULL);
640 static __inline__ int tcp_v6_iif(struct sk_buff *skb)
642 return IP6CB(skb)->iif;
645 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
648 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
649 struct inet_sock *inet = inet_sk(sk);
650 struct ipv6_pinfo *np = inet6_sk(sk);
651 struct tcp_sock *tp = tcp_sk(sk);
652 struct in6_addr *saddr = NULL, *final_p = NULL, final;
654 struct dst_entry *dst;
658 if (addr_len < SIN6_LEN_RFC2133)
661 if (usin->sin6_family != AF_INET6)
662 return(-EAFNOSUPPORT);
664 memset(&fl, 0, sizeof(fl));
667 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
668 IP6_ECN_flow_init(fl.fl6_flowlabel);
669 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
670 struct ip6_flowlabel *flowlabel;
671 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
672 if (flowlabel == NULL)
674 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
675 fl6_sock_release(flowlabel);
680 * connect() to INADDR_ANY means loopback (BSD'ism).
683 if(ipv6_addr_any(&usin->sin6_addr))
684 usin->sin6_addr.s6_addr[15] = 0x1;
686 addr_type = ipv6_addr_type(&usin->sin6_addr);
688 if(addr_type & IPV6_ADDR_MULTICAST)
691 if (addr_type&IPV6_ADDR_LINKLOCAL) {
692 if (addr_len >= sizeof(struct sockaddr_in6) &&
693 usin->sin6_scope_id) {
694 /* If interface is set while binding, indices
697 if (sk->sk_bound_dev_if &&
698 sk->sk_bound_dev_if != usin->sin6_scope_id)
701 sk->sk_bound_dev_if = usin->sin6_scope_id;
704 /* Connect to link-local address requires an interface */
705 if (!sk->sk_bound_dev_if)
709 if (tp->rx_opt.ts_recent_stamp &&
710 !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
711 tp->rx_opt.ts_recent = 0;
712 tp->rx_opt.ts_recent_stamp = 0;
716 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
717 np->flow_label = fl.fl6_flowlabel;
723 if (addr_type == IPV6_ADDR_MAPPED) {
724 u32 exthdrlen = tp->ext_header_len;
725 struct sockaddr_in sin;
727 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
729 if (__ipv6_only_sock(sk))
732 sin.sin_family = AF_INET;
733 sin.sin_port = usin->sin6_port;
734 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
736 tp->af_specific = &ipv6_mapped;
737 sk->sk_backlog_rcv = tcp_v4_do_rcv;
739 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
742 tp->ext_header_len = exthdrlen;
743 tp->af_specific = &ipv6_specific;
744 sk->sk_backlog_rcv = tcp_v6_do_rcv;
747 ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
749 ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
756 if (!ipv6_addr_any(&np->rcv_saddr))
757 saddr = &np->rcv_saddr;
759 fl.proto = IPPROTO_TCP;
760 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
761 ipv6_addr_copy(&fl.fl6_src,
762 (saddr ? saddr : &np->saddr));
763 fl.oif = sk->sk_bound_dev_if;
764 fl.fl_ip_dport = usin->sin6_port;
765 fl.fl_ip_sport = inet->sport;
767 if (np->opt && np->opt->srcrt) {
768 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
769 ipv6_addr_copy(&final, &fl.fl6_dst);
770 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
774 err = ip6_dst_lookup(sk, &dst, &fl);
778 ipv6_addr_copy(&fl.fl6_dst, final_p);
780 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
787 ipv6_addr_copy(&np->rcv_saddr, saddr);
790 /* set the source address */
791 ipv6_addr_copy(&np->saddr, saddr);
792 inet->rcv_saddr = LOOPBACK4_IPV6;
794 ip6_dst_store(sk, dst, NULL);
795 sk->sk_route_caps = dst->dev->features &
796 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
798 tp->ext_header_len = 0;
800 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
802 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
804 inet->dport = usin->sin6_port;
806 tcp_set_state(sk, TCP_SYN_SENT);
807 err = tcp_v6_hash_connect(sk);
812 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
817 err = tcp_connect(sk);
824 tcp_set_state(sk, TCP_CLOSE);
828 sk->sk_route_caps = 0;
832 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
833 int type, int code, int offset, __u32 info)
835 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
836 struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
837 struct ipv6_pinfo *np;
843 sk = tcp_v6_lookup(&hdr->daddr, th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
846 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
850 if (sk->sk_state == TCP_TIME_WAIT) {
851 inet_twsk_put((struct inet_timewait_sock *)sk);
856 if (sock_owned_by_user(sk))
857 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
859 if (sk->sk_state == TCP_CLOSE)
863 seq = ntohl(th->seq);
864 if (sk->sk_state != TCP_LISTEN &&
865 !between(seq, tp->snd_una, tp->snd_nxt)) {
866 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
872 if (type == ICMPV6_PKT_TOOBIG) {
873 struct dst_entry *dst = NULL;
875 if (sock_owned_by_user(sk))
877 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
880 /* icmp should have updated the destination cache entry */
881 dst = __sk_dst_check(sk, np->dst_cookie);
884 struct inet_sock *inet = inet_sk(sk);
887 /* BUGGG_FUTURE: Again, it is not clear how
888 to handle rthdr case. Ignore this complexity
891 memset(&fl, 0, sizeof(fl));
892 fl.proto = IPPROTO_TCP;
893 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
894 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
895 fl.oif = sk->sk_bound_dev_if;
896 fl.fl_ip_dport = inet->dport;
897 fl.fl_ip_sport = inet->sport;
899 if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
900 sk->sk_err_soft = -err;
904 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
905 sk->sk_err_soft = -err;
912 if (tp->pmtu_cookie > dst_mtu(dst)) {
913 tcp_sync_mss(sk, dst_mtu(dst));
914 tcp_simple_retransmit(sk);
915 } /* else let the usual retransmit timer handle it */
920 icmpv6_err_convert(type, code, &err);
922 /* Might be for an request_sock */
923 switch (sk->sk_state) {
924 struct request_sock *req, **prev;
926 if (sock_owned_by_user(sk))
929 req = tcp_v6_search_req(sk, &prev, th->dest, &hdr->daddr,
930 &hdr->saddr, tcp_v6_iif(skb));
934 /* ICMPs are not backlogged, hence we cannot get
935 * an established socket here.
937 BUG_TRAP(req->sk == NULL);
939 if (seq != tcp_rsk(req)->snt_isn) {
940 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
944 inet_csk_reqsk_queue_drop(sk, req, prev);
948 case TCP_SYN_RECV: /* Cannot happen.
949 It can, it SYNs are crossed. --ANK */
950 if (!sock_owned_by_user(sk)) {
951 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
953 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
957 sk->sk_err_soft = err;
961 if (!sock_owned_by_user(sk) && np->recverr) {
963 sk->sk_error_report(sk);
965 sk->sk_err_soft = err;
973 static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
974 struct dst_entry *dst)
976 struct tcp6_request_sock *treq = tcp6_rsk(req);
977 struct ipv6_pinfo *np = inet6_sk(sk);
978 struct sk_buff * skb;
979 struct ipv6_txoptions *opt = NULL;
980 struct in6_addr * final_p = NULL, final;
984 memset(&fl, 0, sizeof(fl));
985 fl.proto = IPPROTO_TCP;
986 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
987 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
988 fl.fl6_flowlabel = 0;
990 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
991 fl.fl_ip_sport = inet_sk(sk)->sport;
996 np->rxopt.bits.srcrt == 2 &&
998 struct sk_buff *pktopts = treq->pktopts;
999 struct inet6_skb_parm *rxopt = IP6CB(pktopts);
1001 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
1004 if (opt && opt->srcrt) {
1005 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1006 ipv6_addr_copy(&final, &fl.fl6_dst);
1007 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1011 err = ip6_dst_lookup(sk, &dst, &fl);
1015 ipv6_addr_copy(&fl.fl6_dst, final_p);
1016 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1020 skb = tcp_make_synack(sk, dst, req);
1022 struct tcphdr *th = skb->h.th;
1024 th->check = tcp_v6_check(th, skb->len,
1025 &treq->loc_addr, &treq->rmt_addr,
1026 csum_partial((char *)th, skb->len, skb->csum));
1028 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1029 err = ip6_xmit(sk, skb, &fl, opt, 0);
1030 if (err == NET_XMIT_CN)
1036 if (opt && opt != np->opt)
1037 sock_kfree_s(sk, opt, opt->tot_len);
1041 static void tcp_v6_reqsk_destructor(struct request_sock *req)
1043 if (tcp6_rsk(req)->pktopts)
1044 kfree_skb(tcp6_rsk(req)->pktopts);
1047 static struct request_sock_ops tcp6_request_sock_ops = {
1049 .obj_size = sizeof(struct tcp6_request_sock),
1050 .rtx_syn_ack = tcp_v6_send_synack,
1051 .send_ack = tcp_v6_reqsk_send_ack,
1052 .destructor = tcp_v6_reqsk_destructor,
1053 .send_reset = tcp_v6_send_reset
1056 static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
1058 struct ipv6_pinfo *np = inet6_sk(sk);
1059 struct inet6_skb_parm *opt = IP6CB(skb);
1061 if (np->rxopt.all) {
1062 if ((opt->hop && np->rxopt.bits.hopopts) ||
1063 ((IPV6_FLOWINFO_MASK&*(u32*)skb->nh.raw) &&
1064 np->rxopt.bits.rxflow) ||
1065 (opt->srcrt && np->rxopt.bits.srcrt) ||
1066 ((opt->dst1 || opt->dst0) && np->rxopt.bits.dstopts))
1073 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
1074 struct sk_buff *skb)
1076 struct ipv6_pinfo *np = inet6_sk(sk);
1078 if (skb->ip_summed == CHECKSUM_HW) {
1079 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
1080 skb->csum = offsetof(struct tcphdr, check);
1082 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
1083 csum_partial((char *)th, th->doff<<2,
1089 static void tcp_v6_send_reset(struct sk_buff *skb)
1091 struct tcphdr *th = skb->h.th, *t1;
1092 struct sk_buff *buff;
1098 if (!ipv6_unicast_destination(skb))
1102 * We need to grab some memory, and put together an RST,
1103 * and then put it into the queue to be sent.
1106 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
1111 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
1113 t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
1115 /* Swap the send and the receive. */
1116 memset(t1, 0, sizeof(*t1));
1117 t1->dest = th->source;
1118 t1->source = th->dest;
1119 t1->doff = sizeof(*t1)/4;
1123 t1->seq = th->ack_seq;
1126 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
1127 + skb->len - (th->doff<<2));
1130 buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
1132 memset(&fl, 0, sizeof(fl));
1133 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1134 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1136 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1137 sizeof(*t1), IPPROTO_TCP,
1140 fl.proto = IPPROTO_TCP;
1141 fl.oif = tcp_v6_iif(skb);
1142 fl.fl_ip_dport = t1->dest;
1143 fl.fl_ip_sport = t1->source;
1145 /* sk = NULL, but it is safe for now. RST socket required. */
1146 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1148 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1149 dst_release(buff->dst);
1153 ip6_xmit(NULL, buff, &fl, NULL, 0);
1154 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1155 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
1162 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
1164 struct tcphdr *th = skb->h.th, *t1;
1165 struct sk_buff *buff;
1167 int tot_len = sizeof(struct tcphdr);
1172 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
1177 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
1179 t1 = (struct tcphdr *) skb_push(buff,tot_len);
1181 /* Swap the send and the receive. */
1182 memset(t1, 0, sizeof(*t1));
1183 t1->dest = th->source;
1184 t1->source = th->dest;
1185 t1->doff = tot_len/4;
1186 t1->seq = htonl(seq);
1187 t1->ack_seq = htonl(ack);
1189 t1->window = htons(win);
1192 u32 *ptr = (u32*)(t1 + 1);
1193 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1194 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1195 *ptr++ = htonl(tcp_time_stamp);
1199 buff->csum = csum_partial((char *)t1, tot_len, 0);
1201 memset(&fl, 0, sizeof(fl));
1202 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1203 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1205 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1206 tot_len, IPPROTO_TCP,
1209 fl.proto = IPPROTO_TCP;
1210 fl.oif = tcp_v6_iif(skb);
1211 fl.fl_ip_dport = t1->dest;
1212 fl.fl_ip_sport = t1->source;
1214 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1215 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1216 dst_release(buff->dst);
1219 ip6_xmit(NULL, buff, &fl, NULL, 0);
1220 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1227 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1229 struct inet_timewait_sock *tw = inet_twsk(sk);
1230 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1232 tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1233 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1234 tcptw->tw_ts_recent);
1239 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
1241 tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent);
1245 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1247 struct request_sock *req, **prev;
1248 struct tcphdr *th = skb->h.th;
1251 /* Find possible connection requests. */
1252 req = tcp_v6_search_req(sk, &prev, th->source, &skb->nh.ipv6h->saddr,
1253 &skb->nh.ipv6h->daddr, tcp_v6_iif(skb));
1255 return tcp_check_req(sk, skb, req, prev);
1257 nsk = __tcp_v6_lookup_established(&skb->nh.ipv6h->saddr,
1259 &skb->nh.ipv6h->daddr,
1264 if (nsk->sk_state != TCP_TIME_WAIT) {
1268 inet_twsk_put((struct inet_timewait_sock *)nsk);
1272 #if 0 /*def CONFIG_SYN_COOKIES*/
1273 if (!th->rst && !th->syn && th->ack)
1274 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
1279 static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req)
1281 struct inet_connection_sock *icsk = inet_csk(sk);
1282 struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
1283 const u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
1285 reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, TCP_TIMEOUT_INIT);
1286 inet_csk_reqsk_queue_added(sk, TCP_TIMEOUT_INIT);
1290 /* FIXME: this is substantially similar to the ipv4 code.
1291 * Can some kind of merge be done? -- erics
1293 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1295 struct tcp6_request_sock *treq;
1296 struct ipv6_pinfo *np = inet6_sk(sk);
1297 struct tcp_options_received tmp_opt;
1298 struct tcp_sock *tp = tcp_sk(sk);
1299 struct request_sock *req = NULL;
1300 __u32 isn = TCP_SKB_CB(skb)->when;
1302 if (skb->protocol == htons(ETH_P_IP))
1303 return tcp_v4_conn_request(sk, skb);
1305 if (!ipv6_unicast_destination(skb))
1309 * There are no SYN attacks on IPv6, yet...
1311 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1312 if (net_ratelimit())
1313 printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
1317 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1320 req = reqsk_alloc(&tcp6_request_sock_ops);
1324 tcp_clear_options(&tmp_opt);
1325 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1326 tmp_opt.user_mss = tp->rx_opt.user_mss;
1328 tcp_parse_options(skb, &tmp_opt, 0);
1330 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1331 tcp_openreq_init(req, &tmp_opt, skb);
1333 treq = tcp6_rsk(req);
1334 ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
1335 ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
1336 TCP_ECN_create_request(req, skb->h.th);
1337 treq->pktopts = NULL;
1338 if (ipv6_opt_accepted(sk, skb) ||
1339 np->rxopt.bits.rxinfo ||
1340 np->rxopt.bits.rxhlim) {
1341 atomic_inc(&skb->users);
1342 treq->pktopts = skb;
1344 treq->iif = sk->sk_bound_dev_if;
1346 /* So that link locals have meaning */
1347 if (!sk->sk_bound_dev_if &&
1348 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1349 treq->iif = tcp_v6_iif(skb);
1352 isn = tcp_v6_init_sequence(sk,skb);
1354 tcp_rsk(req)->snt_isn = isn;
1356 if (tcp_v6_send_synack(sk, req, NULL))
1359 tcp_v6_synq_add(sk, req);
1367 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1368 return 0; /* don't send reset */
1371 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1372 struct request_sock *req,
1373 struct dst_entry *dst)
1375 struct tcp6_request_sock *treq = tcp6_rsk(req);
1376 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1377 struct tcp6_sock *newtcp6sk;
1378 struct inet_sock *newinet;
1379 struct tcp_sock *newtp;
1381 struct ipv6_txoptions *opt;
1383 if (skb->protocol == htons(ETH_P_IP)) {
1388 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1393 newtcp6sk = (struct tcp6_sock *)newsk;
1394 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1396 newinet = inet_sk(newsk);
1397 newnp = inet6_sk(newsk);
1398 newtp = tcp_sk(newsk);
1400 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1402 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1405 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1408 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1410 newtp->af_specific = &ipv6_mapped;
1411 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1412 newnp->pktoptions = NULL;
1414 newnp->mcast_oif = tcp_v6_iif(skb);
1415 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1418 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1419 * here, tcp_create_openreq_child now does this for us, see the comment in
1420 * that function for the gory details. -acme
1423 /* It is tricky place. Until this moment IPv4 tcp
1424 worked with IPv6 af_tcp.af_specific.
1427 tcp_sync_mss(newsk, newtp->pmtu_cookie);
1434 if (sk_acceptq_is_full(sk))
1437 if (np->rxopt.bits.srcrt == 2 &&
1438 opt == NULL && treq->pktopts) {
1439 struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts);
1441 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr *)(treq->pktopts->nh.raw + rxopt->srcrt));
1445 struct in6_addr *final_p = NULL, final;
1448 memset(&fl, 0, sizeof(fl));
1449 fl.proto = IPPROTO_TCP;
1450 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1451 if (opt && opt->srcrt) {
1452 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1453 ipv6_addr_copy(&final, &fl.fl6_dst);
1454 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1457 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
1458 fl.oif = sk->sk_bound_dev_if;
1459 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
1460 fl.fl_ip_sport = inet_sk(sk)->sport;
1462 if (ip6_dst_lookup(sk, &dst, &fl))
1466 ipv6_addr_copy(&fl.fl6_dst, final_p);
1468 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1472 newsk = tcp_create_openreq_child(sk, req, skb);
1477 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1478 * count here, tcp_create_openreq_child now does this for us, see the
1479 * comment in that function for the gory details. -acme
1482 ip6_dst_store(newsk, dst, NULL);
1483 newsk->sk_route_caps = dst->dev->features &
1484 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1486 newtcp6sk = (struct tcp6_sock *)newsk;
1487 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1489 newtp = tcp_sk(newsk);
1490 newinet = inet_sk(newsk);
1491 newnp = inet6_sk(newsk);
1493 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1495 ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
1496 ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
1497 ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
1498 newsk->sk_bound_dev_if = treq->iif;
1500 /* Now IPv6 options...
1502 First: no IPv4 options.
1504 newinet->opt = NULL;
1507 newnp->rxopt.all = np->rxopt.all;
1509 /* Clone pktoptions received with SYN */
1510 newnp->pktoptions = NULL;
1511 if (treq->pktopts != NULL) {
1512 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
1513 kfree_skb(treq->pktopts);
1514 treq->pktopts = NULL;
1515 if (newnp->pktoptions)
1516 skb_set_owner_r(newnp->pktoptions, newsk);
1519 newnp->mcast_oif = tcp_v6_iif(skb);
1520 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1522 /* Clone native IPv6 options from listening socket (if any)
1524 Yes, keeping reference count would be much more clever,
1525 but we make one more one thing there: reattach optmem
1529 newnp->opt = ipv6_dup_options(newsk, opt);
1531 sock_kfree_s(sk, opt, opt->tot_len);
1534 newtp->ext_header_len = 0;
1536 newtp->ext_header_len = newnp->opt->opt_nflen +
1537 newnp->opt->opt_flen;
1539 tcp_sync_mss(newsk, dst_mtu(dst));
1540 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1541 tcp_initialize_rcv_mss(newsk);
1543 newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1545 __tcp_v6_hash(newsk);
1546 inet_inherit_port(&tcp_hashinfo, sk, newsk);
1551 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1553 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1554 if (opt && opt != np->opt)
1555 sock_kfree_s(sk, opt, opt->tot_len);
1560 static int tcp_v6_checksum_init(struct sk_buff *skb)
1562 if (skb->ip_summed == CHECKSUM_HW) {
1563 skb->ip_summed = CHECKSUM_UNNECESSARY;
1564 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1565 &skb->nh.ipv6h->daddr,skb->csum))
1567 LIMIT_NETDEBUG(KERN_DEBUG "hw tcp v6 csum failed\n");
1569 if (skb->len <= 76) {
1570 if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1571 &skb->nh.ipv6h->daddr,skb_checksum(skb, 0, skb->len, 0)))
1573 skb->ip_summed = CHECKSUM_UNNECESSARY;
1575 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1576 &skb->nh.ipv6h->daddr,0);
1581 /* The socket must have it's spinlock held when we get
1584 * We have a potential double-lock case here, so even when
1585 * doing backlog processing we use the BH locking scheme.
1586 * This is because we cannot sleep with the original spinlock
1589 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1591 struct ipv6_pinfo *np = inet6_sk(sk);
1592 struct tcp_sock *tp;
1593 struct sk_buff *opt_skb = NULL;
1595 /* Imagine: socket is IPv6. IPv4 packet arrives,
1596 goes to IPv4 receive handler and backlogged.
1597 From backlog it always goes here. Kerboom...
1598 Fortunately, tcp_rcv_established and rcv_established
1599 handle them correctly, but it is not case with
1600 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1603 if (skb->protocol == htons(ETH_P_IP))
1604 return tcp_v4_do_rcv(sk, skb);
1606 if (sk_filter(sk, skb, 0))
1610 * socket locking is here for SMP purposes as backlog rcv
1611 * is currently called with bh processing disabled.
1614 /* Do Stevens' IPV6_PKTOPTIONS.
1616 Yes, guys, it is the only place in our code, where we
1617 may make it not affecting IPv4.
1618 The rest of code is protocol independent,
1619 and I do not like idea to uglify IPv4.
1621 Actually, all the idea behind IPV6_PKTOPTIONS
1622 looks not very well thought. For now we latch
1623 options, received in the last packet, enqueued
1624 by tcp. Feel free to propose better solution.
1628 opt_skb = skb_clone(skb, GFP_ATOMIC);
1630 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1631 TCP_CHECK_TIMER(sk);
1632 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1634 TCP_CHECK_TIMER(sk);
1636 goto ipv6_pktoptions;
1640 if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1643 if (sk->sk_state == TCP_LISTEN) {
1644 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1649 * Queue it on the new socket if the new socket is active,
1650 * otherwise we just shortcircuit this and continue with
1654 if (tcp_child_process(sk, nsk, skb))
1657 __kfree_skb(opt_skb);
1662 TCP_CHECK_TIMER(sk);
1663 if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1665 TCP_CHECK_TIMER(sk);
1667 goto ipv6_pktoptions;
1671 tcp_v6_send_reset(skb);
1674 __kfree_skb(opt_skb);
1678 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1683 /* Do you ask, what is it?
1685 1. skb was enqueued by tcp.
1686 2. skb is added to tail of read queue, rather than out of order.
1687 3. socket is not in passive state.
1688 4. Finally, it really contains options, which user wants to receive.
1691 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1692 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1693 if (np->rxopt.bits.rxinfo)
1694 np->mcast_oif = tcp_v6_iif(opt_skb);
1695 if (np->rxopt.bits.rxhlim)
1696 np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1697 if (ipv6_opt_accepted(sk, opt_skb)) {
1698 skb_set_owner_r(opt_skb, sk);
1699 opt_skb = xchg(&np->pktoptions, opt_skb);
1701 __kfree_skb(opt_skb);
1702 opt_skb = xchg(&np->pktoptions, NULL);
1711 static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1713 struct sk_buff *skb = *pskb;
1718 if (skb->pkt_type != PACKET_HOST)
1722 * Count it even if it's bad.
1724 TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1726 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1731 if (th->doff < sizeof(struct tcphdr)/4)
1733 if (!pskb_may_pull(skb, th->doff*4))
1736 if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1737 tcp_v6_checksum_init(skb) < 0))
1741 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1742 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1743 skb->len - th->doff*4);
1744 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1745 TCP_SKB_CB(skb)->when = 0;
1746 TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
1747 TCP_SKB_CB(skb)->sacked = 0;
1749 sk = __tcp_v6_lookup(&skb->nh.ipv6h->saddr, th->source,
1750 &skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1756 if (sk->sk_state == TCP_TIME_WAIT)
1759 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1760 goto discard_and_relse;
1762 if (sk_filter(sk, skb, 0))
1763 goto discard_and_relse;
1769 if (!sock_owned_by_user(sk)) {
1770 if (!tcp_prequeue(sk, skb))
1771 ret = tcp_v6_do_rcv(sk, skb);
1773 sk_add_backlog(sk, skb);
1777 return ret ? -1 : 0;
1780 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1783 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1785 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1787 tcp_v6_send_reset(skb);
1804 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1805 inet_twsk_put((struct inet_timewait_sock *)sk);
1809 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1810 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1811 inet_twsk_put((struct inet_timewait_sock *)sk);
1815 switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk,
1821 sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1823 struct inet_timewait_sock *tw = inet_twsk(sk);
1824 inet_twsk_deschedule(tw, &tcp_death_row);
1829 /* Fall through to ACK */
1832 tcp_v6_timewait_ack(sk, skb);
1836 case TCP_TW_SUCCESS:;
1841 static int tcp_v6_rebuild_header(struct sock *sk)
1844 struct dst_entry *dst;
1845 struct ipv6_pinfo *np = inet6_sk(sk);
1847 dst = __sk_dst_check(sk, np->dst_cookie);
1850 struct inet_sock *inet = inet_sk(sk);
1851 struct in6_addr *final_p = NULL, final;
1854 memset(&fl, 0, sizeof(fl));
1855 fl.proto = IPPROTO_TCP;
1856 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1857 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1858 fl.fl6_flowlabel = np->flow_label;
1859 fl.oif = sk->sk_bound_dev_if;
1860 fl.fl_ip_dport = inet->dport;
1861 fl.fl_ip_sport = inet->sport;
1863 if (np->opt && np->opt->srcrt) {
1864 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1865 ipv6_addr_copy(&final, &fl.fl6_dst);
1866 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1870 err = ip6_dst_lookup(sk, &dst, &fl);
1872 sk->sk_route_caps = 0;
1876 ipv6_addr_copy(&fl.fl6_dst, final_p);
1878 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1879 sk->sk_err_soft = -err;
1884 ip6_dst_store(sk, dst, NULL);
1885 sk->sk_route_caps = dst->dev->features &
1886 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1892 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1894 struct sock *sk = skb->sk;
1895 struct inet_sock *inet = inet_sk(sk);
1896 struct ipv6_pinfo *np = inet6_sk(sk);
1898 struct dst_entry *dst;
1899 struct in6_addr *final_p = NULL, final;
1901 memset(&fl, 0, sizeof(fl));
1902 fl.proto = IPPROTO_TCP;
1903 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1904 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1905 fl.fl6_flowlabel = np->flow_label;
1906 IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1907 fl.oif = sk->sk_bound_dev_if;
1908 fl.fl_ip_sport = inet->sport;
1909 fl.fl_ip_dport = inet->dport;
1911 if (np->opt && np->opt->srcrt) {
1912 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1913 ipv6_addr_copy(&final, &fl.fl6_dst);
1914 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1918 dst = __sk_dst_check(sk, np->dst_cookie);
1921 int err = ip6_dst_lookup(sk, &dst, &fl);
1924 sk->sk_err_soft = -err;
1929 ipv6_addr_copy(&fl.fl6_dst, final_p);
1931 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1932 sk->sk_route_caps = 0;
1937 ip6_dst_store(sk, dst, NULL);
1938 sk->sk_route_caps = dst->dev->features &
1939 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1942 skb->dst = dst_clone(dst);
1944 /* Restore final destination back after routing done */
1945 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1947 return ip6_xmit(sk, skb, &fl, np->opt, 0);
1950 static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1952 struct ipv6_pinfo *np = inet6_sk(sk);
1953 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1955 sin6->sin6_family = AF_INET6;
1956 ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1957 sin6->sin6_port = inet_sk(sk)->dport;
1958 /* We do not store received flowlabel for TCP */
1959 sin6->sin6_flowinfo = 0;
1960 sin6->sin6_scope_id = 0;
1961 if (sk->sk_bound_dev_if &&
1962 ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1963 sin6->sin6_scope_id = sk->sk_bound_dev_if;
1966 static int tcp_v6_remember_stamp(struct sock *sk)
1968 /* Alas, not yet... */
1972 static struct tcp_func ipv6_specific = {
1973 .queue_xmit = tcp_v6_xmit,
1974 .send_check = tcp_v6_send_check,
1975 .rebuild_header = tcp_v6_rebuild_header,
1976 .conn_request = tcp_v6_conn_request,
1977 .syn_recv_sock = tcp_v6_syn_recv_sock,
1978 .remember_stamp = tcp_v6_remember_stamp,
1979 .net_header_len = sizeof(struct ipv6hdr),
1981 .setsockopt = ipv6_setsockopt,
1982 .getsockopt = ipv6_getsockopt,
1983 .addr2sockaddr = v6_addr2sockaddr,
1984 .sockaddr_len = sizeof(struct sockaddr_in6)
1988 * TCP over IPv4 via INET6 API
1991 static struct tcp_func ipv6_mapped = {
1992 .queue_xmit = ip_queue_xmit,
1993 .send_check = tcp_v4_send_check,
1994 .rebuild_header = inet_sk_rebuild_header,
1995 .conn_request = tcp_v6_conn_request,
1996 .syn_recv_sock = tcp_v6_syn_recv_sock,
1997 .remember_stamp = tcp_v4_remember_stamp,
1998 .net_header_len = sizeof(struct iphdr),
2000 .setsockopt = ipv6_setsockopt,
2001 .getsockopt = ipv6_getsockopt,
2002 .addr2sockaddr = v6_addr2sockaddr,
2003 .sockaddr_len = sizeof(struct sockaddr_in6)
2008 /* NOTE: A lot of things set to zero explicitly by call to
2009 * sk_alloc() so need not be done here.
2011 static int tcp_v6_init_sock(struct sock *sk)
2013 struct tcp_sock *tp = tcp_sk(sk);
2015 skb_queue_head_init(&tp->out_of_order_queue);
2016 tcp_init_xmit_timers(sk);
2017 tcp_prequeue_init(tp);
2019 inet_csk(sk)->icsk_rto = TCP_TIMEOUT_INIT;
2020 tp->mdev = TCP_TIMEOUT_INIT;
2022 /* So many TCP implementations out there (incorrectly) count the
2023 * initial SYN frame in their delayed-ACK and congestion control
2024 * algorithms that we must have the following bandaid to talk
2025 * efficiently to them. -DaveM
2029 /* See draft-stevens-tcpca-spec-01 for discussion of the
2030 * initialization of these values.
2032 tp->snd_ssthresh = 0x7fffffff;
2033 tp->snd_cwnd_clamp = ~0;
2034 tp->mss_cache = 536;
2036 tp->reordering = sysctl_tcp_reordering;
2038 sk->sk_state = TCP_CLOSE;
2040 tp->af_specific = &ipv6_specific;
2041 tp->ca_ops = &tcp_init_congestion_ops;
2042 sk->sk_write_space = sk_stream_write_space;
2043 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
2045 sk->sk_sndbuf = sysctl_tcp_wmem[1];
2046 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
2048 atomic_inc(&tcp_sockets_allocated);
2053 static int tcp_v6_destroy_sock(struct sock *sk)
2055 extern int tcp_v4_destroy_sock(struct sock *sk);
2057 tcp_v4_destroy_sock(sk);
2058 return inet6_destroy_sock(sk);
2061 /* Proc filesystem TCPv6 sock list dumping. */
2062 static void get_openreq6(struct seq_file *seq,
2063 struct sock *sk, struct request_sock *req, int i, int uid)
2065 struct in6_addr *dest, *src;
2066 int ttd = req->expires - jiffies;
2071 src = &tcp6_rsk(req)->loc_addr;
2072 dest = &tcp6_rsk(req)->rmt_addr;
2074 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2075 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2077 src->s6_addr32[0], src->s6_addr32[1],
2078 src->s6_addr32[2], src->s6_addr32[3],
2079 ntohs(inet_sk(sk)->sport),
2080 dest->s6_addr32[0], dest->s6_addr32[1],
2081 dest->s6_addr32[2], dest->s6_addr32[3],
2082 ntohs(inet_rsk(req)->rmt_port),
2084 0,0, /* could print option size, but that is af dependent. */
2085 1, /* timers active (only the expire timer) */
2086 jiffies_to_clock_t(ttd),
2089 0, /* non standard timer */
2090 0, /* open_requests have no inode */
2094 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2096 struct in6_addr *dest, *src;
2099 unsigned long timer_expires;
2100 struct inet_sock *inet = inet_sk(sp);
2101 struct tcp_sock *tp = tcp_sk(sp);
2102 const struct inet_connection_sock *icsk = inet_csk(sp);
2103 struct ipv6_pinfo *np = inet6_sk(sp);
2106 src = &np->rcv_saddr;
2107 destp = ntohs(inet->dport);
2108 srcp = ntohs(inet->sport);
2110 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
2112 timer_expires = icsk->icsk_timeout;
2113 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2115 timer_expires = icsk->icsk_timeout;
2116 } else if (timer_pending(&sp->sk_timer)) {
2118 timer_expires = sp->sk_timer.expires;
2121 timer_expires = jiffies;
2125 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2126 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
2128 src->s6_addr32[0], src->s6_addr32[1],
2129 src->s6_addr32[2], src->s6_addr32[3], srcp,
2130 dest->s6_addr32[0], dest->s6_addr32[1],
2131 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2133 tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
2135 jiffies_to_clock_t(timer_expires - jiffies),
2136 icsk->icsk_retransmits,
2140 atomic_read(&sp->sk_refcnt), sp,
2143 (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
2144 tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
2148 static void get_timewait6_sock(struct seq_file *seq,
2149 struct inet_timewait_sock *tw, int i)
2151 struct in6_addr *dest, *src;
2153 struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw);
2154 int ttd = tw->tw_ttd - jiffies;
2159 dest = &tcp6tw->tw_v6_daddr;
2160 src = &tcp6tw->tw_v6_rcv_saddr;
2161 destp = ntohs(tw->tw_dport);
2162 srcp = ntohs(tw->tw_sport);
2165 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2166 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2168 src->s6_addr32[0], src->s6_addr32[1],
2169 src->s6_addr32[2], src->s6_addr32[3], srcp,
2170 dest->s6_addr32[0], dest->s6_addr32[1],
2171 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2172 tw->tw_substate, 0, 0,
2173 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2174 atomic_read(&tw->tw_refcnt), tw);
2177 #ifdef CONFIG_PROC_FS
2178 static int tcp6_seq_show(struct seq_file *seq, void *v)
2180 struct tcp_iter_state *st;
2182 if (v == SEQ_START_TOKEN) {
2187 "st tx_queue rx_queue tr tm->when retrnsmt"
2188 " uid timeout inode\n");
2193 switch (st->state) {
2194 case TCP_SEQ_STATE_LISTENING:
2195 case TCP_SEQ_STATE_ESTABLISHED:
2196 get_tcp6_sock(seq, v, st->num);
2198 case TCP_SEQ_STATE_OPENREQ:
2199 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2201 case TCP_SEQ_STATE_TIME_WAIT:
2202 get_timewait6_sock(seq, v, st->num);
2209 static struct file_operations tcp6_seq_fops;
2210 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2211 .owner = THIS_MODULE,
2214 .seq_show = tcp6_seq_show,
2215 .seq_fops = &tcp6_seq_fops,
2218 int __init tcp6_proc_init(void)
2220 return tcp_proc_register(&tcp6_seq_afinfo);
2223 void tcp6_proc_exit(void)
2225 tcp_proc_unregister(&tcp6_seq_afinfo);
2229 struct proto tcpv6_prot = {
2231 .owner = THIS_MODULE,
2233 .connect = tcp_v6_connect,
2234 .disconnect = tcp_disconnect,
2235 .accept = inet_csk_accept,
2237 .init = tcp_v6_init_sock,
2238 .destroy = tcp_v6_destroy_sock,
2239 .shutdown = tcp_shutdown,
2240 .setsockopt = tcp_setsockopt,
2241 .getsockopt = tcp_getsockopt,
2242 .sendmsg = tcp_sendmsg,
2243 .recvmsg = tcp_recvmsg,
2244 .backlog_rcv = tcp_v6_do_rcv,
2245 .hash = tcp_v6_hash,
2246 .unhash = tcp_unhash,
2247 .get_port = tcp_v6_get_port,
2248 .enter_memory_pressure = tcp_enter_memory_pressure,
2249 .sockets_allocated = &tcp_sockets_allocated,
2250 .memory_allocated = &tcp_memory_allocated,
2251 .memory_pressure = &tcp_memory_pressure,
2252 .orphan_count = &tcp_orphan_count,
2253 .sysctl_mem = sysctl_tcp_mem,
2254 .sysctl_wmem = sysctl_tcp_wmem,
2255 .sysctl_rmem = sysctl_tcp_rmem,
2256 .max_header = MAX_TCP_HEADER,
2257 .obj_size = sizeof(struct tcp6_sock),
2258 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
2259 .rsk_prot = &tcp6_request_sock_ops,
2262 static struct inet6_protocol tcpv6_protocol = {
2263 .handler = tcp_v6_rcv,
2264 .err_handler = tcp_v6_err,
2265 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2268 extern struct proto_ops inet6_stream_ops;
2270 static struct inet_protosw tcpv6_protosw = {
2271 .type = SOCK_STREAM,
2272 .protocol = IPPROTO_TCP,
2273 .prot = &tcpv6_prot,
2274 .ops = &inet6_stream_ops,
2277 .flags = INET_PROTOSW_PERMANENT,
2280 void __init tcpv6_init(void)
2282 /* register inet6 protocol */
2283 if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
2284 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
2285 inet6_register_protosw(&tcpv6_protosw);