3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
11 * linux/net/ipv4/tcp.c
12 * linux/net/ipv4/tcp_input.c
13 * linux/net/ipv4/tcp_output.c
16 * Hideaki YOSHIFUJI : sin6_scope_id support
17 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
18 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
19 * a single port at the same time.
20 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
28 #include <linux/module.h>
29 #include <linux/config.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/jiffies.h>
37 #include <linux/in6.h>
38 #include <linux/netdevice.h>
39 #include <linux/init.h>
40 #include <linux/jhash.h>
41 #include <linux/ipsec.h>
42 #include <linux/times.h>
44 #include <linux/ipv6.h>
45 #include <linux/icmpv6.h>
46 #include <linux/random.h>
49 #include <net/ndisc.h>
51 #include <net/transp_v6.h>
52 #include <net/addrconf.h>
53 #include <net/ip6_route.h>
54 #include <net/ip6_checksum.h>
55 #include <net/inet_ecn.h>
56 #include <net/protocol.h>
58 #include <net/addrconf.h>
60 #include <net/dsfield.h>
62 #include <asm/uaccess.h>
64 #include <linux/proc_fs.h>
65 #include <linux/seq_file.h>
67 static void tcp_v6_send_reset(struct sk_buff *skb);
68 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
69 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
72 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
73 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
75 static struct tcp_func ipv6_mapped;
76 static struct tcp_func ipv6_specific;
78 /* I have no idea if this is a good hash for v6 or not. -DaveM */
79 static __inline__ int tcp_v6_hashfn(struct in6_addr *laddr, u16 lport,
80 struct in6_addr *faddr, u16 fport)
82 int hashent = (lport ^ fport);
84 hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]);
85 hashent ^= hashent>>16;
86 hashent ^= hashent>>8;
87 return (hashent & (tcp_ehash_size - 1));
90 static __inline__ int tcp_v6_sk_hashfn(struct sock *sk)
92 struct inet_sock *inet = inet_sk(sk);
93 struct ipv6_pinfo *np = inet6_sk(sk);
94 struct in6_addr *laddr = &np->rcv_saddr;
95 struct in6_addr *faddr = &np->daddr;
96 __u16 lport = inet->num;
97 __u16 fport = inet->dport;
98 return tcp_v6_hashfn(laddr, lport, faddr, fport);
101 static inline int tcp_v6_bind_conflict(struct sock *sk,
102 struct tcp_bind_bucket *tb)
105 struct hlist_node *node;
107 /* We must walk the whole port owner list in this case. -DaveM */
108 sk_for_each_bound(sk2, node, &tb->owners) {
110 (!sk->sk_bound_dev_if ||
111 !sk2->sk_bound_dev_if ||
112 sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
113 (!sk->sk_reuse || !sk2->sk_reuse ||
114 sk2->sk_state == TCP_LISTEN) &&
115 ipv6_rcv_saddr_equal(sk, sk2))
122 /* Grrr, addr_type already calculated by caller, but I don't want
123 * to add some silly "cookie" argument to this method just for that.
124 * But it doesn't matter, the recalculation is in the rarest path
125 * this function ever takes.
127 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
129 struct tcp_bind_hashbucket *head;
130 struct tcp_bind_bucket *tb;
131 struct hlist_node *node;
136 int low = sysctl_local_port_range[0];
137 int high = sysctl_local_port_range[1];
138 int remaining = (high - low) + 1;
141 spin_lock(&tcp_portalloc_lock);
142 if (tcp_port_rover < low)
145 rover = tcp_port_rover;
149 head = &tcp_bhash[tcp_bhashfn(rover)];
150 spin_lock(&head->lock);
151 tb_for_each(tb, node, &head->chain)
152 if (tb->port == rover)
156 spin_unlock(&head->lock);
157 } while (--remaining > 0);
158 tcp_port_rover = rover;
159 spin_unlock(&tcp_portalloc_lock);
161 /* Exhausted local port range during search? It is not
162 * possible for us to be holding one of the bind hash
163 * locks if this test triggers, because if 'remaining'
164 * drops to zero, we broke out of the do/while loop at
165 * the top level, not from the 'break;' statement.
168 if (unlikely(remaining <= 0))
171 /* OK, here is the one we will use. */
174 head = &tcp_bhash[tcp_bhashfn(snum)];
175 spin_lock(&head->lock);
176 tb_for_each(tb, node, &head->chain)
177 if (tb->port == snum)
183 if (tb && !hlist_empty(&tb->owners)) {
184 if (tb->fastreuse > 0 && sk->sk_reuse &&
185 sk->sk_state != TCP_LISTEN) {
189 if (tcp_v6_bind_conflict(sk, tb))
195 if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL)
197 if (hlist_empty(&tb->owners)) {
198 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
202 } else if (tb->fastreuse &&
203 (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
207 if (!tcp_sk(sk)->bind_hash)
208 tcp_bind_hash(sk, tb, snum);
209 BUG_TRAP(tcp_sk(sk)->bind_hash == tb);
213 spin_unlock(&head->lock);
219 static __inline__ void __tcp_v6_hash(struct sock *sk)
221 struct hlist_head *list;
224 BUG_TRAP(sk_unhashed(sk));
226 if (sk->sk_state == TCP_LISTEN) {
227 list = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
228 lock = &tcp_lhash_lock;
231 sk->sk_hashent = tcp_v6_sk_hashfn(sk);
232 list = &tcp_ehash[sk->sk_hashent].chain;
233 lock = &tcp_ehash[sk->sk_hashent].lock;
237 __sk_add_node(sk, list);
238 sock_prot_inc_use(sk->sk_prot);
243 static void tcp_v6_hash(struct sock *sk)
245 if (sk->sk_state != TCP_CLOSE) {
246 struct tcp_sock *tp = tcp_sk(sk);
248 if (tp->af_specific == &ipv6_mapped) {
258 static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned short hnum, int dif)
261 struct hlist_node *node;
262 struct sock *result = NULL;
266 read_lock(&tcp_lhash_lock);
267 sk_for_each(sk, node, &tcp_listening_hash[tcp_lhashfn(hnum)]) {
268 if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
269 struct ipv6_pinfo *np = inet6_sk(sk);
272 if (!ipv6_addr_any(&np->rcv_saddr)) {
273 if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
277 if (sk->sk_bound_dev_if) {
278 if (sk->sk_bound_dev_if != dif)
286 if (score > hiscore) {
294 read_unlock(&tcp_lhash_lock);
298 /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
299 * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
301 * The sockhash lock must be held as a reader here.
304 static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u16 sport,
305 struct in6_addr *daddr, u16 hnum,
308 struct tcp_ehash_bucket *head;
310 struct hlist_node *node;
311 __u32 ports = TCP_COMBINED_PORTS(sport, hnum);
314 /* Optimize here for direct hit, only listening connections can
315 * have wildcards anyways.
317 hash = tcp_v6_hashfn(daddr, hnum, saddr, sport);
318 head = &tcp_ehash[hash];
319 read_lock(&head->lock);
320 sk_for_each(sk, node, &head->chain) {
321 /* For IPV6 do the cheaper port and family tests first. */
322 if(TCP_IPV6_MATCH(sk, saddr, daddr, ports, dif))
323 goto hit; /* You sunk my battleship! */
325 /* Must check for a TIME_WAIT'er before going to listener hash. */
326 sk_for_each(sk, node, &(head + tcp_ehash_size)->chain) {
327 /* FIXME: acme: check this... */
328 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
330 if(*((__u32 *)&(tw->tw_dport)) == ports &&
331 sk->sk_family == PF_INET6) {
332 if(ipv6_addr_equal(&tw->tw_v6_daddr, saddr) &&
333 ipv6_addr_equal(&tw->tw_v6_rcv_saddr, daddr) &&
334 (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif))
338 read_unlock(&head->lock);
343 read_unlock(&head->lock);
348 static inline struct sock *__tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
349 struct in6_addr *daddr, u16 hnum,
354 sk = __tcp_v6_lookup_established(saddr, sport, daddr, hnum, dif);
359 return tcp_v6_lookup_listener(daddr, hnum, dif);
362 inline struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
363 struct in6_addr *daddr, u16 dport,
369 sk = __tcp_v6_lookup(saddr, sport, daddr, ntohs(dport), dif);
375 EXPORT_SYMBOL_GPL(tcp_v6_lookup);
379 * Open request hash tables.
382 static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd)
386 a = raddr->s6_addr32[0];
387 b = raddr->s6_addr32[1];
388 c = raddr->s6_addr32[2];
390 a += JHASH_GOLDEN_RATIO;
391 b += JHASH_GOLDEN_RATIO;
393 __jhash_mix(a, b, c);
395 a += raddr->s6_addr32[3];
397 __jhash_mix(a, b, c);
399 return c & (TCP_SYNQ_HSIZE - 1);
402 static struct request_sock *tcp_v6_search_req(struct tcp_sock *tp,
403 struct request_sock ***prevp,
405 struct in6_addr *raddr,
406 struct in6_addr *laddr,
409 struct listen_sock *lopt = tp->accept_queue.listen_opt;
410 struct request_sock *req, **prev;
412 for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
413 (req = *prev) != NULL;
414 prev = &req->dl_next) {
415 const struct tcp6_request_sock *treq = tcp6_rsk(req);
417 if (inet_rsk(req)->rmt_port == rport &&
418 req->rsk_ops->family == AF_INET6 &&
419 ipv6_addr_equal(&treq->rmt_addr, raddr) &&
420 ipv6_addr_equal(&treq->loc_addr, laddr) &&
421 (!treq->iif || treq->iif == iif)) {
422 BUG_TRAP(req->sk == NULL);
431 static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
432 struct in6_addr *saddr,
433 struct in6_addr *daddr,
436 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
439 static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
441 if (skb->protocol == htons(ETH_P_IPV6)) {
442 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
443 skb->nh.ipv6h->saddr.s6_addr32,
447 return secure_tcp_sequence_number(skb->nh.iph->daddr,
454 static int __tcp_v6_check_established(struct sock *sk, __u16 lport,
455 struct tcp_tw_bucket **twp)
457 struct inet_sock *inet = inet_sk(sk);
458 struct ipv6_pinfo *np = inet6_sk(sk);
459 struct in6_addr *daddr = &np->rcv_saddr;
460 struct in6_addr *saddr = &np->daddr;
461 int dif = sk->sk_bound_dev_if;
462 u32 ports = TCP_COMBINED_PORTS(inet->dport, lport);
463 int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport);
464 struct tcp_ehash_bucket *head = &tcp_ehash[hash];
466 struct hlist_node *node;
467 struct tcp_tw_bucket *tw;
469 write_lock(&head->lock);
471 /* Check TIME-WAIT sockets first. */
472 sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) {
473 tw = (struct tcp_tw_bucket*)sk2;
475 if(*((__u32 *)&(tw->tw_dport)) == ports &&
476 sk2->sk_family == PF_INET6 &&
477 ipv6_addr_equal(&tw->tw_v6_daddr, saddr) &&
478 ipv6_addr_equal(&tw->tw_v6_rcv_saddr, daddr) &&
479 sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
480 struct tcp_sock *tp = tcp_sk(sk);
482 if (tw->tw_ts_recent_stamp &&
483 (!twp || (sysctl_tcp_tw_reuse &&
485 tw->tw_ts_recent_stamp > 1))) {
486 /* See comment in tcp_ipv4.c */
487 tp->write_seq = tw->tw_snd_nxt + 65535 + 2;
490 tp->rx_opt.ts_recent = tw->tw_ts_recent;
491 tp->rx_opt.ts_recent_stamp = tw->tw_ts_recent_stamp;
500 /* And established part... */
501 sk_for_each(sk2, node, &head->chain) {
502 if(TCP_IPV6_MATCH(sk2, saddr, daddr, ports, dif))
507 BUG_TRAP(sk_unhashed(sk));
508 __sk_add_node(sk, &head->chain);
509 sk->sk_hashent = hash;
510 sock_prot_inc_use(sk->sk_prot);
511 write_unlock(&head->lock);
515 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
517 /* Silly. Should hash-dance instead... */
518 tcp_tw_deschedule(tw);
519 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
526 write_unlock(&head->lock);
527 return -EADDRNOTAVAIL;
530 static inline u32 tcpv6_port_offset(const struct sock *sk)
532 const struct inet_sock *inet = inet_sk(sk);
533 const struct ipv6_pinfo *np = inet6_sk(sk);
535 return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
540 static int tcp_v6_hash_connect(struct sock *sk)
542 unsigned short snum = inet_sk(sk)->num;
543 struct tcp_bind_hashbucket *head;
544 struct tcp_bind_bucket *tb;
548 int low = sysctl_local_port_range[0];
549 int high = sysctl_local_port_range[1];
550 int range = high - low;
554 u32 offset = hint + tcpv6_port_offset(sk);
555 struct hlist_node *node;
556 struct tcp_tw_bucket *tw = NULL;
559 for (i = 1; i <= range; i++) {
560 port = low + (i + offset) % range;
561 head = &tcp_bhash[tcp_bhashfn(port)];
562 spin_lock(&head->lock);
564 /* Does not bother with rcv_saddr checks,
565 * because the established check is already
568 tb_for_each(tb, node, &head->chain) {
569 if (tb->port == port) {
570 BUG_TRAP(!hlist_empty(&tb->owners));
571 if (tb->fastreuse >= 0)
573 if (!__tcp_v6_check_established(sk,
581 tb = tcp_bucket_create(head, port);
583 spin_unlock(&head->lock);
590 spin_unlock(&head->lock);
594 return -EADDRNOTAVAIL;
599 /* Head lock still held and bh's disabled */
600 tcp_bind_hash(sk, tb, port);
601 if (sk_unhashed(sk)) {
602 inet_sk(sk)->sport = htons(port);
605 spin_unlock(&head->lock);
608 tcp_tw_deschedule(tw);
616 head = &tcp_bhash[tcp_bhashfn(snum)];
617 tb = tcp_sk(sk)->bind_hash;
618 spin_lock_bh(&head->lock);
620 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
622 spin_unlock_bh(&head->lock);
625 spin_unlock(&head->lock);
626 /* No definite answer... Walk to established hash table */
627 ret = __tcp_v6_check_established(sk, snum, NULL);
634 static __inline__ int tcp_v6_iif(struct sk_buff *skb)
636 return IP6CB(skb)->iif;
639 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
642 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
643 struct inet_sock *inet = inet_sk(sk);
644 struct ipv6_pinfo *np = inet6_sk(sk);
645 struct tcp_sock *tp = tcp_sk(sk);
646 struct in6_addr *saddr = NULL, *final_p = NULL, final;
648 struct dst_entry *dst;
652 if (addr_len < SIN6_LEN_RFC2133)
655 if (usin->sin6_family != AF_INET6)
656 return(-EAFNOSUPPORT);
658 memset(&fl, 0, sizeof(fl));
661 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
662 IP6_ECN_flow_init(fl.fl6_flowlabel);
663 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
664 struct ip6_flowlabel *flowlabel;
665 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
666 if (flowlabel == NULL)
668 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
669 fl6_sock_release(flowlabel);
674 * connect() to INADDR_ANY means loopback (BSD'ism).
677 if(ipv6_addr_any(&usin->sin6_addr))
678 usin->sin6_addr.s6_addr[15] = 0x1;
680 addr_type = ipv6_addr_type(&usin->sin6_addr);
682 if(addr_type & IPV6_ADDR_MULTICAST)
685 if (addr_type&IPV6_ADDR_LINKLOCAL) {
686 if (addr_len >= sizeof(struct sockaddr_in6) &&
687 usin->sin6_scope_id) {
688 /* If interface is set while binding, indices
691 if (sk->sk_bound_dev_if &&
692 sk->sk_bound_dev_if != usin->sin6_scope_id)
695 sk->sk_bound_dev_if = usin->sin6_scope_id;
698 /* Connect to link-local address requires an interface */
699 if (!sk->sk_bound_dev_if)
703 if (tp->rx_opt.ts_recent_stamp &&
704 !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
705 tp->rx_opt.ts_recent = 0;
706 tp->rx_opt.ts_recent_stamp = 0;
710 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
711 np->flow_label = fl.fl6_flowlabel;
717 if (addr_type == IPV6_ADDR_MAPPED) {
718 u32 exthdrlen = tp->ext_header_len;
719 struct sockaddr_in sin;
721 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
723 if (__ipv6_only_sock(sk))
726 sin.sin_family = AF_INET;
727 sin.sin_port = usin->sin6_port;
728 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
730 tp->af_specific = &ipv6_mapped;
731 sk->sk_backlog_rcv = tcp_v4_do_rcv;
733 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
736 tp->ext_header_len = exthdrlen;
737 tp->af_specific = &ipv6_specific;
738 sk->sk_backlog_rcv = tcp_v6_do_rcv;
741 ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
743 ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
750 if (!ipv6_addr_any(&np->rcv_saddr))
751 saddr = &np->rcv_saddr;
753 fl.proto = IPPROTO_TCP;
754 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
755 ipv6_addr_copy(&fl.fl6_src,
756 (saddr ? saddr : &np->saddr));
757 fl.oif = sk->sk_bound_dev_if;
758 fl.fl_ip_dport = usin->sin6_port;
759 fl.fl_ip_sport = inet->sport;
761 if (np->opt && np->opt->srcrt) {
762 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
763 ipv6_addr_copy(&final, &fl.fl6_dst);
764 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
768 err = ip6_dst_lookup(sk, &dst, &fl);
772 ipv6_addr_copy(&fl.fl6_dst, final_p);
774 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
781 ipv6_addr_copy(&np->rcv_saddr, saddr);
784 /* set the source address */
785 ipv6_addr_copy(&np->saddr, saddr);
786 inet->rcv_saddr = LOOPBACK4_IPV6;
788 ip6_dst_store(sk, dst, NULL);
789 sk->sk_route_caps = dst->dev->features &
790 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
792 tp->ext_header_len = 0;
794 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
796 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
798 inet->dport = usin->sin6_port;
800 tcp_set_state(sk, TCP_SYN_SENT);
801 err = tcp_v6_hash_connect(sk);
806 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
811 err = tcp_connect(sk);
818 tcp_set_state(sk, TCP_CLOSE);
822 sk->sk_route_caps = 0;
826 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
827 int type, int code, int offset, __u32 info)
829 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
830 struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
831 struct ipv6_pinfo *np;
837 sk = tcp_v6_lookup(&hdr->daddr, th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
840 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
844 if (sk->sk_state == TCP_TIME_WAIT) {
845 tcp_tw_put((struct tcp_tw_bucket*)sk);
850 if (sock_owned_by_user(sk))
851 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
853 if (sk->sk_state == TCP_CLOSE)
857 seq = ntohl(th->seq);
858 if (sk->sk_state != TCP_LISTEN &&
859 !between(seq, tp->snd_una, tp->snd_nxt)) {
860 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
866 if (type == ICMPV6_PKT_TOOBIG) {
867 struct dst_entry *dst = NULL;
869 if (sock_owned_by_user(sk))
871 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
874 /* icmp should have updated the destination cache entry */
875 dst = __sk_dst_check(sk, np->dst_cookie);
878 struct inet_sock *inet = inet_sk(sk);
881 /* BUGGG_FUTURE: Again, it is not clear how
882 to handle rthdr case. Ignore this complexity
885 memset(&fl, 0, sizeof(fl));
886 fl.proto = IPPROTO_TCP;
887 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
888 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
889 fl.oif = sk->sk_bound_dev_if;
890 fl.fl_ip_dport = inet->dport;
891 fl.fl_ip_sport = inet->sport;
893 if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
894 sk->sk_err_soft = -err;
898 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
899 sk->sk_err_soft = -err;
906 if (tp->pmtu_cookie > dst_mtu(dst)) {
907 tcp_sync_mss(sk, dst_mtu(dst));
908 tcp_simple_retransmit(sk);
909 } /* else let the usual retransmit timer handle it */
914 icmpv6_err_convert(type, code, &err);
916 /* Might be for an request_sock */
917 switch (sk->sk_state) {
918 struct request_sock *req, **prev;
920 if (sock_owned_by_user(sk))
923 req = tcp_v6_search_req(tp, &prev, th->dest, &hdr->daddr,
924 &hdr->saddr, tcp_v6_iif(skb));
928 /* ICMPs are not backlogged, hence we cannot get
929 * an established socket here.
931 BUG_TRAP(req->sk == NULL);
933 if (seq != tcp_rsk(req)->snt_isn) {
934 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
938 tcp_synq_drop(sk, req, prev);
942 case TCP_SYN_RECV: /* Cannot happen.
943 It can, it SYNs are crossed. --ANK */
944 if (!sock_owned_by_user(sk)) {
945 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
947 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
951 sk->sk_err_soft = err;
955 if (!sock_owned_by_user(sk) && np->recverr) {
957 sk->sk_error_report(sk);
959 sk->sk_err_soft = err;
967 static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
968 struct dst_entry *dst)
970 struct tcp6_request_sock *treq = tcp6_rsk(req);
971 struct ipv6_pinfo *np = inet6_sk(sk);
972 struct sk_buff * skb;
973 struct ipv6_txoptions *opt = NULL;
974 struct in6_addr * final_p = NULL, final;
978 memset(&fl, 0, sizeof(fl));
979 fl.proto = IPPROTO_TCP;
980 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
981 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
982 fl.fl6_flowlabel = 0;
984 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
985 fl.fl_ip_sport = inet_sk(sk)->sport;
990 np->rxopt.bits.srcrt == 2 &&
992 struct sk_buff *pktopts = treq->pktopts;
993 struct inet6_skb_parm *rxopt = IP6CB(pktopts);
995 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
998 if (opt && opt->srcrt) {
999 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1000 ipv6_addr_copy(&final, &fl.fl6_dst);
1001 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1005 err = ip6_dst_lookup(sk, &dst, &fl);
1009 ipv6_addr_copy(&fl.fl6_dst, final_p);
1010 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1014 skb = tcp_make_synack(sk, dst, req);
1016 struct tcphdr *th = skb->h.th;
1018 th->check = tcp_v6_check(th, skb->len,
1019 &treq->loc_addr, &treq->rmt_addr,
1020 csum_partial((char *)th, skb->len, skb->csum));
1022 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1023 err = ip6_xmit(sk, skb, &fl, opt, 0);
1024 if (err == NET_XMIT_CN)
1030 if (opt && opt != np->opt)
1031 sock_kfree_s(sk, opt, opt->tot_len);
1035 static void tcp_v6_reqsk_destructor(struct request_sock *req)
1037 if (tcp6_rsk(req)->pktopts)
1038 kfree_skb(tcp6_rsk(req)->pktopts);
1041 static struct request_sock_ops tcp6_request_sock_ops = {
1043 .obj_size = sizeof(struct tcp6_request_sock),
1044 .rtx_syn_ack = tcp_v6_send_synack,
1045 .send_ack = tcp_v6_reqsk_send_ack,
1046 .destructor = tcp_v6_reqsk_destructor,
1047 .send_reset = tcp_v6_send_reset
1050 static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
1052 struct ipv6_pinfo *np = inet6_sk(sk);
1053 struct inet6_skb_parm *opt = IP6CB(skb);
1055 if (np->rxopt.all) {
1056 if ((opt->hop && np->rxopt.bits.hopopts) ||
1057 ((IPV6_FLOWINFO_MASK&*(u32*)skb->nh.raw) &&
1058 np->rxopt.bits.rxflow) ||
1059 (opt->srcrt && np->rxopt.bits.srcrt) ||
1060 ((opt->dst1 || opt->dst0) && np->rxopt.bits.dstopts))
1067 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
1068 struct sk_buff *skb)
1070 struct ipv6_pinfo *np = inet6_sk(sk);
1072 if (skb->ip_summed == CHECKSUM_HW) {
1073 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
1074 skb->csum = offsetof(struct tcphdr, check);
1076 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
1077 csum_partial((char *)th, th->doff<<2,
1083 static void tcp_v6_send_reset(struct sk_buff *skb)
1085 struct tcphdr *th = skb->h.th, *t1;
1086 struct sk_buff *buff;
1092 if (!ipv6_unicast_destination(skb))
1096 * We need to grab some memory, and put together an RST,
1097 * and then put it into the queue to be sent.
1100 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
1105 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
1107 t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
1109 /* Swap the send and the receive. */
1110 memset(t1, 0, sizeof(*t1));
1111 t1->dest = th->source;
1112 t1->source = th->dest;
1113 t1->doff = sizeof(*t1)/4;
1117 t1->seq = th->ack_seq;
1120 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
1121 + skb->len - (th->doff<<2));
1124 buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
1126 memset(&fl, 0, sizeof(fl));
1127 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1128 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1130 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1131 sizeof(*t1), IPPROTO_TCP,
1134 fl.proto = IPPROTO_TCP;
1135 fl.oif = tcp_v6_iif(skb);
1136 fl.fl_ip_dport = t1->dest;
1137 fl.fl_ip_sport = t1->source;
1139 /* sk = NULL, but it is safe for now. RST socket required. */
1140 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1142 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1143 dst_release(buff->dst);
1147 ip6_xmit(NULL, buff, &fl, NULL, 0);
1148 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1149 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
1156 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
1158 struct tcphdr *th = skb->h.th, *t1;
1159 struct sk_buff *buff;
1161 int tot_len = sizeof(struct tcphdr);
1166 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
1171 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
1173 t1 = (struct tcphdr *) skb_push(buff,tot_len);
1175 /* Swap the send and the receive. */
1176 memset(t1, 0, sizeof(*t1));
1177 t1->dest = th->source;
1178 t1->source = th->dest;
1179 t1->doff = tot_len/4;
1180 t1->seq = htonl(seq);
1181 t1->ack_seq = htonl(ack);
1183 t1->window = htons(win);
1186 u32 *ptr = (u32*)(t1 + 1);
1187 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1188 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1189 *ptr++ = htonl(tcp_time_stamp);
1193 buff->csum = csum_partial((char *)t1, tot_len, 0);
1195 memset(&fl, 0, sizeof(fl));
1196 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1197 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1199 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1200 tot_len, IPPROTO_TCP,
1203 fl.proto = IPPROTO_TCP;
1204 fl.oif = tcp_v6_iif(skb);
1205 fl.fl_ip_dport = t1->dest;
1206 fl.fl_ip_sport = t1->source;
1208 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1209 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1210 dst_release(buff->dst);
1213 ip6_xmit(NULL, buff, &fl, NULL, 0);
1214 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1221 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1223 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
1225 tcp_v6_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt,
1226 tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent);
1231 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
1233 tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent);
1237 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1239 struct request_sock *req, **prev;
1240 struct tcphdr *th = skb->h.th;
1241 struct tcp_sock *tp = tcp_sk(sk);
1244 /* Find possible connection requests. */
1245 req = tcp_v6_search_req(tp, &prev, th->source, &skb->nh.ipv6h->saddr,
1246 &skb->nh.ipv6h->daddr, tcp_v6_iif(skb));
1248 return tcp_check_req(sk, skb, req, prev);
1250 nsk = __tcp_v6_lookup_established(&skb->nh.ipv6h->saddr,
1252 &skb->nh.ipv6h->daddr,
1257 if (nsk->sk_state != TCP_TIME_WAIT) {
1261 tcp_tw_put((struct tcp_tw_bucket*)nsk);
1265 #if 0 /*def CONFIG_SYN_COOKIES*/
1266 if (!th->rst && !th->syn && th->ack)
1267 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
1272 static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req)
1274 struct tcp_sock *tp = tcp_sk(sk);
1275 struct listen_sock *lopt = tp->accept_queue.listen_opt;
1276 u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
1278 reqsk_queue_hash_req(&tp->accept_queue, h, req, TCP_TIMEOUT_INIT);
1283 /* FIXME: this is substantially similar to the ipv4 code.
1284 * Can some kind of merge be done? -- erics
1286 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1288 struct tcp6_request_sock *treq;
1289 struct ipv6_pinfo *np = inet6_sk(sk);
1290 struct tcp_options_received tmp_opt;
1291 struct tcp_sock *tp = tcp_sk(sk);
1292 struct request_sock *req = NULL;
1293 __u32 isn = TCP_SKB_CB(skb)->when;
1295 if (skb->protocol == htons(ETH_P_IP))
1296 return tcp_v4_conn_request(sk, skb);
1298 if (!ipv6_unicast_destination(skb))
1302 * There are no SYN attacks on IPv6, yet...
1304 if (tcp_synq_is_full(sk) && !isn) {
1305 if (net_ratelimit())
1306 printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
1310 if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
1313 req = reqsk_alloc(&tcp6_request_sock_ops);
1317 tcp_clear_options(&tmp_opt);
1318 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1319 tmp_opt.user_mss = tp->rx_opt.user_mss;
1321 tcp_parse_options(skb, &tmp_opt, 0);
1323 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1324 tcp_openreq_init(req, &tmp_opt, skb);
1326 treq = tcp6_rsk(req);
1327 ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
1328 ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
1329 TCP_ECN_create_request(req, skb->h.th);
1330 treq->pktopts = NULL;
1331 if (ipv6_opt_accepted(sk, skb) ||
1332 np->rxopt.bits.rxinfo ||
1333 np->rxopt.bits.rxhlim) {
1334 atomic_inc(&skb->users);
1335 treq->pktopts = skb;
1337 treq->iif = sk->sk_bound_dev_if;
1339 /* So that link locals have meaning */
1340 if (!sk->sk_bound_dev_if &&
1341 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1342 treq->iif = tcp_v6_iif(skb);
1345 isn = tcp_v6_init_sequence(sk,skb);
1347 tcp_rsk(req)->snt_isn = isn;
1349 if (tcp_v6_send_synack(sk, req, NULL))
1352 tcp_v6_synq_add(sk, req);
1360 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1361 return 0; /* don't send reset */
1364 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1365 struct request_sock *req,
1366 struct dst_entry *dst)
1368 struct tcp6_request_sock *treq = tcp6_rsk(req);
1369 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1370 struct tcp6_sock *newtcp6sk;
1371 struct inet_sock *newinet;
1372 struct tcp_sock *newtp;
1374 struct ipv6_txoptions *opt;
1376 if (skb->protocol == htons(ETH_P_IP)) {
1381 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1386 newtcp6sk = (struct tcp6_sock *)newsk;
1387 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1389 newinet = inet_sk(newsk);
1390 newnp = inet6_sk(newsk);
1391 newtp = tcp_sk(newsk);
1393 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1395 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1398 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1401 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1403 newtp->af_specific = &ipv6_mapped;
1404 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1405 newnp->pktoptions = NULL;
1407 newnp->mcast_oif = tcp_v6_iif(skb);
1408 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1410 /* Charge newly allocated IPv6 socket. Though it is mapped,
1413 #ifdef INET_REFCNT_DEBUG
1414 atomic_inc(&inet6_sock_nr);
1417 /* It is tricky place. Until this moment IPv4 tcp
1418 worked with IPv6 af_tcp.af_specific.
1421 tcp_sync_mss(newsk, newtp->pmtu_cookie);
1428 if (sk_acceptq_is_full(sk))
1431 if (np->rxopt.bits.srcrt == 2 &&
1432 opt == NULL && treq->pktopts) {
1433 struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts);
1435 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr *)(treq->pktopts->nh.raw + rxopt->srcrt));
1439 struct in6_addr *final_p = NULL, final;
1442 memset(&fl, 0, sizeof(fl));
1443 fl.proto = IPPROTO_TCP;
1444 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1445 if (opt && opt->srcrt) {
1446 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1447 ipv6_addr_copy(&final, &fl.fl6_dst);
1448 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1451 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
1452 fl.oif = sk->sk_bound_dev_if;
1453 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
1454 fl.fl_ip_sport = inet_sk(sk)->sport;
1456 if (ip6_dst_lookup(sk, &dst, &fl))
1460 ipv6_addr_copy(&fl.fl6_dst, final_p);
1462 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1466 newsk = tcp_create_openreq_child(sk, req, skb);
1470 /* Charge newly allocated IPv6 socket */
1471 #ifdef INET_REFCNT_DEBUG
1472 atomic_inc(&inet6_sock_nr);
1475 ip6_dst_store(newsk, dst, NULL);
1476 newsk->sk_route_caps = dst->dev->features &
1477 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1479 newtcp6sk = (struct tcp6_sock *)newsk;
1480 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1482 newtp = tcp_sk(newsk);
1483 newinet = inet_sk(newsk);
1484 newnp = inet6_sk(newsk);
1486 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1488 ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
1489 ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
1490 ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
1491 newsk->sk_bound_dev_if = treq->iif;
1493 /* Now IPv6 options...
1495 First: no IPv4 options.
1497 newinet->opt = NULL;
1500 newnp->rxopt.all = np->rxopt.all;
1502 /* Clone pktoptions received with SYN */
1503 newnp->pktoptions = NULL;
1504 if (treq->pktopts != NULL) {
1505 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
1506 kfree_skb(treq->pktopts);
1507 treq->pktopts = NULL;
1508 if (newnp->pktoptions)
1509 skb_set_owner_r(newnp->pktoptions, newsk);
1512 newnp->mcast_oif = tcp_v6_iif(skb);
1513 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1515 /* Clone native IPv6 options from listening socket (if any)
1517 Yes, keeping reference count would be much more clever,
1518 but we make one more one thing there: reattach optmem
1522 newnp->opt = ipv6_dup_options(newsk, opt);
1524 sock_kfree_s(sk, opt, opt->tot_len);
1527 newtp->ext_header_len = 0;
1529 newtp->ext_header_len = newnp->opt->opt_nflen +
1530 newnp->opt->opt_flen;
1532 tcp_sync_mss(newsk, dst_mtu(dst));
1533 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1534 tcp_initialize_rcv_mss(newsk);
1536 newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1538 __tcp_v6_hash(newsk);
1539 tcp_inherit_port(sk, newsk);
1544 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1546 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1547 if (opt && opt != np->opt)
1548 sock_kfree_s(sk, opt, opt->tot_len);
1553 static int tcp_v6_checksum_init(struct sk_buff *skb)
1555 if (skb->ip_summed == CHECKSUM_HW) {
1556 skb->ip_summed = CHECKSUM_UNNECESSARY;
1557 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1558 &skb->nh.ipv6h->daddr,skb->csum))
1560 LIMIT_NETDEBUG(printk(KERN_DEBUG "hw tcp v6 csum failed\n"));
1562 if (skb->len <= 76) {
1563 if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1564 &skb->nh.ipv6h->daddr,skb_checksum(skb, 0, skb->len, 0)))
1566 skb->ip_summed = CHECKSUM_UNNECESSARY;
1568 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1569 &skb->nh.ipv6h->daddr,0);
1574 /* The socket must have it's spinlock held when we get
1577 * We have a potential double-lock case here, so even when
1578 * doing backlog processing we use the BH locking scheme.
1579 * This is because we cannot sleep with the original spinlock
1582 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1584 struct ipv6_pinfo *np = inet6_sk(sk);
1585 struct tcp_sock *tp;
1586 struct sk_buff *opt_skb = NULL;
1588 /* Imagine: socket is IPv6. IPv4 packet arrives,
1589 goes to IPv4 receive handler and backlogged.
1590 From backlog it always goes here. Kerboom...
1591 Fortunately, tcp_rcv_established and rcv_established
1592 handle them correctly, but it is not case with
1593 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1596 if (skb->protocol == htons(ETH_P_IP))
1597 return tcp_v4_do_rcv(sk, skb);
1599 if (sk_filter(sk, skb, 0))
1603 * socket locking is here for SMP purposes as backlog rcv
1604 * is currently called with bh processing disabled.
1607 /* Do Stevens' IPV6_PKTOPTIONS.
1609 Yes, guys, it is the only place in our code, where we
1610 may make it not affecting IPv4.
1611 The rest of code is protocol independent,
1612 and I do not like idea to uglify IPv4.
1614 Actually, all the idea behind IPV6_PKTOPTIONS
1615 looks not very well thought. For now we latch
1616 options, received in the last packet, enqueued
1617 by tcp. Feel free to propose better solution.
1621 opt_skb = skb_clone(skb, GFP_ATOMIC);
1623 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1624 TCP_CHECK_TIMER(sk);
1625 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1627 TCP_CHECK_TIMER(sk);
1629 goto ipv6_pktoptions;
1633 if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1636 if (sk->sk_state == TCP_LISTEN) {
1637 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1642 * Queue it on the new socket if the new socket is active,
1643 * otherwise we just shortcircuit this and continue with
1647 if (tcp_child_process(sk, nsk, skb))
1650 __kfree_skb(opt_skb);
1655 TCP_CHECK_TIMER(sk);
1656 if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1658 TCP_CHECK_TIMER(sk);
1660 goto ipv6_pktoptions;
1664 tcp_v6_send_reset(skb);
1667 __kfree_skb(opt_skb);
1671 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1676 /* Do you ask, what is it?
1678 1. skb was enqueued by tcp.
1679 2. skb is added to tail of read queue, rather than out of order.
1680 3. socket is not in passive state.
1681 4. Finally, it really contains options, which user wants to receive.
1684 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1685 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1686 if (np->rxopt.bits.rxinfo)
1687 np->mcast_oif = tcp_v6_iif(opt_skb);
1688 if (np->rxopt.bits.rxhlim)
1689 np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1690 if (ipv6_opt_accepted(sk, opt_skb)) {
1691 skb_set_owner_r(opt_skb, sk);
1692 opt_skb = xchg(&np->pktoptions, opt_skb);
1694 __kfree_skb(opt_skb);
1695 opt_skb = xchg(&np->pktoptions, NULL);
1704 static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1706 struct sk_buff *skb = *pskb;
1711 if (skb->pkt_type != PACKET_HOST)
1715 * Count it even if it's bad.
1717 TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1719 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1724 if (th->doff < sizeof(struct tcphdr)/4)
1726 if (!pskb_may_pull(skb, th->doff*4))
1729 if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1730 tcp_v6_checksum_init(skb) < 0))
1734 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1735 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1736 skb->len - th->doff*4);
1737 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1738 TCP_SKB_CB(skb)->when = 0;
1739 TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
1740 TCP_SKB_CB(skb)->sacked = 0;
1742 sk = __tcp_v6_lookup(&skb->nh.ipv6h->saddr, th->source,
1743 &skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1749 if (sk->sk_state == TCP_TIME_WAIT)
1752 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1753 goto discard_and_relse;
1755 if (sk_filter(sk, skb, 0))
1756 goto discard_and_relse;
1762 if (!sock_owned_by_user(sk)) {
1763 if (!tcp_prequeue(sk, skb))
1764 ret = tcp_v6_do_rcv(sk, skb);
1766 sk_add_backlog(sk, skb);
1770 return ret ? -1 : 0;
1773 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1776 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1778 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1780 tcp_v6_send_reset(skb);
1797 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1798 tcp_tw_put((struct tcp_tw_bucket *) sk);
1802 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1803 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1804 tcp_tw_put((struct tcp_tw_bucket *) sk);
1808 switch(tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
1809 skb, th, skb->len)) {
1814 sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1816 tcp_tw_deschedule((struct tcp_tw_bucket *)sk);
1817 tcp_tw_put((struct tcp_tw_bucket *)sk);
1821 /* Fall through to ACK */
1824 tcp_v6_timewait_ack(sk, skb);
1828 case TCP_TW_SUCCESS:;
1833 static int tcp_v6_rebuild_header(struct sock *sk)
1836 struct dst_entry *dst;
1837 struct ipv6_pinfo *np = inet6_sk(sk);
1839 dst = __sk_dst_check(sk, np->dst_cookie);
1842 struct inet_sock *inet = inet_sk(sk);
1843 struct in6_addr *final_p = NULL, final;
1846 memset(&fl, 0, sizeof(fl));
1847 fl.proto = IPPROTO_TCP;
1848 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1849 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1850 fl.fl6_flowlabel = np->flow_label;
1851 fl.oif = sk->sk_bound_dev_if;
1852 fl.fl_ip_dport = inet->dport;
1853 fl.fl_ip_sport = inet->sport;
1855 if (np->opt && np->opt->srcrt) {
1856 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1857 ipv6_addr_copy(&final, &fl.fl6_dst);
1858 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1862 err = ip6_dst_lookup(sk, &dst, &fl);
1864 sk->sk_route_caps = 0;
1868 ipv6_addr_copy(&fl.fl6_dst, final_p);
1870 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1871 sk->sk_err_soft = -err;
1876 ip6_dst_store(sk, dst, NULL);
1877 sk->sk_route_caps = dst->dev->features &
1878 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1884 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1886 struct sock *sk = skb->sk;
1887 struct inet_sock *inet = inet_sk(sk);
1888 struct ipv6_pinfo *np = inet6_sk(sk);
1890 struct dst_entry *dst;
1891 struct in6_addr *final_p = NULL, final;
1893 memset(&fl, 0, sizeof(fl));
1894 fl.proto = IPPROTO_TCP;
1895 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1896 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1897 fl.fl6_flowlabel = np->flow_label;
1898 IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1899 fl.oif = sk->sk_bound_dev_if;
1900 fl.fl_ip_sport = inet->sport;
1901 fl.fl_ip_dport = inet->dport;
1903 if (np->opt && np->opt->srcrt) {
1904 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1905 ipv6_addr_copy(&final, &fl.fl6_dst);
1906 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1910 dst = __sk_dst_check(sk, np->dst_cookie);
1913 int err = ip6_dst_lookup(sk, &dst, &fl);
1916 sk->sk_err_soft = -err;
1921 ipv6_addr_copy(&fl.fl6_dst, final_p);
1923 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1924 sk->sk_route_caps = 0;
1929 ip6_dst_store(sk, dst, NULL);
1930 sk->sk_route_caps = dst->dev->features &
1931 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1934 skb->dst = dst_clone(dst);
1936 /* Restore final destination back after routing done */
1937 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1939 return ip6_xmit(sk, skb, &fl, np->opt, 0);
1942 static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1944 struct ipv6_pinfo *np = inet6_sk(sk);
1945 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1947 sin6->sin6_family = AF_INET6;
1948 ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1949 sin6->sin6_port = inet_sk(sk)->dport;
1950 /* We do not store received flowlabel for TCP */
1951 sin6->sin6_flowinfo = 0;
1952 sin6->sin6_scope_id = 0;
1953 if (sk->sk_bound_dev_if &&
1954 ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1955 sin6->sin6_scope_id = sk->sk_bound_dev_if;
1958 static int tcp_v6_remember_stamp(struct sock *sk)
1960 /* Alas, not yet... */
1964 static struct tcp_func ipv6_specific = {
1965 .queue_xmit = tcp_v6_xmit,
1966 .send_check = tcp_v6_send_check,
1967 .rebuild_header = tcp_v6_rebuild_header,
1968 .conn_request = tcp_v6_conn_request,
1969 .syn_recv_sock = tcp_v6_syn_recv_sock,
1970 .remember_stamp = tcp_v6_remember_stamp,
1971 .net_header_len = sizeof(struct ipv6hdr),
1973 .setsockopt = ipv6_setsockopt,
1974 .getsockopt = ipv6_getsockopt,
1975 .addr2sockaddr = v6_addr2sockaddr,
1976 .sockaddr_len = sizeof(struct sockaddr_in6)
1980 * TCP over IPv4 via INET6 API
1983 static struct tcp_func ipv6_mapped = {
1984 .queue_xmit = ip_queue_xmit,
1985 .send_check = tcp_v4_send_check,
1986 .rebuild_header = tcp_v4_rebuild_header,
1987 .conn_request = tcp_v6_conn_request,
1988 .syn_recv_sock = tcp_v6_syn_recv_sock,
1989 .remember_stamp = tcp_v4_remember_stamp,
1990 .net_header_len = sizeof(struct iphdr),
1992 .setsockopt = ipv6_setsockopt,
1993 .getsockopt = ipv6_getsockopt,
1994 .addr2sockaddr = v6_addr2sockaddr,
1995 .sockaddr_len = sizeof(struct sockaddr_in6)
2000 /* NOTE: A lot of things set to zero explicitly by call to
2001 * sk_alloc() so need not be done here.
2003 static int tcp_v6_init_sock(struct sock *sk)
2005 struct tcp_sock *tp = tcp_sk(sk);
2007 skb_queue_head_init(&tp->out_of_order_queue);
2008 tcp_init_xmit_timers(sk);
2009 tcp_prequeue_init(tp);
2011 tp->rto = TCP_TIMEOUT_INIT;
2012 tp->mdev = TCP_TIMEOUT_INIT;
2014 /* So many TCP implementations out there (incorrectly) count the
2015 * initial SYN frame in their delayed-ACK and congestion control
2016 * algorithms that we must have the following bandaid to talk
2017 * efficiently to them. -DaveM
2021 /* See draft-stevens-tcpca-spec-01 for discussion of the
2022 * initialization of these values.
2024 tp->snd_ssthresh = 0x7fffffff;
2025 tp->snd_cwnd_clamp = ~0;
2026 tp->mss_cache = 536;
2028 tp->reordering = sysctl_tcp_reordering;
2030 sk->sk_state = TCP_CLOSE;
2032 tp->af_specific = &ipv6_specific;
2033 tp->ca_ops = &tcp_init_congestion_ops;
2034 sk->sk_write_space = sk_stream_write_space;
2035 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
2037 sk->sk_sndbuf = sysctl_tcp_wmem[1];
2038 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
2040 atomic_inc(&tcp_sockets_allocated);
2045 static int tcp_v6_destroy_sock(struct sock *sk)
2047 extern int tcp_v4_destroy_sock(struct sock *sk);
2049 tcp_v4_destroy_sock(sk);
2050 return inet6_destroy_sock(sk);
2053 /* Proc filesystem TCPv6 sock list dumping. */
2054 static void get_openreq6(struct seq_file *seq,
2055 struct sock *sk, struct request_sock *req, int i, int uid)
2057 struct in6_addr *dest, *src;
2058 int ttd = req->expires - jiffies;
2063 src = &tcp6_rsk(req)->loc_addr;
2064 dest = &tcp6_rsk(req)->rmt_addr;
2066 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2067 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2069 src->s6_addr32[0], src->s6_addr32[1],
2070 src->s6_addr32[2], src->s6_addr32[3],
2071 ntohs(inet_sk(sk)->sport),
2072 dest->s6_addr32[0], dest->s6_addr32[1],
2073 dest->s6_addr32[2], dest->s6_addr32[3],
2074 ntohs(inet_rsk(req)->rmt_port),
2076 0,0, /* could print option size, but that is af dependent. */
2077 1, /* timers active (only the expire timer) */
2078 jiffies_to_clock_t(ttd),
2081 0, /* non standard timer */
2082 0, /* open_requests have no inode */
2086 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2088 struct in6_addr *dest, *src;
2091 unsigned long timer_expires;
2092 struct inet_sock *inet = inet_sk(sp);
2093 struct tcp_sock *tp = tcp_sk(sp);
2094 struct ipv6_pinfo *np = inet6_sk(sp);
2097 src = &np->rcv_saddr;
2098 destp = ntohs(inet->dport);
2099 srcp = ntohs(inet->sport);
2100 if (tp->pending == TCP_TIME_RETRANS) {
2102 timer_expires = tp->timeout;
2103 } else if (tp->pending == TCP_TIME_PROBE0) {
2105 timer_expires = tp->timeout;
2106 } else if (timer_pending(&sp->sk_timer)) {
2108 timer_expires = sp->sk_timer.expires;
2111 timer_expires = jiffies;
2115 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2116 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
2118 src->s6_addr32[0], src->s6_addr32[1],
2119 src->s6_addr32[2], src->s6_addr32[3], srcp,
2120 dest->s6_addr32[0], dest->s6_addr32[1],
2121 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2123 tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
2125 jiffies_to_clock_t(timer_expires - jiffies),
2130 atomic_read(&sp->sk_refcnt), sp,
2131 tp->rto, tp->ack.ato, (tp->ack.quick<<1)|tp->ack.pingpong,
2132 tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
2136 static void get_timewait6_sock(struct seq_file *seq,
2137 struct tcp_tw_bucket *tw, int i)
2139 struct in6_addr *dest, *src;
2141 int ttd = tw->tw_ttd - jiffies;
2146 dest = &tw->tw_v6_daddr;
2147 src = &tw->tw_v6_rcv_saddr;
2148 destp = ntohs(tw->tw_dport);
2149 srcp = ntohs(tw->tw_sport);
2152 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2153 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2155 src->s6_addr32[0], src->s6_addr32[1],
2156 src->s6_addr32[2], src->s6_addr32[3], srcp,
2157 dest->s6_addr32[0], dest->s6_addr32[1],
2158 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2159 tw->tw_substate, 0, 0,
2160 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2161 atomic_read(&tw->tw_refcnt), tw);
2164 #ifdef CONFIG_PROC_FS
2165 static int tcp6_seq_show(struct seq_file *seq, void *v)
2167 struct tcp_iter_state *st;
2169 if (v == SEQ_START_TOKEN) {
2174 "st tx_queue rx_queue tr tm->when retrnsmt"
2175 " uid timeout inode\n");
2180 switch (st->state) {
2181 case TCP_SEQ_STATE_LISTENING:
2182 case TCP_SEQ_STATE_ESTABLISHED:
2183 get_tcp6_sock(seq, v, st->num);
2185 case TCP_SEQ_STATE_OPENREQ:
2186 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2188 case TCP_SEQ_STATE_TIME_WAIT:
2189 get_timewait6_sock(seq, v, st->num);
2196 static struct file_operations tcp6_seq_fops;
2197 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2198 .owner = THIS_MODULE,
2201 .seq_show = tcp6_seq_show,
2202 .seq_fops = &tcp6_seq_fops,
2205 int __init tcp6_proc_init(void)
2207 return tcp_proc_register(&tcp6_seq_afinfo);
2210 void tcp6_proc_exit(void)
2212 tcp_proc_unregister(&tcp6_seq_afinfo);
2216 struct proto tcpv6_prot = {
2218 .owner = THIS_MODULE,
2220 .connect = tcp_v6_connect,
2221 .disconnect = tcp_disconnect,
2222 .accept = tcp_accept,
2224 .init = tcp_v6_init_sock,
2225 .destroy = tcp_v6_destroy_sock,
2226 .shutdown = tcp_shutdown,
2227 .setsockopt = tcp_setsockopt,
2228 .getsockopt = tcp_getsockopt,
2229 .sendmsg = tcp_sendmsg,
2230 .recvmsg = tcp_recvmsg,
2231 .backlog_rcv = tcp_v6_do_rcv,
2232 .hash = tcp_v6_hash,
2233 .unhash = tcp_unhash,
2234 .get_port = tcp_v6_get_port,
2235 .enter_memory_pressure = tcp_enter_memory_pressure,
2236 .sockets_allocated = &tcp_sockets_allocated,
2237 .memory_allocated = &tcp_memory_allocated,
2238 .memory_pressure = &tcp_memory_pressure,
2239 .sysctl_mem = sysctl_tcp_mem,
2240 .sysctl_wmem = sysctl_tcp_wmem,
2241 .sysctl_rmem = sysctl_tcp_rmem,
2242 .max_header = MAX_TCP_HEADER,
2243 .obj_size = sizeof(struct tcp6_sock),
2244 .rsk_prot = &tcp6_request_sock_ops,
2247 static struct inet6_protocol tcpv6_protocol = {
2248 .handler = tcp_v6_rcv,
2249 .err_handler = tcp_v6_err,
2250 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2253 extern struct proto_ops inet6_stream_ops;
2255 static struct inet_protosw tcpv6_protosw = {
2256 .type = SOCK_STREAM,
2257 .protocol = IPPROTO_TCP,
2258 .prot = &tcpv6_prot,
2259 .ops = &inet6_stream_ops,
2262 .flags = INET_PROTOSW_PERMANENT,
2265 void __init tcpv6_init(void)
2267 /* register inet6 protocol */
2268 if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
2269 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
2270 inet6_register_protosw(&tcpv6_protosw);