3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
11 * linux/net/ipv4/tcp.c
12 * linux/net/ipv4/tcp_input.c
13 * linux/net/ipv4/tcp_output.c
16 * Hideaki YOSHIFUJI : sin6_scope_id support
17 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
18 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
19 * a single port at the same time.
20 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
28 #include <linux/module.h>
29 #include <linux/config.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/jiffies.h>
37 #include <linux/in6.h>
38 #include <linux/netdevice.h>
39 #include <linux/init.h>
40 #include <linux/jhash.h>
41 #include <linux/ipsec.h>
42 #include <linux/times.h>
44 #include <linux/ipv6.h>
45 #include <linux/icmpv6.h>
46 #include <linux/random.h>
49 #include <net/ndisc.h>
51 #include <net/transp_v6.h>
52 #include <net/addrconf.h>
53 #include <net/ip6_route.h>
54 #include <net/ip6_checksum.h>
55 #include <net/inet_ecn.h>
56 #include <net/protocol.h>
58 #include <net/addrconf.h>
60 #include <net/dsfield.h>
62 #include <asm/uaccess.h>
64 #include <linux/proc_fs.h>
65 #include <linux/seq_file.h>
67 static void tcp_v6_send_reset(struct sk_buff *skb);
68 static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req);
69 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
72 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
73 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
75 static struct tcp_func ipv6_mapped;
76 static struct tcp_func ipv6_specific;
78 /* I have no idea if this is a good hash for v6 or not. -DaveM */
79 static __inline__ int tcp_v6_hashfn(struct in6_addr *laddr, u16 lport,
80 struct in6_addr *faddr, u16 fport)
82 int hashent = (lport ^ fport);
84 hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]);
85 hashent ^= hashent>>16;
86 hashent ^= hashent>>8;
87 return (hashent & (tcp_ehash_size - 1));
90 static __inline__ int tcp_v6_sk_hashfn(struct sock *sk)
92 struct inet_sock *inet = inet_sk(sk);
93 struct ipv6_pinfo *np = inet6_sk(sk);
94 struct in6_addr *laddr = &np->rcv_saddr;
95 struct in6_addr *faddr = &np->daddr;
96 __u16 lport = inet->num;
97 __u16 fport = inet->dport;
98 return tcp_v6_hashfn(laddr, lport, faddr, fport);
101 static inline int tcp_v6_bind_conflict(struct sock *sk,
102 struct tcp_bind_bucket *tb)
105 struct hlist_node *node;
107 /* We must walk the whole port owner list in this case. -DaveM */
108 sk_for_each_bound(sk2, node, &tb->owners) {
110 (!sk->sk_bound_dev_if ||
111 !sk2->sk_bound_dev_if ||
112 sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
113 (!sk->sk_reuse || !sk2->sk_reuse ||
114 sk2->sk_state == TCP_LISTEN) &&
115 ipv6_rcv_saddr_equal(sk, sk2))
122 /* Grrr, addr_type already calculated by caller, but I don't want
123 * to add some silly "cookie" argument to this method just for that.
124 * But it doesn't matter, the recalculation is in the rarest path
125 * this function ever takes.
127 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
129 struct tcp_bind_hashbucket *head;
130 struct tcp_bind_bucket *tb;
131 struct hlist_node *node;
136 int low = sysctl_local_port_range[0];
137 int high = sysctl_local_port_range[1];
138 int remaining = (high - low) + 1;
141 spin_lock(&tcp_portalloc_lock);
142 rover = tcp_port_rover;
144 if ((rover < low) || (rover > high))
146 head = &tcp_bhash[tcp_bhashfn(rover)];
147 spin_lock(&head->lock);
148 tb_for_each(tb, node, &head->chain)
149 if (tb->port == rover)
153 spin_unlock(&head->lock);
154 } while (--remaining > 0);
155 tcp_port_rover = rover;
156 spin_unlock(&tcp_portalloc_lock);
158 /* Exhausted local port range during search? */
163 /* OK, here is the one we will use. */
166 head = &tcp_bhash[tcp_bhashfn(snum)];
167 spin_lock(&head->lock);
168 tb_for_each(tb, node, &head->chain)
169 if (tb->port == snum)
175 if (tb && !hlist_empty(&tb->owners)) {
176 if (tb->fastreuse > 0 && sk->sk_reuse &&
177 sk->sk_state != TCP_LISTEN) {
181 if (tcp_v6_bind_conflict(sk, tb))
187 if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL)
189 if (hlist_empty(&tb->owners)) {
190 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
194 } else if (tb->fastreuse &&
195 (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
199 if (!tcp_sk(sk)->bind_hash)
200 tcp_bind_hash(sk, tb, snum);
201 BUG_TRAP(tcp_sk(sk)->bind_hash == tb);
205 spin_unlock(&head->lock);
211 static __inline__ void __tcp_v6_hash(struct sock *sk)
213 struct hlist_head *list;
216 BUG_TRAP(sk_unhashed(sk));
218 if (sk->sk_state == TCP_LISTEN) {
219 list = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
220 lock = &tcp_lhash_lock;
223 sk->sk_hashent = tcp_v6_sk_hashfn(sk);
224 list = &tcp_ehash[sk->sk_hashent].chain;
225 lock = &tcp_ehash[sk->sk_hashent].lock;
229 __sk_add_node(sk, list);
230 sock_prot_inc_use(sk->sk_prot);
235 static void tcp_v6_hash(struct sock *sk)
237 if (sk->sk_state != TCP_CLOSE) {
238 struct tcp_sock *tp = tcp_sk(sk);
240 if (tp->af_specific == &ipv6_mapped) {
250 static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned short hnum, int dif)
253 struct hlist_node *node;
254 struct sock *result = NULL;
258 read_lock(&tcp_lhash_lock);
259 sk_for_each(sk, node, &tcp_listening_hash[tcp_lhashfn(hnum)]) {
260 if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
261 struct ipv6_pinfo *np = inet6_sk(sk);
264 if (!ipv6_addr_any(&np->rcv_saddr)) {
265 if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
269 if (sk->sk_bound_dev_if) {
270 if (sk->sk_bound_dev_if != dif)
278 if (score > hiscore) {
286 read_unlock(&tcp_lhash_lock);
290 /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
291 * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
293 * The sockhash lock must be held as a reader here.
296 static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u16 sport,
297 struct in6_addr *daddr, u16 hnum,
300 struct tcp_ehash_bucket *head;
302 struct hlist_node *node;
303 __u32 ports = TCP_COMBINED_PORTS(sport, hnum);
306 /* Optimize here for direct hit, only listening connections can
307 * have wildcards anyways.
309 hash = tcp_v6_hashfn(daddr, hnum, saddr, sport);
310 head = &tcp_ehash[hash];
311 read_lock(&head->lock);
312 sk_for_each(sk, node, &head->chain) {
313 /* For IPV6 do the cheaper port and family tests first. */
314 if(TCP_IPV6_MATCH(sk, saddr, daddr, ports, dif))
315 goto hit; /* You sunk my battleship! */
317 /* Must check for a TIME_WAIT'er before going to listener hash. */
318 sk_for_each(sk, node, &(head + tcp_ehash_size)->chain) {
319 /* FIXME: acme: check this... */
320 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
322 if(*((__u32 *)&(tw->tw_dport)) == ports &&
323 sk->sk_family == PF_INET6) {
324 if(ipv6_addr_equal(&tw->tw_v6_daddr, saddr) &&
325 ipv6_addr_equal(&tw->tw_v6_rcv_saddr, daddr) &&
326 (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif))
330 read_unlock(&head->lock);
335 read_unlock(&head->lock);
340 static inline struct sock *__tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
341 struct in6_addr *daddr, u16 hnum,
346 sk = __tcp_v6_lookup_established(saddr, sport, daddr, hnum, dif);
351 return tcp_v6_lookup_listener(daddr, hnum, dif);
354 inline struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
355 struct in6_addr *daddr, u16 dport,
361 sk = __tcp_v6_lookup(saddr, sport, daddr, ntohs(dport), dif);
367 EXPORT_SYMBOL_GPL(tcp_v6_lookup);
371 * Open request hash tables.
374 static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd)
378 a = raddr->s6_addr32[0];
379 b = raddr->s6_addr32[1];
380 c = raddr->s6_addr32[2];
382 a += JHASH_GOLDEN_RATIO;
383 b += JHASH_GOLDEN_RATIO;
385 __jhash_mix(a, b, c);
387 a += raddr->s6_addr32[3];
389 __jhash_mix(a, b, c);
391 return c & (TCP_SYNQ_HSIZE - 1);
394 static struct open_request *tcp_v6_search_req(struct tcp_sock *tp,
395 struct open_request ***prevp,
397 struct in6_addr *raddr,
398 struct in6_addr *laddr,
401 struct tcp_listen_opt *lopt = tp->listen_opt;
402 struct open_request *req, **prev;
404 for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
405 (req = *prev) != NULL;
406 prev = &req->dl_next) {
407 if (req->rmt_port == rport &&
408 req->class->family == AF_INET6 &&
409 ipv6_addr_equal(&req->af.v6_req.rmt_addr, raddr) &&
410 ipv6_addr_equal(&req->af.v6_req.loc_addr, laddr) &&
411 (!req->af.v6_req.iif || req->af.v6_req.iif == iif)) {
412 BUG_TRAP(req->sk == NULL);
421 static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
422 struct in6_addr *saddr,
423 struct in6_addr *daddr,
426 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
429 static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
431 if (skb->protocol == htons(ETH_P_IPV6)) {
432 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
433 skb->nh.ipv6h->saddr.s6_addr32,
437 return secure_tcp_sequence_number(skb->nh.iph->daddr,
444 static int __tcp_v6_check_established(struct sock *sk, __u16 lport,
445 struct tcp_tw_bucket **twp)
447 struct inet_sock *inet = inet_sk(sk);
448 struct ipv6_pinfo *np = inet6_sk(sk);
449 struct in6_addr *daddr = &np->rcv_saddr;
450 struct in6_addr *saddr = &np->daddr;
451 int dif = sk->sk_bound_dev_if;
452 u32 ports = TCP_COMBINED_PORTS(inet->dport, lport);
453 int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport);
454 struct tcp_ehash_bucket *head = &tcp_ehash[hash];
456 struct hlist_node *node;
457 struct tcp_tw_bucket *tw;
459 write_lock(&head->lock);
461 /* Check TIME-WAIT sockets first. */
462 sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) {
463 tw = (struct tcp_tw_bucket*)sk2;
465 if(*((__u32 *)&(tw->tw_dport)) == ports &&
466 sk2->sk_family == PF_INET6 &&
467 ipv6_addr_equal(&tw->tw_v6_daddr, saddr) &&
468 ipv6_addr_equal(&tw->tw_v6_rcv_saddr, daddr) &&
469 sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
470 struct tcp_sock *tp = tcp_sk(sk);
472 if (tw->tw_ts_recent_stamp &&
473 (!twp || (sysctl_tcp_tw_reuse &&
475 tw->tw_ts_recent_stamp > 1))) {
476 /* See comment in tcp_ipv4.c */
477 tp->write_seq = tw->tw_snd_nxt + 65535 + 2;
480 tp->rx_opt.ts_recent = tw->tw_ts_recent;
481 tp->rx_opt.ts_recent_stamp = tw->tw_ts_recent_stamp;
490 /* And established part... */
491 sk_for_each(sk2, node, &head->chain) {
492 if(TCP_IPV6_MATCH(sk2, saddr, daddr, ports, dif))
497 BUG_TRAP(sk_unhashed(sk));
498 __sk_add_node(sk, &head->chain);
499 sk->sk_hashent = hash;
500 sock_prot_inc_use(sk->sk_prot);
501 write_unlock(&head->lock);
505 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
507 /* Silly. Should hash-dance instead... */
508 tcp_tw_deschedule(tw);
509 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
516 write_unlock(&head->lock);
517 return -EADDRNOTAVAIL;
520 static inline u32 tcpv6_port_offset(const struct sock *sk)
522 const struct inet_sock *inet = inet_sk(sk);
523 const struct ipv6_pinfo *np = inet6_sk(sk);
525 return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
530 static int tcp_v6_hash_connect(struct sock *sk)
532 unsigned short snum = inet_sk(sk)->num;
533 struct tcp_bind_hashbucket *head;
534 struct tcp_bind_bucket *tb;
538 int low = sysctl_local_port_range[0];
539 int high = sysctl_local_port_range[1];
540 int range = high - low;
544 u32 offset = hint + tcpv6_port_offset(sk);
545 struct hlist_node *node;
546 struct tcp_tw_bucket *tw = NULL;
549 for (i = 1; i <= range; i++) {
550 port = low + (i + offset) % range;
551 head = &tcp_bhash[tcp_bhashfn(port)];
552 spin_lock(&head->lock);
554 /* Does not bother with rcv_saddr checks,
555 * because the established check is already
558 tb_for_each(tb, node, &head->chain) {
559 if (tb->port == port) {
560 BUG_TRAP(!hlist_empty(&tb->owners));
561 if (tb->fastreuse >= 0)
563 if (!__tcp_v6_check_established(sk,
571 tb = tcp_bucket_create(head, port);
573 spin_unlock(&head->lock);
580 spin_unlock(&head->lock);
584 return -EADDRNOTAVAIL;
589 /* Head lock still held and bh's disabled */
590 tcp_bind_hash(sk, tb, port);
591 if (sk_unhashed(sk)) {
592 inet_sk(sk)->sport = htons(port);
595 spin_unlock(&head->lock);
598 tcp_tw_deschedule(tw);
606 head = &tcp_bhash[tcp_bhashfn(snum)];
607 tb = tcp_sk(sk)->bind_hash;
608 spin_lock_bh(&head->lock);
610 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
612 spin_unlock_bh(&head->lock);
615 spin_unlock(&head->lock);
616 /* No definite answer... Walk to established hash table */
617 ret = __tcp_v6_check_established(sk, snum, NULL);
624 static __inline__ int tcp_v6_iif(struct sk_buff *skb)
626 return IP6CB(skb)->iif;
629 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
632 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
633 struct inet_sock *inet = inet_sk(sk);
634 struct ipv6_pinfo *np = inet6_sk(sk);
635 struct tcp_sock *tp = tcp_sk(sk);
636 struct in6_addr *saddr = NULL, *final_p = NULL, final;
638 struct dst_entry *dst;
642 if (addr_len < SIN6_LEN_RFC2133)
645 if (usin->sin6_family != AF_INET6)
646 return(-EAFNOSUPPORT);
648 memset(&fl, 0, sizeof(fl));
651 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
652 IP6_ECN_flow_init(fl.fl6_flowlabel);
653 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
654 struct ip6_flowlabel *flowlabel;
655 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
656 if (flowlabel == NULL)
658 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
659 fl6_sock_release(flowlabel);
664 * connect() to INADDR_ANY means loopback (BSD'ism).
667 if(ipv6_addr_any(&usin->sin6_addr))
668 usin->sin6_addr.s6_addr[15] = 0x1;
670 addr_type = ipv6_addr_type(&usin->sin6_addr);
672 if(addr_type & IPV6_ADDR_MULTICAST)
675 if (addr_type&IPV6_ADDR_LINKLOCAL) {
676 if (addr_len >= sizeof(struct sockaddr_in6) &&
677 usin->sin6_scope_id) {
678 /* If interface is set while binding, indices
681 if (sk->sk_bound_dev_if &&
682 sk->sk_bound_dev_if != usin->sin6_scope_id)
685 sk->sk_bound_dev_if = usin->sin6_scope_id;
688 /* Connect to link-local address requires an interface */
689 if (!sk->sk_bound_dev_if)
693 if (tp->rx_opt.ts_recent_stamp &&
694 !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
695 tp->rx_opt.ts_recent = 0;
696 tp->rx_opt.ts_recent_stamp = 0;
700 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
701 np->flow_label = fl.fl6_flowlabel;
707 if (addr_type == IPV6_ADDR_MAPPED) {
708 u32 exthdrlen = tp->ext_header_len;
709 struct sockaddr_in sin;
711 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
713 if (__ipv6_only_sock(sk))
716 sin.sin_family = AF_INET;
717 sin.sin_port = usin->sin6_port;
718 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
720 tp->af_specific = &ipv6_mapped;
721 sk->sk_backlog_rcv = tcp_v4_do_rcv;
723 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
726 tp->ext_header_len = exthdrlen;
727 tp->af_specific = &ipv6_specific;
728 sk->sk_backlog_rcv = tcp_v6_do_rcv;
731 ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
733 ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
740 if (!ipv6_addr_any(&np->rcv_saddr))
741 saddr = &np->rcv_saddr;
743 fl.proto = IPPROTO_TCP;
744 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
745 ipv6_addr_copy(&fl.fl6_src,
746 (saddr ? saddr : &np->saddr));
747 fl.oif = sk->sk_bound_dev_if;
748 fl.fl_ip_dport = usin->sin6_port;
749 fl.fl_ip_sport = inet->sport;
751 if (np->opt && np->opt->srcrt) {
752 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
753 ipv6_addr_copy(&final, &fl.fl6_dst);
754 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
758 err = ip6_dst_lookup(sk, &dst, &fl);
762 ipv6_addr_copy(&fl.fl6_dst, final_p);
764 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
771 ipv6_addr_copy(&np->rcv_saddr, saddr);
774 /* set the source address */
775 ipv6_addr_copy(&np->saddr, saddr);
776 inet->rcv_saddr = LOOPBACK4_IPV6;
778 ip6_dst_store(sk, dst, NULL);
779 sk->sk_route_caps = dst->dev->features &
780 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
782 tp->ext_header_len = 0;
784 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
786 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
788 inet->dport = usin->sin6_port;
790 tcp_set_state(sk, TCP_SYN_SENT);
791 err = tcp_v6_hash_connect(sk);
796 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
801 err = tcp_connect(sk);
808 tcp_set_state(sk, TCP_CLOSE);
812 sk->sk_route_caps = 0;
816 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
817 int type, int code, int offset, __u32 info)
819 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
820 struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
821 struct ipv6_pinfo *np;
827 sk = tcp_v6_lookup(&hdr->daddr, th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
830 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
834 if (sk->sk_state == TCP_TIME_WAIT) {
835 tcp_tw_put((struct tcp_tw_bucket*)sk);
840 if (sock_owned_by_user(sk))
841 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
843 if (sk->sk_state == TCP_CLOSE)
847 seq = ntohl(th->seq);
848 if (sk->sk_state != TCP_LISTEN &&
849 !between(seq, tp->snd_una, tp->snd_nxt)) {
850 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
856 if (type == ICMPV6_PKT_TOOBIG) {
857 struct dst_entry *dst = NULL;
859 if (sock_owned_by_user(sk))
861 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
864 /* icmp should have updated the destination cache entry */
865 dst = __sk_dst_check(sk, np->dst_cookie);
868 struct inet_sock *inet = inet_sk(sk);
871 /* BUGGG_FUTURE: Again, it is not clear how
872 to handle rthdr case. Ignore this complexity
875 memset(&fl, 0, sizeof(fl));
876 fl.proto = IPPROTO_TCP;
877 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
878 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
879 fl.oif = sk->sk_bound_dev_if;
880 fl.fl_ip_dport = inet->dport;
881 fl.fl_ip_sport = inet->sport;
883 if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
884 sk->sk_err_soft = -err;
888 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
889 sk->sk_err_soft = -err;
896 if (tp->pmtu_cookie > dst_mtu(dst)) {
897 tcp_sync_mss(sk, dst_mtu(dst));
898 tcp_simple_retransmit(sk);
899 } /* else let the usual retransmit timer handle it */
904 icmpv6_err_convert(type, code, &err);
906 /* Might be for an open_request */
907 switch (sk->sk_state) {
908 struct open_request *req, **prev;
910 if (sock_owned_by_user(sk))
913 req = tcp_v6_search_req(tp, &prev, th->dest, &hdr->daddr,
914 &hdr->saddr, tcp_v6_iif(skb));
918 /* ICMPs are not backlogged, hence we cannot get
919 * an established socket here.
921 BUG_TRAP(req->sk == NULL);
923 if (seq != req->snt_isn) {
924 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
928 tcp_synq_drop(sk, req, prev);
932 case TCP_SYN_RECV: /* Cannot happen.
933 It can, it SYNs are crossed. --ANK */
934 if (!sock_owned_by_user(sk)) {
935 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
937 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
941 sk->sk_err_soft = err;
945 if (!sock_owned_by_user(sk) && np->recverr) {
947 sk->sk_error_report(sk);
949 sk->sk_err_soft = err;
957 static int tcp_v6_send_synack(struct sock *sk, struct open_request *req,
958 struct dst_entry *dst)
960 struct ipv6_pinfo *np = inet6_sk(sk);
961 struct sk_buff * skb;
962 struct ipv6_txoptions *opt = NULL;
963 struct in6_addr * final_p = NULL, final;
967 memset(&fl, 0, sizeof(fl));
968 fl.proto = IPPROTO_TCP;
969 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
970 ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr);
971 fl.fl6_flowlabel = 0;
972 fl.oif = req->af.v6_req.iif;
973 fl.fl_ip_dport = req->rmt_port;
974 fl.fl_ip_sport = inet_sk(sk)->sport;
979 np->rxopt.bits.srcrt == 2 &&
980 req->af.v6_req.pktopts) {
981 struct sk_buff *pktopts = req->af.v6_req.pktopts;
982 struct inet6_skb_parm *rxopt = IP6CB(pktopts);
984 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
987 if (opt && opt->srcrt) {
988 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
989 ipv6_addr_copy(&final, &fl.fl6_dst);
990 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
994 err = ip6_dst_lookup(sk, &dst, &fl);
998 ipv6_addr_copy(&fl.fl6_dst, final_p);
999 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1003 skb = tcp_make_synack(sk, dst, req);
1005 struct tcphdr *th = skb->h.th;
1007 th->check = tcp_v6_check(th, skb->len,
1008 &req->af.v6_req.loc_addr, &req->af.v6_req.rmt_addr,
1009 csum_partial((char *)th, skb->len, skb->csum));
1011 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
1012 err = ip6_xmit(sk, skb, &fl, opt, 0);
1013 if (err == NET_XMIT_CN)
1019 if (opt && opt != np->opt)
1020 sock_kfree_s(sk, opt, opt->tot_len);
1024 static void tcp_v6_or_free(struct open_request *req)
1026 if (req->af.v6_req.pktopts)
1027 kfree_skb(req->af.v6_req.pktopts);
1030 static struct or_calltable or_ipv6 = {
1032 .rtx_syn_ack = tcp_v6_send_synack,
1033 .send_ack = tcp_v6_or_send_ack,
1034 .destructor = tcp_v6_or_free,
1035 .send_reset = tcp_v6_send_reset
1038 static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
1040 struct ipv6_pinfo *np = inet6_sk(sk);
1041 struct inet6_skb_parm *opt = IP6CB(skb);
1043 if (np->rxopt.all) {
1044 if ((opt->hop && np->rxopt.bits.hopopts) ||
1045 ((IPV6_FLOWINFO_MASK&*(u32*)skb->nh.raw) &&
1046 np->rxopt.bits.rxflow) ||
1047 (opt->srcrt && np->rxopt.bits.srcrt) ||
1048 ((opt->dst1 || opt->dst0) && np->rxopt.bits.dstopts))
1055 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
1056 struct sk_buff *skb)
1058 struct ipv6_pinfo *np = inet6_sk(sk);
1060 if (skb->ip_summed == CHECKSUM_HW) {
1061 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
1062 skb->csum = offsetof(struct tcphdr, check);
1064 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
1065 csum_partial((char *)th, th->doff<<2,
1071 static void tcp_v6_send_reset(struct sk_buff *skb)
1073 struct tcphdr *th = skb->h.th, *t1;
1074 struct sk_buff *buff;
1080 if (!ipv6_unicast_destination(skb))
1084 * We need to grab some memory, and put together an RST,
1085 * and then put it into the queue to be sent.
1088 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
1093 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
1095 t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
1097 /* Swap the send and the receive. */
1098 memset(t1, 0, sizeof(*t1));
1099 t1->dest = th->source;
1100 t1->source = th->dest;
1101 t1->doff = sizeof(*t1)/4;
1105 t1->seq = th->ack_seq;
1108 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
1109 + skb->len - (th->doff<<2));
1112 buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
1114 memset(&fl, 0, sizeof(fl));
1115 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1116 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1118 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1119 sizeof(*t1), IPPROTO_TCP,
1122 fl.proto = IPPROTO_TCP;
1123 fl.oif = tcp_v6_iif(skb);
1124 fl.fl_ip_dport = t1->dest;
1125 fl.fl_ip_sport = t1->source;
1127 /* sk = NULL, but it is safe for now. RST socket required. */
1128 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1130 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1131 dst_release(buff->dst);
1135 ip6_xmit(NULL, buff, &fl, NULL, 0);
1136 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1137 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
1144 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
1146 struct tcphdr *th = skb->h.th, *t1;
1147 struct sk_buff *buff;
1149 int tot_len = sizeof(struct tcphdr);
1154 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
1159 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
1161 t1 = (struct tcphdr *) skb_push(buff,tot_len);
1163 /* Swap the send and the receive. */
1164 memset(t1, 0, sizeof(*t1));
1165 t1->dest = th->source;
1166 t1->source = th->dest;
1167 t1->doff = tot_len/4;
1168 t1->seq = htonl(seq);
1169 t1->ack_seq = htonl(ack);
1171 t1->window = htons(win);
1174 u32 *ptr = (u32*)(t1 + 1);
1175 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1176 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1177 *ptr++ = htonl(tcp_time_stamp);
1181 buff->csum = csum_partial((char *)t1, tot_len, 0);
1183 memset(&fl, 0, sizeof(fl));
1184 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1185 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1187 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1188 tot_len, IPPROTO_TCP,
1191 fl.proto = IPPROTO_TCP;
1192 fl.oif = tcp_v6_iif(skb);
1193 fl.fl_ip_dport = t1->dest;
1194 fl.fl_ip_sport = t1->source;
1196 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1197 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1198 dst_release(buff->dst);
1201 ip6_xmit(NULL, buff, &fl, NULL, 0);
1202 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1209 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1211 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
1213 tcp_v6_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt,
1214 tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent);
1219 static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req)
1221 tcp_v6_send_ack(skb, req->snt_isn+1, req->rcv_isn+1, req->rcv_wnd, req->ts_recent);
1225 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1227 struct open_request *req, **prev;
1228 struct tcphdr *th = skb->h.th;
1229 struct tcp_sock *tp = tcp_sk(sk);
1232 /* Find possible connection requests. */
1233 req = tcp_v6_search_req(tp, &prev, th->source, &skb->nh.ipv6h->saddr,
1234 &skb->nh.ipv6h->daddr, tcp_v6_iif(skb));
1236 return tcp_check_req(sk, skb, req, prev);
1238 nsk = __tcp_v6_lookup_established(&skb->nh.ipv6h->saddr,
1240 &skb->nh.ipv6h->daddr,
1245 if (nsk->sk_state != TCP_TIME_WAIT) {
1249 tcp_tw_put((struct tcp_tw_bucket*)nsk);
1253 #if 0 /*def CONFIG_SYN_COOKIES*/
1254 if (!th->rst && !th->syn && th->ack)
1255 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
1260 static void tcp_v6_synq_add(struct sock *sk, struct open_request *req)
1262 struct tcp_sock *tp = tcp_sk(sk);
1263 struct tcp_listen_opt *lopt = tp->listen_opt;
1264 u32 h = tcp_v6_synq_hash(&req->af.v6_req.rmt_addr, req->rmt_port, lopt->hash_rnd);
1267 req->expires = jiffies + TCP_TIMEOUT_INIT;
1269 req->dl_next = lopt->syn_table[h];
1271 write_lock(&tp->syn_wait_lock);
1272 lopt->syn_table[h] = req;
1273 write_unlock(&tp->syn_wait_lock);
1279 /* FIXME: this is substantially similar to the ipv4 code.
1280 * Can some kind of merge be done? -- erics
1282 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1284 struct ipv6_pinfo *np = inet6_sk(sk);
1285 struct tcp_options_received tmp_opt;
1286 struct tcp_sock *tp = tcp_sk(sk);
1287 struct open_request *req = NULL;
1288 __u32 isn = TCP_SKB_CB(skb)->when;
1290 if (skb->protocol == htons(ETH_P_IP))
1291 return tcp_v4_conn_request(sk, skb);
1293 if (!ipv6_unicast_destination(skb))
1297 * There are no SYN attacks on IPv6, yet...
1299 if (tcp_synq_is_full(sk) && !isn) {
1300 if (net_ratelimit())
1301 printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
1305 if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
1308 req = tcp_openreq_alloc();
1312 tcp_clear_options(&tmp_opt);
1313 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1314 tmp_opt.user_mss = tp->rx_opt.user_mss;
1316 tcp_parse_options(skb, &tmp_opt, 0);
1318 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1319 tcp_openreq_init(req, &tmp_opt, skb);
1321 req->class = &or_ipv6;
1322 ipv6_addr_copy(&req->af.v6_req.rmt_addr, &skb->nh.ipv6h->saddr);
1323 ipv6_addr_copy(&req->af.v6_req.loc_addr, &skb->nh.ipv6h->daddr);
1324 TCP_ECN_create_request(req, skb->h.th);
1325 req->af.v6_req.pktopts = NULL;
1326 if (ipv6_opt_accepted(sk, skb) ||
1327 np->rxopt.bits.rxinfo ||
1328 np->rxopt.bits.rxhlim) {
1329 atomic_inc(&skb->users);
1330 req->af.v6_req.pktopts = skb;
1332 req->af.v6_req.iif = sk->sk_bound_dev_if;
1334 /* So that link locals have meaning */
1335 if (!sk->sk_bound_dev_if &&
1336 ipv6_addr_type(&req->af.v6_req.rmt_addr) & IPV6_ADDR_LINKLOCAL)
1337 req->af.v6_req.iif = tcp_v6_iif(skb);
1340 isn = tcp_v6_init_sequence(sk,skb);
1344 if (tcp_v6_send_synack(sk, req, NULL))
1347 tcp_v6_synq_add(sk, req);
1353 tcp_openreq_free(req);
1355 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1356 return 0; /* don't send reset */
1359 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1360 struct open_request *req,
1361 struct dst_entry *dst)
1363 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1364 struct tcp6_sock *newtcp6sk;
1365 struct inet_sock *newinet;
1366 struct tcp_sock *newtp;
1368 struct ipv6_txoptions *opt;
1370 if (skb->protocol == htons(ETH_P_IP)) {
1375 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1380 newtcp6sk = (struct tcp6_sock *)newsk;
1381 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1383 newinet = inet_sk(newsk);
1384 newnp = inet6_sk(newsk);
1385 newtp = tcp_sk(newsk);
1387 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1389 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1392 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1395 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1397 newtp->af_specific = &ipv6_mapped;
1398 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1399 newnp->pktoptions = NULL;
1401 newnp->mcast_oif = tcp_v6_iif(skb);
1402 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1404 /* Charge newly allocated IPv6 socket. Though it is mapped,
1407 #ifdef INET_REFCNT_DEBUG
1408 atomic_inc(&inet6_sock_nr);
1411 /* It is tricky place. Until this moment IPv4 tcp
1412 worked with IPv6 af_tcp.af_specific.
1415 tcp_sync_mss(newsk, newtp->pmtu_cookie);
1422 if (sk_acceptq_is_full(sk))
1425 if (np->rxopt.bits.srcrt == 2 &&
1426 opt == NULL && req->af.v6_req.pktopts) {
1427 struct inet6_skb_parm *rxopt = IP6CB(req->af.v6_req.pktopts);
1429 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(req->af.v6_req.pktopts->nh.raw+rxopt->srcrt));
1433 struct in6_addr *final_p = NULL, final;
1436 memset(&fl, 0, sizeof(fl));
1437 fl.proto = IPPROTO_TCP;
1438 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
1439 if (opt && opt->srcrt) {
1440 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1441 ipv6_addr_copy(&final, &fl.fl6_dst);
1442 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1445 ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr);
1446 fl.oif = sk->sk_bound_dev_if;
1447 fl.fl_ip_dport = req->rmt_port;
1448 fl.fl_ip_sport = inet_sk(sk)->sport;
1450 if (ip6_dst_lookup(sk, &dst, &fl))
1454 ipv6_addr_copy(&fl.fl6_dst, final_p);
1456 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1460 newsk = tcp_create_openreq_child(sk, req, skb);
1464 /* Charge newly allocated IPv6 socket */
1465 #ifdef INET_REFCNT_DEBUG
1466 atomic_inc(&inet6_sock_nr);
1469 ip6_dst_store(newsk, dst, NULL);
1470 newsk->sk_route_caps = dst->dev->features &
1471 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1473 newtcp6sk = (struct tcp6_sock *)newsk;
1474 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1476 newtp = tcp_sk(newsk);
1477 newinet = inet_sk(newsk);
1478 newnp = inet6_sk(newsk);
1480 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1482 ipv6_addr_copy(&newnp->daddr, &req->af.v6_req.rmt_addr);
1483 ipv6_addr_copy(&newnp->saddr, &req->af.v6_req.loc_addr);
1484 ipv6_addr_copy(&newnp->rcv_saddr, &req->af.v6_req.loc_addr);
1485 newsk->sk_bound_dev_if = req->af.v6_req.iif;
1487 /* Now IPv6 options...
1489 First: no IPv4 options.
1491 newinet->opt = NULL;
1494 newnp->rxopt.all = np->rxopt.all;
1496 /* Clone pktoptions received with SYN */
1497 newnp->pktoptions = NULL;
1498 if (req->af.v6_req.pktopts) {
1499 newnp->pktoptions = skb_clone(req->af.v6_req.pktopts,
1501 kfree_skb(req->af.v6_req.pktopts);
1502 req->af.v6_req.pktopts = NULL;
1503 if (newnp->pktoptions)
1504 skb_set_owner_r(newnp->pktoptions, newsk);
1507 newnp->mcast_oif = tcp_v6_iif(skb);
1508 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1510 /* Clone native IPv6 options from listening socket (if any)
1512 Yes, keeping reference count would be much more clever,
1513 but we make one more one thing there: reattach optmem
1517 newnp->opt = ipv6_dup_options(newsk, opt);
1519 sock_kfree_s(sk, opt, opt->tot_len);
1522 newtp->ext_header_len = 0;
1524 newtp->ext_header_len = newnp->opt->opt_nflen +
1525 newnp->opt->opt_flen;
1527 tcp_sync_mss(newsk, dst_mtu(dst));
1528 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1529 tcp_initialize_rcv_mss(newsk);
1531 newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1533 __tcp_v6_hash(newsk);
1534 tcp_inherit_port(sk, newsk);
1539 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1541 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1542 if (opt && opt != np->opt)
1543 sock_kfree_s(sk, opt, opt->tot_len);
1548 static int tcp_v6_checksum_init(struct sk_buff *skb)
1550 if (skb->ip_summed == CHECKSUM_HW) {
1551 skb->ip_summed = CHECKSUM_UNNECESSARY;
1552 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1553 &skb->nh.ipv6h->daddr,skb->csum))
1555 LIMIT_NETDEBUG(printk(KERN_DEBUG "hw tcp v6 csum failed\n"));
1557 if (skb->len <= 76) {
1558 if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1559 &skb->nh.ipv6h->daddr,skb_checksum(skb, 0, skb->len, 0)))
1561 skb->ip_summed = CHECKSUM_UNNECESSARY;
1563 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1564 &skb->nh.ipv6h->daddr,0);
1569 /* The socket must have it's spinlock held when we get
1572 * We have a potential double-lock case here, so even when
1573 * doing backlog processing we use the BH locking scheme.
1574 * This is because we cannot sleep with the original spinlock
1577 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1579 struct ipv6_pinfo *np = inet6_sk(sk);
1580 struct tcp_sock *tp;
1581 struct sk_buff *opt_skb = NULL;
1583 /* Imagine: socket is IPv6. IPv4 packet arrives,
1584 goes to IPv4 receive handler and backlogged.
1585 From backlog it always goes here. Kerboom...
1586 Fortunately, tcp_rcv_established and rcv_established
1587 handle them correctly, but it is not case with
1588 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1591 if (skb->protocol == htons(ETH_P_IP))
1592 return tcp_v4_do_rcv(sk, skb);
1594 if (sk_filter(sk, skb, 0))
1598 * socket locking is here for SMP purposes as backlog rcv
1599 * is currently called with bh processing disabled.
1602 /* Do Stevens' IPV6_PKTOPTIONS.
1604 Yes, guys, it is the only place in our code, where we
1605 may make it not affecting IPv4.
1606 The rest of code is protocol independent,
1607 and I do not like idea to uglify IPv4.
1609 Actually, all the idea behind IPV6_PKTOPTIONS
1610 looks not very well thought. For now we latch
1611 options, received in the last packet, enqueued
1612 by tcp. Feel free to propose better solution.
1616 opt_skb = skb_clone(skb, GFP_ATOMIC);
1618 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1619 TCP_CHECK_TIMER(sk);
1620 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1622 TCP_CHECK_TIMER(sk);
1624 goto ipv6_pktoptions;
1628 if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1631 if (sk->sk_state == TCP_LISTEN) {
1632 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1637 * Queue it on the new socket if the new socket is active,
1638 * otherwise we just shortcircuit this and continue with
1642 if (tcp_child_process(sk, nsk, skb))
1645 __kfree_skb(opt_skb);
1650 TCP_CHECK_TIMER(sk);
1651 if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1653 TCP_CHECK_TIMER(sk);
1655 goto ipv6_pktoptions;
1659 tcp_v6_send_reset(skb);
1662 __kfree_skb(opt_skb);
1666 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1671 /* Do you ask, what is it?
1673 1. skb was enqueued by tcp.
1674 2. skb is added to tail of read queue, rather than out of order.
1675 3. socket is not in passive state.
1676 4. Finally, it really contains options, which user wants to receive.
1679 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1680 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1681 if (np->rxopt.bits.rxinfo)
1682 np->mcast_oif = tcp_v6_iif(opt_skb);
1683 if (np->rxopt.bits.rxhlim)
1684 np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1685 if (ipv6_opt_accepted(sk, opt_skb)) {
1686 skb_set_owner_r(opt_skb, sk);
1687 opt_skb = xchg(&np->pktoptions, opt_skb);
1689 __kfree_skb(opt_skb);
1690 opt_skb = xchg(&np->pktoptions, NULL);
1699 static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1701 struct sk_buff *skb = *pskb;
1706 if (skb->pkt_type != PACKET_HOST)
1710 * Count it even if it's bad.
1712 TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1714 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1719 if (th->doff < sizeof(struct tcphdr)/4)
1721 if (!pskb_may_pull(skb, th->doff*4))
1724 if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1725 tcp_v6_checksum_init(skb) < 0))
1729 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1730 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1731 skb->len - th->doff*4);
1732 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1733 TCP_SKB_CB(skb)->when = 0;
1734 TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
1735 TCP_SKB_CB(skb)->sacked = 0;
1737 sk = __tcp_v6_lookup(&skb->nh.ipv6h->saddr, th->source,
1738 &skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1744 if (sk->sk_state == TCP_TIME_WAIT)
1747 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1748 goto discard_and_relse;
1750 if (sk_filter(sk, skb, 0))
1751 goto discard_and_relse;
1757 if (!sock_owned_by_user(sk)) {
1758 if (!tcp_prequeue(sk, skb))
1759 ret = tcp_v6_do_rcv(sk, skb);
1761 sk_add_backlog(sk, skb);
1765 return ret ? -1 : 0;
1768 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1771 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1773 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1775 tcp_v6_send_reset(skb);
1792 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1793 tcp_tw_put((struct tcp_tw_bucket *) sk);
1797 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1798 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1799 tcp_tw_put((struct tcp_tw_bucket *) sk);
1803 switch(tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
1804 skb, th, skb->len)) {
1809 sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1811 tcp_tw_deschedule((struct tcp_tw_bucket *)sk);
1812 tcp_tw_put((struct tcp_tw_bucket *)sk);
1816 /* Fall through to ACK */
1819 tcp_v6_timewait_ack(sk, skb);
1823 case TCP_TW_SUCCESS:;
1828 static int tcp_v6_rebuild_header(struct sock *sk)
1831 struct dst_entry *dst;
1832 struct ipv6_pinfo *np = inet6_sk(sk);
1834 dst = __sk_dst_check(sk, np->dst_cookie);
1837 struct inet_sock *inet = inet_sk(sk);
1838 struct in6_addr *final_p = NULL, final;
1841 memset(&fl, 0, sizeof(fl));
1842 fl.proto = IPPROTO_TCP;
1843 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1844 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1845 fl.fl6_flowlabel = np->flow_label;
1846 fl.oif = sk->sk_bound_dev_if;
1847 fl.fl_ip_dport = inet->dport;
1848 fl.fl_ip_sport = inet->sport;
1850 if (np->opt && np->opt->srcrt) {
1851 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1852 ipv6_addr_copy(&final, &fl.fl6_dst);
1853 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1857 err = ip6_dst_lookup(sk, &dst, &fl);
1859 sk->sk_route_caps = 0;
1863 ipv6_addr_copy(&fl.fl6_dst, final_p);
1865 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1866 sk->sk_err_soft = -err;
1871 ip6_dst_store(sk, dst, NULL);
1872 sk->sk_route_caps = dst->dev->features &
1873 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1879 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1881 struct sock *sk = skb->sk;
1882 struct inet_sock *inet = inet_sk(sk);
1883 struct ipv6_pinfo *np = inet6_sk(sk);
1885 struct dst_entry *dst;
1886 struct in6_addr *final_p = NULL, final;
1888 memset(&fl, 0, sizeof(fl));
1889 fl.proto = IPPROTO_TCP;
1890 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1891 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1892 fl.fl6_flowlabel = np->flow_label;
1893 IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1894 fl.oif = sk->sk_bound_dev_if;
1895 fl.fl_ip_sport = inet->sport;
1896 fl.fl_ip_dport = inet->dport;
1898 if (np->opt && np->opt->srcrt) {
1899 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1900 ipv6_addr_copy(&final, &fl.fl6_dst);
1901 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1905 dst = __sk_dst_check(sk, np->dst_cookie);
1908 int err = ip6_dst_lookup(sk, &dst, &fl);
1911 sk->sk_err_soft = -err;
1916 ipv6_addr_copy(&fl.fl6_dst, final_p);
1918 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1919 sk->sk_route_caps = 0;
1924 ip6_dst_store(sk, dst, NULL);
1925 sk->sk_route_caps = dst->dev->features &
1926 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1929 skb->dst = dst_clone(dst);
1931 /* Restore final destination back after routing done */
1932 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1934 return ip6_xmit(sk, skb, &fl, np->opt, 0);
1937 static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1939 struct ipv6_pinfo *np = inet6_sk(sk);
1940 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1942 sin6->sin6_family = AF_INET6;
1943 ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1944 sin6->sin6_port = inet_sk(sk)->dport;
1945 /* We do not store received flowlabel for TCP */
1946 sin6->sin6_flowinfo = 0;
1947 sin6->sin6_scope_id = 0;
1948 if (sk->sk_bound_dev_if &&
1949 ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1950 sin6->sin6_scope_id = sk->sk_bound_dev_if;
1953 static int tcp_v6_remember_stamp(struct sock *sk)
1955 /* Alas, not yet... */
1959 static struct tcp_func ipv6_specific = {
1960 .queue_xmit = tcp_v6_xmit,
1961 .send_check = tcp_v6_send_check,
1962 .rebuild_header = tcp_v6_rebuild_header,
1963 .conn_request = tcp_v6_conn_request,
1964 .syn_recv_sock = tcp_v6_syn_recv_sock,
1965 .remember_stamp = tcp_v6_remember_stamp,
1966 .net_header_len = sizeof(struct ipv6hdr),
1968 .setsockopt = ipv6_setsockopt,
1969 .getsockopt = ipv6_getsockopt,
1970 .addr2sockaddr = v6_addr2sockaddr,
1971 .sockaddr_len = sizeof(struct sockaddr_in6)
1975 * TCP over IPv4 via INET6 API
1978 static struct tcp_func ipv6_mapped = {
1979 .queue_xmit = ip_queue_xmit,
1980 .send_check = tcp_v4_send_check,
1981 .rebuild_header = tcp_v4_rebuild_header,
1982 .conn_request = tcp_v6_conn_request,
1983 .syn_recv_sock = tcp_v6_syn_recv_sock,
1984 .remember_stamp = tcp_v4_remember_stamp,
1985 .net_header_len = sizeof(struct iphdr),
1987 .setsockopt = ipv6_setsockopt,
1988 .getsockopt = ipv6_getsockopt,
1989 .addr2sockaddr = v6_addr2sockaddr,
1990 .sockaddr_len = sizeof(struct sockaddr_in6)
1995 /* NOTE: A lot of things set to zero explicitly by call to
1996 * sk_alloc() so need not be done here.
1998 static int tcp_v6_init_sock(struct sock *sk)
2000 struct tcp_sock *tp = tcp_sk(sk);
2002 skb_queue_head_init(&tp->out_of_order_queue);
2003 tcp_init_xmit_timers(sk);
2004 tcp_prequeue_init(tp);
2006 tp->rto = TCP_TIMEOUT_INIT;
2007 tp->mdev = TCP_TIMEOUT_INIT;
2009 /* So many TCP implementations out there (incorrectly) count the
2010 * initial SYN frame in their delayed-ACK and congestion control
2011 * algorithms that we must have the following bandaid to talk
2012 * efficiently to them. -DaveM
2016 /* See draft-stevens-tcpca-spec-01 for discussion of the
2017 * initialization of these values.
2019 tp->snd_ssthresh = 0x7fffffff;
2020 tp->snd_cwnd_clamp = ~0;
2021 tp->mss_cache_std = tp->mss_cache = 536;
2023 tp->reordering = sysctl_tcp_reordering;
2025 sk->sk_state = TCP_CLOSE;
2027 tp->af_specific = &ipv6_specific;
2029 sk->sk_write_space = sk_stream_write_space;
2030 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
2032 sk->sk_sndbuf = sysctl_tcp_wmem[1];
2033 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
2035 atomic_inc(&tcp_sockets_allocated);
2040 static int tcp_v6_destroy_sock(struct sock *sk)
2042 extern int tcp_v4_destroy_sock(struct sock *sk);
2044 tcp_v4_destroy_sock(sk);
2045 return inet6_destroy_sock(sk);
2048 /* Proc filesystem TCPv6 sock list dumping. */
2049 static void get_openreq6(struct seq_file *seq,
2050 struct sock *sk, struct open_request *req, int i, int uid)
2052 struct in6_addr *dest, *src;
2053 int ttd = req->expires - jiffies;
2058 src = &req->af.v6_req.loc_addr;
2059 dest = &req->af.v6_req.rmt_addr;
2061 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2062 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2064 src->s6_addr32[0], src->s6_addr32[1],
2065 src->s6_addr32[2], src->s6_addr32[3],
2066 ntohs(inet_sk(sk)->sport),
2067 dest->s6_addr32[0], dest->s6_addr32[1],
2068 dest->s6_addr32[2], dest->s6_addr32[3],
2069 ntohs(req->rmt_port),
2071 0,0, /* could print option size, but that is af dependent. */
2072 1, /* timers active (only the expire timer) */
2073 jiffies_to_clock_t(ttd),
2076 0, /* non standard timer */
2077 0, /* open_requests have no inode */
2081 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2083 struct in6_addr *dest, *src;
2086 unsigned long timer_expires;
2087 struct inet_sock *inet = inet_sk(sp);
2088 struct tcp_sock *tp = tcp_sk(sp);
2089 struct ipv6_pinfo *np = inet6_sk(sp);
2092 src = &np->rcv_saddr;
2093 destp = ntohs(inet->dport);
2094 srcp = ntohs(inet->sport);
2095 if (tp->pending == TCP_TIME_RETRANS) {
2097 timer_expires = tp->timeout;
2098 } else if (tp->pending == TCP_TIME_PROBE0) {
2100 timer_expires = tp->timeout;
2101 } else if (timer_pending(&sp->sk_timer)) {
2103 timer_expires = sp->sk_timer.expires;
2106 timer_expires = jiffies;
2110 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2111 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
2113 src->s6_addr32[0], src->s6_addr32[1],
2114 src->s6_addr32[2], src->s6_addr32[3], srcp,
2115 dest->s6_addr32[0], dest->s6_addr32[1],
2116 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2118 tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
2120 jiffies_to_clock_t(timer_expires - jiffies),
2125 atomic_read(&sp->sk_refcnt), sp,
2126 tp->rto, tp->ack.ato, (tp->ack.quick<<1)|tp->ack.pingpong,
2127 tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
2131 static void get_timewait6_sock(struct seq_file *seq,
2132 struct tcp_tw_bucket *tw, int i)
2134 struct in6_addr *dest, *src;
2136 int ttd = tw->tw_ttd - jiffies;
2141 dest = &tw->tw_v6_daddr;
2142 src = &tw->tw_v6_rcv_saddr;
2143 destp = ntohs(tw->tw_dport);
2144 srcp = ntohs(tw->tw_sport);
2147 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2148 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2150 src->s6_addr32[0], src->s6_addr32[1],
2151 src->s6_addr32[2], src->s6_addr32[3], srcp,
2152 dest->s6_addr32[0], dest->s6_addr32[1],
2153 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2154 tw->tw_substate, 0, 0,
2155 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2156 atomic_read(&tw->tw_refcnt), tw);
2159 #ifdef CONFIG_PROC_FS
2160 static int tcp6_seq_show(struct seq_file *seq, void *v)
2162 struct tcp_iter_state *st;
2164 if (v == SEQ_START_TOKEN) {
2169 "st tx_queue rx_queue tr tm->when retrnsmt"
2170 " uid timeout inode\n");
2175 switch (st->state) {
2176 case TCP_SEQ_STATE_LISTENING:
2177 case TCP_SEQ_STATE_ESTABLISHED:
2178 get_tcp6_sock(seq, v, st->num);
2180 case TCP_SEQ_STATE_OPENREQ:
2181 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2183 case TCP_SEQ_STATE_TIME_WAIT:
2184 get_timewait6_sock(seq, v, st->num);
2191 static struct file_operations tcp6_seq_fops;
2192 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2193 .owner = THIS_MODULE,
2196 .seq_show = tcp6_seq_show,
2197 .seq_fops = &tcp6_seq_fops,
2200 int __init tcp6_proc_init(void)
2202 return tcp_proc_register(&tcp6_seq_afinfo);
2205 void tcp6_proc_exit(void)
2207 tcp_proc_unregister(&tcp6_seq_afinfo);
2211 struct proto tcpv6_prot = {
2213 .owner = THIS_MODULE,
2215 .connect = tcp_v6_connect,
2216 .disconnect = tcp_disconnect,
2217 .accept = tcp_accept,
2219 .init = tcp_v6_init_sock,
2220 .destroy = tcp_v6_destroy_sock,
2221 .shutdown = tcp_shutdown,
2222 .setsockopt = tcp_setsockopt,
2223 .getsockopt = tcp_getsockopt,
2224 .sendmsg = tcp_sendmsg,
2225 .recvmsg = tcp_recvmsg,
2226 .backlog_rcv = tcp_v6_do_rcv,
2227 .hash = tcp_v6_hash,
2228 .unhash = tcp_unhash,
2229 .get_port = tcp_v6_get_port,
2230 .enter_memory_pressure = tcp_enter_memory_pressure,
2231 .sockets_allocated = &tcp_sockets_allocated,
2232 .memory_allocated = &tcp_memory_allocated,
2233 .memory_pressure = &tcp_memory_pressure,
2234 .sysctl_mem = sysctl_tcp_mem,
2235 .sysctl_wmem = sysctl_tcp_wmem,
2236 .sysctl_rmem = sysctl_tcp_rmem,
2237 .max_header = MAX_TCP_HEADER,
2238 .obj_size = sizeof(struct tcp6_sock),
2241 static struct inet6_protocol tcpv6_protocol = {
2242 .handler = tcp_v6_rcv,
2243 .err_handler = tcp_v6_err,
2244 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2247 extern struct proto_ops inet6_stream_ops;
2249 static struct inet_protosw tcpv6_protosw = {
2250 .type = SOCK_STREAM,
2251 .protocol = IPPROTO_TCP,
2252 .prot = &tcpv6_prot,
2253 .ops = &inet6_stream_ops,
2256 .flags = INET_PROTOSW_PERMANENT,
2259 void __init tcpv6_init(void)
2261 /* register inet6 protocol */
2262 if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
2263 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
2264 inet6_register_protosw(&tcpv6_protosw);