3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
11 * linux/net/ipv4/tcp.c
12 * linux/net/ipv4/tcp_input.c
13 * linux/net/ipv4/tcp_output.c
16 * Hideaki YOSHIFUJI : sin6_scope_id support
17 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
18 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
19 * a single port at the same time.
20 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
28 #include <linux/module.h>
29 #include <linux/config.h>
30 #include <linux/errno.h>
31 #include <linux/types.h>
32 #include <linux/socket.h>
33 #include <linux/sockios.h>
34 #include <linux/net.h>
35 #include <linux/jiffies.h>
37 #include <linux/in6.h>
38 #include <linux/netdevice.h>
39 #include <linux/init.h>
40 #include <linux/jhash.h>
41 #include <linux/ipsec.h>
42 #include <linux/times.h>
44 #include <linux/ipv6.h>
45 #include <linux/icmpv6.h>
46 #include <linux/random.h>
49 #include <net/ndisc.h>
50 #include <net/inet6_hashtables.h>
52 #include <net/transp_v6.h>
53 #include <net/addrconf.h>
54 #include <net/ip6_route.h>
55 #include <net/ip6_checksum.h>
56 #include <net/inet_ecn.h>
57 #include <net/protocol.h>
59 #include <net/addrconf.h>
61 #include <net/dsfield.h>
63 #include <asm/uaccess.h>
65 #include <linux/proc_fs.h>
66 #include <linux/seq_file.h>
68 static void tcp_v6_send_reset(struct sk_buff *skb);
69 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
70 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
73 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
74 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
76 static struct tcp_func ipv6_mapped;
77 static struct tcp_func ipv6_specific;
79 static inline int tcp_v6_bind_conflict(const struct sock *sk,
80 const struct inet_bind_bucket *tb)
82 const struct sock *sk2;
83 const struct hlist_node *node;
85 /* We must walk the whole port owner list in this case. -DaveM */
86 sk_for_each_bound(sk2, node, &tb->owners) {
88 (!sk->sk_bound_dev_if ||
89 !sk2->sk_bound_dev_if ||
90 sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
91 (!sk->sk_reuse || !sk2->sk_reuse ||
92 sk2->sk_state == TCP_LISTEN) &&
93 ipv6_rcv_saddr_equal(sk, sk2))
100 /* Grrr, addr_type already calculated by caller, but I don't want
101 * to add some silly "cookie" argument to this method just for that.
102 * But it doesn't matter, the recalculation is in the rarest path
103 * this function ever takes.
105 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
107 struct inet_bind_hashbucket *head;
108 struct inet_bind_bucket *tb;
109 struct hlist_node *node;
114 int low = sysctl_local_port_range[0];
115 int high = sysctl_local_port_range[1];
116 int remaining = (high - low) + 1;
119 spin_lock(&tcp_hashinfo.portalloc_lock);
120 if (tcp_hashinfo.port_rover < low)
123 rover = tcp_hashinfo.port_rover;
127 head = &tcp_hashinfo.bhash[inet_bhashfn(rover, tcp_hashinfo.bhash_size)];
128 spin_lock(&head->lock);
129 inet_bind_bucket_for_each(tb, node, &head->chain)
130 if (tb->port == rover)
134 spin_unlock(&head->lock);
135 } while (--remaining > 0);
136 tcp_hashinfo.port_rover = rover;
137 spin_unlock(&tcp_hashinfo.portalloc_lock);
139 /* Exhausted local port range during search? It is not
140 * possible for us to be holding one of the bind hash
141 * locks if this test triggers, because if 'remaining'
142 * drops to zero, we broke out of the do/while loop at
143 * the top level, not from the 'break;' statement.
146 if (unlikely(remaining <= 0))
149 /* OK, here is the one we will use. */
152 head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
153 spin_lock(&head->lock);
154 inet_bind_bucket_for_each(tb, node, &head->chain)
155 if (tb->port == snum)
161 if (tb && !hlist_empty(&tb->owners)) {
162 if (tb->fastreuse > 0 && sk->sk_reuse &&
163 sk->sk_state != TCP_LISTEN) {
167 if (tcp_v6_bind_conflict(sk, tb))
174 tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, snum);
178 if (hlist_empty(&tb->owners)) {
179 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
183 } else if (tb->fastreuse &&
184 (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
188 if (!inet_csk(sk)->icsk_bind_hash)
189 inet_bind_hash(sk, tb, snum);
190 BUG_TRAP(inet_csk(sk)->icsk_bind_hash == tb);
194 spin_unlock(&head->lock);
200 static __inline__ void __tcp_v6_hash(struct sock *sk)
202 struct hlist_head *list;
205 BUG_TRAP(sk_unhashed(sk));
207 if (sk->sk_state == TCP_LISTEN) {
208 list = &tcp_hashinfo.listening_hash[inet_sk_listen_hashfn(sk)];
209 lock = &tcp_hashinfo.lhash_lock;
210 inet_listen_wlock(&tcp_hashinfo);
212 sk->sk_hashent = inet6_sk_ehashfn(sk, tcp_hashinfo.ehash_size);
213 list = &tcp_hashinfo.ehash[sk->sk_hashent].chain;
214 lock = &tcp_hashinfo.ehash[sk->sk_hashent].lock;
218 __sk_add_node(sk, list);
219 sock_prot_inc_use(sk->sk_prot);
224 static void tcp_v6_hash(struct sock *sk)
226 if (sk->sk_state != TCP_CLOSE) {
227 struct tcp_sock *tp = tcp_sk(sk);
229 if (tp->af_specific == &ipv6_mapped) {
240 * Open request hash tables.
243 static u32 tcp_v6_synq_hash(const struct in6_addr *raddr, const u16 rport, const u32 rnd)
247 a = raddr->s6_addr32[0];
248 b = raddr->s6_addr32[1];
249 c = raddr->s6_addr32[2];
251 a += JHASH_GOLDEN_RATIO;
252 b += JHASH_GOLDEN_RATIO;
254 __jhash_mix(a, b, c);
256 a += raddr->s6_addr32[3];
258 __jhash_mix(a, b, c);
260 return c & (TCP_SYNQ_HSIZE - 1);
263 static struct request_sock *tcp_v6_search_req(const struct sock *sk,
264 struct request_sock ***prevp,
266 struct in6_addr *raddr,
267 struct in6_addr *laddr,
270 const struct inet_connection_sock *icsk = inet_csk(sk);
271 struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
272 struct request_sock *req, **prev;
274 for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
275 (req = *prev) != NULL;
276 prev = &req->dl_next) {
277 const struct tcp6_request_sock *treq = tcp6_rsk(req);
279 if (inet_rsk(req)->rmt_port == rport &&
280 req->rsk_ops->family == AF_INET6 &&
281 ipv6_addr_equal(&treq->rmt_addr, raddr) &&
282 ipv6_addr_equal(&treq->loc_addr, laddr) &&
283 (!treq->iif || treq->iif == iif)) {
284 BUG_TRAP(req->sk == NULL);
293 static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
294 struct in6_addr *saddr,
295 struct in6_addr *daddr,
298 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
301 static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
303 if (skb->protocol == htons(ETH_P_IPV6)) {
304 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
305 skb->nh.ipv6h->saddr.s6_addr32,
309 return secure_tcp_sequence_number(skb->nh.iph->daddr,
316 static int __tcp_v6_check_established(struct sock *sk, const __u16 lport,
317 struct inet_timewait_sock **twp)
319 struct inet_sock *inet = inet_sk(sk);
320 const struct ipv6_pinfo *np = inet6_sk(sk);
321 const struct in6_addr *daddr = &np->rcv_saddr;
322 const struct in6_addr *saddr = &np->daddr;
323 const int dif = sk->sk_bound_dev_if;
324 const u32 ports = INET_COMBINED_PORTS(inet->dport, lport);
325 const int hash = inet6_ehashfn(daddr, inet->num, saddr, inet->dport,
326 tcp_hashinfo.ehash_size);
327 struct inet_ehash_bucket *head = &tcp_hashinfo.ehash[hash];
329 const struct hlist_node *node;
330 struct inet_timewait_sock *tw;
332 write_lock(&head->lock);
334 /* Check TIME-WAIT sockets first. */
335 sk_for_each(sk2, node, &(head + tcp_hashinfo.ehash_size)->chain) {
336 const struct tcp6_timewait_sock *tcp6tw = tcp6_twsk(sk2);
340 if(*((__u32 *)&(tw->tw_dport)) == ports &&
341 sk2->sk_family == PF_INET6 &&
342 ipv6_addr_equal(&tcp6tw->tw_v6_daddr, saddr) &&
343 ipv6_addr_equal(&tcp6tw->tw_v6_rcv_saddr, daddr) &&
344 sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
345 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk2);
346 struct tcp_sock *tp = tcp_sk(sk);
348 if (tcptw->tw_ts_recent_stamp &&
350 (sysctl_tcp_tw_reuse &&
351 xtime.tv_sec - tcptw->tw_ts_recent_stamp > 1))) {
352 /* See comment in tcp_ipv4.c */
353 tp->write_seq = tcptw->tw_snd_nxt + 65535 + 2;
356 tp->rx_opt.ts_recent = tcptw->tw_ts_recent;
357 tp->rx_opt.ts_recent_stamp = tcptw->tw_ts_recent_stamp;
366 /* And established part... */
367 sk_for_each(sk2, node, &head->chain) {
368 if (INET6_MATCH(sk2, saddr, daddr, ports, dif))
373 BUG_TRAP(sk_unhashed(sk));
374 __sk_add_node(sk, &head->chain);
375 sk->sk_hashent = hash;
376 sock_prot_inc_use(sk->sk_prot);
377 write_unlock(&head->lock);
381 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
383 /* Silly. Should hash-dance instead... */
384 inet_twsk_deschedule(tw, &tcp_death_row);
385 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
392 write_unlock(&head->lock);
393 return -EADDRNOTAVAIL;
396 static inline u32 tcpv6_port_offset(const struct sock *sk)
398 const struct inet_sock *inet = inet_sk(sk);
399 const struct ipv6_pinfo *np = inet6_sk(sk);
401 return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
406 static int tcp_v6_hash_connect(struct sock *sk)
408 unsigned short snum = inet_sk(sk)->num;
409 struct inet_bind_hashbucket *head;
410 struct inet_bind_bucket *tb;
414 int low = sysctl_local_port_range[0];
415 int high = sysctl_local_port_range[1];
416 int range = high - low;
420 u32 offset = hint + tcpv6_port_offset(sk);
421 struct hlist_node *node;
422 struct inet_timewait_sock *tw = NULL;
425 for (i = 1; i <= range; i++) {
426 port = low + (i + offset) % range;
427 head = &tcp_hashinfo.bhash[inet_bhashfn(port, tcp_hashinfo.bhash_size)];
428 spin_lock(&head->lock);
430 /* Does not bother with rcv_saddr checks,
431 * because the established check is already
434 inet_bind_bucket_for_each(tb, node, &head->chain) {
435 if (tb->port == port) {
436 BUG_TRAP(!hlist_empty(&tb->owners));
437 if (tb->fastreuse >= 0)
439 if (!__tcp_v6_check_established(sk,
447 tb = inet_bind_bucket_create(tcp_hashinfo.bind_bucket_cachep, head, port);
449 spin_unlock(&head->lock);
456 spin_unlock(&head->lock);
460 return -EADDRNOTAVAIL;
465 /* Head lock still held and bh's disabled */
466 inet_bind_hash(sk, tb, port);
467 if (sk_unhashed(sk)) {
468 inet_sk(sk)->sport = htons(port);
471 spin_unlock(&head->lock);
474 inet_twsk_deschedule(tw, &tcp_death_row);
482 head = &tcp_hashinfo.bhash[inet_bhashfn(snum, tcp_hashinfo.bhash_size)];
483 tb = inet_csk(sk)->icsk_bind_hash;
484 spin_lock_bh(&head->lock);
486 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
488 spin_unlock_bh(&head->lock);
491 spin_unlock(&head->lock);
492 /* No definite answer... Walk to established hash table */
493 ret = __tcp_v6_check_established(sk, snum, NULL);
500 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
503 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
504 struct inet_sock *inet = inet_sk(sk);
505 struct ipv6_pinfo *np = inet6_sk(sk);
506 struct tcp_sock *tp = tcp_sk(sk);
507 struct in6_addr *saddr = NULL, *final_p = NULL, final;
509 struct dst_entry *dst;
513 if (addr_len < SIN6_LEN_RFC2133)
516 if (usin->sin6_family != AF_INET6)
517 return(-EAFNOSUPPORT);
519 memset(&fl, 0, sizeof(fl));
522 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
523 IP6_ECN_flow_init(fl.fl6_flowlabel);
524 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
525 struct ip6_flowlabel *flowlabel;
526 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
527 if (flowlabel == NULL)
529 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
530 fl6_sock_release(flowlabel);
535 * connect() to INADDR_ANY means loopback (BSD'ism).
538 if(ipv6_addr_any(&usin->sin6_addr))
539 usin->sin6_addr.s6_addr[15] = 0x1;
541 addr_type = ipv6_addr_type(&usin->sin6_addr);
543 if(addr_type & IPV6_ADDR_MULTICAST)
546 if (addr_type&IPV6_ADDR_LINKLOCAL) {
547 if (addr_len >= sizeof(struct sockaddr_in6) &&
548 usin->sin6_scope_id) {
549 /* If interface is set while binding, indices
552 if (sk->sk_bound_dev_if &&
553 sk->sk_bound_dev_if != usin->sin6_scope_id)
556 sk->sk_bound_dev_if = usin->sin6_scope_id;
559 /* Connect to link-local address requires an interface */
560 if (!sk->sk_bound_dev_if)
564 if (tp->rx_opt.ts_recent_stamp &&
565 !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
566 tp->rx_opt.ts_recent = 0;
567 tp->rx_opt.ts_recent_stamp = 0;
571 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
572 np->flow_label = fl.fl6_flowlabel;
578 if (addr_type == IPV6_ADDR_MAPPED) {
579 u32 exthdrlen = tp->ext_header_len;
580 struct sockaddr_in sin;
582 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
584 if (__ipv6_only_sock(sk))
587 sin.sin_family = AF_INET;
588 sin.sin_port = usin->sin6_port;
589 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
591 tp->af_specific = &ipv6_mapped;
592 sk->sk_backlog_rcv = tcp_v4_do_rcv;
594 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
597 tp->ext_header_len = exthdrlen;
598 tp->af_specific = &ipv6_specific;
599 sk->sk_backlog_rcv = tcp_v6_do_rcv;
602 ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
604 ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
611 if (!ipv6_addr_any(&np->rcv_saddr))
612 saddr = &np->rcv_saddr;
614 fl.proto = IPPROTO_TCP;
615 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
616 ipv6_addr_copy(&fl.fl6_src,
617 (saddr ? saddr : &np->saddr));
618 fl.oif = sk->sk_bound_dev_if;
619 fl.fl_ip_dport = usin->sin6_port;
620 fl.fl_ip_sport = inet->sport;
622 if (np->opt && np->opt->srcrt) {
623 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
624 ipv6_addr_copy(&final, &fl.fl6_dst);
625 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
629 err = ip6_dst_lookup(sk, &dst, &fl);
633 ipv6_addr_copy(&fl.fl6_dst, final_p);
635 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
640 ipv6_addr_copy(&np->rcv_saddr, saddr);
643 /* set the source address */
644 ipv6_addr_copy(&np->saddr, saddr);
645 inet->rcv_saddr = LOOPBACK4_IPV6;
647 ip6_dst_store(sk, dst, NULL);
648 sk->sk_route_caps = dst->dev->features &
649 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
651 tp->ext_header_len = 0;
653 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
655 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
657 inet->dport = usin->sin6_port;
659 tcp_set_state(sk, TCP_SYN_SENT);
660 err = tcp_v6_hash_connect(sk);
665 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
670 err = tcp_connect(sk);
677 tcp_set_state(sk, TCP_CLOSE);
681 sk->sk_route_caps = 0;
685 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
686 int type, int code, int offset, __u32 info)
688 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
689 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
690 struct ipv6_pinfo *np;
696 sk = inet6_lookup(&tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr,
697 th->source, skb->dev->ifindex);
700 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
704 if (sk->sk_state == TCP_TIME_WAIT) {
705 inet_twsk_put((struct inet_timewait_sock *)sk);
710 if (sock_owned_by_user(sk))
711 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
713 if (sk->sk_state == TCP_CLOSE)
717 seq = ntohl(th->seq);
718 if (sk->sk_state != TCP_LISTEN &&
719 !between(seq, tp->snd_una, tp->snd_nxt)) {
720 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
726 if (type == ICMPV6_PKT_TOOBIG) {
727 struct dst_entry *dst = NULL;
729 if (sock_owned_by_user(sk))
731 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
734 /* icmp should have updated the destination cache entry */
735 dst = __sk_dst_check(sk, np->dst_cookie);
738 struct inet_sock *inet = inet_sk(sk);
741 /* BUGGG_FUTURE: Again, it is not clear how
742 to handle rthdr case. Ignore this complexity
745 memset(&fl, 0, sizeof(fl));
746 fl.proto = IPPROTO_TCP;
747 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
748 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
749 fl.oif = sk->sk_bound_dev_if;
750 fl.fl_ip_dport = inet->dport;
751 fl.fl_ip_sport = inet->sport;
753 if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
754 sk->sk_err_soft = -err;
758 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
759 sk->sk_err_soft = -err;
766 if (tp->pmtu_cookie > dst_mtu(dst)) {
767 tcp_sync_mss(sk, dst_mtu(dst));
768 tcp_simple_retransmit(sk);
769 } /* else let the usual retransmit timer handle it */
774 icmpv6_err_convert(type, code, &err);
776 /* Might be for an request_sock */
777 switch (sk->sk_state) {
778 struct request_sock *req, **prev;
780 if (sock_owned_by_user(sk))
783 req = tcp_v6_search_req(sk, &prev, th->dest, &hdr->daddr,
784 &hdr->saddr, inet6_iif(skb));
788 /* ICMPs are not backlogged, hence we cannot get
789 * an established socket here.
791 BUG_TRAP(req->sk == NULL);
793 if (seq != tcp_rsk(req)->snt_isn) {
794 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
798 inet_csk_reqsk_queue_drop(sk, req, prev);
802 case TCP_SYN_RECV: /* Cannot happen.
803 It can, it SYNs are crossed. --ANK */
804 if (!sock_owned_by_user(sk)) {
805 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
807 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
811 sk->sk_err_soft = err;
815 if (!sock_owned_by_user(sk) && np->recverr) {
817 sk->sk_error_report(sk);
819 sk->sk_err_soft = err;
827 static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
828 struct dst_entry *dst)
830 struct tcp6_request_sock *treq = tcp6_rsk(req);
831 struct ipv6_pinfo *np = inet6_sk(sk);
832 struct sk_buff * skb;
833 struct ipv6_txoptions *opt = NULL;
834 struct in6_addr * final_p = NULL, final;
838 memset(&fl, 0, sizeof(fl));
839 fl.proto = IPPROTO_TCP;
840 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
841 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
842 fl.fl6_flowlabel = 0;
844 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
845 fl.fl_ip_sport = inet_sk(sk)->sport;
850 np->rxopt.bits.osrcrt == 2 &&
852 struct sk_buff *pktopts = treq->pktopts;
853 struct inet6_skb_parm *rxopt = IP6CB(pktopts);
855 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
858 if (opt && opt->srcrt) {
859 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
860 ipv6_addr_copy(&final, &fl.fl6_dst);
861 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
865 err = ip6_dst_lookup(sk, &dst, &fl);
869 ipv6_addr_copy(&fl.fl6_dst, final_p);
870 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
874 skb = tcp_make_synack(sk, dst, req);
876 struct tcphdr *th = skb->h.th;
878 th->check = tcp_v6_check(th, skb->len,
879 &treq->loc_addr, &treq->rmt_addr,
880 csum_partial((char *)th, skb->len, skb->csum));
882 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
883 err = ip6_xmit(sk, skb, &fl, opt, 0);
884 if (err == NET_XMIT_CN)
889 if (opt && opt != np->opt)
890 sock_kfree_s(sk, opt, opt->tot_len);
894 static void tcp_v6_reqsk_destructor(struct request_sock *req)
896 if (tcp6_rsk(req)->pktopts)
897 kfree_skb(tcp6_rsk(req)->pktopts);
900 static struct request_sock_ops tcp6_request_sock_ops = {
902 .obj_size = sizeof(struct tcp6_request_sock),
903 .rtx_syn_ack = tcp_v6_send_synack,
904 .send_ack = tcp_v6_reqsk_send_ack,
905 .destructor = tcp_v6_reqsk_destructor,
906 .send_reset = tcp_v6_send_reset
909 static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
911 struct ipv6_pinfo *np = inet6_sk(sk);
912 struct inet6_skb_parm *opt = IP6CB(skb);
915 if ((opt->hop && (np->rxopt.bits.hopopts || np->rxopt.bits.ohopopts)) ||
916 ((IPV6_FLOWINFO_MASK & *(u32*)skb->nh.raw) && np->rxopt.bits.rxflow) ||
917 (opt->srcrt && (np->rxopt.bits.srcrt || np->rxopt.bits.osrcrt)) ||
918 ((opt->dst1 || opt->dst0) && (np->rxopt.bits.dstopts || np->rxopt.bits.odstopts)))
925 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
928 struct ipv6_pinfo *np = inet6_sk(sk);
930 if (skb->ip_summed == CHECKSUM_HW) {
931 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
932 skb->csum = offsetof(struct tcphdr, check);
934 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
935 csum_partial((char *)th, th->doff<<2,
941 static void tcp_v6_send_reset(struct sk_buff *skb)
943 struct tcphdr *th = skb->h.th, *t1;
944 struct sk_buff *buff;
950 if (!ipv6_unicast_destination(skb))
954 * We need to grab some memory, and put together an RST,
955 * and then put it into the queue to be sent.
958 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
963 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
965 t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
967 /* Swap the send and the receive. */
968 memset(t1, 0, sizeof(*t1));
969 t1->dest = th->source;
970 t1->source = th->dest;
971 t1->doff = sizeof(*t1)/4;
975 t1->seq = th->ack_seq;
978 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
979 + skb->len - (th->doff<<2));
982 buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
984 memset(&fl, 0, sizeof(fl));
985 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
986 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
988 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
989 sizeof(*t1), IPPROTO_TCP,
992 fl.proto = IPPROTO_TCP;
993 fl.oif = inet6_iif(skb);
994 fl.fl_ip_dport = t1->dest;
995 fl.fl_ip_sport = t1->source;
997 /* sk = NULL, but it is safe for now. RST socket required. */
998 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1000 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0)
1003 ip6_xmit(NULL, buff, &fl, NULL, 0);
1004 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1005 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
1012 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
1014 struct tcphdr *th = skb->h.th, *t1;
1015 struct sk_buff *buff;
1017 int tot_len = sizeof(struct tcphdr);
1022 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
1027 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
1029 t1 = (struct tcphdr *) skb_push(buff,tot_len);
1031 /* Swap the send and the receive. */
1032 memset(t1, 0, sizeof(*t1));
1033 t1->dest = th->source;
1034 t1->source = th->dest;
1035 t1->doff = tot_len/4;
1036 t1->seq = htonl(seq);
1037 t1->ack_seq = htonl(ack);
1039 t1->window = htons(win);
1042 u32 *ptr = (u32*)(t1 + 1);
1043 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1044 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1045 *ptr++ = htonl(tcp_time_stamp);
1049 buff->csum = csum_partial((char *)t1, tot_len, 0);
1051 memset(&fl, 0, sizeof(fl));
1052 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1053 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1055 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1056 tot_len, IPPROTO_TCP,
1059 fl.proto = IPPROTO_TCP;
1060 fl.oif = inet6_iif(skb);
1061 fl.fl_ip_dport = t1->dest;
1062 fl.fl_ip_sport = t1->source;
1064 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1065 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0)
1067 ip6_xmit(NULL, buff, &fl, NULL, 0);
1068 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1075 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1077 struct inet_timewait_sock *tw = inet_twsk(sk);
1078 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1080 tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1081 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1082 tcptw->tw_ts_recent);
1087 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
1089 tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent);
1093 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1095 struct request_sock *req, **prev;
1096 const struct tcphdr *th = skb->h.th;
1099 /* Find possible connection requests. */
1100 req = tcp_v6_search_req(sk, &prev, th->source, &skb->nh.ipv6h->saddr,
1101 &skb->nh.ipv6h->daddr, inet6_iif(skb));
1103 return tcp_check_req(sk, skb, req, prev);
1105 nsk = __inet6_lookup_established(&tcp_hashinfo, &skb->nh.ipv6h->saddr,
1106 th->source, &skb->nh.ipv6h->daddr,
1107 ntohs(th->dest), inet6_iif(skb));
1110 if (nsk->sk_state != TCP_TIME_WAIT) {
1114 inet_twsk_put((struct inet_timewait_sock *)nsk);
1118 #if 0 /*def CONFIG_SYN_COOKIES*/
1119 if (!th->rst && !th->syn && th->ack)
1120 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
1125 static void tcp_v6_synq_add(struct sock *sk, struct request_sock *req)
1127 struct inet_connection_sock *icsk = inet_csk(sk);
1128 struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt;
1129 const u32 h = tcp_v6_synq_hash(&tcp6_rsk(req)->rmt_addr, inet_rsk(req)->rmt_port, lopt->hash_rnd);
1131 reqsk_queue_hash_req(&icsk->icsk_accept_queue, h, req, TCP_TIMEOUT_INIT);
1132 inet_csk_reqsk_queue_added(sk, TCP_TIMEOUT_INIT);
1136 /* FIXME: this is substantially similar to the ipv4 code.
1137 * Can some kind of merge be done? -- erics
1139 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1141 struct tcp6_request_sock *treq;
1142 struct ipv6_pinfo *np = inet6_sk(sk);
1143 struct tcp_options_received tmp_opt;
1144 struct tcp_sock *tp = tcp_sk(sk);
1145 struct request_sock *req = NULL;
1146 __u32 isn = TCP_SKB_CB(skb)->when;
1148 if (skb->protocol == htons(ETH_P_IP))
1149 return tcp_v4_conn_request(sk, skb);
1151 if (!ipv6_unicast_destination(skb))
1155 * There are no SYN attacks on IPv6, yet...
1157 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1158 if (net_ratelimit())
1159 printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
1163 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1166 req = reqsk_alloc(&tcp6_request_sock_ops);
1170 tcp_clear_options(&tmp_opt);
1171 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1172 tmp_opt.user_mss = tp->rx_opt.user_mss;
1174 tcp_parse_options(skb, &tmp_opt, 0);
1176 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1177 tcp_openreq_init(req, &tmp_opt, skb);
1179 treq = tcp6_rsk(req);
1180 ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
1181 ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
1182 TCP_ECN_create_request(req, skb->h.th);
1183 treq->pktopts = NULL;
1184 if (ipv6_opt_accepted(sk, skb) ||
1185 np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
1186 np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
1187 atomic_inc(&skb->users);
1188 treq->pktopts = skb;
1190 treq->iif = sk->sk_bound_dev_if;
1192 /* So that link locals have meaning */
1193 if (!sk->sk_bound_dev_if &&
1194 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1195 treq->iif = inet6_iif(skb);
1198 isn = tcp_v6_init_sequence(sk,skb);
1200 tcp_rsk(req)->snt_isn = isn;
1202 if (tcp_v6_send_synack(sk, req, NULL))
1205 tcp_v6_synq_add(sk, req);
1213 TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1214 return 0; /* don't send reset */
1217 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1218 struct request_sock *req,
1219 struct dst_entry *dst)
1221 struct tcp6_request_sock *treq = tcp6_rsk(req);
1222 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1223 struct tcp6_sock *newtcp6sk;
1224 struct inet_sock *newinet;
1225 struct tcp_sock *newtp;
1227 struct ipv6_txoptions *opt;
1229 if (skb->protocol == htons(ETH_P_IP)) {
1234 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1239 newtcp6sk = (struct tcp6_sock *)newsk;
1240 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1242 newinet = inet_sk(newsk);
1243 newnp = inet6_sk(newsk);
1244 newtp = tcp_sk(newsk);
1246 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1248 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1251 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1254 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1256 newtp->af_specific = &ipv6_mapped;
1257 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1258 newnp->pktoptions = NULL;
1260 newnp->mcast_oif = inet6_iif(skb);
1261 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1264 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1265 * here, tcp_create_openreq_child now does this for us, see the comment in
1266 * that function for the gory details. -acme
1269 /* It is tricky place. Until this moment IPv4 tcp
1270 worked with IPv6 af_tcp.af_specific.
1273 tcp_sync_mss(newsk, newtp->pmtu_cookie);
1280 if (sk_acceptq_is_full(sk))
1283 if (np->rxopt.bits.osrcrt == 2 &&
1284 opt == NULL && treq->pktopts) {
1285 struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts);
1287 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr *)(treq->pktopts->nh.raw + rxopt->srcrt));
1291 struct in6_addr *final_p = NULL, final;
1294 memset(&fl, 0, sizeof(fl));
1295 fl.proto = IPPROTO_TCP;
1296 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
1297 if (opt && opt->srcrt) {
1298 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1299 ipv6_addr_copy(&final, &fl.fl6_dst);
1300 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1303 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
1304 fl.oif = sk->sk_bound_dev_if;
1305 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
1306 fl.fl_ip_sport = inet_sk(sk)->sport;
1308 if (ip6_dst_lookup(sk, &dst, &fl))
1312 ipv6_addr_copy(&fl.fl6_dst, final_p);
1314 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1318 newsk = tcp_create_openreq_child(sk, req, skb);
1323 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1324 * count here, tcp_create_openreq_child now does this for us, see the
1325 * comment in that function for the gory details. -acme
1328 ip6_dst_store(newsk, dst, NULL);
1329 newsk->sk_route_caps = dst->dev->features &
1330 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1332 newtcp6sk = (struct tcp6_sock *)newsk;
1333 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1335 newtp = tcp_sk(newsk);
1336 newinet = inet_sk(newsk);
1337 newnp = inet6_sk(newsk);
1339 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1341 ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
1342 ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
1343 ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
1344 newsk->sk_bound_dev_if = treq->iif;
1346 /* Now IPv6 options...
1348 First: no IPv4 options.
1350 newinet->opt = NULL;
1353 newnp->rxopt.all = np->rxopt.all;
1355 /* Clone pktoptions received with SYN */
1356 newnp->pktoptions = NULL;
1357 if (treq->pktopts != NULL) {
1358 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
1359 kfree_skb(treq->pktopts);
1360 treq->pktopts = NULL;
1361 if (newnp->pktoptions)
1362 skb_set_owner_r(newnp->pktoptions, newsk);
1365 newnp->mcast_oif = inet6_iif(skb);
1366 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1368 /* Clone native IPv6 options from listening socket (if any)
1370 Yes, keeping reference count would be much more clever,
1371 but we make one more one thing there: reattach optmem
1375 newnp->opt = ipv6_dup_options(newsk, opt);
1377 sock_kfree_s(sk, opt, opt->tot_len);
1380 newtp->ext_header_len = 0;
1382 newtp->ext_header_len = newnp->opt->opt_nflen +
1383 newnp->opt->opt_flen;
1385 tcp_sync_mss(newsk, dst_mtu(dst));
1386 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1387 tcp_initialize_rcv_mss(newsk);
1389 newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1391 __tcp_v6_hash(newsk);
1392 inet_inherit_port(&tcp_hashinfo, sk, newsk);
1397 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1399 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1400 if (opt && opt != np->opt)
1401 sock_kfree_s(sk, opt, opt->tot_len);
1406 static int tcp_v6_checksum_init(struct sk_buff *skb)
1408 if (skb->ip_summed == CHECKSUM_HW) {
1409 skb->ip_summed = CHECKSUM_UNNECESSARY;
1410 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1411 &skb->nh.ipv6h->daddr,skb->csum))
1413 LIMIT_NETDEBUG(KERN_DEBUG "hw tcp v6 csum failed\n");
1415 if (skb->len <= 76) {
1416 if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1417 &skb->nh.ipv6h->daddr,skb_checksum(skb, 0, skb->len, 0)))
1419 skb->ip_summed = CHECKSUM_UNNECESSARY;
1421 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1422 &skb->nh.ipv6h->daddr,0);
1427 /* The socket must have it's spinlock held when we get
1430 * We have a potential double-lock case here, so even when
1431 * doing backlog processing we use the BH locking scheme.
1432 * This is because we cannot sleep with the original spinlock
1435 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1437 struct ipv6_pinfo *np = inet6_sk(sk);
1438 struct tcp_sock *tp;
1439 struct sk_buff *opt_skb = NULL;
1441 /* Imagine: socket is IPv6. IPv4 packet arrives,
1442 goes to IPv4 receive handler and backlogged.
1443 From backlog it always goes here. Kerboom...
1444 Fortunately, tcp_rcv_established and rcv_established
1445 handle them correctly, but it is not case with
1446 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1449 if (skb->protocol == htons(ETH_P_IP))
1450 return tcp_v4_do_rcv(sk, skb);
1452 if (sk_filter(sk, skb, 0))
1456 * socket locking is here for SMP purposes as backlog rcv
1457 * is currently called with bh processing disabled.
1460 /* Do Stevens' IPV6_PKTOPTIONS.
1462 Yes, guys, it is the only place in our code, where we
1463 may make it not affecting IPv4.
1464 The rest of code is protocol independent,
1465 and I do not like idea to uglify IPv4.
1467 Actually, all the idea behind IPV6_PKTOPTIONS
1468 looks not very well thought. For now we latch
1469 options, received in the last packet, enqueued
1470 by tcp. Feel free to propose better solution.
1474 opt_skb = skb_clone(skb, GFP_ATOMIC);
1476 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1477 TCP_CHECK_TIMER(sk);
1478 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1480 TCP_CHECK_TIMER(sk);
1482 goto ipv6_pktoptions;
1486 if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1489 if (sk->sk_state == TCP_LISTEN) {
1490 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1495 * Queue it on the new socket if the new socket is active,
1496 * otherwise we just shortcircuit this and continue with
1500 if (tcp_child_process(sk, nsk, skb))
1503 __kfree_skb(opt_skb);
1508 TCP_CHECK_TIMER(sk);
1509 if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1511 TCP_CHECK_TIMER(sk);
1513 goto ipv6_pktoptions;
1517 tcp_v6_send_reset(skb);
1520 __kfree_skb(opt_skb);
1524 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1529 /* Do you ask, what is it?
1531 1. skb was enqueued by tcp.
1532 2. skb is added to tail of read queue, rather than out of order.
1533 3. socket is not in passive state.
1534 4. Finally, it really contains options, which user wants to receive.
1537 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1538 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1539 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1540 np->mcast_oif = inet6_iif(opt_skb);
1541 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1542 np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1543 if (ipv6_opt_accepted(sk, opt_skb)) {
1544 skb_set_owner_r(opt_skb, sk);
1545 opt_skb = xchg(&np->pktoptions, opt_skb);
1547 __kfree_skb(opt_skb);
1548 opt_skb = xchg(&np->pktoptions, NULL);
1557 static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1559 struct sk_buff *skb = *pskb;
1564 if (skb->pkt_type != PACKET_HOST)
1568 * Count it even if it's bad.
1570 TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1572 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1577 if (th->doff < sizeof(struct tcphdr)/4)
1579 if (!pskb_may_pull(skb, th->doff*4))
1582 if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1583 tcp_v6_checksum_init(skb) < 0))
1587 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1588 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1589 skb->len - th->doff*4);
1590 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1591 TCP_SKB_CB(skb)->when = 0;
1592 TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
1593 TCP_SKB_CB(skb)->sacked = 0;
1595 sk = __inet6_lookup(&tcp_hashinfo, &skb->nh.ipv6h->saddr, th->source,
1596 &skb->nh.ipv6h->daddr, ntohs(th->dest),
1603 if (sk->sk_state == TCP_TIME_WAIT)
1606 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1607 goto discard_and_relse;
1609 if (sk_filter(sk, skb, 0))
1610 goto discard_and_relse;
1616 if (!sock_owned_by_user(sk)) {
1617 if (!tcp_prequeue(sk, skb))
1618 ret = tcp_v6_do_rcv(sk, skb);
1620 sk_add_backlog(sk, skb);
1624 return ret ? -1 : 0;
1627 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1630 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1632 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1634 tcp_v6_send_reset(skb);
1651 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1652 inet_twsk_put((struct inet_timewait_sock *)sk);
1656 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1657 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1658 inet_twsk_put((struct inet_timewait_sock *)sk);
1662 switch (tcp_timewait_state_process((struct inet_timewait_sock *)sk,
1668 sk2 = inet6_lookup_listener(&tcp_hashinfo,
1669 &skb->nh.ipv6h->daddr,
1670 ntohs(th->dest), inet6_iif(skb));
1672 struct inet_timewait_sock *tw = inet_twsk(sk);
1673 inet_twsk_deschedule(tw, &tcp_death_row);
1678 /* Fall through to ACK */
1681 tcp_v6_timewait_ack(sk, skb);
1685 case TCP_TW_SUCCESS:;
1690 static int tcp_v6_rebuild_header(struct sock *sk)
1693 struct dst_entry *dst;
1694 struct ipv6_pinfo *np = inet6_sk(sk);
1696 dst = __sk_dst_check(sk, np->dst_cookie);
1699 struct inet_sock *inet = inet_sk(sk);
1700 struct in6_addr *final_p = NULL, final;
1703 memset(&fl, 0, sizeof(fl));
1704 fl.proto = IPPROTO_TCP;
1705 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1706 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1707 fl.fl6_flowlabel = np->flow_label;
1708 fl.oif = sk->sk_bound_dev_if;
1709 fl.fl_ip_dport = inet->dport;
1710 fl.fl_ip_sport = inet->sport;
1712 if (np->opt && np->opt->srcrt) {
1713 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1714 ipv6_addr_copy(&final, &fl.fl6_dst);
1715 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1719 err = ip6_dst_lookup(sk, &dst, &fl);
1721 sk->sk_route_caps = 0;
1725 ipv6_addr_copy(&fl.fl6_dst, final_p);
1727 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1728 sk->sk_err_soft = -err;
1732 ip6_dst_store(sk, dst, NULL);
1733 sk->sk_route_caps = dst->dev->features &
1734 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1740 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1742 struct sock *sk = skb->sk;
1743 struct inet_sock *inet = inet_sk(sk);
1744 struct ipv6_pinfo *np = inet6_sk(sk);
1746 struct dst_entry *dst;
1747 struct in6_addr *final_p = NULL, final;
1749 memset(&fl, 0, sizeof(fl));
1750 fl.proto = IPPROTO_TCP;
1751 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1752 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1753 fl.fl6_flowlabel = np->flow_label;
1754 IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1755 fl.oif = sk->sk_bound_dev_if;
1756 fl.fl_ip_sport = inet->sport;
1757 fl.fl_ip_dport = inet->dport;
1759 if (np->opt && np->opt->srcrt) {
1760 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1761 ipv6_addr_copy(&final, &fl.fl6_dst);
1762 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1766 dst = __sk_dst_check(sk, np->dst_cookie);
1769 int err = ip6_dst_lookup(sk, &dst, &fl);
1772 sk->sk_err_soft = -err;
1777 ipv6_addr_copy(&fl.fl6_dst, final_p);
1779 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1780 sk->sk_route_caps = 0;
1784 ip6_dst_store(sk, dst, NULL);
1785 sk->sk_route_caps = dst->dev->features &
1786 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1789 skb->dst = dst_clone(dst);
1791 /* Restore final destination back after routing done */
1792 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1794 return ip6_xmit(sk, skb, &fl, np->opt, 0);
1797 static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1799 struct ipv6_pinfo *np = inet6_sk(sk);
1800 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1802 sin6->sin6_family = AF_INET6;
1803 ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1804 sin6->sin6_port = inet_sk(sk)->dport;
1805 /* We do not store received flowlabel for TCP */
1806 sin6->sin6_flowinfo = 0;
1807 sin6->sin6_scope_id = 0;
1808 if (sk->sk_bound_dev_if &&
1809 ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1810 sin6->sin6_scope_id = sk->sk_bound_dev_if;
1813 static int tcp_v6_remember_stamp(struct sock *sk)
1815 /* Alas, not yet... */
1819 static struct tcp_func ipv6_specific = {
1820 .queue_xmit = tcp_v6_xmit,
1821 .send_check = tcp_v6_send_check,
1822 .rebuild_header = tcp_v6_rebuild_header,
1823 .conn_request = tcp_v6_conn_request,
1824 .syn_recv_sock = tcp_v6_syn_recv_sock,
1825 .remember_stamp = tcp_v6_remember_stamp,
1826 .net_header_len = sizeof(struct ipv6hdr),
1828 .setsockopt = ipv6_setsockopt,
1829 .getsockopt = ipv6_getsockopt,
1830 .addr2sockaddr = v6_addr2sockaddr,
1831 .sockaddr_len = sizeof(struct sockaddr_in6)
1835 * TCP over IPv4 via INET6 API
1838 static struct tcp_func ipv6_mapped = {
1839 .queue_xmit = ip_queue_xmit,
1840 .send_check = tcp_v4_send_check,
1841 .rebuild_header = inet_sk_rebuild_header,
1842 .conn_request = tcp_v6_conn_request,
1843 .syn_recv_sock = tcp_v6_syn_recv_sock,
1844 .remember_stamp = tcp_v4_remember_stamp,
1845 .net_header_len = sizeof(struct iphdr),
1847 .setsockopt = ipv6_setsockopt,
1848 .getsockopt = ipv6_getsockopt,
1849 .addr2sockaddr = v6_addr2sockaddr,
1850 .sockaddr_len = sizeof(struct sockaddr_in6)
1855 /* NOTE: A lot of things set to zero explicitly by call to
1856 * sk_alloc() so need not be done here.
1858 static int tcp_v6_init_sock(struct sock *sk)
1860 struct inet_connection_sock *icsk = inet_csk(sk);
1861 struct tcp_sock *tp = tcp_sk(sk);
1863 skb_queue_head_init(&tp->out_of_order_queue);
1864 tcp_init_xmit_timers(sk);
1865 tcp_prequeue_init(tp);
1867 icsk->icsk_rto = TCP_TIMEOUT_INIT;
1868 tp->mdev = TCP_TIMEOUT_INIT;
1870 /* So many TCP implementations out there (incorrectly) count the
1871 * initial SYN frame in their delayed-ACK and congestion control
1872 * algorithms that we must have the following bandaid to talk
1873 * efficiently to them. -DaveM
1877 /* See draft-stevens-tcpca-spec-01 for discussion of the
1878 * initialization of these values.
1880 tp->snd_ssthresh = 0x7fffffff;
1881 tp->snd_cwnd_clamp = ~0;
1882 tp->mss_cache = 536;
1884 tp->reordering = sysctl_tcp_reordering;
1886 sk->sk_state = TCP_CLOSE;
1888 tp->af_specific = &ipv6_specific;
1889 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1890 sk->sk_write_space = sk_stream_write_space;
1891 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1893 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1894 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1896 atomic_inc(&tcp_sockets_allocated);
1901 static int tcp_v6_destroy_sock(struct sock *sk)
1903 tcp_v4_destroy_sock(sk);
1904 return inet6_destroy_sock(sk);
1907 /* Proc filesystem TCPv6 sock list dumping. */
1908 static void get_openreq6(struct seq_file *seq,
1909 struct sock *sk, struct request_sock *req, int i, int uid)
1911 struct in6_addr *dest, *src;
1912 int ttd = req->expires - jiffies;
1917 src = &tcp6_rsk(req)->loc_addr;
1918 dest = &tcp6_rsk(req)->rmt_addr;
1920 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1921 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1923 src->s6_addr32[0], src->s6_addr32[1],
1924 src->s6_addr32[2], src->s6_addr32[3],
1925 ntohs(inet_sk(sk)->sport),
1926 dest->s6_addr32[0], dest->s6_addr32[1],
1927 dest->s6_addr32[2], dest->s6_addr32[3],
1928 ntohs(inet_rsk(req)->rmt_port),
1930 0,0, /* could print option size, but that is af dependent. */
1931 1, /* timers active (only the expire timer) */
1932 jiffies_to_clock_t(ttd),
1935 0, /* non standard timer */
1936 0, /* open_requests have no inode */
1940 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1942 struct in6_addr *dest, *src;
1945 unsigned long timer_expires;
1946 struct inet_sock *inet = inet_sk(sp);
1947 struct tcp_sock *tp = tcp_sk(sp);
1948 const struct inet_connection_sock *icsk = inet_csk(sp);
1949 struct ipv6_pinfo *np = inet6_sk(sp);
1952 src = &np->rcv_saddr;
1953 destp = ntohs(inet->dport);
1954 srcp = ntohs(inet->sport);
1956 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1958 timer_expires = icsk->icsk_timeout;
1959 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1961 timer_expires = icsk->icsk_timeout;
1962 } else if (timer_pending(&sp->sk_timer)) {
1964 timer_expires = sp->sk_timer.expires;
1967 timer_expires = jiffies;
1971 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1972 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
1974 src->s6_addr32[0], src->s6_addr32[1],
1975 src->s6_addr32[2], src->s6_addr32[3], srcp,
1976 dest->s6_addr32[0], dest->s6_addr32[1],
1977 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1979 tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
1981 jiffies_to_clock_t(timer_expires - jiffies),
1982 icsk->icsk_retransmits,
1984 icsk->icsk_probes_out,
1986 atomic_read(&sp->sk_refcnt), sp,
1989 (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
1990 tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
1994 static void get_timewait6_sock(struct seq_file *seq,
1995 struct inet_timewait_sock *tw, int i)
1997 struct in6_addr *dest, *src;
1999 struct tcp6_timewait_sock *tcp6tw = tcp6_twsk((struct sock *)tw);
2000 int ttd = tw->tw_ttd - jiffies;
2005 dest = &tcp6tw->tw_v6_daddr;
2006 src = &tcp6tw->tw_v6_rcv_saddr;
2007 destp = ntohs(tw->tw_dport);
2008 srcp = ntohs(tw->tw_sport);
2011 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2012 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2014 src->s6_addr32[0], src->s6_addr32[1],
2015 src->s6_addr32[2], src->s6_addr32[3], srcp,
2016 dest->s6_addr32[0], dest->s6_addr32[1],
2017 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2018 tw->tw_substate, 0, 0,
2019 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2020 atomic_read(&tw->tw_refcnt), tw);
2023 #ifdef CONFIG_PROC_FS
2024 static int tcp6_seq_show(struct seq_file *seq, void *v)
2026 struct tcp_iter_state *st;
2028 if (v == SEQ_START_TOKEN) {
2033 "st tx_queue rx_queue tr tm->when retrnsmt"
2034 " uid timeout inode\n");
2039 switch (st->state) {
2040 case TCP_SEQ_STATE_LISTENING:
2041 case TCP_SEQ_STATE_ESTABLISHED:
2042 get_tcp6_sock(seq, v, st->num);
2044 case TCP_SEQ_STATE_OPENREQ:
2045 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2047 case TCP_SEQ_STATE_TIME_WAIT:
2048 get_timewait6_sock(seq, v, st->num);
2055 static struct file_operations tcp6_seq_fops;
2056 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2057 .owner = THIS_MODULE,
2060 .seq_show = tcp6_seq_show,
2061 .seq_fops = &tcp6_seq_fops,
2064 int __init tcp6_proc_init(void)
2066 return tcp_proc_register(&tcp6_seq_afinfo);
2069 void tcp6_proc_exit(void)
2071 tcp_proc_unregister(&tcp6_seq_afinfo);
2075 struct proto tcpv6_prot = {
2077 .owner = THIS_MODULE,
2079 .connect = tcp_v6_connect,
2080 .disconnect = tcp_disconnect,
2081 .accept = inet_csk_accept,
2083 .init = tcp_v6_init_sock,
2084 .destroy = tcp_v6_destroy_sock,
2085 .shutdown = tcp_shutdown,
2086 .setsockopt = tcp_setsockopt,
2087 .getsockopt = tcp_getsockopt,
2088 .sendmsg = tcp_sendmsg,
2089 .recvmsg = tcp_recvmsg,
2090 .backlog_rcv = tcp_v6_do_rcv,
2091 .hash = tcp_v6_hash,
2092 .unhash = tcp_unhash,
2093 .get_port = tcp_v6_get_port,
2094 .enter_memory_pressure = tcp_enter_memory_pressure,
2095 .sockets_allocated = &tcp_sockets_allocated,
2096 .memory_allocated = &tcp_memory_allocated,
2097 .memory_pressure = &tcp_memory_pressure,
2098 .orphan_count = &tcp_orphan_count,
2099 .sysctl_mem = sysctl_tcp_mem,
2100 .sysctl_wmem = sysctl_tcp_wmem,
2101 .sysctl_rmem = sysctl_tcp_rmem,
2102 .max_header = MAX_TCP_HEADER,
2103 .obj_size = sizeof(struct tcp6_sock),
2104 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
2105 .rsk_prot = &tcp6_request_sock_ops,
2108 static struct inet6_protocol tcpv6_protocol = {
2109 .handler = tcp_v6_rcv,
2110 .err_handler = tcp_v6_err,
2111 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2114 static struct inet_protosw tcpv6_protosw = {
2115 .type = SOCK_STREAM,
2116 .protocol = IPPROTO_TCP,
2117 .prot = &tcpv6_prot,
2118 .ops = &inet6_stream_ops,
2121 .flags = INET_PROTOSW_PERMANENT,
2124 void __init tcpv6_init(void)
2126 /* register inet6 protocol */
2127 if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
2128 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
2129 inet6_register_protosw(&tcpv6_protosw);