git.oblomov.eu Git - linux-2.6/blob - net/ipv6/tcp_ipv6.c

   1 /*
   2  *      TCP over IPv6
   3  *      Linux INET6 implementation
   4  *
   5  *      Authors:
   6  *      Pedro Roque             <roque@di.fc.ul.pt>
   7  *
   8  *      $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
   9  *
  10  *      Based on:
  11  *      linux/net/ipv4/tcp.c
  12  *      linux/net/ipv4/tcp_input.c
  13  *      linux/net/ipv4/tcp_output.c
  14  *
  15  *      Fixes:
  16  *      Hideaki YOSHIFUJI       :       sin6_scope_id support
  17  *      YOSHIFUJI Hideaki @USAGI and:   Support IPV6_V6ONLY socket option, which
  18  *      Alexey Kuznetsov                allow both IPv4 and IPv6 sockets to bind
  19  *                                      a single port at the same time.
  20  *      YOSHIFUJI Hideaki @USAGI:       convert /proc/net/tcp6 to seq_file.
  21  *
  22  *      This program is free software; you can redistribute it and/or
  23  *      modify it under the terms of the GNU General Public License
  24  *      as published by the Free Software Foundation; either version
  25  *      2 of the License, or (at your option) any later version.
  26  */
  27
  28 #include <linux/module.h>
  29 #include <linux/config.h>
  30 #include <linux/errno.h>
  31 #include <linux/types.h>
  32 #include <linux/socket.h>
  33 #include <linux/sockios.h>
  34 #include <linux/net.h>
  35 #include <linux/jiffies.h>
  36 #include <linux/in.h>
  37 #include <linux/in6.h>
  38 #include <linux/netdevice.h>
  39 #include <linux/init.h>
  40 #include <linux/jhash.h>
  41 #include <linux/ipsec.h>
  42 #include <linux/times.h>
  43
  44 #include <linux/ipv6.h>
  45 #include <linux/icmpv6.h>
  46 #include <linux/random.h>
  47
  48 #include <net/tcp.h>
  49 #include <net/ndisc.h>
  50 #include <net/ipv6.h>
  51 #include <net/transp_v6.h>
  52 #include <net/addrconf.h>
  53 #include <net/ip6_route.h>
  54 #include <net/ip6_checksum.h>
  55 #include <net/inet_ecn.h>
  56 #include <net/protocol.h>
  57 #include <net/xfrm.h>
  58 #include <net/addrconf.h>
  59 #include <net/snmp.h>
  60 #include <net/dsfield.h>
  61
  62 #include <asm/uaccess.h>
  63
  64 #include <linux/proc_fs.h>
  65 #include <linux/seq_file.h>
  66
  67 static void     tcp_v6_send_reset(struct sk_buff *skb);
  68 static void     tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req);
  69 static void     tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
  70                                   struct sk_buff *skb);
  71
  72 static int      tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
  73 static int      tcp_v6_xmit(struct sk_buff *skb, int ipfragok);
  74
  75 static struct tcp_func ipv6_mapped;
  76 static struct tcp_func ipv6_specific;
  77
  78 /* I have no idea if this is a good hash for v6 or not. -DaveM */
  79 static __inline__ int tcp_v6_hashfn(struct in6_addr *laddr, u16 lport,
  80                                     struct in6_addr *faddr, u16 fport)
  81 {
  82         int hashent = (lport ^ fport);
  83
  84         hashent ^= (laddr->s6_addr32[3] ^ faddr->s6_addr32[3]);
  85         hashent ^= hashent>>16;
  86         hashent ^= hashent>>8;
  87         return (hashent & (tcp_ehash_size - 1));
  88 }
  89
  90 static __inline__ int tcp_v6_sk_hashfn(struct sock *sk)
  91 {
  92         struct inet_sock *inet = inet_sk(sk);
  93         struct ipv6_pinfo *np = inet6_sk(sk);
  94         struct in6_addr *laddr = &np->rcv_saddr;
  95         struct in6_addr *faddr = &np->daddr;
  96         __u16 lport = inet->num;
  97         __u16 fport = inet->dport;
  98         return tcp_v6_hashfn(laddr, lport, faddr, fport);
  99 }
 100
 101 static inline int tcp_v6_bind_conflict(struct sock *sk,
 102                                        struct tcp_bind_bucket *tb)
 103 {
 104         struct sock *sk2;
 105         struct hlist_node *node;
 106
 107         /* We must walk the whole port owner list in this case. -DaveM */
 108         sk_for_each_bound(sk2, node, &tb->owners) {
 109                 if (sk != sk2 &&
 110                     (!sk->sk_bound_dev_if ||
 111                      !sk2->sk_bound_dev_if ||
 112                      sk->sk_bound_dev_if == sk2->sk_bound_dev_if) &&
 113                     (!sk->sk_reuse || !sk2->sk_reuse ||
 114                      sk2->sk_state == TCP_LISTEN) &&
 115                      ipv6_rcv_saddr_equal(sk, sk2))
 116                         break;
 117         }
 118
 119         return node != NULL;
 120 }
 121
 122 /* Grrr, addr_type already calculated by caller, but I don't want
 123  * to add some silly "cookie" argument to this method just for that.
 124  * But it doesn't matter, the recalculation is in the rarest path
 125  * this function ever takes.
 126  */
 127 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
 128 {
 129         struct tcp_bind_hashbucket *head;
 130         struct tcp_bind_bucket *tb;
 131         struct hlist_node *node;
 132         int ret;
 133
 134         local_bh_disable();
 135         if (snum == 0) {
 136                 int low = sysctl_local_port_range[0];
 137                 int high = sysctl_local_port_range[1];
 138                 int remaining = (high - low) + 1;
 139                 int rover;
 140
 141                 spin_lock(&tcp_portalloc_lock);
 142                 rover = tcp_port_rover;
 143                 do {    rover++;
 144                         if ((rover < low) || (rover > high))
 145                                 rover = low;
 146                         head = &tcp_bhash[tcp_bhashfn(rover)];
 147                         spin_lock(&head->lock);
 148                         tb_for_each(tb, node, &head->chain)
 149                                 if (tb->port == rover)
 150                                         goto next;
 151                         break;
 152                 next:
 153                         spin_unlock(&head->lock);
 154                 } while (--remaining > 0);
 155                 tcp_port_rover = rover;
 156                 spin_unlock(&tcp_portalloc_lock);
 157
 158                 /* Exhausted local port range during search? */
 159                 ret = 1;
 160                 if (remaining <= 0)
 161                         goto fail;
 162
 163                 /* OK, here is the one we will use. */
 164                 snum = rover;
 165         } else {
 166                 head = &tcp_bhash[tcp_bhashfn(snum)];
 167                 spin_lock(&head->lock);
 168                 tb_for_each(tb, node, &head->chain)
 169                         if (tb->port == snum)
 170                                 goto tb_found;
 171         }
 172         tb = NULL;
 173         goto tb_not_found;
 174 tb_found:
 175         if (tb && !hlist_empty(&tb->owners)) {
 176                 if (tb->fastreuse > 0 && sk->sk_reuse &&
 177                     sk->sk_state != TCP_LISTEN) {
 178                         goto success;
 179                 } else {
 180                         ret = 1;
 181                         if (tcp_v6_bind_conflict(sk, tb))
 182                                 goto fail_unlock;
 183                 }
 184         }
 185 tb_not_found:
 186         ret = 1;
 187         if (!tb && (tb = tcp_bucket_create(head, snum)) == NULL)
 188                 goto fail_unlock;
 189         if (hlist_empty(&tb->owners)) {
 190                 if (sk->sk_reuse && sk->sk_state != TCP_LISTEN)
 191                         tb->fastreuse = 1;
 192                 else
 193                         tb->fastreuse = 0;
 194         } else if (tb->fastreuse &&
 195                    (!sk->sk_reuse || sk->sk_state == TCP_LISTEN))
 196                 tb->fastreuse = 0;
 197
 198 success:
 199         if (!tcp_sk(sk)->bind_hash)
 200                 tcp_bind_hash(sk, tb, snum);
 201         BUG_TRAP(tcp_sk(sk)->bind_hash == tb);
 202         ret = 0;
 203
 204 fail_unlock:
 205         spin_unlock(&head->lock);
 206 fail:
 207         local_bh_enable();
 208         return ret;
 209 }
 210
 211 static __inline__ void __tcp_v6_hash(struct sock *sk)
 212 {
 213         struct hlist_head *list;
 214         rwlock_t *lock;
 215
 216         BUG_TRAP(sk_unhashed(sk));
 217
 218         if (sk->sk_state == TCP_LISTEN) {
 219                 list = &tcp_listening_hash[tcp_sk_listen_hashfn(sk)];
 220                 lock = &tcp_lhash_lock;
 221                 tcp_listen_wlock();
 222         } else {
 223                 sk->sk_hashent = tcp_v6_sk_hashfn(sk);
 224                 list = &tcp_ehash[sk->sk_hashent].chain;
 225                 lock = &tcp_ehash[sk->sk_hashent].lock;
 226                 write_lock(lock);
 227         }
 228
 229         __sk_add_node(sk, list);
 230         sock_prot_inc_use(sk->sk_prot);
 231         write_unlock(lock);
 232 }
 233
 234
 235 static void tcp_v6_hash(struct sock *sk)
 236 {
 237         if (sk->sk_state != TCP_CLOSE) {
 238                 struct tcp_sock *tp = tcp_sk(sk);
 239
 240                 if (tp->af_specific == &ipv6_mapped) {
 241                         tcp_prot.hash(sk);
 242                         return;
 243                 }
 244                 local_bh_disable();
 245                 __tcp_v6_hash(sk);
 246                 local_bh_enable();
 247         }
 248 }
 249
 250 static struct sock *tcp_v6_lookup_listener(struct in6_addr *daddr, unsigned short hnum, int dif)
 251 {
 252         struct sock *sk;
 253         struct hlist_node *node;
 254         struct sock *result = NULL;
 255         int score, hiscore;
 256
 257         hiscore=0;
 258         read_lock(&tcp_lhash_lock);
 259         sk_for_each(sk, node, &tcp_listening_hash[tcp_lhashfn(hnum)]) {
 260                 if (inet_sk(sk)->num == hnum && sk->sk_family == PF_INET6) {
 261                         struct ipv6_pinfo *np = inet6_sk(sk);
 262
 263                         score = 1;
 264                         if (!ipv6_addr_any(&np->rcv_saddr)) {
 265                                 if (!ipv6_addr_equal(&np->rcv_saddr, daddr))
 266                                         continue;
 267                                 score++;
 268                         }
 269                         if (sk->sk_bound_dev_if) {
 270                                 if (sk->sk_bound_dev_if != dif)
 271                                         continue;
 272                                 score++;
 273                         }
 274                         if (score == 3) {
 275                                 result = sk;
 276                                 break;
 277                         }
 278                         if (score > hiscore) {
 279                                 hiscore = score;
 280                                 result = sk;
 281                         }
 282                 }
 283         }
 284         if (result)
 285                 sock_hold(result);
 286         read_unlock(&tcp_lhash_lock);
 287         return result;
 288 }
 289
 290 /* Sockets in TCP_CLOSE state are _always_ taken out of the hash, so
 291  * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM
 292  *
 293  * The sockhash lock must be held as a reader here.
 294  */
 295
 296 static inline struct sock *__tcp_v6_lookup_established(struct in6_addr *saddr, u16 sport,
 297                                                        struct in6_addr *daddr, u16 hnum,
 298                                                        int dif)
 299 {
 300         struct tcp_ehash_bucket *head;
 301         struct sock *sk;
 302         struct hlist_node *node;
 303         __u32 ports = TCP_COMBINED_PORTS(sport, hnum);
 304         int hash;
 305
 306         /* Optimize here for direct hit, only listening connections can
 307          * have wildcards anyways.
 308          */
 309         hash = tcp_v6_hashfn(daddr, hnum, saddr, sport);
 310         head = &tcp_ehash[hash];
 311         read_lock(&head->lock);
 312         sk_for_each(sk, node, &head->chain) {
 313                 /* For IPV6 do the cheaper port and family tests first. */
 314                 if(TCP_IPV6_MATCH(sk, saddr, daddr, ports, dif))
 315                         goto hit; /* You sunk my battleship! */
 316         }
 317         /* Must check for a TIME_WAIT'er before going to listener hash. */
 318         sk_for_each(sk, node, &(head + tcp_ehash_size)->chain) {
 319                 /* FIXME: acme: check this... */
 320                 struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
 321
 322                 if(*((__u32 *)&(tw->tw_dport))  == ports        &&
 323                    sk->sk_family                == PF_INET6) {
 324                         if(ipv6_addr_equal(&tw->tw_v6_daddr, saddr)     &&
 325                            ipv6_addr_equal(&tw->tw_v6_rcv_saddr, daddr) &&
 326                            (!sk->sk_bound_dev_if || sk->sk_bound_dev_if == dif))
 327                                 goto hit;
 328                 }
 329         }
 330         read_unlock(&head->lock);
 331         return NULL;
 332
 333 hit:
 334         sock_hold(sk);
 335         read_unlock(&head->lock);
 336         return sk;
 337 }
 338
 339
 340 static inline struct sock *__tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
 341                                            struct in6_addr *daddr, u16 hnum,
 342                                            int dif)
 343 {
 344         struct sock *sk;
 345
 346         sk = __tcp_v6_lookup_established(saddr, sport, daddr, hnum, dif);
 347
 348         if (sk)
 349                 return sk;
 350
 351         return tcp_v6_lookup_listener(daddr, hnum, dif);
 352 }
 353
 354 inline struct sock *tcp_v6_lookup(struct in6_addr *saddr, u16 sport,
 355                                   struct in6_addr *daddr, u16 dport,
 356                                   int dif)
 357 {
 358         struct sock *sk;
 359
 360         local_bh_disable();
 361         sk = __tcp_v6_lookup(saddr, sport, daddr, ntohs(dport), dif);
 362         local_bh_enable();
 363
 364         return sk;
 365 }
 366
 367 EXPORT_SYMBOL_GPL(tcp_v6_lookup);
 368
 369
 370 /*
 371  * Open request hash tables.
 372  */
 373
 374 static u32 tcp_v6_synq_hash(struct in6_addr *raddr, u16 rport, u32 rnd)
 375 {
 376         u32 a, b, c;
 377
 378         a = raddr->s6_addr32[0];
 379         b = raddr->s6_addr32[1];
 380         c = raddr->s6_addr32[2];
 381
 382         a += JHASH_GOLDEN_RATIO;
 383         b += JHASH_GOLDEN_RATIO;
 384         c += rnd;
 385         __jhash_mix(a, b, c);
 386
 387         a += raddr->s6_addr32[3];
 388         b += (u32) rport;
 389         __jhash_mix(a, b, c);
 390
 391         return c & (TCP_SYNQ_HSIZE - 1);
 392 }
 393
 394 static struct open_request *tcp_v6_search_req(struct tcp_sock *tp,
 395                                               struct open_request ***prevp,
 396                                               __u16 rport,
 397                                               struct in6_addr *raddr,
 398                                               struct in6_addr *laddr,
 399                                               int iif)
 400 {
 401         struct tcp_listen_opt *lopt = tp->listen_opt;
 402         struct open_request *req, **prev;
 403
 404         for (prev = &lopt->syn_table[tcp_v6_synq_hash(raddr, rport, lopt->hash_rnd)];
 405              (req = *prev) != NULL;
 406              prev = &req->dl_next) {
 407                 if (req->rmt_port == rport &&
 408                     req->class->family == AF_INET6 &&
 409                     ipv6_addr_equal(&req->af.v6_req.rmt_addr, raddr) &&
 410                     ipv6_addr_equal(&req->af.v6_req.loc_addr, laddr) &&
 411                     (!req->af.v6_req.iif || req->af.v6_req.iif == iif)) {
 412                         BUG_TRAP(req->sk == NULL);
 413                         *prevp = prev;
 414                         return req;
 415                 }
 416         }
 417
 418         return NULL;
 419 }
 420
 421 static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
 422                                    struct in6_addr *saddr,
 423                                    struct in6_addr *daddr,
 424                                    unsigned long base)
 425 {
 426         return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
 427 }
 428
 429 static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
 430 {
 431         if (skb->protocol == htons(ETH_P_IPV6)) {
 432                 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
 433                                                     skb->nh.ipv6h->saddr.s6_addr32,
 434                                                     skb->h.th->dest,
 435                                                     skb->h.th->source);
 436         } else {
 437                 return secure_tcp_sequence_number(skb->nh.iph->daddr,
 438                                                   skb->nh.iph->saddr,
 439                                                   skb->h.th->dest,
 440                                                   skb->h.th->source);
 441         }
 442 }
 443
 444 static int __tcp_v6_check_established(struct sock *sk, __u16 lport,
 445                                       struct tcp_tw_bucket **twp)
 446 {
 447         struct inet_sock *inet = inet_sk(sk);
 448         struct ipv6_pinfo *np = inet6_sk(sk);
 449         struct in6_addr *daddr = &np->rcv_saddr;
 450         struct in6_addr *saddr = &np->daddr;
 451         int dif = sk->sk_bound_dev_if;
 452         u32 ports = TCP_COMBINED_PORTS(inet->dport, lport);
 453         int hash = tcp_v6_hashfn(daddr, inet->num, saddr, inet->dport);
 454         struct tcp_ehash_bucket *head = &tcp_ehash[hash];
 455         struct sock *sk2;
 456         struct hlist_node *node;
 457         struct tcp_tw_bucket *tw;
 458
 459         write_lock(&head->lock);
 460
 461         /* Check TIME-WAIT sockets first. */
 462         sk_for_each(sk2, node, &(head + tcp_ehash_size)->chain) {
 463                 tw = (struct tcp_tw_bucket*)sk2;
 464
 465                 if(*((__u32 *)&(tw->tw_dport))  == ports        &&
 466                    sk2->sk_family               == PF_INET6     &&
 467                    ipv6_addr_equal(&tw->tw_v6_daddr, saddr)     &&
 468                    ipv6_addr_equal(&tw->tw_v6_rcv_saddr, daddr) &&
 469                    sk2->sk_bound_dev_if == sk->sk_bound_dev_if) {
 470                         struct tcp_sock *tp = tcp_sk(sk);
 471
 472                         if (tw->tw_ts_recent_stamp &&
 473                             (!twp || (sysctl_tcp_tw_reuse &&
 474                                       xtime.tv_sec -
 475                                       tw->tw_ts_recent_stamp > 1))) {
 476                                 /* See comment in tcp_ipv4.c */
 477                                 tp->write_seq = tw->tw_snd_nxt + 65535 + 2;
 478                                 if (!tp->write_seq)
 479                                         tp->write_seq = 1;
 480                                 tp->rx_opt.ts_recent = tw->tw_ts_recent;
 481                                 tp->rx_opt.ts_recent_stamp = tw->tw_ts_recent_stamp;
 482                                 sock_hold(sk2);
 483                                 goto unique;
 484                         } else
 485                                 goto not_unique;
 486                 }
 487         }
 488         tw = NULL;
 489
 490         /* And established part... */
 491         sk_for_each(sk2, node, &head->chain) {
 492                 if(TCP_IPV6_MATCH(sk2, saddr, daddr, ports, dif))
 493                         goto not_unique;
 494         }
 495
 496 unique:
 497         BUG_TRAP(sk_unhashed(sk));
 498         __sk_add_node(sk, &head->chain);
 499         sk->sk_hashent = hash;
 500         sock_prot_inc_use(sk->sk_prot);
 501         write_unlock(&head->lock);
 502
 503         if (twp) {
 504                 *twp = tw;
 505                 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
 506         } else if (tw) {
 507                 /* Silly. Should hash-dance instead... */
 508                 tcp_tw_deschedule(tw);
 509                 NET_INC_STATS_BH(LINUX_MIB_TIMEWAITRECYCLED);
 510
 511                 tcp_tw_put(tw);
 512         }
 513         return 0;
 514
 515 not_unique:
 516         write_unlock(&head->lock);
 517         return -EADDRNOTAVAIL;
 518 }
 519
 520 static inline u32 tcpv6_port_offset(const struct sock *sk)
 521 {
 522         const struct inet_sock *inet = inet_sk(sk);
 523         const struct ipv6_pinfo *np = inet6_sk(sk);
 524
 525         return secure_tcpv6_port_ephemeral(np->rcv_saddr.s6_addr32,
 526                                            np->daddr.s6_addr32,
 527                                            inet->dport);
 528 }
 529
 530 static int tcp_v6_hash_connect(struct sock *sk)
 531 {
 532         unsigned short snum = inet_sk(sk)->num;
 533         struct tcp_bind_hashbucket *head;
 534         struct tcp_bind_bucket *tb;
 535         int ret;
 536
 537         if (!snum) {
 538                 int low = sysctl_local_port_range[0];
 539                 int high = sysctl_local_port_range[1];
 540                 int range = high - low;
 541                 int i;
 542                 int port;
 543                 static u32 hint;
 544                 u32 offset = hint + tcpv6_port_offset(sk);
 545                 struct hlist_node *node;
 546                 struct tcp_tw_bucket *tw = NULL;
 547
 548                 local_bh_disable();
 549                 for (i = 1; i <= range; i++) {
 550                         port = low + (i + offset) % range;
 551                         head = &tcp_bhash[tcp_bhashfn(port)];
 552                         spin_lock(&head->lock);
 553
 554                         /* Does not bother with rcv_saddr checks,
 555                          * because the established check is already
 556                          * unique enough.
 557                          */
 558                         tb_for_each(tb, node, &head->chain) {
 559                                 if (tb->port == port) {
 560                                         BUG_TRAP(!hlist_empty(&tb->owners));
 561                                         if (tb->fastreuse >= 0)
 562                                                 goto next_port;
 563                                         if (!__tcp_v6_check_established(sk,
 564                                                                         port,
 565                                                                         &tw))
 566                                                 goto ok;
 567                                         goto next_port;
 568                                 }
 569                         }
 570
 571                         tb = tcp_bucket_create(head, port);
 572                         if (!tb) {
 573                                 spin_unlock(&head->lock);
 574                                 break;
 575                         }
 576                         tb->fastreuse = -1;
 577                         goto ok;
 578
 579                 next_port:
 580                         spin_unlock(&head->lock);
 581                 }
 582                 local_bh_enable();
 583
 584                 return -EADDRNOTAVAIL;
 585
 586 ok:
 587                 hint += i;
 588
 589                 /* Head lock still held and bh's disabled */
 590                 tcp_bind_hash(sk, tb, port);
 591                 if (sk_unhashed(sk)) {
 592                         inet_sk(sk)->sport = htons(port);
 593                         __tcp_v6_hash(sk);
 594                 }
 595                 spin_unlock(&head->lock);
 596
 597                 if (tw) {
 598                         tcp_tw_deschedule(tw);
 599                         tcp_tw_put(tw);
 600                 }
 601
 602                 ret = 0;
 603                 goto out;
 604         }
 605
 606         head  = &tcp_bhash[tcp_bhashfn(snum)];
 607         tb  = tcp_sk(sk)->bind_hash;
 608         spin_lock_bh(&head->lock);
 609
 610         if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
 611                 __tcp_v6_hash(sk);
 612                 spin_unlock_bh(&head->lock);
 613                 return 0;
 614         } else {
 615                 spin_unlock(&head->lock);
 616                 /* No definite answer... Walk to established hash table */
 617                 ret = __tcp_v6_check_established(sk, snum, NULL);
 618 out:
 619                 local_bh_enable();
 620                 return ret;
 621         }
 622 }
 623
 624 static __inline__ int tcp_v6_iif(struct sk_buff *skb)
 625 {
 626         return IP6CB(skb)->iif;
 627 }
 628
 629 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
 630                           int addr_len)
 631 {
 632         struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
 633         struct inet_sock *inet = inet_sk(sk);
 634         struct ipv6_pinfo *np = inet6_sk(sk);
 635         struct tcp_sock *tp = tcp_sk(sk);
 636         struct in6_addr *saddr = NULL, *final_p = NULL, final;
 637         struct flowi fl;
 638         struct dst_entry *dst;
 639         int addr_type;
 640         int err;
 641
 642         if (addr_len < SIN6_LEN_RFC2133)
 643                 return -EINVAL;
 644
 645         if (usin->sin6_family != AF_INET6)
 646                 return(-EAFNOSUPPORT);
 647
 648         memset(&fl, 0, sizeof(fl));
 649
 650         if (np->sndflow) {
 651                 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
 652                 IP6_ECN_flow_init(fl.fl6_flowlabel);
 653                 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
 654                         struct ip6_flowlabel *flowlabel;
 655                         flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
 656                         if (flowlabel == NULL)
 657                                 return -EINVAL;
 658                         ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
 659                         fl6_sock_release(flowlabel);
 660                 }
 661         }
 662
 663         /*
 664          *      connect() to INADDR_ANY means loopback (BSD'ism).
 665          */
 666
 667         if(ipv6_addr_any(&usin->sin6_addr))
 668                 usin->sin6_addr.s6_addr[15] = 0x1;
 669
 670         addr_type = ipv6_addr_type(&usin->sin6_addr);
 671
 672         if(addr_type & IPV6_ADDR_MULTICAST)
 673                 return -ENETUNREACH;
 674
 675         if (addr_type&IPV6_ADDR_LINKLOCAL) {
 676                 if (addr_len >= sizeof(struct sockaddr_in6) &&
 677                     usin->sin6_scope_id) {
 678                         /* If interface is set while binding, indices
 679                          * must coincide.
 680                          */
 681                         if (sk->sk_bound_dev_if &&
 682                             sk->sk_bound_dev_if != usin->sin6_scope_id)
 683                                 return -EINVAL;
 684
 685                         sk->sk_bound_dev_if = usin->sin6_scope_id;
 686                 }
 687
 688                 /* Connect to link-local address requires an interface */
 689                 if (!sk->sk_bound_dev_if)
 690                         return -EINVAL;
 691         }
 692
 693         if (tp->rx_opt.ts_recent_stamp &&
 694             !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
 695                 tp->rx_opt.ts_recent = 0;
 696                 tp->rx_opt.ts_recent_stamp = 0;
 697                 tp->write_seq = 0;
 698         }
 699
 700         ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
 701         np->flow_label = fl.fl6_flowlabel;
 702
 703         /*
 704          *      TCP over IPv4
 705          */
 706
 707         if (addr_type == IPV6_ADDR_MAPPED) {
 708                 u32 exthdrlen = tp->ext_header_len;
 709                 struct sockaddr_in sin;
 710
 711                 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
 712
 713                 if (__ipv6_only_sock(sk))
 714                         return -ENETUNREACH;
 715
 716                 sin.sin_family = AF_INET;
 717                 sin.sin_port = usin->sin6_port;
 718                 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
 719
 720                 tp->af_specific = &ipv6_mapped;
 721                 sk->sk_backlog_rcv = tcp_v4_do_rcv;
 722
 723                 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
 724
 725                 if (err) {
 726                         tp->ext_header_len = exthdrlen;
 727                         tp->af_specific = &ipv6_specific;
 728                         sk->sk_backlog_rcv = tcp_v6_do_rcv;
 729                         goto failure;
 730                 } else {
 731                         ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
 732                                       inet->saddr);
 733                         ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
 734                                       inet->rcv_saddr);
 735                 }
 736
 737                 return err;
 738         }
 739
 740         if (!ipv6_addr_any(&np->rcv_saddr))
 741                 saddr = &np->rcv_saddr;
 742
 743         fl.proto = IPPROTO_TCP;
 744         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
 745         ipv6_addr_copy(&fl.fl6_src,
 746                        (saddr ? saddr : &np->saddr));
 747         fl.oif = sk->sk_bound_dev_if;
 748         fl.fl_ip_dport = usin->sin6_port;
 749         fl.fl_ip_sport = inet->sport;
 750
 751         if (np->opt && np->opt->srcrt) {
 752                 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
 753                 ipv6_addr_copy(&final, &fl.fl6_dst);
 754                 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
 755                 final_p = &final;
 756         }
 757
 758         err = ip6_dst_lookup(sk, &dst, &fl);
 759         if (err)
 760                 goto failure;
 761         if (final_p)
 762                 ipv6_addr_copy(&fl.fl6_dst, final_p);
 763
 764         if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
 765                 dst_release(dst);
 766                 goto failure;
 767         }
 768
 769         if (saddr == NULL) {
 770                 saddr = &fl.fl6_src;
 771                 ipv6_addr_copy(&np->rcv_saddr, saddr);
 772         }
 773
 774         /* set the source address */
 775         ipv6_addr_copy(&np->saddr, saddr);
 776         inet->rcv_saddr = LOOPBACK4_IPV6;
 777
 778         ip6_dst_store(sk, dst, NULL);
 779         sk->sk_route_caps = dst->dev->features &
 780                 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
 781
 782         tp->ext_header_len = 0;
 783         if (np->opt)
 784                 tp->ext_header_len = np->opt->opt_flen + np->opt->opt_nflen;
 785
 786         tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
 787
 788         inet->dport = usin->sin6_port;
 789
 790         tcp_set_state(sk, TCP_SYN_SENT);
 791         err = tcp_v6_hash_connect(sk);
 792         if (err)
 793                 goto late_failure;
 794
 795         if (!tp->write_seq)
 796                 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
 797                                                              np->daddr.s6_addr32,
 798                                                              inet->sport,
 799                                                              inet->dport);
 800
 801         err = tcp_connect(sk);
 802         if (err)
 803                 goto late_failure;
 804
 805         return 0;
 806
 807 late_failure:
 808         tcp_set_state(sk, TCP_CLOSE);
 809         __sk_dst_reset(sk);
 810 failure:
 811         inet->dport = 0;
 812         sk->sk_route_caps = 0;
 813         return err;
 814 }
 815
 816 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 817                 int type, int code, int offset, __u32 info)
 818 {
 819         struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
 820         struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
 821         struct ipv6_pinfo *np;
 822         struct sock *sk;
 823         int err;
 824         struct tcp_sock *tp;
 825         __u32 seq;
 826
 827         sk = tcp_v6_lookup(&hdr->daddr, th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
 828
 829         if (sk == NULL) {
 830                 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
 831                 return;
 832         }
 833
 834         if (sk->sk_state == TCP_TIME_WAIT) {
 835                 tcp_tw_put((struct tcp_tw_bucket*)sk);
 836                 return;
 837         }
 838
 839         bh_lock_sock(sk);
 840         if (sock_owned_by_user(sk))
 841                 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
 842
 843         if (sk->sk_state == TCP_CLOSE)
 844                 goto out;
 845
 846         tp = tcp_sk(sk);
 847         seq = ntohl(th->seq);
 848         if (sk->sk_state != TCP_LISTEN &&
 849             !between(seq, tp->snd_una, tp->snd_nxt)) {
 850                 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
 851                 goto out;
 852         }
 853
 854         np = inet6_sk(sk);
 855
 856         if (type == ICMPV6_PKT_TOOBIG) {
 857                 struct dst_entry *dst = NULL;
 858
 859                 if (sock_owned_by_user(sk))
 860                         goto out;
 861                 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
 862                         goto out;
 863
 864                 /* icmp should have updated the destination cache entry */
 865                 dst = __sk_dst_check(sk, np->dst_cookie);
 866
 867                 if (dst == NULL) {
 868                         struct inet_sock *inet = inet_sk(sk);
 869                         struct flowi fl;
 870
 871                         /* BUGGG_FUTURE: Again, it is not clear how
 872                            to handle rthdr case. Ignore this complexity
 873                            for now.
 874                          */
 875                         memset(&fl, 0, sizeof(fl));
 876                         fl.proto = IPPROTO_TCP;
 877                         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
 878                         ipv6_addr_copy(&fl.fl6_src, &np->saddr);
 879                         fl.oif = sk->sk_bound_dev_if;
 880                         fl.fl_ip_dport = inet->dport;
 881                         fl.fl_ip_sport = inet->sport;
 882
 883                         if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
 884                                 sk->sk_err_soft = -err;
 885                                 goto out;
 886                         }
 887
 888                         if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
 889                                 sk->sk_err_soft = -err;
 890                                 goto out;
 891                         }
 892
 893                 } else
 894                         dst_hold(dst);
 895
 896                 if (tp->pmtu_cookie > dst_mtu(dst)) {
 897                         tcp_sync_mss(sk, dst_mtu(dst));
 898                         tcp_simple_retransmit(sk);
 899                 } /* else let the usual retransmit timer handle it */
 900                 dst_release(dst);
 901                 goto out;
 902         }
 903
 904         icmpv6_err_convert(type, code, &err);
 905
 906         /* Might be for an open_request */
 907         switch (sk->sk_state) {
 908                 struct open_request *req, **prev;
 909         case TCP_LISTEN:
 910                 if (sock_owned_by_user(sk))
 911                         goto out;
 912
 913                 req = tcp_v6_search_req(tp, &prev, th->dest, &hdr->daddr,
 914                                         &hdr->saddr, tcp_v6_iif(skb));
 915                 if (!req)
 916                         goto out;
 917
 918                 /* ICMPs are not backlogged, hence we cannot get
 919                  * an established socket here.
 920                  */
 921                 BUG_TRAP(req->sk == NULL);
 922
 923                 if (seq != req->snt_isn) {
 924                         NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
 925                         goto out;
 926                 }
 927
 928                 tcp_synq_drop(sk, req, prev);
 929                 goto out;
 930
 931         case TCP_SYN_SENT:
 932         case TCP_SYN_RECV:  /* Cannot happen.
 933                                It can, it SYNs are crossed. --ANK */
 934                 if (!sock_owned_by_user(sk)) {
 935                         TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
 936                         sk->sk_err = err;
 937                         sk->sk_error_report(sk);                /* Wake people up to see the error (see connect in sock.c) */
 938
 939                         tcp_done(sk);
 940                 } else
 941                         sk->sk_err_soft = err;
 942                 goto out;
 943         }
 944
 945         if (!sock_owned_by_user(sk) && np->recverr) {
 946                 sk->sk_err = err;
 947                 sk->sk_error_report(sk);
 948         } else
 949                 sk->sk_err_soft = err;
 950
 951 out:
 952         bh_unlock_sock(sk);
 953         sock_put(sk);
 954 }
 955
 956
 957 static int tcp_v6_send_synack(struct sock *sk, struct open_request *req,
 958                               struct dst_entry *dst)
 959 {
 960         struct ipv6_pinfo *np = inet6_sk(sk);
 961         struct sk_buff * skb;
 962         struct ipv6_txoptions *opt = NULL;
 963         struct in6_addr * final_p = NULL, final;
 964         struct flowi fl;
 965         int err = -1;
 966
 967         memset(&fl, 0, sizeof(fl));
 968         fl.proto = IPPROTO_TCP;
 969         ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
 970         ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr);
 971         fl.fl6_flowlabel = 0;
 972         fl.oif = req->af.v6_req.iif;
 973         fl.fl_ip_dport = req->rmt_port;
 974         fl.fl_ip_sport = inet_sk(sk)->sport;
 975
 976         if (dst == NULL) {
 977                 opt = np->opt;
 978                 if (opt == NULL &&
 979                     np->rxopt.bits.srcrt == 2 &&
 980                     req->af.v6_req.pktopts) {
 981                         struct sk_buff *pktopts = req->af.v6_req.pktopts;
 982                         struct inet6_skb_parm *rxopt = IP6CB(pktopts);
 983                         if (rxopt->srcrt)
 984                                 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
 985                 }
 986
 987                 if (opt && opt->srcrt) {
 988                         struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
 989                         ipv6_addr_copy(&final, &fl.fl6_dst);
 990                         ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
 991                         final_p = &final;
 992                 }
 993
 994                 err = ip6_dst_lookup(sk, &dst, &fl);
 995                 if (err)
 996                         goto done;
 997                 if (final_p)
 998                         ipv6_addr_copy(&fl.fl6_dst, final_p);
 999                 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1000                         goto done;
1001         }
1002
1003         skb = tcp_make_synack(sk, dst, req);
1004         if (skb) {
1005                 struct tcphdr *th = skb->h.th;
1006
1007                 th->check = tcp_v6_check(th, skb->len,
1008                                          &req->af.v6_req.loc_addr, &req->af.v6_req.rmt_addr,
1009                                          csum_partial((char *)th, skb->len, skb->csum));
1010
1011                 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
1012                 err = ip6_xmit(sk, skb, &fl, opt, 0);
1013                 if (err == NET_XMIT_CN)
1014                         err = 0;
1015         }
1016
1017 done:
1018         dst_release(dst);
1019         if (opt && opt != np->opt)
1020                 sock_kfree_s(sk, opt, opt->tot_len);
1021         return err;
1022 }
1023
1024 static void tcp_v6_or_free(struct open_request *req)
1025 {
1026         if (req->af.v6_req.pktopts)
1027                 kfree_skb(req->af.v6_req.pktopts);
1028 }
1029
1030 static struct or_calltable or_ipv6 = {
1031         .family         =       AF_INET6,
1032         .rtx_syn_ack    =       tcp_v6_send_synack,
1033         .send_ack       =       tcp_v6_or_send_ack,
1034         .destructor     =       tcp_v6_or_free,
1035         .send_reset     =       tcp_v6_send_reset
1036 };
1037
1038 static int ipv6_opt_accepted(struct sock *sk, struct sk_buff *skb)
1039 {
1040         struct ipv6_pinfo *np = inet6_sk(sk);
1041         struct inet6_skb_parm *opt = IP6CB(skb);
1042
1043         if (np->rxopt.all) {
1044                 if ((opt->hop && np->rxopt.bits.hopopts) ||
1045                     ((IPV6_FLOWINFO_MASK&*(u32*)skb->nh.raw) &&
1046                      np->rxopt.bits.rxflow) ||
1047                     (opt->srcrt && np->rxopt.bits.srcrt) ||
1048                     ((opt->dst1 || opt->dst0) && np->rxopt.bits.dstopts))
1049                         return 1;
1050         }
1051         return 0;
1052 }
1053
1054
1055 static void tcp_v6_send_check(struct sock *sk, struct tcphdr *th, int len,
1056                               struct sk_buff *skb)
1057 {
1058         struct ipv6_pinfo *np = inet6_sk(sk);
1059
1060         if (skb->ip_summed == CHECKSUM_HW) {
1061                 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,  0);
1062                 skb->csum = offsetof(struct tcphdr, check);
1063         } else {
1064                 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
1065                                             csum_partial((char *)th, th->doff<<2,
1066                                                          skb->csum));
1067         }
1068 }
1069
1070
1071 static void tcp_v6_send_reset(struct sk_buff *skb)
1072 {
1073         struct tcphdr *th = skb->h.th, *t1;
1074         struct sk_buff *buff;
1075         struct flowi fl;
1076
1077         if (th->rst)
1078                 return;
1079
1080         if (!ipv6_unicast_destination(skb))
1081                 return;
1082
1083         /*
1084          * We need to grab some memory, and put together an RST,
1085          * and then put it into the queue to be sent.
1086          */
1087
1088         buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
1089                          GFP_ATOMIC);
1090         if (buff == NULL)
1091                 return;
1092
1093         skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
1094
1095         t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
1096
1097         /* Swap the send and the receive. */
1098         memset(t1, 0, sizeof(*t1));
1099         t1->dest = th->source;
1100         t1->source = th->dest;
1101         t1->doff = sizeof(*t1)/4;
1102         t1->rst = 1;
1103
1104         if(th->ack) {
1105                 t1->seq = th->ack_seq;
1106         } else {
1107                 t1->ack = 1;
1108                 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
1109                                     + skb->len - (th->doff<<2));
1110         }
1111
1112         buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
1113
1114         memset(&fl, 0, sizeof(fl));
1115         ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1116         ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1117
1118         t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1119                                     sizeof(*t1), IPPROTO_TCP,
1120                                     buff->csum);
1121
1122         fl.proto = IPPROTO_TCP;
1123         fl.oif = tcp_v6_iif(skb);
1124         fl.fl_ip_dport = t1->dest;
1125         fl.fl_ip_sport = t1->source;
1126
1127         /* sk = NULL, but it is safe for now. RST socket required. */
1128         if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1129
1130                 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1131                         dst_release(buff->dst);
1132                         return;
1133                 }
1134
1135                 ip6_xmit(NULL, buff, &fl, NULL, 0);
1136                 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1137                 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
1138                 return;
1139         }
1140
1141         kfree_skb(buff);
1142 }
1143
1144 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
1145 {
1146         struct tcphdr *th = skb->h.th, *t1;
1147         struct sk_buff *buff;
1148         struct flowi fl;
1149         int tot_len = sizeof(struct tcphdr);
1150
1151         if (ts)
1152                 tot_len += 3*4;
1153
1154         buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
1155                          GFP_ATOMIC);
1156         if (buff == NULL)
1157                 return;
1158
1159         skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
1160
1161         t1 = (struct tcphdr *) skb_push(buff,tot_len);
1162
1163         /* Swap the send and the receive. */
1164         memset(t1, 0, sizeof(*t1));
1165         t1->dest = th->source;
1166         t1->source = th->dest;
1167         t1->doff = tot_len/4;
1168         t1->seq = htonl(seq);
1169         t1->ack_seq = htonl(ack);
1170         t1->ack = 1;
1171         t1->window = htons(win);
1172
1173         if (ts) {
1174                 u32 *ptr = (u32*)(t1 + 1);
1175                 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1176                                (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1177                 *ptr++ = htonl(tcp_time_stamp);
1178                 *ptr = htonl(ts);
1179         }
1180
1181         buff->csum = csum_partial((char *)t1, tot_len, 0);
1182
1183         memset(&fl, 0, sizeof(fl));
1184         ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
1185         ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
1186
1187         t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
1188                                     tot_len, IPPROTO_TCP,
1189                                     buff->csum);
1190
1191         fl.proto = IPPROTO_TCP;
1192         fl.oif = tcp_v6_iif(skb);
1193         fl.fl_ip_dport = t1->dest;
1194         fl.fl_ip_sport = t1->source;
1195
1196         if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
1197                 if ((xfrm_lookup(&buff->dst, &fl, NULL, 0)) < 0) {
1198                         dst_release(buff->dst);
1199                         return;
1200                 }
1201                 ip6_xmit(NULL, buff, &fl, NULL, 0);
1202                 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
1203                 return;
1204         }
1205
1206         kfree_skb(buff);
1207 }
1208
1209 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1210 {
1211         struct tcp_tw_bucket *tw = (struct tcp_tw_bucket *)sk;
1212
1213         tcp_v6_send_ack(skb, tw->tw_snd_nxt, tw->tw_rcv_nxt,
1214                         tw->tw_rcv_wnd >> tw->tw_rcv_wscale, tw->tw_ts_recent);
1215
1216         tcp_tw_put(tw);
1217 }
1218
1219 static void tcp_v6_or_send_ack(struct sk_buff *skb, struct open_request *req)
1220 {
1221         tcp_v6_send_ack(skb, req->snt_isn+1, req->rcv_isn+1, req->rcv_wnd, req->ts_recent);
1222 }
1223
1224
1225 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1226 {
1227         struct open_request *req, **prev;
1228         struct tcphdr *th = skb->h.th;
1229         struct tcp_sock *tp = tcp_sk(sk);
1230         struct sock *nsk;
1231
1232         /* Find possible connection requests. */
1233         req = tcp_v6_search_req(tp, &prev, th->source, &skb->nh.ipv6h->saddr,
1234                                 &skb->nh.ipv6h->daddr, tcp_v6_iif(skb));
1235         if (req)
1236                 return tcp_check_req(sk, skb, req, prev);
1237
1238         nsk = __tcp_v6_lookup_established(&skb->nh.ipv6h->saddr,
1239                                           th->source,
1240                                           &skb->nh.ipv6h->daddr,
1241                                           ntohs(th->dest),
1242                                           tcp_v6_iif(skb));
1243
1244         if (nsk) {
1245                 if (nsk->sk_state != TCP_TIME_WAIT) {
1246                         bh_lock_sock(nsk);
1247                         return nsk;
1248                 }
1249                 tcp_tw_put((struct tcp_tw_bucket*)nsk);
1250                 return NULL;
1251         }
1252
1253 #if 0 /*def CONFIG_SYN_COOKIES*/
1254         if (!th->rst && !th->syn && th->ack)
1255                 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
1256 #endif
1257         return sk;
1258 }
1259
1260 static void tcp_v6_synq_add(struct sock *sk, struct open_request *req)
1261 {
1262         struct tcp_sock *tp = tcp_sk(sk);
1263         struct tcp_listen_opt *lopt = tp->listen_opt;
1264         u32 h = tcp_v6_synq_hash(&req->af.v6_req.rmt_addr, req->rmt_port, lopt->hash_rnd);
1265
1266         req->sk = NULL;
1267         req->expires = jiffies + TCP_TIMEOUT_INIT;
1268         req->retrans = 0;
1269         req->dl_next = lopt->syn_table[h];
1270
1271         write_lock(&tp->syn_wait_lock);
1272         lopt->syn_table[h] = req;
1273         write_unlock(&tp->syn_wait_lock);
1274
1275         tcp_synq_added(sk);
1276 }
1277
1278
1279 /* FIXME: this is substantially similar to the ipv4 code.
1280  * Can some kind of merge be done? -- erics
1281  */
1282 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1283 {
1284         struct ipv6_pinfo *np = inet6_sk(sk);
1285         struct tcp_options_received tmp_opt;
1286         struct tcp_sock *tp = tcp_sk(sk);
1287         struct open_request *req = NULL;
1288         __u32 isn = TCP_SKB_CB(skb)->when;
1289
1290         if (skb->protocol == htons(ETH_P_IP))
1291                 return tcp_v4_conn_request(sk, skb);
1292
1293         if (!ipv6_unicast_destination(skb))
1294                 goto drop;
1295
1296         /*
1297          *      There are no SYN attacks on IPv6, yet...
1298          */
1299         if (tcp_synq_is_full(sk) && !isn) {
1300                 if (net_ratelimit())
1301                         printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
1302                 goto drop;
1303         }
1304
1305         if (sk_acceptq_is_full(sk) && tcp_synq_young(sk) > 1)
1306                 goto drop;
1307
1308         req = tcp_openreq_alloc();
1309         if (req == NULL)
1310                 goto drop;
1311
1312         tcp_clear_options(&tmp_opt);
1313         tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1314         tmp_opt.user_mss = tp->rx_opt.user_mss;
1315
1316         tcp_parse_options(skb, &tmp_opt, 0);
1317
1318         tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1319         tcp_openreq_init(req, &tmp_opt, skb);
1320
1321         req->class = &or_ipv6;
1322         ipv6_addr_copy(&req->af.v6_req.rmt_addr, &skb->nh.ipv6h->saddr);
1323         ipv6_addr_copy(&req->af.v6_req.loc_addr, &skb->nh.ipv6h->daddr);
1324         TCP_ECN_create_request(req, skb->h.th);
1325         req->af.v6_req.pktopts = NULL;
1326         if (ipv6_opt_accepted(sk, skb) ||
1327             np->rxopt.bits.rxinfo ||
1328             np->rxopt.bits.rxhlim) {
1329                 atomic_inc(&skb->users);
1330                 req->af.v6_req.pktopts = skb;
1331         }
1332         req->af.v6_req.iif = sk->sk_bound_dev_if;
1333
1334         /* So that link locals have meaning */
1335         if (!sk->sk_bound_dev_if &&
1336             ipv6_addr_type(&req->af.v6_req.rmt_addr) & IPV6_ADDR_LINKLOCAL)
1337                 req->af.v6_req.iif = tcp_v6_iif(skb);
1338
1339         if (isn == 0)
1340                 isn = tcp_v6_init_sequence(sk,skb);
1341
1342         req->snt_isn = isn;
1343
1344         if (tcp_v6_send_synack(sk, req, NULL))
1345                 goto drop;
1346
1347         tcp_v6_synq_add(sk, req);
1348
1349         return 0;
1350
1351 drop:
1352         if (req)
1353                 tcp_openreq_free(req);
1354
1355         TCP_INC_STATS_BH(TCP_MIB_ATTEMPTFAILS);
1356         return 0; /* don't send reset */
1357 }
1358
1359 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1360                                           struct open_request *req,
1361                                           struct dst_entry *dst)
1362 {
1363         struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1364         struct tcp6_sock *newtcp6sk;
1365         struct inet_sock *newinet;
1366         struct tcp_sock *newtp;
1367         struct sock *newsk;
1368         struct ipv6_txoptions *opt;
1369
1370         if (skb->protocol == htons(ETH_P_IP)) {
1371                 /*
1372                  *      v6 mapped
1373                  */
1374
1375                 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1376
1377                 if (newsk == NULL)
1378                         return NULL;
1379
1380                 newtcp6sk = (struct tcp6_sock *)newsk;
1381                 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1382
1383                 newinet = inet_sk(newsk);
1384                 newnp = inet6_sk(newsk);
1385                 newtp = tcp_sk(newsk);
1386
1387                 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1388
1389                 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
1390                               newinet->daddr);
1391
1392                 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
1393                               newinet->saddr);
1394
1395                 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1396
1397                 newtp->af_specific = &ipv6_mapped;
1398                 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1399                 newnp->pktoptions  = NULL;
1400                 newnp->opt         = NULL;
1401                 newnp->mcast_oif   = tcp_v6_iif(skb);
1402                 newnp->mcast_hops  = skb->nh.ipv6h->hop_limit;
1403
1404                 /* Charge newly allocated IPv6 socket. Though it is mapped,
1405                  * it is IPv6 yet.
1406                  */
1407 #ifdef INET_REFCNT_DEBUG
1408                 atomic_inc(&inet6_sock_nr);
1409 #endif
1410
1411                 /* It is tricky place. Until this moment IPv4 tcp
1412                    worked with IPv6 af_tcp.af_specific.
1413                    Sync it now.
1414                  */
1415                 tcp_sync_mss(newsk, newtp->pmtu_cookie);
1416
1417                 return newsk;
1418         }
1419
1420         opt = np->opt;
1421
1422         if (sk_acceptq_is_full(sk))
1423                 goto out_overflow;
1424
1425         if (np->rxopt.bits.srcrt == 2 &&
1426             opt == NULL && req->af.v6_req.pktopts) {
1427                 struct inet6_skb_parm *rxopt = IP6CB(req->af.v6_req.pktopts);
1428                 if (rxopt->srcrt)
1429                         opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(req->af.v6_req.pktopts->nh.raw+rxopt->srcrt));
1430         }
1431
1432         if (dst == NULL) {
1433                 struct in6_addr *final_p = NULL, final;
1434                 struct flowi fl;
1435
1436                 memset(&fl, 0, sizeof(fl));
1437                 fl.proto = IPPROTO_TCP;
1438                 ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
1439                 if (opt && opt->srcrt) {
1440                         struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
1441                         ipv6_addr_copy(&final, &fl.fl6_dst);
1442                         ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1443                         final_p = &final;
1444                 }
1445                 ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr);
1446                 fl.oif = sk->sk_bound_dev_if;
1447                 fl.fl_ip_dport = req->rmt_port;
1448                 fl.fl_ip_sport = inet_sk(sk)->sport;
1449
1450                 if (ip6_dst_lookup(sk, &dst, &fl))
1451                         goto out;
1452
1453                 if (final_p)
1454                         ipv6_addr_copy(&fl.fl6_dst, final_p);
1455
1456                 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
1457                         goto out;
1458         }
1459
1460         newsk = tcp_create_openreq_child(sk, req, skb);
1461         if (newsk == NULL)
1462                 goto out;
1463
1464         /* Charge newly allocated IPv6 socket */
1465 #ifdef INET_REFCNT_DEBUG
1466         atomic_inc(&inet6_sock_nr);
1467 #endif
1468
1469         ip6_dst_store(newsk, dst, NULL);
1470         newsk->sk_route_caps = dst->dev->features &
1471                 ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1472
1473         newtcp6sk = (struct tcp6_sock *)newsk;
1474         inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1475
1476         newtp = tcp_sk(newsk);
1477         newinet = inet_sk(newsk);
1478         newnp = inet6_sk(newsk);
1479
1480         memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1481
1482         ipv6_addr_copy(&newnp->daddr, &req->af.v6_req.rmt_addr);
1483         ipv6_addr_copy(&newnp->saddr, &req->af.v6_req.loc_addr);
1484         ipv6_addr_copy(&newnp->rcv_saddr, &req->af.v6_req.loc_addr);
1485         newsk->sk_bound_dev_if = req->af.v6_req.iif;
1486
1487         /* Now IPv6 options...
1488
1489            First: no IPv4 options.
1490          */
1491         newinet->opt = NULL;
1492
1493         /* Clone RX bits */
1494         newnp->rxopt.all = np->rxopt.all;
1495
1496         /* Clone pktoptions received with SYN */
1497         newnp->pktoptions = NULL;
1498         if (req->af.v6_req.pktopts) {
1499                 newnp->pktoptions = skb_clone(req->af.v6_req.pktopts,
1500                                               GFP_ATOMIC);
1501                 kfree_skb(req->af.v6_req.pktopts);
1502                 req->af.v6_req.pktopts = NULL;
1503                 if (newnp->pktoptions)
1504                         skb_set_owner_r(newnp->pktoptions, newsk);
1505         }
1506         newnp->opt        = NULL;
1507         newnp->mcast_oif  = tcp_v6_iif(skb);
1508         newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
1509
1510         /* Clone native IPv6 options from listening socket (if any)
1511
1512            Yes, keeping reference count would be much more clever,
1513            but we make one more one thing there: reattach optmem
1514            to newsk.
1515          */
1516         if (opt) {
1517                 newnp->opt = ipv6_dup_options(newsk, opt);
1518                 if (opt != np->opt)
1519                         sock_kfree_s(sk, opt, opt->tot_len);
1520         }
1521
1522         newtp->ext_header_len = 0;
1523         if (newnp->opt)
1524                 newtp->ext_header_len = newnp->opt->opt_nflen +
1525                                         newnp->opt->opt_flen;
1526
1527         tcp_sync_mss(newsk, dst_mtu(dst));
1528         newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1529         tcp_initialize_rcv_mss(newsk);
1530
1531         newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1532
1533         __tcp_v6_hash(newsk);
1534         tcp_inherit_port(sk, newsk);
1535
1536         return newsk;
1537
1538 out_overflow:
1539         NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1540 out:
1541         NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1542         if (opt && opt != np->opt)
1543                 sock_kfree_s(sk, opt, opt->tot_len);
1544         dst_release(dst);
1545         return NULL;
1546 }
1547
1548 static int tcp_v6_checksum_init(struct sk_buff *skb)
1549 {
1550         if (skb->ip_summed == CHECKSUM_HW) {
1551                 skb->ip_summed = CHECKSUM_UNNECESSARY;
1552                 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1553                                   &skb->nh.ipv6h->daddr,skb->csum))
1554                         return 0;
1555                 LIMIT_NETDEBUG(printk(KERN_DEBUG "hw tcp v6 csum failed\n"));
1556         }
1557         if (skb->len <= 76) {
1558                 if (tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1559                                  &skb->nh.ipv6h->daddr,skb_checksum(skb, 0, skb->len, 0)))
1560                         return -1;
1561                 skb->ip_summed = CHECKSUM_UNNECESSARY;
1562         } else {
1563                 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1564                                           &skb->nh.ipv6h->daddr,0);
1565         }
1566         return 0;
1567 }
1568
1569 /* The socket must have it's spinlock held when we get
1570  * here.
1571  *
1572  * We have a potential double-lock case here, so even when
1573  * doing backlog processing we use the BH locking scheme.
1574  * This is because we cannot sleep with the original spinlock
1575  * held.
1576  */
1577 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1578 {
1579         struct ipv6_pinfo *np = inet6_sk(sk);
1580         struct tcp_sock *tp;
1581         struct sk_buff *opt_skb = NULL;
1582
1583         /* Imagine: socket is IPv6. IPv4 packet arrives,
1584            goes to IPv4 receive handler and backlogged.
1585            From backlog it always goes here. Kerboom...
1586            Fortunately, tcp_rcv_established and rcv_established
1587            handle them correctly, but it is not case with
1588            tcp_v6_hnd_req and tcp_v6_send_reset().   --ANK
1589          */
1590
1591         if (skb->protocol == htons(ETH_P_IP))
1592                 return tcp_v4_do_rcv(sk, skb);
1593
1594         if (sk_filter(sk, skb, 0))
1595                 goto discard;
1596
1597         /*
1598          *      socket locking is here for SMP purposes as backlog rcv
1599          *      is currently called with bh processing disabled.
1600          */
1601
1602         /* Do Stevens' IPV6_PKTOPTIONS.
1603
1604            Yes, guys, it is the only place in our code, where we
1605            may make it not affecting IPv4.
1606            The rest of code is protocol independent,
1607            and I do not like idea to uglify IPv4.
1608
1609            Actually, all the idea behind IPV6_PKTOPTIONS
1610            looks not very well thought. For now we latch
1611            options, received in the last packet, enqueued
1612            by tcp. Feel free to propose better solution.
1613                                                --ANK (980728)
1614          */
1615         if (np->rxopt.all)
1616                 opt_skb = skb_clone(skb, GFP_ATOMIC);
1617
1618         if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1619                 TCP_CHECK_TIMER(sk);
1620                 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1621                         goto reset;
1622                 TCP_CHECK_TIMER(sk);
1623                 if (opt_skb)
1624                         goto ipv6_pktoptions;
1625                 return 0;
1626         }
1627
1628         if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1629                 goto csum_err;
1630
1631         if (sk->sk_state == TCP_LISTEN) {
1632                 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1633                 if (!nsk)
1634                         goto discard;
1635
1636                 /*
1637                  * Queue it on the new socket if the new socket is active,
1638                  * otherwise we just shortcircuit this and continue with
1639                  * the new socket..
1640                  */
1641                 if(nsk != sk) {
1642                         if (tcp_child_process(sk, nsk, skb))
1643                                 goto reset;
1644                         if (opt_skb)
1645                                 __kfree_skb(opt_skb);
1646                         return 0;
1647                 }
1648         }
1649
1650         TCP_CHECK_TIMER(sk);
1651         if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1652                 goto reset;
1653         TCP_CHECK_TIMER(sk);
1654         if (opt_skb)
1655                 goto ipv6_pktoptions;
1656         return 0;
1657
1658 reset:
1659         tcp_v6_send_reset(skb);
1660 discard:
1661         if (opt_skb)
1662                 __kfree_skb(opt_skb);
1663         kfree_skb(skb);
1664         return 0;
1665 csum_err:
1666         TCP_INC_STATS_BH(TCP_MIB_INERRS);
1667         goto discard;
1668
1669
1670 ipv6_pktoptions:
1671         /* Do you ask, what is it?
1672
1673            1. skb was enqueued by tcp.
1674            2. skb is added to tail of read queue, rather than out of order.
1675            3. socket is not in passive state.
1676            4. Finally, it really contains options, which user wants to receive.
1677          */
1678         tp = tcp_sk(sk);
1679         if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1680             !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1681                 if (np->rxopt.bits.rxinfo)
1682                         np->mcast_oif = tcp_v6_iif(opt_skb);
1683                 if (np->rxopt.bits.rxhlim)
1684                         np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1685                 if (ipv6_opt_accepted(sk, opt_skb)) {
1686                         skb_set_owner_r(opt_skb, sk);
1687                         opt_skb = xchg(&np->pktoptions, opt_skb);
1688                 } else {
1689                         __kfree_skb(opt_skb);
1690                         opt_skb = xchg(&np->pktoptions, NULL);
1691                 }
1692         }
1693
1694         if (opt_skb)
1695                 kfree_skb(opt_skb);
1696         return 0;
1697 }
1698
1699 static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
1700 {
1701         struct sk_buff *skb = *pskb;
1702         struct tcphdr *th;
1703         struct sock *sk;
1704         int ret;
1705
1706         if (skb->pkt_type != PACKET_HOST)
1707                 goto discard_it;
1708
1709         /*
1710          *      Count it even if it's bad.
1711          */
1712         TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1713
1714         if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1715                 goto discard_it;
1716
1717         th = skb->h.th;
1718
1719         if (th->doff < sizeof(struct tcphdr)/4)
1720                 goto bad_packet;
1721         if (!pskb_may_pull(skb, th->doff*4))
1722                 goto discard_it;
1723
1724         if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1725              tcp_v6_checksum_init(skb) < 0))
1726                 goto bad_packet;
1727
1728         th = skb->h.th;
1729         TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1730         TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1731                                     skb->len - th->doff*4);
1732         TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1733         TCP_SKB_CB(skb)->when = 0;
1734         TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
1735         TCP_SKB_CB(skb)->sacked = 0;
1736
1737         sk = __tcp_v6_lookup(&skb->nh.ipv6h->saddr, th->source,
1738                              &skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1739
1740         if (!sk)
1741                 goto no_tcp_socket;
1742
1743 process:
1744         if (sk->sk_state == TCP_TIME_WAIT)
1745                 goto do_time_wait;
1746
1747         if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1748                 goto discard_and_relse;
1749
1750         if (sk_filter(sk, skb, 0))
1751                 goto discard_and_relse;
1752
1753         skb->dev = NULL;
1754
1755         bh_lock_sock(sk);
1756         ret = 0;
1757         if (!sock_owned_by_user(sk)) {
1758                 if (!tcp_prequeue(sk, skb))
1759                         ret = tcp_v6_do_rcv(sk, skb);
1760         } else
1761                 sk_add_backlog(sk, skb);
1762         bh_unlock_sock(sk);
1763
1764         sock_put(sk);
1765         return ret ? -1 : 0;
1766
1767 no_tcp_socket:
1768         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1769                 goto discard_it;
1770
1771         if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1772 bad_packet:
1773                 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1774         } else {
1775                 tcp_v6_send_reset(skb);
1776         }
1777
1778 discard_it:
1779
1780         /*
1781          *      Discard frame
1782          */
1783
1784         kfree_skb(skb);
1785         return 0;
1786
1787 discard_and_relse:
1788         sock_put(sk);
1789         goto discard_it;
1790
1791 do_time_wait:
1792         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1793                 tcp_tw_put((struct tcp_tw_bucket *) sk);
1794                 goto discard_it;
1795         }
1796
1797         if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1798                 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1799                 tcp_tw_put((struct tcp_tw_bucket *) sk);
1800                 goto discard_it;
1801         }
1802
1803         switch(tcp_timewait_state_process((struct tcp_tw_bucket *)sk,
1804                                           skb, th, skb->len)) {
1805         case TCP_TW_SYN:
1806         {
1807                 struct sock *sk2;
1808
1809                 sk2 = tcp_v6_lookup_listener(&skb->nh.ipv6h->daddr, ntohs(th->dest), tcp_v6_iif(skb));
1810                 if (sk2 != NULL) {
1811                         tcp_tw_deschedule((struct tcp_tw_bucket *)sk);
1812                         tcp_tw_put((struct tcp_tw_bucket *)sk);
1813                         sk = sk2;
1814                         goto process;
1815                 }
1816                 /* Fall through to ACK */
1817         }
1818         case TCP_TW_ACK:
1819                 tcp_v6_timewait_ack(sk, skb);
1820                 break;
1821         case TCP_TW_RST:
1822                 goto no_tcp_socket;
1823         case TCP_TW_SUCCESS:;
1824         }
1825         goto discard_it;
1826 }
1827
1828 static int tcp_v6_rebuild_header(struct sock *sk)
1829 {
1830         int err;
1831         struct dst_entry *dst;
1832         struct ipv6_pinfo *np = inet6_sk(sk);
1833
1834         dst = __sk_dst_check(sk, np->dst_cookie);
1835
1836         if (dst == NULL) {
1837                 struct inet_sock *inet = inet_sk(sk);
1838                 struct in6_addr *final_p = NULL, final;
1839                 struct flowi fl;
1840
1841                 memset(&fl, 0, sizeof(fl));
1842                 fl.proto = IPPROTO_TCP;
1843                 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1844                 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1845                 fl.fl6_flowlabel = np->flow_label;
1846                 fl.oif = sk->sk_bound_dev_if;
1847                 fl.fl_ip_dport = inet->dport;
1848                 fl.fl_ip_sport = inet->sport;
1849
1850                 if (np->opt && np->opt->srcrt) {
1851                         struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1852                         ipv6_addr_copy(&final, &fl.fl6_dst);
1853                         ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1854                         final_p = &final;
1855                 }
1856
1857                 err = ip6_dst_lookup(sk, &dst, &fl);
1858                 if (err) {
1859                         sk->sk_route_caps = 0;
1860                         return err;
1861                 }
1862                 if (final_p)
1863                         ipv6_addr_copy(&fl.fl6_dst, final_p);
1864
1865                 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1866                         sk->sk_err_soft = -err;
1867                         dst_release(dst);
1868                         return err;
1869                 }
1870
1871                 ip6_dst_store(sk, dst, NULL);
1872                 sk->sk_route_caps = dst->dev->features &
1873                         ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1874         }
1875
1876         return 0;
1877 }
1878
1879 static int tcp_v6_xmit(struct sk_buff *skb, int ipfragok)
1880 {
1881         struct sock *sk = skb->sk;
1882         struct inet_sock *inet = inet_sk(sk);
1883         struct ipv6_pinfo *np = inet6_sk(sk);
1884         struct flowi fl;
1885         struct dst_entry *dst;
1886         struct in6_addr *final_p = NULL, final;
1887
1888         memset(&fl, 0, sizeof(fl));
1889         fl.proto = IPPROTO_TCP;
1890         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1891         ipv6_addr_copy(&fl.fl6_src, &np->saddr);
1892         fl.fl6_flowlabel = np->flow_label;
1893         IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
1894         fl.oif = sk->sk_bound_dev_if;
1895         fl.fl_ip_sport = inet->sport;
1896         fl.fl_ip_dport = inet->dport;
1897
1898         if (np->opt && np->opt->srcrt) {
1899                 struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
1900                 ipv6_addr_copy(&final, &fl.fl6_dst);
1901                 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
1902                 final_p = &final;
1903         }
1904
1905         dst = __sk_dst_check(sk, np->dst_cookie);
1906
1907         if (dst == NULL) {
1908                 int err = ip6_dst_lookup(sk, &dst, &fl);
1909
1910                 if (err) {
1911                         sk->sk_err_soft = -err;
1912                         return err;
1913                 }
1914
1915                 if (final_p)
1916                         ipv6_addr_copy(&fl.fl6_dst, final_p);
1917
1918                 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
1919                         sk->sk_route_caps = 0;
1920                         dst_release(dst);
1921                         return err;
1922                 }
1923
1924                 ip6_dst_store(sk, dst, NULL);
1925                 sk->sk_route_caps = dst->dev->features &
1926                         ~(NETIF_F_IP_CSUM | NETIF_F_TSO);
1927         }
1928
1929         skb->dst = dst_clone(dst);
1930
1931         /* Restore final destination back after routing done */
1932         ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
1933
1934         return ip6_xmit(sk, skb, &fl, np->opt, 0);
1935 }
1936
1937 static void v6_addr2sockaddr(struct sock *sk, struct sockaddr * uaddr)
1938 {
1939         struct ipv6_pinfo *np = inet6_sk(sk);
1940         struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) uaddr;
1941
1942         sin6->sin6_family = AF_INET6;
1943         ipv6_addr_copy(&sin6->sin6_addr, &np->daddr);
1944         sin6->sin6_port = inet_sk(sk)->dport;
1945         /* We do not store received flowlabel for TCP */
1946         sin6->sin6_flowinfo = 0;
1947         sin6->sin6_scope_id = 0;
1948         if (sk->sk_bound_dev_if &&
1949             ipv6_addr_type(&sin6->sin6_addr) & IPV6_ADDR_LINKLOCAL)
1950                 sin6->sin6_scope_id = sk->sk_bound_dev_if;
1951 }
1952
1953 static int tcp_v6_remember_stamp(struct sock *sk)
1954 {
1955         /* Alas, not yet... */
1956         return 0;
1957 }
1958
1959 static struct tcp_func ipv6_specific = {
1960         .queue_xmit     =       tcp_v6_xmit,
1961         .send_check     =       tcp_v6_send_check,
1962         .rebuild_header =       tcp_v6_rebuild_header,
1963         .conn_request   =       tcp_v6_conn_request,
1964         .syn_recv_sock  =       tcp_v6_syn_recv_sock,
1965         .remember_stamp =       tcp_v6_remember_stamp,
1966         .net_header_len =       sizeof(struct ipv6hdr),
1967
1968         .setsockopt     =       ipv6_setsockopt,
1969         .getsockopt     =       ipv6_getsockopt,
1970         .addr2sockaddr  =       v6_addr2sockaddr,
1971         .sockaddr_len   =       sizeof(struct sockaddr_in6)
1972 };
1973
1974 /*
1975  *      TCP over IPv4 via INET6 API
1976  */
1977
1978 static struct tcp_func ipv6_mapped = {
1979         .queue_xmit     =       ip_queue_xmit,
1980         .send_check     =       tcp_v4_send_check,
1981         .rebuild_header =       tcp_v4_rebuild_header,
1982         .conn_request   =       tcp_v6_conn_request,
1983         .syn_recv_sock  =       tcp_v6_syn_recv_sock,
1984         .remember_stamp =       tcp_v4_remember_stamp,
1985         .net_header_len =       sizeof(struct iphdr),
1986
1987         .setsockopt     =       ipv6_setsockopt,
1988         .getsockopt     =       ipv6_getsockopt,
1989         .addr2sockaddr  =       v6_addr2sockaddr,
1990         .sockaddr_len   =       sizeof(struct sockaddr_in6)
1991 };
1992
1993
1994
1995 /* NOTE: A lot of things set to zero explicitly by call to
1996  *       sk_alloc() so need not be done here.
1997  */
1998 static int tcp_v6_init_sock(struct sock *sk)
1999 {
2000         struct tcp_sock *tp = tcp_sk(sk);
2001
2002         skb_queue_head_init(&tp->out_of_order_queue);
2003         tcp_init_xmit_timers(sk);
2004         tcp_prequeue_init(tp);
2005
2006         tp->rto  = TCP_TIMEOUT_INIT;
2007         tp->mdev = TCP_TIMEOUT_INIT;
2008
2009         /* So many TCP implementations out there (incorrectly) count the
2010          * initial SYN frame in their delayed-ACK and congestion control
2011          * algorithms that we must have the following bandaid to talk
2012          * efficiently to them.  -DaveM
2013          */
2014         tp->snd_cwnd = 2;
2015
2016         /* See draft-stevens-tcpca-spec-01 for discussion of the
2017          * initialization of these values.
2018          */
2019         tp->snd_ssthresh = 0x7fffffff;
2020         tp->snd_cwnd_clamp = ~0;
2021         tp->mss_cache_std = tp->mss_cache = 536;
2022
2023         tp->reordering = sysctl_tcp_reordering;
2024
2025         sk->sk_state = TCP_CLOSE;
2026
2027         tp->af_specific = &ipv6_specific;
2028
2029         sk->sk_write_space = sk_stream_write_space;
2030         sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
2031
2032         sk->sk_sndbuf = sysctl_tcp_wmem[1];
2033         sk->sk_rcvbuf = sysctl_tcp_rmem[1];
2034
2035         atomic_inc(&tcp_sockets_allocated);
2036
2037         return 0;
2038 }
2039
2040 static int tcp_v6_destroy_sock(struct sock *sk)
2041 {
2042         extern int tcp_v4_destroy_sock(struct sock *sk);
2043
2044         tcp_v4_destroy_sock(sk);
2045         return inet6_destroy_sock(sk);
2046 }
2047
2048 /* Proc filesystem TCPv6 sock list dumping. */
2049 static void get_openreq6(struct seq_file *seq,
2050                          struct sock *sk, struct open_request *req, int i, int uid)
2051 {
2052         struct in6_addr *dest, *src;
2053         int ttd = req->expires - jiffies;
2054
2055         if (ttd < 0)
2056                 ttd = 0;
2057
2058         src = &req->af.v6_req.loc_addr;
2059         dest = &req->af.v6_req.rmt_addr;
2060         seq_printf(seq,
2061                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2062                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2063                    i,
2064                    src->s6_addr32[0], src->s6_addr32[1],
2065                    src->s6_addr32[2], src->s6_addr32[3],
2066                    ntohs(inet_sk(sk)->sport),
2067                    dest->s6_addr32[0], dest->s6_addr32[1],
2068                    dest->s6_addr32[2], dest->s6_addr32[3],
2069                    ntohs(req->rmt_port),
2070                    TCP_SYN_RECV,
2071                    0,0, /* could print option size, but that is af dependent. */
2072                    1,   /* timers active (only the expire timer) */
2073                    jiffies_to_clock_t(ttd),
2074                    req->retrans,
2075                    uid,
2076                    0,  /* non standard timer */
2077                    0, /* open_requests have no inode */
2078                    0, req);
2079 }
2080
2081 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2082 {
2083         struct in6_addr *dest, *src;
2084         __u16 destp, srcp;
2085         int timer_active;
2086         unsigned long timer_expires;
2087         struct inet_sock *inet = inet_sk(sp);
2088         struct tcp_sock *tp = tcp_sk(sp);
2089         struct ipv6_pinfo *np = inet6_sk(sp);
2090
2091         dest  = &np->daddr;
2092         src   = &np->rcv_saddr;
2093         destp = ntohs(inet->dport);
2094         srcp  = ntohs(inet->sport);
2095         if (tp->pending == TCP_TIME_RETRANS) {
2096                 timer_active    = 1;
2097                 timer_expires   = tp->timeout;
2098         } else if (tp->pending == TCP_TIME_PROBE0) {
2099                 timer_active    = 4;
2100                 timer_expires   = tp->timeout;
2101         } else if (timer_pending(&sp->sk_timer)) {
2102                 timer_active    = 2;
2103                 timer_expires   = sp->sk_timer.expires;
2104         } else {
2105                 timer_active    = 0;
2106                 timer_expires = jiffies;
2107         }
2108
2109         seq_printf(seq,
2110                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2111                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
2112                    i,
2113                    src->s6_addr32[0], src->s6_addr32[1],
2114                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2115                    dest->s6_addr32[0], dest->s6_addr32[1],
2116                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2117                    sp->sk_state,
2118                    tp->write_seq-tp->snd_una, tp->rcv_nxt-tp->copied_seq,
2119                    timer_active,
2120                    jiffies_to_clock_t(timer_expires - jiffies),
2121                    tp->retransmits,
2122                    sock_i_uid(sp),
2123                    tp->probes_out,
2124                    sock_i_ino(sp),
2125                    atomic_read(&sp->sk_refcnt), sp,
2126                    tp->rto, tp->ack.ato, (tp->ack.quick<<1)|tp->ack.pingpong,
2127                    tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
2128                    );
2129 }
2130
2131 static void get_timewait6_sock(struct seq_file *seq,
2132                                struct tcp_tw_bucket *tw, int i)
2133 {
2134         struct in6_addr *dest, *src;
2135         __u16 destp, srcp;
2136         int ttd = tw->tw_ttd - jiffies;
2137
2138         if (ttd < 0)
2139                 ttd = 0;
2140
2141         dest  = &tw->tw_v6_daddr;
2142         src   = &tw->tw_v6_rcv_saddr;
2143         destp = ntohs(tw->tw_dport);
2144         srcp  = ntohs(tw->tw_sport);
2145
2146         seq_printf(seq,
2147                    "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2148                    "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
2149                    i,
2150                    src->s6_addr32[0], src->s6_addr32[1],
2151                    src->s6_addr32[2], src->s6_addr32[3], srcp,
2152                    dest->s6_addr32[0], dest->s6_addr32[1],
2153                    dest->s6_addr32[2], dest->s6_addr32[3], destp,
2154                    tw->tw_substate, 0, 0,
2155                    3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2156                    atomic_read(&tw->tw_refcnt), tw);
2157 }
2158
2159 #ifdef CONFIG_PROC_FS
2160 static int tcp6_seq_show(struct seq_file *seq, void *v)
2161 {
2162         struct tcp_iter_state *st;
2163
2164         if (v == SEQ_START_TOKEN) {
2165                 seq_puts(seq,
2166                          "  sl  "
2167                          "local_address                         "
2168                          "remote_address                        "
2169                          "st tx_queue rx_queue tr tm->when retrnsmt"
2170                          "   uid  timeout inode\n");
2171                 goto out;
2172         }
2173         st = seq->private;
2174
2175         switch (st->state) {
2176         case TCP_SEQ_STATE_LISTENING:
2177         case TCP_SEQ_STATE_ESTABLISHED:
2178                 get_tcp6_sock(seq, v, st->num);
2179                 break;
2180         case TCP_SEQ_STATE_OPENREQ:
2181                 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2182                 break;
2183         case TCP_SEQ_STATE_TIME_WAIT:
2184                 get_timewait6_sock(seq, v, st->num);
2185                 break;
2186         }
2187 out:
2188         return 0;
2189 }
2190
2191 static struct file_operations tcp6_seq_fops;
2192 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2193         .owner          = THIS_MODULE,
2194         .name           = "tcp6",
2195         .family         = AF_INET6,
2196         .seq_show       = tcp6_seq_show,
2197         .seq_fops       = &tcp6_seq_fops,
2198 };
2199
2200 int __init tcp6_proc_init(void)
2201 {
2202         return tcp_proc_register(&tcp6_seq_afinfo);
2203 }
2204
2205 void tcp6_proc_exit(void)
2206 {
2207         tcp_proc_unregister(&tcp6_seq_afinfo);
2208 }
2209 #endif
2210
2211 struct proto tcpv6_prot = {
2212         .name                   = "TCPv6",
2213         .owner                  = THIS_MODULE,
2214         .close                  = tcp_close,
2215         .connect                = tcp_v6_connect,
2216         .disconnect             = tcp_disconnect,
2217         .accept                 = tcp_accept,
2218         .ioctl                  = tcp_ioctl,
2219         .init                   = tcp_v6_init_sock,
2220         .destroy                = tcp_v6_destroy_sock,
2221         .shutdown               = tcp_shutdown,
2222         .setsockopt             = tcp_setsockopt,
2223         .getsockopt             = tcp_getsockopt,
2224         .sendmsg                = tcp_sendmsg,
2225         .recvmsg                = tcp_recvmsg,
2226         .backlog_rcv            = tcp_v6_do_rcv,
2227         .hash                   = tcp_v6_hash,
2228         .unhash                 = tcp_unhash,
2229         .get_port               = tcp_v6_get_port,
2230         .enter_memory_pressure  = tcp_enter_memory_pressure,
2231         .sockets_allocated      = &tcp_sockets_allocated,
2232         .memory_allocated       = &tcp_memory_allocated,
2233         .memory_pressure        = &tcp_memory_pressure,
2234         .sysctl_mem             = sysctl_tcp_mem,
2235         .sysctl_wmem            = sysctl_tcp_wmem,
2236         .sysctl_rmem            = sysctl_tcp_rmem,
2237         .max_header             = MAX_TCP_HEADER,
2238         .obj_size               = sizeof(struct tcp6_sock),
2239 };
2240
2241 static struct inet6_protocol tcpv6_protocol = {
2242         .handler        =       tcp_v6_rcv,
2243         .err_handler    =       tcp_v6_err,
2244         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2245 };
2246
2247 extern struct proto_ops inet6_stream_ops;
2248
2249 static struct inet_protosw tcpv6_protosw = {
2250         .type           =       SOCK_STREAM,
2251         .protocol       =       IPPROTO_TCP,
2252         .prot           =       &tcpv6_prot,
2253         .ops            =       &inet6_stream_ops,
2254         .capability     =       -1,
2255         .no_check       =       0,
2256         .flags          =       INET_PROTOSW_PERMANENT,
2257 };
2258
2259 void __init tcpv6_init(void)
2260 {
2261         /* register inet6 protocol */
2262         if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
2263                 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
2264         inet6_register_protosw(&tcpv6_protosw);
2265 }