git.oblomov.eu Git - linux-2.6/blob - net/ipv6/ip6_output.c

   1 /*
   2  *      IPv6 output functions
   3  *      Linux INET6 implementation
   4  *
   5  *      Authors:
   6  *      Pedro Roque             <roque@di.fc.ul.pt>
   7  *
   8  *      $Id: ip6_output.c,v 1.34 2002/02/01 22:01:04 davem Exp $
   9  *
  10  *      Based on linux/net/ipv4/ip_output.c
  11  *
  12  *      This program is free software; you can redistribute it and/or
  13  *      modify it under the terms of the GNU General Public License
  14  *      as published by the Free Software Foundation; either version
  15  *      2 of the License, or (at your option) any later version.
  16  *
  17  *      Changes:
  18  *      A.N.Kuznetsov   :       airthmetics in fragmentation.
  19  *                              extension headers are implemented.
  20  *                              route changes now work.
  21  *                              ip6_forward does not confuse sniffers.
  22  *                              etc.
  23  *
  24  *      H. von Brand    :       Added missing #include <linux/string.h>
  25  *      Imran Patel     :       frag id should be in NBO
  26  *      Kazunori MIYAZAWA @USAGI
  27  *                      :       add ip6_append_data and related functions
  28  *                              for datagram xmit
  29  */
  30
  31 #include <linux/errno.h>
  32 #include <linux/types.h>
  33 #include <linux/string.h>
  34 #include <linux/socket.h>
  35 #include <linux/net.h>
  36 #include <linux/netdevice.h>
  37 #include <linux/if_arp.h>
  38 #include <linux/in6.h>
  39 #include <linux/tcp.h>
  40 #include <linux/route.h>
  41 #include <linux/module.h>
  42
  43 #include <linux/netfilter.h>
  44 #include <linux/netfilter_ipv6.h>
  45
  46 #include <net/sock.h>
  47 #include <net/snmp.h>
  48
  49 #include <net/ipv6.h>
  50 #include <net/ndisc.h>
  51 #include <net/protocol.h>
  52 #include <net/ip6_route.h>
  53 #include <net/addrconf.h>
  54 #include <net/rawv6.h>
  55 #include <net/icmp.h>
  56 #include <net/xfrm.h>
  57 #include <net/checksum.h>
  58
  59 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
  60
  61 static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
  62 {
  63         static u32 ipv6_fragmentation_id = 1;
  64         static DEFINE_SPINLOCK(ip6_id_lock);
  65
  66         spin_lock_bh(&ip6_id_lock);
  67         fhdr->identification = htonl(ipv6_fragmentation_id);
  68         if (++ipv6_fragmentation_id == 0)
  69                 ipv6_fragmentation_id = 1;
  70         spin_unlock_bh(&ip6_id_lock);
  71 }
  72
  73 static int ip6_output_finish(struct sk_buff *skb)
  74 {
  75         struct dst_entry *dst = skb->dst;
  76
  77         if (dst->hh)
  78                 return neigh_hh_output(dst->hh, skb);
  79         else if (dst->neighbour)
  80                 return dst->neighbour->output(skb);
  81
  82         IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
  83         kfree_skb(skb);
  84         return -EINVAL;
  85
  86 }
  87
  88 /* dev_loopback_xmit for use with netfilter. */
  89 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
  90 {
  91         skb_reset_mac_header(newskb);
  92         __skb_pull(newskb, skb_network_offset(newskb));
  93         newskb->pkt_type = PACKET_LOOPBACK;
  94         newskb->ip_summed = CHECKSUM_UNNECESSARY;
  95         BUG_TRAP(newskb->dst);
  96
  97         netif_rx(newskb);
  98         return 0;
  99 }
 100
 101
 102 static int ip6_output2(struct sk_buff *skb)
 103 {
 104         struct dst_entry *dst = skb->dst;
 105         struct net_device *dev = dst->dev;
 106
 107         skb->protocol = htons(ETH_P_IPV6);
 108         skb->dev = dev;
 109
 110         if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
 111                 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
 112                 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
 113
 114                 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
 115                     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
 116                                         &ipv6_hdr(skb)->saddr)) {
 117                         struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
 118
 119                         /* Do not check for IFF_ALLMULTI; multicast routing
 120                            is not supported in any case.
 121                          */
 122                         if (newskb)
 123                                 NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, newskb, NULL,
 124                                         newskb->dev,
 125                                         ip6_dev_loopback_xmit);
 126
 127                         if (ipv6_hdr(skb)->hop_limit == 0) {
 128                                 IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
 129                                 kfree_skb(skb);
 130                                 return 0;
 131                         }
 132                 }
 133
 134                 IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS);
 135         }
 136
 137         return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
 138 }
 139
 140 static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
 141 {
 142         struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
 143
 144         return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
 145                skb->dst->dev->mtu : dst_mtu(skb->dst);
 146 }
 147
 148 int ip6_output(struct sk_buff *skb)
 149 {
 150         if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
 151                                 dst_allfrag(skb->dst))
 152                 return ip6_fragment(skb, ip6_output2);
 153         else
 154                 return ip6_output2(skb);
 155 }
 156
 157 /*
 158  *      xmit an sk_buff (used by TCP)
 159  */
 160
 161 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
 162              struct ipv6_txoptions *opt, int ipfragok)
 163 {
 164         struct ipv6_pinfo *np = inet6_sk(sk);
 165         struct in6_addr *first_hop = &fl->fl6_dst;
 166         struct dst_entry *dst = skb->dst;
 167         struct ipv6hdr *hdr;
 168         u8  proto = fl->proto;
 169         int seg_len = skb->len;
 170         int hlimit, tclass;
 171         u32 mtu;
 172
 173         if (opt) {
 174                 unsigned int head_room;
 175
 176                 /* First: exthdrs may take lots of space (~8K for now)
 177                    MAX_HEADER is not enough.
 178                  */
 179                 head_room = opt->opt_nflen + opt->opt_flen;
 180                 seg_len += head_room;
 181                 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
 182
 183                 if (skb_headroom(skb) < head_room) {
 184                         struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
 185                         if (skb2 == NULL) {
 186                                 IP6_INC_STATS(ip6_dst_idev(skb->dst),
 187                                               IPSTATS_MIB_OUTDISCARDS);
 188                                 kfree_skb(skb);
 189                                 return -ENOBUFS;
 190                         }
 191                         kfree_skb(skb);
 192                         skb = skb2;
 193                         if (sk)
 194                                 skb_set_owner_w(skb, sk);
 195                 }
 196                 if (opt->opt_flen)
 197                         ipv6_push_frag_opts(skb, opt, &proto);
 198                 if (opt->opt_nflen)
 199                         ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
 200         }
 201
 202         skb_push(skb, sizeof(struct ipv6hdr));
 203         skb_reset_network_header(skb);
 204         hdr = ipv6_hdr(skb);
 205
 206         /*
 207          *      Fill in the IPv6 header
 208          */
 209
 210         hlimit = -1;
 211         if (np)
 212                 hlimit = np->hop_limit;
 213         if (hlimit < 0)
 214                 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
 215         if (hlimit < 0)
 216                 hlimit = ipv6_get_hoplimit(dst->dev);
 217
 218         tclass = -1;
 219         if (np)
 220                 tclass = np->tclass;
 221         if (tclass < 0)
 222                 tclass = 0;
 223
 224         *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
 225
 226         hdr->payload_len = htons(seg_len);
 227         hdr->nexthdr = proto;
 228         hdr->hop_limit = hlimit;
 229
 230         ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
 231         ipv6_addr_copy(&hdr->daddr, first_hop);
 232
 233         skb->priority = sk->sk_priority;
 234
 235         mtu = dst_mtu(dst);
 236         if ((skb->len <= mtu) || ipfragok || skb_is_gso(skb)) {
 237                 IP6_INC_STATS(ip6_dst_idev(skb->dst),
 238                               IPSTATS_MIB_OUTREQUESTS);
 239                 return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev,
 240                                 dst_output);
 241         }
 242
 243         if (net_ratelimit())
 244                 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
 245         skb->dev = dst->dev;
 246         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
 247         IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
 248         kfree_skb(skb);
 249         return -EMSGSIZE;
 250 }
 251
 252 EXPORT_SYMBOL(ip6_xmit);
 253
 254 /*
 255  *      To avoid extra problems ND packets are send through this
 256  *      routine. It's code duplication but I really want to avoid
 257  *      extra checks since ipv6_build_header is used by TCP (which
 258  *      is for us performance critical)
 259  */
 260
 261 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
 262                struct in6_addr *saddr, struct in6_addr *daddr,
 263                int proto, int len)
 264 {
 265         struct ipv6_pinfo *np = inet6_sk(sk);
 266         struct ipv6hdr *hdr;
 267         int totlen;
 268
 269         skb->protocol = htons(ETH_P_IPV6);
 270         skb->dev = dev;
 271
 272         totlen = len + sizeof(struct ipv6hdr);
 273
 274         skb_reset_network_header(skb);
 275         skb_put(skb, sizeof(struct ipv6hdr));
 276         hdr = ipv6_hdr(skb);
 277
 278         *(__be32*)hdr = htonl(0x60000000);
 279
 280         hdr->payload_len = htons(len);
 281         hdr->nexthdr = proto;
 282         hdr->hop_limit = np->hop_limit;
 283
 284         ipv6_addr_copy(&hdr->saddr, saddr);
 285         ipv6_addr_copy(&hdr->daddr, daddr);
 286
 287         return 0;
 288 }
 289
 290 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
 291 {
 292         struct ip6_ra_chain *ra;
 293         struct sock *last = NULL;
 294
 295         read_lock(&ip6_ra_lock);
 296         for (ra = ip6_ra_chain; ra; ra = ra->next) {
 297                 struct sock *sk = ra->sk;
 298                 if (sk && ra->sel == sel &&
 299                     (!sk->sk_bound_dev_if ||
 300                      sk->sk_bound_dev_if == skb->dev->ifindex)) {
 301                         if (last) {
 302                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 303                                 if (skb2)
 304                                         rawv6_rcv(last, skb2);
 305                         }
 306                         last = sk;
 307                 }
 308         }
 309
 310         if (last) {
 311                 rawv6_rcv(last, skb);
 312                 read_unlock(&ip6_ra_lock);
 313                 return 1;
 314         }
 315         read_unlock(&ip6_ra_lock);
 316         return 0;
 317 }
 318
 319 static int ip6_forward_proxy_check(struct sk_buff *skb)
 320 {
 321         struct ipv6hdr *hdr = ipv6_hdr(skb);
 322         u8 nexthdr = hdr->nexthdr;
 323         int offset;
 324
 325         if (ipv6_ext_hdr(nexthdr)) {
 326                 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
 327                 if (offset < 0)
 328                         return 0;
 329         } else
 330                 offset = sizeof(struct ipv6hdr);
 331
 332         if (nexthdr == IPPROTO_ICMPV6) {
 333                 struct icmp6hdr *icmp6;
 334
 335                 if (!pskb_may_pull(skb, (skb_network_header(skb) +
 336                                          offset + 1 - skb->data)))
 337                         return 0;
 338
 339                 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
 340
 341                 switch (icmp6->icmp6_type) {
 342                 case NDISC_ROUTER_SOLICITATION:
 343                 case NDISC_ROUTER_ADVERTISEMENT:
 344                 case NDISC_NEIGHBOUR_SOLICITATION:
 345                 case NDISC_NEIGHBOUR_ADVERTISEMENT:
 346                 case NDISC_REDIRECT:
 347                         /* For reaction involving unicast neighbor discovery
 348                          * message destined to the proxied address, pass it to
 349                          * input function.
 350                          */
 351                         return 1;
 352                 default:
 353                         break;
 354                 }
 355         }
 356
 357         /*
 358          * The proxying router can't forward traffic sent to a link-local
 359          * address, so signal the sender and discard the packet. This
 360          * behavior is clarified by the MIPv6 specification.
 361          */
 362         if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
 363                 dst_link_failure(skb);
 364                 return -1;
 365         }
 366
 367         return 0;
 368 }
 369
 370 static inline int ip6_forward_finish(struct sk_buff *skb)
 371 {
 372         return dst_output(skb);
 373 }
 374
 375 int ip6_forward(struct sk_buff *skb)
 376 {
 377         struct dst_entry *dst = skb->dst;
 378         struct ipv6hdr *hdr = ipv6_hdr(skb);
 379         struct inet6_skb_parm *opt = IP6CB(skb);
 380
 381         if (ipv6_devconf.forwarding == 0)
 382                 goto error;
 383
 384         if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
 385                 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
 386                 goto drop;
 387         }
 388
 389         skb_forward_csum(skb);
 390
 391         /*
 392          *      We DO NOT make any processing on
 393          *      RA packets, pushing them to user level AS IS
 394          *      without ane WARRANTY that application will be able
 395          *      to interpret them. The reason is that we
 396          *      cannot make anything clever here.
 397          *
 398          *      We are not end-node, so that if packet contains
 399          *      AH/ESP, we cannot make anything.
 400          *      Defragmentation also would be mistake, RA packets
 401          *      cannot be fragmented, because there is no warranty
 402          *      that different fragments will go along one path. --ANK
 403          */
 404         if (opt->ra) {
 405                 u8 *ptr = skb_network_header(skb) + opt->ra;
 406                 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
 407                         return 0;
 408         }
 409
 410         /*
 411          *      check and decrement ttl
 412          */
 413         if (hdr->hop_limit <= 1) {
 414                 /* Force OUTPUT device used as source address */
 415                 skb->dev = dst->dev;
 416                 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
 417                             0, skb->dev);
 418                 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
 419
 420                 kfree_skb(skb);
 421                 return -ETIMEDOUT;
 422         }
 423
 424         /* XXX: idev->cnf.proxy_ndp? */
 425         if (ipv6_devconf.proxy_ndp &&
 426             pneigh_lookup(&nd_tbl, &hdr->daddr, skb->dev, 0)) {
 427                 int proxied = ip6_forward_proxy_check(skb);
 428                 if (proxied > 0)
 429                         return ip6_input(skb);
 430                 else if (proxied < 0) {
 431                         IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
 432                         goto drop;
 433                 }
 434         }
 435
 436         if (!xfrm6_route_forward(skb)) {
 437                 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
 438                 goto drop;
 439         }
 440         dst = skb->dst;
 441
 442         /* IPv6 specs say nothing about it, but it is clear that we cannot
 443            send redirects to source routed frames.
 444            We don't send redirects to frames decapsulated from IPsec.
 445          */
 446         if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
 447             !skb->sp) {
 448                 struct in6_addr *target = NULL;
 449                 struct rt6_info *rt;
 450                 struct neighbour *n = dst->neighbour;
 451
 452                 /*
 453                  *      incoming and outgoing devices are the same
 454                  *      send a redirect.
 455                  */
 456
 457                 rt = (struct rt6_info *) dst;
 458                 if ((rt->rt6i_flags & RTF_GATEWAY))
 459                         target = (struct in6_addr*)&n->primary_key;
 460                 else
 461                         target = &hdr->daddr;
 462
 463                 /* Limit redirects both by destination (here)
 464                    and by source (inside ndisc_send_redirect)
 465                  */
 466                 if (xrlim_allow(dst, 1*HZ))
 467                         ndisc_send_redirect(skb, n, target);
 468         } else {
 469                 int addrtype = ipv6_addr_type(&hdr->saddr);
 470
 471                 /* This check is security critical. */
 472                 if (addrtype & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK))
 473                         goto error;
 474                 if (addrtype & IPV6_ADDR_LINKLOCAL) {
 475                         icmpv6_send(skb, ICMPV6_DEST_UNREACH,
 476                                 ICMPV6_NOT_NEIGHBOUR, 0, skb->dev);
 477                         goto error;
 478                 }
 479         }
 480
 481         if (skb->len > dst_mtu(dst)) {
 482                 /* Again, force OUTPUT device used as source address */
 483                 skb->dev = dst->dev;
 484                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
 485                 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
 486                 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
 487                 kfree_skb(skb);
 488                 return -EMSGSIZE;
 489         }
 490
 491         if (skb_cow(skb, dst->dev->hard_header_len)) {
 492                 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
 493                 goto drop;
 494         }
 495
 496         hdr = ipv6_hdr(skb);
 497
 498         /* Mangling hops number delayed to point after skb COW */
 499
 500         hdr->hop_limit--;
 501
 502         IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
 503         return NF_HOOK(PF_INET6,NF_IP6_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish);
 504
 505 error:
 506         IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
 507 drop:
 508         kfree_skb(skb);
 509         return -EINVAL;
 510 }
 511
 512 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 513 {
 514         to->pkt_type = from->pkt_type;
 515         to->priority = from->priority;
 516         to->protocol = from->protocol;
 517         dst_release(to->dst);
 518         to->dst = dst_clone(from->dst);
 519         to->dev = from->dev;
 520         to->mark = from->mark;
 521
 522 #ifdef CONFIG_NET_SCHED
 523         to->tc_index = from->tc_index;
 524 #endif
 525         nf_copy(to, from);
 526 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
 527     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
 528         to->nf_trace = from->nf_trace;
 529 #endif
 530         skb_copy_secmark(to, from);
 531 }
 532
 533 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
 534 {
 535         u16 offset = sizeof(struct ipv6hdr);
 536         struct ipv6_opt_hdr *exthdr =
 537                                 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
 538         unsigned int packet_len = skb->tail - skb->network_header;
 539         int found_rhdr = 0;
 540         *nexthdr = &ipv6_hdr(skb)->nexthdr;
 541
 542         while (offset + 1 <= packet_len) {
 543
 544                 switch (**nexthdr) {
 545
 546                 case NEXTHDR_HOP:
 547                         break;
 548                 case NEXTHDR_ROUTING:
 549                         found_rhdr = 1;
 550                         break;
 551                 case NEXTHDR_DEST:
 552 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 553                         if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
 554                                 break;
 555 #endif
 556                         if (found_rhdr)
 557                                 return offset;
 558                         break;
 559                 default :
 560                         return offset;
 561                 }
 562
 563                 offset += ipv6_optlen(exthdr);
 564                 *nexthdr = &exthdr->nexthdr;
 565                 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
 566                                                  offset);
 567         }
 568
 569         return offset;
 570 }
 571 EXPORT_SYMBOL_GPL(ip6_find_1stfragopt);
 572
 573 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 574 {
 575         struct net_device *dev;
 576         struct sk_buff *frag;
 577         struct rt6_info *rt = (struct rt6_info*)skb->dst;
 578         struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
 579         struct ipv6hdr *tmp_hdr;
 580         struct frag_hdr *fh;
 581         unsigned int mtu, hlen, left, len;
 582         __be32 frag_id = 0;
 583         int ptr, offset = 0, err=0;
 584         u8 *prevhdr, nexthdr = 0;
 585
 586         dev = rt->u.dst.dev;
 587         hlen = ip6_find_1stfragopt(skb, &prevhdr);
 588         nexthdr = *prevhdr;
 589
 590         mtu = ip6_skb_dst_mtu(skb);
 591
 592         /* We must not fragment if the socket is set to force MTU discovery
 593          * or if the skb it not generated by a local socket.  (This last
 594          * check should be redundant, but it's free.)
 595          */
 596         if (!np || np->pmtudisc >= IPV6_PMTUDISC_DO) {
 597                 skb->dev = skb->dst->dev;
 598                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
 599                 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
 600                 kfree_skb(skb);
 601                 return -EMSGSIZE;
 602         }
 603
 604         if (np && np->frag_size < mtu) {
 605                 if (np->frag_size)
 606                         mtu = np->frag_size;
 607         }
 608         mtu -= hlen + sizeof(struct frag_hdr);
 609
 610         if (skb_shinfo(skb)->frag_list) {
 611                 int first_len = skb_pagelen(skb);
 612
 613                 if (first_len - hlen > mtu ||
 614                     ((first_len - hlen) & 7) ||
 615                     skb_cloned(skb))
 616                         goto slow_path;
 617
 618                 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
 619                         /* Correct geometry. */
 620                         if (frag->len > mtu ||
 621                             ((frag->len & 7) && frag->next) ||
 622                             skb_headroom(frag) < hlen)
 623                             goto slow_path;
 624
 625                         /* Partially cloned skb? */
 626                         if (skb_shared(frag))
 627                                 goto slow_path;
 628
 629                         BUG_ON(frag->sk);
 630                         if (skb->sk) {
 631                                 sock_hold(skb->sk);
 632                                 frag->sk = skb->sk;
 633                                 frag->destructor = sock_wfree;
 634                                 skb->truesize -= frag->truesize;
 635                         }
 636                 }
 637
 638                 err = 0;
 639                 offset = 0;
 640                 frag = skb_shinfo(skb)->frag_list;
 641                 skb_shinfo(skb)->frag_list = NULL;
 642                 /* BUILD HEADER */
 643
 644                 *prevhdr = NEXTHDR_FRAGMENT;
 645                 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
 646                 if (!tmp_hdr) {
 647                         IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
 648                         return -ENOMEM;
 649                 }
 650
 651                 __skb_pull(skb, hlen);
 652                 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
 653                 __skb_push(skb, hlen);
 654                 skb_reset_network_header(skb);
 655                 memcpy(skb_network_header(skb), tmp_hdr, hlen);
 656
 657                 ipv6_select_ident(skb, fh);
 658                 fh->nexthdr = nexthdr;
 659                 fh->reserved = 0;
 660                 fh->frag_off = htons(IP6_MF);
 661                 frag_id = fh->identification;
 662
 663                 first_len = skb_pagelen(skb);
 664                 skb->data_len = first_len - skb_headlen(skb);
 665                 skb->len = first_len;
 666                 ipv6_hdr(skb)->payload_len = htons(first_len -
 667                                                    sizeof(struct ipv6hdr));
 668
 669                 dst_hold(&rt->u.dst);
 670
 671                 for (;;) {
 672                         /* Prepare header of the next frame,
 673                          * before previous one went down. */
 674                         if (frag) {
 675                                 frag->ip_summed = CHECKSUM_NONE;
 676                                 skb_reset_transport_header(frag);
 677                                 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
 678                                 __skb_push(frag, hlen);
 679                                 skb_reset_network_header(frag);
 680                                 memcpy(skb_network_header(frag), tmp_hdr,
 681                                        hlen);
 682                                 offset += skb->len - hlen - sizeof(struct frag_hdr);
 683                                 fh->nexthdr = nexthdr;
 684                                 fh->reserved = 0;
 685                                 fh->frag_off = htons(offset);
 686                                 if (frag->next != NULL)
 687                                         fh->frag_off |= htons(IP6_MF);
 688                                 fh->identification = frag_id;
 689                                 ipv6_hdr(frag)->payload_len =
 690                                                 htons(frag->len -
 691                                                       sizeof(struct ipv6hdr));
 692                                 ip6_copy_metadata(frag, skb);
 693                         }
 694
 695                         err = output(skb);
 696                         if(!err)
 697                                 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGCREATES);
 698
 699                         if (err || !frag)
 700                                 break;
 701
 702                         skb = frag;
 703                         frag = skb->next;
 704                         skb->next = NULL;
 705                 }
 706
 707                 kfree(tmp_hdr);
 708
 709                 if (err == 0) {
 710                         IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGOKS);
 711                         dst_release(&rt->u.dst);
 712                         return 0;
 713                 }
 714
 715                 while (frag) {
 716                         skb = frag->next;
 717                         kfree_skb(frag);
 718                         frag = skb;
 719                 }
 720
 721                 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGFAILS);
 722                 dst_release(&rt->u.dst);
 723                 return err;
 724         }
 725
 726 slow_path:
 727         left = skb->len - hlen;         /* Space per frame */
 728         ptr = hlen;                     /* Where to start from */
 729
 730         /*
 731          *      Fragment the datagram.
 732          */
 733
 734         *prevhdr = NEXTHDR_FRAGMENT;
 735
 736         /*
 737          *      Keep copying data until we run out.
 738          */
 739         while(left > 0) {
 740                 len = left;
 741                 /* IF: it doesn't fit, use 'mtu' - the data space left */
 742                 if (len > mtu)
 743                         len = mtu;
 744                 /* IF: we are not sending upto and including the packet end
 745                    then align the next start on an eight byte boundary */
 746                 if (len < left) {
 747                         len &= ~7;
 748                 }
 749                 /*
 750                  *      Allocate buffer.
 751                  */
 752
 753                 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
 754                         NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
 755                         IP6_INC_STATS(ip6_dst_idev(skb->dst),
 756                                       IPSTATS_MIB_FRAGFAILS);
 757                         err = -ENOMEM;
 758                         goto fail;
 759                 }
 760
 761                 /*
 762                  *      Set up data on packet
 763                  */
 764
 765                 ip6_copy_metadata(frag, skb);
 766                 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
 767                 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
 768                 skb_reset_network_header(frag);
 769                 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
 770                 frag->transport_header = (frag->network_header + hlen +
 771                                           sizeof(struct frag_hdr));
 772
 773                 /*
 774                  *      Charge the memory for the fragment to any owner
 775                  *      it might possess
 776                  */
 777                 if (skb->sk)
 778                         skb_set_owner_w(frag, skb->sk);
 779
 780                 /*
 781                  *      Copy the packet header into the new buffer.
 782                  */
 783                 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
 784
 785                 /*
 786                  *      Build fragment header.
 787                  */
 788                 fh->nexthdr = nexthdr;
 789                 fh->reserved = 0;
 790                 if (!frag_id) {
 791                         ipv6_select_ident(skb, fh);
 792                         frag_id = fh->identification;
 793                 } else
 794                         fh->identification = frag_id;
 795
 796                 /*
 797                  *      Copy a block of the IP datagram.
 798                  */
 799                 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
 800                         BUG();
 801                 left -= len;
 802
 803                 fh->frag_off = htons(offset);
 804                 if (left > 0)
 805                         fh->frag_off |= htons(IP6_MF);
 806                 ipv6_hdr(frag)->payload_len = htons(frag->len -
 807                                                     sizeof(struct ipv6hdr));
 808
 809                 ptr += len;
 810                 offset += len;
 811
 812                 /*
 813                  *      Put this fragment into the sending queue.
 814                  */
 815                 err = output(frag);
 816                 if (err)
 817                         goto fail;
 818
 819                 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGCREATES);
 820         }
 821         IP6_INC_STATS(ip6_dst_idev(skb->dst),
 822                       IPSTATS_MIB_FRAGOKS);
 823         kfree_skb(skb);
 824         return err;
 825
 826 fail:
 827         IP6_INC_STATS(ip6_dst_idev(skb->dst),
 828                       IPSTATS_MIB_FRAGFAILS);
 829         kfree_skb(skb);
 830         return err;
 831 }
 832
 833 static inline int ip6_rt_check(struct rt6key *rt_key,
 834                                struct in6_addr *fl_addr,
 835                                struct in6_addr *addr_cache)
 836 {
 837         return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
 838                 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
 839 }
 840
 841 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
 842                                           struct dst_entry *dst,
 843                                           struct flowi *fl)
 844 {
 845         struct ipv6_pinfo *np = inet6_sk(sk);
 846         struct rt6_info *rt = (struct rt6_info *)dst;
 847
 848         if (!dst)
 849                 goto out;
 850
 851         /* Yes, checking route validity in not connected
 852          * case is not very simple. Take into account,
 853          * that we do not support routing by source, TOS,
 854          * and MSG_DONTROUTE            --ANK (980726)
 855          *
 856          * 1. ip6_rt_check(): If route was host route,
 857          *    check that cached destination is current.
 858          *    If it is network route, we still may
 859          *    check its validity using saved pointer
 860          *    to the last used address: daddr_cache.
 861          *    We do not want to save whole address now,
 862          *    (because main consumer of this service
 863          *    is tcp, which has not this problem),
 864          *    so that the last trick works only on connected
 865          *    sockets.
 866          * 2. oif also should be the same.
 867          */
 868         if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
 869 #ifdef CONFIG_IPV6_SUBTREES
 870             ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
 871 #endif
 872             (fl->oif && fl->oif != dst->dev->ifindex)) {
 873                 dst_release(dst);
 874                 dst = NULL;
 875         }
 876
 877 out:
 878         return dst;
 879 }
 880
 881 static int ip6_dst_lookup_tail(struct sock *sk,
 882                                struct dst_entry **dst, struct flowi *fl)
 883 {
 884         int err;
 885
 886         if (*dst == NULL)
 887                 *dst = ip6_route_output(sk, fl);
 888
 889         if ((err = (*dst)->error))
 890                 goto out_err_release;
 891
 892         if (ipv6_addr_any(&fl->fl6_src)) {
 893                 err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src);
 894                 if (err)
 895                         goto out_err_release;
 896         }
 897
 898 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 899                 /*
 900                  * Here if the dst entry we've looked up
 901                  * has a neighbour entry that is in the INCOMPLETE
 902                  * state and the src address from the flow is
 903                  * marked as OPTIMISTIC, we release the found
 904                  * dst entry and replace it instead with the
 905                  * dst entry of the nexthop router
 906                  */
 907                 if (!((*dst)->neighbour->nud_state & NUD_VALID)) {
 908                         struct inet6_ifaddr *ifp;
 909                         struct flowi fl_gw;
 910                         int redirect;
 911
 912                         ifp = ipv6_get_ifaddr(&fl->fl6_src, (*dst)->dev, 1);
 913
 914                         redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
 915                         if (ifp)
 916                                 in6_ifa_put(ifp);
 917
 918                         if (redirect) {
 919                                 /*
 920                                  * We need to get the dst entry for the
 921                                  * default router instead
 922                                  */
 923                                 dst_release(*dst);
 924                                 memcpy(&fl_gw, fl, sizeof(struct flowi));
 925                                 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
 926                                 *dst = ip6_route_output(sk, &fl_gw);
 927                                 if ((err = (*dst)->error))
 928                                         goto out_err_release;
 929                         }
 930                 }
 931 #endif
 932
 933         return 0;
 934
 935 out_err_release:
 936         dst_release(*dst);
 937         *dst = NULL;
 938         return err;
 939 }
 940
 941 /**
 942  *      ip6_dst_lookup - perform route lookup on flow
 943  *      @sk: socket which provides route info
 944  *      @dst: pointer to dst_entry * for result
 945  *      @fl: flow to lookup
 946  *
 947  *      This function performs a route lookup on the given flow.
 948  *
 949  *      It returns zero on success, or a standard errno code on error.
 950  */
 951 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
 952 {
 953         *dst = NULL;
 954         return ip6_dst_lookup_tail(sk, dst, fl);
 955 }
 956 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
 957
 958 /**
 959  *      ip6_sk_dst_lookup - perform socket cached route lookup on flow
 960  *      @sk: socket which provides the dst cache and route info
 961  *      @dst: pointer to dst_entry * for result
 962  *      @fl: flow to lookup
 963  *
 964  *      This function performs a route lookup on the given flow with the
 965  *      possibility of using the cached route in the socket if it is valid.
 966  *      It will take the socket dst lock when operating on the dst cache.
 967  *      As a result, this function can only be used in process context.
 968  *
 969  *      It returns zero on success, or a standard errno code on error.
 970  */
 971 int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
 972 {
 973         *dst = NULL;
 974         if (sk) {
 975                 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
 976                 *dst = ip6_sk_dst_check(sk, *dst, fl);
 977         }
 978
 979         return ip6_dst_lookup_tail(sk, dst, fl);
 980 }
 981 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
 982
 983 static inline int ip6_ufo_append_data(struct sock *sk,
 984                         int getfrag(void *from, char *to, int offset, int len,
 985                         int odd, struct sk_buff *skb),
 986                         void *from, int length, int hh_len, int fragheaderlen,
 987                         int transhdrlen, int mtu,unsigned int flags)
 988
 989 {
 990         struct sk_buff *skb;
 991         int err;
 992
 993         /* There is support for UDP large send offload by network
 994          * device, so create one single skb packet containing complete
 995          * udp datagram
 996          */
 997         if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
 998                 skb = sock_alloc_send_skb(sk,
 999                         hh_len + fragheaderlen + transhdrlen + 20,
1000                         (flags & MSG_DONTWAIT), &err);
1001                 if (skb == NULL)
1002                         return -ENOMEM;
1003
1004                 /* reserve space for Hardware header */
1005                 skb_reserve(skb, hh_len);
1006
1007                 /* create space for UDP/IP header */
1008                 skb_put(skb,fragheaderlen + transhdrlen);
1009
1010                 /* initialize network header pointer */
1011                 skb_reset_network_header(skb);
1012
1013                 /* initialize protocol header pointer */
1014                 skb->transport_header = skb->network_header + fragheaderlen;
1015
1016                 skb->ip_summed = CHECKSUM_PARTIAL;
1017                 skb->csum = 0;
1018                 sk->sk_sndmsg_off = 0;
1019         }
1020
1021         err = skb_append_datato_frags(sk,skb, getfrag, from,
1022                                       (length - transhdrlen));
1023         if (!err) {
1024                 struct frag_hdr fhdr;
1025
1026                 /* specify the length of each IP datagram fragment*/
1027                 skb_shinfo(skb)->gso_size = mtu - fragheaderlen -
1028                                             sizeof(struct frag_hdr);
1029                 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1030                 ipv6_select_ident(skb, &fhdr);
1031                 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1032                 __skb_queue_tail(&sk->sk_write_queue, skb);
1033
1034                 return 0;
1035         }
1036         /* There is not enough support do UPD LSO,
1037          * so follow normal path
1038          */
1039         kfree_skb(skb);
1040
1041         return err;
1042 }
1043
1044 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1045         int offset, int len, int odd, struct sk_buff *skb),
1046         void *from, int length, int transhdrlen,
1047         int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1048         struct rt6_info *rt, unsigned int flags)
1049 {
1050         struct inet_sock *inet = inet_sk(sk);
1051         struct ipv6_pinfo *np = inet6_sk(sk);
1052         struct sk_buff *skb;
1053         unsigned int maxfraglen, fragheaderlen;
1054         int exthdrlen;
1055         int hh_len;
1056         int mtu;
1057         int copy;
1058         int err;
1059         int offset = 0;
1060         int csummode = CHECKSUM_NONE;
1061
1062         if (flags&MSG_PROBE)
1063                 return 0;
1064         if (skb_queue_empty(&sk->sk_write_queue)) {
1065                 /*
1066                  * setup for corking
1067                  */
1068                 if (opt) {
1069                         if (np->cork.opt == NULL) {
1070                                 np->cork.opt = kmalloc(opt->tot_len,
1071                                                        sk->sk_allocation);
1072                                 if (unlikely(np->cork.opt == NULL))
1073                                         return -ENOBUFS;
1074                         } else if (np->cork.opt->tot_len < opt->tot_len) {
1075                                 printk(KERN_DEBUG "ip6_append_data: invalid option length\n");
1076                                 return -EINVAL;
1077                         }
1078                         memcpy(np->cork.opt, opt, opt->tot_len);
1079                         inet->cork.flags |= IPCORK_OPT;
1080                         /* need source address above miyazawa*/
1081                 }
1082                 dst_hold(&rt->u.dst);
1083                 np->cork.rt = rt;
1084                 inet->cork.fl = *fl;
1085                 np->cork.hop_limit = hlimit;
1086                 np->cork.tclass = tclass;
1087                 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1088                       rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
1089                 if (np->frag_size < mtu) {
1090                         if (np->frag_size)
1091                                 mtu = np->frag_size;
1092                 }
1093                 inet->cork.fragsize = mtu;
1094                 if (dst_allfrag(rt->u.dst.path))
1095                         inet->cork.flags |= IPCORK_ALLFRAG;
1096                 inet->cork.length = 0;
1097                 sk->sk_sndmsg_page = NULL;
1098                 sk->sk_sndmsg_off = 0;
1099                 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0);
1100                 length += exthdrlen;
1101                 transhdrlen += exthdrlen;
1102         } else {
1103                 rt = np->cork.rt;
1104                 fl = &inet->cork.fl;
1105                 if (inet->cork.flags & IPCORK_OPT)
1106                         opt = np->cork.opt;
1107                 transhdrlen = 0;
1108                 exthdrlen = 0;
1109                 mtu = inet->cork.fragsize;
1110         }
1111
1112         hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1113
1114         fragheaderlen = sizeof(struct ipv6hdr) + rt->u.dst.nfheader_len + (opt ? opt->opt_nflen : 0);
1115         maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1116
1117         if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1118                 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1119                         ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1120                         return -EMSGSIZE;
1121                 }
1122         }
1123
1124         /*
1125          * Let's try using as much space as possible.
1126          * Use MTU if total length of the message fits into the MTU.
1127          * Otherwise, we need to reserve fragment header and
1128          * fragment alignment (= 8-15 octects, in total).
1129          *
1130          * Note that we may need to "move" the data from the tail of
1131          * of the buffer to the new fragment when we split
1132          * the message.
1133          *
1134          * FIXME: It may be fragmented into multiple chunks
1135          *        at once if non-fragmentable extension headers
1136          *        are too large.
1137          * --yoshfuji
1138          */
1139
1140         inet->cork.length += length;
1141         if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1142             (rt->u.dst.dev->features & NETIF_F_UFO)) {
1143
1144                 err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1145                                           fragheaderlen, transhdrlen, mtu,
1146                                           flags);
1147                 if (err)
1148                         goto error;
1149                 return 0;
1150         }
1151
1152         if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1153                 goto alloc_new_skb;
1154
1155         while (length > 0) {
1156                 /* Check if the remaining data fits into current packet. */
1157                 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1158                 if (copy < length)
1159                         copy = maxfraglen - skb->len;
1160
1161                 if (copy <= 0) {
1162                         char *data;
1163                         unsigned int datalen;
1164                         unsigned int fraglen;
1165                         unsigned int fraggap;
1166                         unsigned int alloclen;
1167                         struct sk_buff *skb_prev;
1168 alloc_new_skb:
1169                         skb_prev = skb;
1170
1171                         /* There's no room in the current skb */
1172                         if (skb_prev)
1173                                 fraggap = skb_prev->len - maxfraglen;
1174                         else
1175                                 fraggap = 0;
1176
1177                         /*
1178                          * If remaining data exceeds the mtu,
1179                          * we know we need more fragment(s).
1180                          */
1181                         datalen = length + fraggap;
1182                         if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1183                                 datalen = maxfraglen - fragheaderlen;
1184
1185                         fraglen = datalen + fragheaderlen;
1186                         if ((flags & MSG_MORE) &&
1187                             !(rt->u.dst.dev->features&NETIF_F_SG))
1188                                 alloclen = mtu;
1189                         else
1190                                 alloclen = datalen + fragheaderlen;
1191
1192                         /*
1193                          * The last fragment gets additional space at tail.
1194                          * Note: we overallocate on fragments with MSG_MODE
1195                          * because we have no idea if we're the last one.
1196                          */
1197                         if (datalen == length + fraggap)
1198                                 alloclen += rt->u.dst.trailer_len;
1199
1200                         /*
1201                          * We just reserve space for fragment header.
1202                          * Note: this may be overallocation if the message
1203                          * (without MSG_MORE) fits into the MTU.
1204                          */
1205                         alloclen += sizeof(struct frag_hdr);
1206
1207                         if (transhdrlen) {
1208                                 skb = sock_alloc_send_skb(sk,
1209                                                 alloclen + hh_len,
1210                                                 (flags & MSG_DONTWAIT), &err);
1211                         } else {
1212                                 skb = NULL;
1213                                 if (atomic_read(&sk->sk_wmem_alloc) <=
1214                                     2 * sk->sk_sndbuf)
1215                                         skb = sock_wmalloc(sk,
1216                                                            alloclen + hh_len, 1,
1217                                                            sk->sk_allocation);
1218                                 if (unlikely(skb == NULL))
1219                                         err = -ENOBUFS;
1220                         }
1221                         if (skb == NULL)
1222                                 goto error;
1223                         /*
1224                          *      Fill in the control structures
1225                          */
1226                         skb->ip_summed = csummode;
1227                         skb->csum = 0;
1228                         /* reserve for fragmentation */
1229                         skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1230
1231                         /*
1232                          *      Find where to start putting bytes
1233                          */
1234                         data = skb_put(skb, fraglen);
1235                         skb_set_network_header(skb, exthdrlen);
1236                         data += fragheaderlen;
1237                         skb->transport_header = (skb->network_header +
1238                                                  fragheaderlen);
1239                         if (fraggap) {
1240                                 skb->csum = skb_copy_and_csum_bits(
1241                                         skb_prev, maxfraglen,
1242                                         data + transhdrlen, fraggap, 0);
1243                                 skb_prev->csum = csum_sub(skb_prev->csum,
1244                                                           skb->csum);
1245                                 data += fraggap;
1246                                 pskb_trim_unique(skb_prev, maxfraglen);
1247                         }
1248                         copy = datalen - transhdrlen - fraggap;
1249                         if (copy < 0) {
1250                                 err = -EINVAL;
1251                                 kfree_skb(skb);
1252                                 goto error;
1253                         } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1254                                 err = -EFAULT;
1255                                 kfree_skb(skb);
1256                                 goto error;
1257                         }
1258
1259                         offset += copy;
1260                         length -= datalen - fraggap;
1261                         transhdrlen = 0;
1262                         exthdrlen = 0;
1263                         csummode = CHECKSUM_NONE;
1264
1265                         /*
1266                          * Put the packet on the pending queue
1267                          */
1268                         __skb_queue_tail(&sk->sk_write_queue, skb);
1269                         continue;
1270                 }
1271
1272                 if (copy > length)
1273                         copy = length;
1274
1275                 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1276                         unsigned int off;
1277
1278                         off = skb->len;
1279                         if (getfrag(from, skb_put(skb, copy),
1280                                                 offset, copy, off, skb) < 0) {
1281                                 __skb_trim(skb, off);
1282                                 err = -EFAULT;
1283                                 goto error;
1284                         }
1285                 } else {
1286                         int i = skb_shinfo(skb)->nr_frags;
1287                         skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1288                         struct page *page = sk->sk_sndmsg_page;
1289                         int off = sk->sk_sndmsg_off;
1290                         unsigned int left;
1291
1292                         if (page && (left = PAGE_SIZE - off) > 0) {
1293                                 if (copy >= left)
1294                                         copy = left;
1295                                 if (page != frag->page) {
1296                                         if (i == MAX_SKB_FRAGS) {
1297                                                 err = -EMSGSIZE;
1298                                                 goto error;
1299                                         }
1300                                         get_page(page);
1301                                         skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1302                                         frag = &skb_shinfo(skb)->frags[i];
1303                                 }
1304                         } else if(i < MAX_SKB_FRAGS) {
1305                                 if (copy > PAGE_SIZE)
1306                                         copy = PAGE_SIZE;
1307                                 page = alloc_pages(sk->sk_allocation, 0);
1308                                 if (page == NULL) {
1309                                         err = -ENOMEM;
1310                                         goto error;
1311                                 }
1312                                 sk->sk_sndmsg_page = page;
1313                                 sk->sk_sndmsg_off = 0;
1314
1315                                 skb_fill_page_desc(skb, i, page, 0, 0);
1316                                 frag = &skb_shinfo(skb)->frags[i];
1317                                 skb->truesize += PAGE_SIZE;
1318                                 atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
1319                         } else {
1320                                 err = -EMSGSIZE;
1321                                 goto error;
1322                         }
1323                         if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1324                                 err = -EFAULT;
1325                                 goto error;
1326                         }
1327                         sk->sk_sndmsg_off += copy;
1328                         frag->size += copy;
1329                         skb->len += copy;
1330                         skb->data_len += copy;
1331                 }
1332                 offset += copy;
1333                 length -= copy;
1334         }
1335         return 0;
1336 error:
1337         inet->cork.length -= length;
1338         IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1339         return err;
1340 }
1341
1342 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1343 {
1344         inet->cork.flags &= ~IPCORK_OPT;
1345         kfree(np->cork.opt);
1346         np->cork.opt = NULL;
1347         if (np->cork.rt) {
1348                 dst_release(&np->cork.rt->u.dst);
1349                 np->cork.rt = NULL;
1350                 inet->cork.flags &= ~IPCORK_ALLFRAG;
1351         }
1352         memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1353 }
1354
1355 int ip6_push_pending_frames(struct sock *sk)
1356 {
1357         struct sk_buff *skb, *tmp_skb;
1358         struct sk_buff **tail_skb;
1359         struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1360         struct inet_sock *inet = inet_sk(sk);
1361         struct ipv6_pinfo *np = inet6_sk(sk);
1362         struct ipv6hdr *hdr;
1363         struct ipv6_txoptions *opt = np->cork.opt;
1364         struct rt6_info *rt = np->cork.rt;
1365         struct flowi *fl = &inet->cork.fl;
1366         unsigned char proto = fl->proto;
1367         int err = 0;
1368
1369         if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1370                 goto out;
1371         tail_skb = &(skb_shinfo(skb)->frag_list);
1372
1373         /* move skb->data to ip header from ext header */
1374         if (skb->data < skb_network_header(skb))
1375                 __skb_pull(skb, skb_network_offset(skb));
1376         while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1377                 __skb_pull(tmp_skb, skb_network_header_len(skb));
1378                 *tail_skb = tmp_skb;
1379                 tail_skb = &(tmp_skb->next);
1380                 skb->len += tmp_skb->len;
1381                 skb->data_len += tmp_skb->len;
1382                 skb->truesize += tmp_skb->truesize;
1383                 __sock_put(tmp_skb->sk);
1384                 tmp_skb->destructor = NULL;
1385                 tmp_skb->sk = NULL;
1386         }
1387
1388         ipv6_addr_copy(final_dst, &fl->fl6_dst);
1389         __skb_pull(skb, skb_network_header_len(skb));
1390         if (opt && opt->opt_flen)
1391                 ipv6_push_frag_opts(skb, opt, &proto);
1392         if (opt && opt->opt_nflen)
1393                 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1394
1395         skb_push(skb, sizeof(struct ipv6hdr));
1396         skb_reset_network_header(skb);
1397         hdr = ipv6_hdr(skb);
1398
1399         *(__be32*)hdr = fl->fl6_flowlabel |
1400                      htonl(0x60000000 | ((int)np->cork.tclass << 20));
1401
1402         if (skb->len <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN)
1403                 hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
1404         else
1405                 hdr->payload_len = 0;
1406         hdr->hop_limit = np->cork.hop_limit;
1407         hdr->nexthdr = proto;
1408         ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1409         ipv6_addr_copy(&hdr->daddr, final_dst);
1410
1411         skb->priority = sk->sk_priority;
1412
1413         skb->dst = dst_clone(&rt->u.dst);
1414         IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
1415         if (proto == IPPROTO_ICMPV6) {
1416                 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
1417
1418                 ICMP6MSGOUT_INC_STATS_BH(idev, icmp6_hdr(skb)->icmp6_type);
1419                 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
1420         }
1421
1422         err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output);
1423         if (err) {
1424                 if (err > 0)
1425                         err = np->recverr ? net_xmit_errno(err) : 0;
1426                 if (err)
1427                         goto error;
1428         }
1429
1430 out:
1431         ip6_cork_release(inet, np);
1432         return err;
1433 error:
1434         goto out;
1435 }
1436
1437 void ip6_flush_pending_frames(struct sock *sk)
1438 {
1439         struct sk_buff *skb;
1440
1441         while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1442                 if (skb->dst)
1443                         IP6_INC_STATS(ip6_dst_idev(skb->dst),
1444                                       IPSTATS_MIB_OUTDISCARDS);
1445                 kfree_skb(skb);
1446         }
1447
1448         ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1449 }