git.oblomov.eu Git - linux-2.6/blob - net/ipv6/ip6_output.c

   1 /*
   2  *      IPv6 output functions
   3  *      Linux INET6 implementation
   4  *
   5  *      Authors:
   6  *      Pedro Roque             <roque@di.fc.ul.pt>
   7  *
   8  *      $Id: ip6_output.c,v 1.34 2002/02/01 22:01:04 davem Exp $
   9  *
  10  *      Based on linux/net/ipv4/ip_output.c
  11  *
  12  *      This program is free software; you can redistribute it and/or
  13  *      modify it under the terms of the GNU General Public License
  14  *      as published by the Free Software Foundation; either version
  15  *      2 of the License, or (at your option) any later version.
  16  *
  17  *      Changes:
  18  *      A.N.Kuznetsov   :       airthmetics in fragmentation.
  19  *                              extension headers are implemented.
  20  *                              route changes now work.
  21  *                              ip6_forward does not confuse sniffers.
  22  *                              etc.
  23  *
  24  *      H. von Brand    :       Added missing #include <linux/string.h>
  25  *      Imran Patel     :       frag id should be in NBO
  26  *      Kazunori MIYAZAWA @USAGI
  27  *                      :       add ip6_append_data and related functions
  28  *                              for datagram xmit
  29  */
  30
  31 #include <linux/errno.h>
  32 #include <linux/types.h>
  33 #include <linux/string.h>
  34 #include <linux/socket.h>
  35 #include <linux/net.h>
  36 #include <linux/netdevice.h>
  37 #include <linux/if_arp.h>
  38 #include <linux/in6.h>
  39 #include <linux/tcp.h>
  40 #include <linux/route.h>
  41 #include <linux/module.h>
  42
  43 #include <linux/netfilter.h>
  44 #include <linux/netfilter_ipv6.h>
  45
  46 #include <net/sock.h>
  47 #include <net/snmp.h>
  48
  49 #include <net/ipv6.h>
  50 #include <net/ndisc.h>
  51 #include <net/protocol.h>
  52 #include <net/ip6_route.h>
  53 #include <net/addrconf.h>
  54 #include <net/rawv6.h>
  55 #include <net/icmp.h>
  56 #include <net/xfrm.h>
  57 #include <net/checksum.h>
  58
  59 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
  60
  61 static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
  62 {
  63         static u32 ipv6_fragmentation_id = 1;
  64         static DEFINE_SPINLOCK(ip6_id_lock);
  65
  66         spin_lock_bh(&ip6_id_lock);
  67         fhdr->identification = htonl(ipv6_fragmentation_id);
  68         if (++ipv6_fragmentation_id == 0)
  69                 ipv6_fragmentation_id = 1;
  70         spin_unlock_bh(&ip6_id_lock);
  71 }
  72
  73 static int ip6_output_finish(struct sk_buff *skb)
  74 {
  75         struct dst_entry *dst = skb->dst;
  76
  77         if (dst->hh)
  78                 return neigh_hh_output(dst->hh, skb);
  79         else if (dst->neighbour)
  80                 return dst->neighbour->output(skb);
  81
  82         IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
  83         kfree_skb(skb);
  84         return -EINVAL;
  85
  86 }
  87
  88 /* dev_loopback_xmit for use with netfilter. */
  89 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
  90 {
  91         skb_reset_mac_header(newskb);
  92         __skb_pull(newskb, skb_network_offset(newskb));
  93         newskb->pkt_type = PACKET_LOOPBACK;
  94         newskb->ip_summed = CHECKSUM_UNNECESSARY;
  95         BUG_TRAP(newskb->dst);
  96
  97         netif_rx(newskb);
  98         return 0;
  99 }
 100
 101
 102 static int ip6_output2(struct sk_buff *skb)
 103 {
 104         struct dst_entry *dst = skb->dst;
 105         struct net_device *dev = dst->dev;
 106
 107         skb->protocol = htons(ETH_P_IPV6);
 108         skb->dev = dev;
 109
 110         if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
 111                 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
 112                 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
 113
 114                 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
 115                     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
 116                                         &ipv6_hdr(skb)->saddr)) {
 117                         struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
 118
 119                         /* Do not check for IFF_ALLMULTI; multicast routing
 120                            is not supported in any case.
 121                          */
 122                         if (newskb)
 123                                 NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, newskb, NULL,
 124                                         newskb->dev,
 125                                         ip6_dev_loopback_xmit);
 126
 127                         if (ipv6_hdr(skb)->hop_limit == 0) {
 128                                 IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
 129                                 kfree_skb(skb);
 130                                 return 0;
 131                         }
 132                 }
 133
 134                 IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS);
 135         }
 136
 137         return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
 138 }
 139
 140 static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
 141 {
 142         struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
 143
 144         return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
 145                skb->dst->dev->mtu : dst_mtu(skb->dst);
 146 }
 147
 148 int ip6_output(struct sk_buff *skb)
 149 {
 150         if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
 151                                 dst_allfrag(skb->dst))
 152                 return ip6_fragment(skb, ip6_output2);
 153         else
 154                 return ip6_output2(skb);
 155 }
 156
 157 /*
 158  *      xmit an sk_buff (used by TCP)
 159  */
 160
 161 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
 162              struct ipv6_txoptions *opt, int ipfragok)
 163 {
 164         struct ipv6_pinfo *np = inet6_sk(sk);
 165         struct in6_addr *first_hop = &fl->fl6_dst;
 166         struct dst_entry *dst = skb->dst;
 167         struct ipv6hdr *hdr;
 168         u8  proto = fl->proto;
 169         int seg_len = skb->len;
 170         int hlimit, tclass;
 171         u32 mtu;
 172
 173         if (opt) {
 174                 unsigned int head_room;
 175
 176                 /* First: exthdrs may take lots of space (~8K for now)
 177                    MAX_HEADER is not enough.
 178                  */
 179                 head_room = opt->opt_nflen + opt->opt_flen;
 180                 seg_len += head_room;
 181                 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
 182
 183                 if (skb_headroom(skb) < head_room) {
 184                         struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
 185                         if (skb2 == NULL) {
 186                                 IP6_INC_STATS(ip6_dst_idev(skb->dst),
 187                                               IPSTATS_MIB_OUTDISCARDS);
 188                                 kfree_skb(skb);
 189                                 return -ENOBUFS;
 190                         }
 191                         kfree_skb(skb);
 192                         skb = skb2;
 193                         if (sk)
 194                                 skb_set_owner_w(skb, sk);
 195                 }
 196                 if (opt->opt_flen)
 197                         ipv6_push_frag_opts(skb, opt, &proto);
 198                 if (opt->opt_nflen)
 199                         ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
 200         }
 201
 202         skb_push(skb, sizeof(struct ipv6hdr));
 203         skb_reset_network_header(skb);
 204         hdr = ipv6_hdr(skb);
 205
 206         /*
 207          *      Fill in the IPv6 header
 208          */
 209
 210         hlimit = -1;
 211         if (np)
 212                 hlimit = np->hop_limit;
 213         if (hlimit < 0)
 214                 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
 215         if (hlimit < 0)
 216                 hlimit = ipv6_get_hoplimit(dst->dev);
 217
 218         tclass = -1;
 219         if (np)
 220                 tclass = np->tclass;
 221         if (tclass < 0)
 222                 tclass = 0;
 223
 224         *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
 225
 226         hdr->payload_len = htons(seg_len);
 227         hdr->nexthdr = proto;
 228         hdr->hop_limit = hlimit;
 229
 230         ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
 231         ipv6_addr_copy(&hdr->daddr, first_hop);
 232
 233         skb->priority = sk->sk_priority;
 234
 235         mtu = dst_mtu(dst);
 236         if ((skb->len <= mtu) || ipfragok || skb_is_gso(skb)) {
 237                 IP6_INC_STATS(ip6_dst_idev(skb->dst),
 238                               IPSTATS_MIB_OUTREQUESTS);
 239                 return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev,
 240                                 dst_output);
 241         }
 242
 243         if (net_ratelimit())
 244                 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
 245         skb->dev = dst->dev;
 246         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
 247         IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
 248         kfree_skb(skb);
 249         return -EMSGSIZE;
 250 }
 251
 252 EXPORT_SYMBOL(ip6_xmit);
 253
 254 /*
 255  *      To avoid extra problems ND packets are send through this
 256  *      routine. It's code duplication but I really want to avoid
 257  *      extra checks since ipv6_build_header is used by TCP (which
 258  *      is for us performance critical)
 259  */
 260
 261 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
 262                struct in6_addr *saddr, struct in6_addr *daddr,
 263                int proto, int len)
 264 {
 265         struct ipv6_pinfo *np = inet6_sk(sk);
 266         struct ipv6hdr *hdr;
 267         int totlen;
 268
 269         skb->protocol = htons(ETH_P_IPV6);
 270         skb->dev = dev;
 271
 272         totlen = len + sizeof(struct ipv6hdr);
 273
 274         skb_reset_network_header(skb);
 275         skb_put(skb, sizeof(struct ipv6hdr));
 276         hdr = ipv6_hdr(skb);
 277
 278         *(__be32*)hdr = htonl(0x60000000);
 279
 280         hdr->payload_len = htons(len);
 281         hdr->nexthdr = proto;
 282         hdr->hop_limit = np->hop_limit;
 283
 284         ipv6_addr_copy(&hdr->saddr, saddr);
 285         ipv6_addr_copy(&hdr->daddr, daddr);
 286
 287         return 0;
 288 }
 289
 290 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
 291 {
 292         struct ip6_ra_chain *ra;
 293         struct sock *last = NULL;
 294
 295         read_lock(&ip6_ra_lock);
 296         for (ra = ip6_ra_chain; ra; ra = ra->next) {
 297                 struct sock *sk = ra->sk;
 298                 if (sk && ra->sel == sel &&
 299                     (!sk->sk_bound_dev_if ||
 300                      sk->sk_bound_dev_if == skb->dev->ifindex)) {
 301                         if (last) {
 302                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
 303                                 if (skb2)
 304                                         rawv6_rcv(last, skb2);
 305                         }
 306                         last = sk;
 307                 }
 308         }
 309
 310         if (last) {
 311                 rawv6_rcv(last, skb);
 312                 read_unlock(&ip6_ra_lock);
 313                 return 1;
 314         }
 315         read_unlock(&ip6_ra_lock);
 316         return 0;
 317 }
 318
 319 static int ip6_forward_proxy_check(struct sk_buff *skb)
 320 {
 321         struct ipv6hdr *hdr = ipv6_hdr(skb);
 322         u8 nexthdr = hdr->nexthdr;
 323         int offset;
 324
 325         if (ipv6_ext_hdr(nexthdr)) {
 326                 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
 327                 if (offset < 0)
 328                         return 0;
 329         } else
 330                 offset = sizeof(struct ipv6hdr);
 331
 332         if (nexthdr == IPPROTO_ICMPV6) {
 333                 struct icmp6hdr *icmp6;
 334
 335                 if (!pskb_may_pull(skb, (skb_network_header(skb) +
 336                                          offset + 1 - skb->data)))
 337                         return 0;
 338
 339                 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
 340
 341                 switch (icmp6->icmp6_type) {
 342                 case NDISC_ROUTER_SOLICITATION:
 343                 case NDISC_ROUTER_ADVERTISEMENT:
 344                 case NDISC_NEIGHBOUR_SOLICITATION:
 345                 case NDISC_NEIGHBOUR_ADVERTISEMENT:
 346                 case NDISC_REDIRECT:
 347                         /* For reaction involving unicast neighbor discovery
 348                          * message destined to the proxied address, pass it to
 349                          * input function.
 350                          */
 351                         return 1;
 352                 default:
 353                         break;
 354                 }
 355         }
 356
 357         /*
 358          * The proxying router can't forward traffic sent to a link-local
 359          * address, so signal the sender and discard the packet. This
 360          * behavior is clarified by the MIPv6 specification.
 361          */
 362         if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
 363                 dst_link_failure(skb);
 364                 return -1;
 365         }
 366
 367         return 0;
 368 }
 369
 370 static inline int ip6_forward_finish(struct sk_buff *skb)
 371 {
 372         return dst_output(skb);
 373 }
 374
 375 int ip6_forward(struct sk_buff *skb)
 376 {
 377         struct dst_entry *dst = skb->dst;
 378         struct ipv6hdr *hdr = ipv6_hdr(skb);
 379         struct inet6_skb_parm *opt = IP6CB(skb);
 380
 381         if (ipv6_devconf.forwarding == 0)
 382                 goto error;
 383
 384         if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
 385                 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
 386                 goto drop;
 387         }
 388
 389         skb_forward_csum(skb);
 390
 391         /*
 392          *      We DO NOT make any processing on
 393          *      RA packets, pushing them to user level AS IS
 394          *      without ane WARRANTY that application will be able
 395          *      to interpret them. The reason is that we
 396          *      cannot make anything clever here.
 397          *
 398          *      We are not end-node, so that if packet contains
 399          *      AH/ESP, we cannot make anything.
 400          *      Defragmentation also would be mistake, RA packets
 401          *      cannot be fragmented, because there is no warranty
 402          *      that different fragments will go along one path. --ANK
 403          */
 404         if (opt->ra) {
 405                 u8 *ptr = skb_network_header(skb) + opt->ra;
 406                 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
 407                         return 0;
 408         }
 409
 410         /*
 411          *      check and decrement ttl
 412          */
 413         if (hdr->hop_limit <= 1) {
 414                 /* Force OUTPUT device used as source address */
 415                 skb->dev = dst->dev;
 416                 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
 417                             0, skb->dev);
 418                 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
 419
 420                 kfree_skb(skb);
 421                 return -ETIMEDOUT;
 422         }
 423
 424         /* XXX: idev->cnf.proxy_ndp? */
 425         if (ipv6_devconf.proxy_ndp &&
 426             pneigh_lookup(&nd_tbl, &hdr->daddr, skb->dev, 0)) {
 427                 int proxied = ip6_forward_proxy_check(skb);
 428                 if (proxied > 0)
 429                         return ip6_input(skb);
 430                 else if (proxied < 0) {
 431                         IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
 432                         goto drop;
 433                 }
 434         }
 435
 436         if (!xfrm6_route_forward(skb)) {
 437                 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
 438                 goto drop;
 439         }
 440         dst = skb->dst;
 441
 442         /* IPv6 specs say nothing about it, but it is clear that we cannot
 443            send redirects to source routed frames.
 444            We don't send redirects to frames decapsulated from IPsec.
 445          */
 446         if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
 447             !skb->sp) {
 448                 struct in6_addr *target = NULL;
 449                 struct rt6_info *rt;
 450                 struct neighbour *n = dst->neighbour;
 451
 452                 /*
 453                  *      incoming and outgoing devices are the same
 454                  *      send a redirect.
 455                  */
 456
 457                 rt = (struct rt6_info *) dst;
 458                 if ((rt->rt6i_flags & RTF_GATEWAY))
 459                         target = (struct in6_addr*)&n->primary_key;
 460                 else
 461                         target = &hdr->daddr;
 462
 463                 /* Limit redirects both by destination (here)
 464                    and by source (inside ndisc_send_redirect)
 465                  */
 466                 if (xrlim_allow(dst, 1*HZ))
 467                         ndisc_send_redirect(skb, n, target);
 468         } else {
 469                 int addrtype = ipv6_addr_type(&hdr->saddr);
 470
 471                 /* This check is security critical. */
 472                 if (addrtype & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK))
 473                         goto error;
 474                 if (addrtype & IPV6_ADDR_LINKLOCAL) {
 475                         icmpv6_send(skb, ICMPV6_DEST_UNREACH,
 476                                 ICMPV6_NOT_NEIGHBOUR, 0, skb->dev);
 477                         goto error;
 478                 }
 479         }
 480
 481         if (skb->len > dst_mtu(dst)) {
 482                 /* Again, force OUTPUT device used as source address */
 483                 skb->dev = dst->dev;
 484                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
 485                 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
 486                 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
 487                 kfree_skb(skb);
 488                 return -EMSGSIZE;
 489         }
 490
 491         if (skb_cow(skb, dst->dev->hard_header_len)) {
 492                 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
 493                 goto drop;
 494         }
 495
 496         hdr = ipv6_hdr(skb);
 497
 498         /* Mangling hops number delayed to point after skb COW */
 499
 500         hdr->hop_limit--;
 501
 502         IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
 503         return NF_HOOK(PF_INET6,NF_IP6_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish);
 504
 505 error:
 506         IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
 507 drop:
 508         kfree_skb(skb);
 509         return -EINVAL;
 510 }
 511
 512 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
 513 {
 514         to->pkt_type = from->pkt_type;
 515         to->priority = from->priority;
 516         to->protocol = from->protocol;
 517         dst_release(to->dst);
 518         to->dst = dst_clone(from->dst);
 519         to->dev = from->dev;
 520         to->mark = from->mark;
 521
 522 #ifdef CONFIG_NET_SCHED
 523         to->tc_index = from->tc_index;
 524 #endif
 525         nf_copy(to, from);
 526 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
 527     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
 528         to->nf_trace = from->nf_trace;
 529 #endif
 530         skb_copy_secmark(to, from);
 531 }
 532
 533 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
 534 {
 535         u16 offset = sizeof(struct ipv6hdr);
 536         struct ipv6_opt_hdr *exthdr =
 537                                 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
 538         unsigned int packet_len = skb->tail - skb->network_header;
 539         int found_rhdr = 0;
 540         *nexthdr = &ipv6_hdr(skb)->nexthdr;
 541
 542         while (offset + 1 <= packet_len) {
 543
 544                 switch (**nexthdr) {
 545
 546                 case NEXTHDR_HOP:
 547                         break;
 548                 case NEXTHDR_ROUTING:
 549                         found_rhdr = 1;
 550                         break;
 551                 case NEXTHDR_DEST:
 552 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 553                         if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
 554                                 break;
 555 #endif
 556                         if (found_rhdr)
 557                                 return offset;
 558                         break;
 559                 default :
 560                         return offset;
 561                 }
 562
 563                 offset += ipv6_optlen(exthdr);
 564                 *nexthdr = &exthdr->nexthdr;
 565                 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
 566                                                  offset);
 567         }
 568
 569         return offset;
 570 }
 571 EXPORT_SYMBOL_GPL(ip6_find_1stfragopt);
 572
 573 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
 574 {
 575         struct net_device *dev;
 576         struct sk_buff *frag;
 577         struct rt6_info *rt = (struct rt6_info*)skb->dst;
 578         struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
 579         struct ipv6hdr *tmp_hdr;
 580         struct frag_hdr *fh;
 581         unsigned int mtu, hlen, left, len;
 582         __be32 frag_id = 0;
 583         int ptr, offset = 0, err=0;
 584         u8 *prevhdr, nexthdr = 0;
 585
 586         dev = rt->u.dst.dev;
 587         hlen = ip6_find_1stfragopt(skb, &prevhdr);
 588         nexthdr = *prevhdr;
 589
 590         mtu = ip6_skb_dst_mtu(skb);
 591
 592         /* We must not fragment if the socket is set to force MTU discovery
 593          * or if the skb it not generated by a local socket.  (This last
 594          * check should be redundant, but it's free.)
 595          */
 596         if (!np || np->pmtudisc >= IPV6_PMTUDISC_DO) {
 597                 skb->dev = skb->dst->dev;
 598                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
 599                 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
 600                 kfree_skb(skb);
 601                 return -EMSGSIZE;
 602         }
 603
 604         if (np && np->frag_size < mtu) {
 605                 if (np->frag_size)
 606                         mtu = np->frag_size;
 607         }
 608         mtu -= hlen + sizeof(struct frag_hdr);
 609
 610         if (skb_shinfo(skb)->frag_list) {
 611                 int first_len = skb_pagelen(skb);
 612
 613                 if (first_len - hlen > mtu ||
 614                     ((first_len - hlen) & 7) ||
 615                     skb_cloned(skb))
 616                         goto slow_path;
 617
 618                 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
 619                         /* Correct geometry. */
 620                         if (frag->len > mtu ||
 621                             ((frag->len & 7) && frag->next) ||
 622                             skb_headroom(frag) < hlen)
 623                             goto slow_path;
 624
 625                         /* Partially cloned skb? */
 626                         if (skb_shared(frag))
 627                                 goto slow_path;
 628
 629                         BUG_ON(frag->sk);
 630                         if (skb->sk) {
 631                                 sock_hold(skb->sk);
 632                                 frag->sk = skb->sk;
 633                                 frag->destructor = sock_wfree;
 634                                 skb->truesize -= frag->truesize;
 635                         }
 636                 }
 637
 638                 err = 0;
 639                 offset = 0;
 640                 frag = skb_shinfo(skb)->frag_list;
 641                 skb_shinfo(skb)->frag_list = NULL;
 642                 /* BUILD HEADER */
 643
 644                 *prevhdr = NEXTHDR_FRAGMENT;
 645                 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
 646                 if (!tmp_hdr) {
 647                         IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
 648                         return -ENOMEM;
 649                 }
 650
 651                 __skb_pull(skb, hlen);
 652                 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
 653                 __skb_push(skb, hlen);
 654                 skb_reset_network_header(skb);
 655                 memcpy(skb_network_header(skb), tmp_hdr, hlen);
 656
 657                 ipv6_select_ident(skb, fh);
 658                 fh->nexthdr = nexthdr;
 659                 fh->reserved = 0;
 660                 fh->frag_off = htons(IP6_MF);
 661                 frag_id = fh->identification;
 662
 663                 first_len = skb_pagelen(skb);
 664                 skb->data_len = first_len - skb_headlen(skb);
 665                 skb->len = first_len;
 666                 ipv6_hdr(skb)->payload_len = htons(first_len -
 667                                                    sizeof(struct ipv6hdr));
 668
 669                 dst_hold(&rt->u.dst);
 670
 671                 for (;;) {
 672                         /* Prepare header of the next frame,
 673                          * before previous one went down. */
 674                         if (frag) {
 675                                 frag->ip_summed = CHECKSUM_NONE;
 676                                 skb_reset_transport_header(frag);
 677                                 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
 678                                 __skb_push(frag, hlen);
 679                                 skb_reset_network_header(frag);
 680                                 memcpy(skb_network_header(frag), tmp_hdr,
 681                                        hlen);
 682                                 offset += skb->len - hlen - sizeof(struct frag_hdr);
 683                                 fh->nexthdr = nexthdr;
 684                                 fh->reserved = 0;
 685                                 fh->frag_off = htons(offset);
 686                                 if (frag->next != NULL)
 687                                         fh->frag_off |= htons(IP6_MF);
 688                                 fh->identification = frag_id;
 689                                 ipv6_hdr(frag)->payload_len =
 690                                                 htons(frag->len -
 691                                                       sizeof(struct ipv6hdr));
 692                                 ip6_copy_metadata(frag, skb);
 693                         }
 694
 695                         err = output(skb);
 696                         if(!err)
 697                                 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGCREATES);
 698
 699                         if (err || !frag)
 700                                 break;
 701
 702                         skb = frag;
 703                         frag = skb->next;
 704                         skb->next = NULL;
 705                 }
 706
 707                 kfree(tmp_hdr);
 708
 709                 if (err == 0) {
 710                         IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGOKS);
 711                         dst_release(&rt->u.dst);
 712                         return 0;
 713                 }
 714
 715                 while (frag) {
 716                         skb = frag->next;
 717                         kfree_skb(frag);
 718                         frag = skb;
 719                 }
 720
 721                 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGFAILS);
 722                 dst_release(&rt->u.dst);
 723                 return err;
 724         }
 725
 726 slow_path:
 727         left = skb->len - hlen;         /* Space per frame */
 728         ptr = hlen;                     /* Where to start from */
 729
 730         /*
 731          *      Fragment the datagram.
 732          */
 733
 734         *prevhdr = NEXTHDR_FRAGMENT;
 735
 736         /*
 737          *      Keep copying data until we run out.
 738          */
 739         while(left > 0) {
 740                 len = left;
 741                 /* IF: it doesn't fit, use 'mtu' - the data space left */
 742                 if (len > mtu)
 743                         len = mtu;
 744                 /* IF: we are not sending upto and including the packet end
 745                    then align the next start on an eight byte boundary */
 746                 if (len < left) {
 747                         len &= ~7;
 748                 }
 749                 /*
 750                  *      Allocate buffer.
 751                  */
 752
 753                 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
 754                         NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
 755                         IP6_INC_STATS(ip6_dst_idev(skb->dst),
 756                                       IPSTATS_MIB_FRAGFAILS);
 757                         err = -ENOMEM;
 758                         goto fail;
 759                 }
 760
 761                 /*
 762                  *      Set up data on packet
 763                  */
 764
 765                 ip6_copy_metadata(frag, skb);
 766                 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
 767                 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
 768                 skb_reset_network_header(frag);
 769                 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
 770                 frag->transport_header = (frag->network_header + hlen +
 771                                           sizeof(struct frag_hdr));
 772
 773                 /*
 774                  *      Charge the memory for the fragment to any owner
 775                  *      it might possess
 776                  */
 777                 if (skb->sk)
 778                         skb_set_owner_w(frag, skb->sk);
 779
 780                 /*
 781                  *      Copy the packet header into the new buffer.
 782                  */
 783                 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
 784
 785                 /*
 786                  *      Build fragment header.
 787                  */
 788                 fh->nexthdr = nexthdr;
 789                 fh->reserved = 0;
 790                 if (!frag_id) {
 791                         ipv6_select_ident(skb, fh);
 792                         frag_id = fh->identification;
 793                 } else
 794                         fh->identification = frag_id;
 795
 796                 /*
 797                  *      Copy a block of the IP datagram.
 798                  */
 799                 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
 800                         BUG();
 801                 left -= len;
 802
 803                 fh->frag_off = htons(offset);
 804                 if (left > 0)
 805                         fh->frag_off |= htons(IP6_MF);
 806                 ipv6_hdr(frag)->payload_len = htons(frag->len -
 807                                                     sizeof(struct ipv6hdr));
 808
 809                 ptr += len;
 810                 offset += len;
 811
 812                 /*
 813                  *      Put this fragment into the sending queue.
 814                  */
 815                 err = output(frag);
 816                 if (err)
 817                         goto fail;
 818
 819                 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGCREATES);
 820         }
 821         IP6_INC_STATS(ip6_dst_idev(skb->dst),
 822                       IPSTATS_MIB_FRAGOKS);
 823         kfree_skb(skb);
 824         return err;
 825
 826 fail:
 827         IP6_INC_STATS(ip6_dst_idev(skb->dst),
 828                       IPSTATS_MIB_FRAGFAILS);
 829         kfree_skb(skb);
 830         return err;
 831 }
 832
 833 static inline int ip6_rt_check(struct rt6key *rt_key,
 834                                struct in6_addr *fl_addr,
 835                                struct in6_addr *addr_cache)
 836 {
 837         return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
 838                 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
 839 }
 840
 841 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
 842                                           struct dst_entry *dst,
 843                                           struct flowi *fl)
 844 {
 845         struct ipv6_pinfo *np = inet6_sk(sk);
 846         struct rt6_info *rt = (struct rt6_info *)dst;
 847
 848         if (!dst)
 849                 goto out;
 850
 851         /* Yes, checking route validity in not connected
 852          * case is not very simple. Take into account,
 853          * that we do not support routing by source, TOS,
 854          * and MSG_DONTROUTE            --ANK (980726)
 855          *
 856          * 1. ip6_rt_check(): If route was host route,
 857          *    check that cached destination is current.
 858          *    If it is network route, we still may
 859          *    check its validity using saved pointer
 860          *    to the last used address: daddr_cache.
 861          *    We do not want to save whole address now,
 862          *    (because main consumer of this service
 863          *    is tcp, which has not this problem),
 864          *    so that the last trick works only on connected
 865          *    sockets.
 866          * 2. oif also should be the same.
 867          */
 868         if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
 869 #ifdef CONFIG_IPV6_SUBTREES
 870             ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
 871 #endif
 872             (fl->oif && fl->oif != dst->dev->ifindex)) {
 873                 dst_release(dst);
 874                 dst = NULL;
 875         }
 876
 877 out:
 878         return dst;
 879 }
 880
 881 static int ip6_dst_lookup_tail(struct sock *sk,
 882                                struct dst_entry **dst, struct flowi *fl)
 883 {
 884         int err;
 885
 886         if (*dst == NULL)
 887                 *dst = ip6_route_output(sk, fl);
 888
 889         if ((err = (*dst)->error))
 890                 goto out_err_release;
 891
 892         if (ipv6_addr_any(&fl->fl6_src)) {
 893                 err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src);
 894                 if (err)
 895                         goto out_err_release;
 896         }
 897
 898 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 899                 /*
 900                  * Here if the dst entry we've looked up
 901                  * has a neighbour entry that is in the INCOMPLETE
 902                  * state and the src address from the flow is
 903                  * marked as OPTIMISTIC, we release the found
 904                  * dst entry and replace it instead with the
 905                  * dst entry of the nexthop router
 906                  */
 907                 if (!((*dst)->neighbour->nud_state & NUD_VALID)) {
 908                         struct inet6_ifaddr *ifp;
 909                         struct flowi fl_gw;
 910                         int redirect;
 911
 912                         ifp = ipv6_get_ifaddr(&fl->fl6_src, (*dst)->dev, 1);
 913
 914                         redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
 915                         if (ifp)
 916                                 in6_ifa_put(ifp);
 917
 918                         if (redirect) {
 919                                 /*
 920                                  * We need to get the dst entry for the
 921                                  * default router instead
 922                                  */
 923                                 dst_release(*dst);
 924                                 memcpy(&fl_gw, fl, sizeof(struct flowi));
 925                                 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
 926                                 *dst = ip6_route_output(sk, &fl_gw);
 927                                 if ((err = (*dst)->error))
 928                                         goto out_err_release;
 929                         }
 930                 }
 931 #endif
 932
 933         return 0;
 934
 935 out_err_release:
 936         if (err == -ENETUNREACH)
 937                 IP6_INC_STATS_BH(NULL, IPSTATS_MIB_OUTNOROUTES);
 938         dst_release(*dst);
 939         *dst = NULL;
 940         return err;
 941 }
 942
 943 /**
 944  *      ip6_dst_lookup - perform route lookup on flow
 945  *      @sk: socket which provides route info
 946  *      @dst: pointer to dst_entry * for result
 947  *      @fl: flow to lookup
 948  *
 949  *      This function performs a route lookup on the given flow.
 950  *
 951  *      It returns zero on success, or a standard errno code on error.
 952  */
 953 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
 954 {
 955         *dst = NULL;
 956         return ip6_dst_lookup_tail(sk, dst, fl);
 957 }
 958 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
 959
 960 /**
 961  *      ip6_sk_dst_lookup - perform socket cached route lookup on flow
 962  *      @sk: socket which provides the dst cache and route info
 963  *      @dst: pointer to dst_entry * for result
 964  *      @fl: flow to lookup
 965  *
 966  *      This function performs a route lookup on the given flow with the
 967  *      possibility of using the cached route in the socket if it is valid.
 968  *      It will take the socket dst lock when operating on the dst cache.
 969  *      As a result, this function can only be used in process context.
 970  *
 971  *      It returns zero on success, or a standard errno code on error.
 972  */
 973 int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
 974 {
 975         *dst = NULL;
 976         if (sk) {
 977                 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
 978                 *dst = ip6_sk_dst_check(sk, *dst, fl);
 979         }
 980
 981         return ip6_dst_lookup_tail(sk, dst, fl);
 982 }
 983 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
 984
 985 static inline int ip6_ufo_append_data(struct sock *sk,
 986                         int getfrag(void *from, char *to, int offset, int len,
 987                         int odd, struct sk_buff *skb),
 988                         void *from, int length, int hh_len, int fragheaderlen,
 989                         int transhdrlen, int mtu,unsigned int flags)
 990
 991 {
 992         struct sk_buff *skb;
 993         int err;
 994
 995         /* There is support for UDP large send offload by network
 996          * device, so create one single skb packet containing complete
 997          * udp datagram
 998          */
 999         if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1000                 skb = sock_alloc_send_skb(sk,
1001                         hh_len + fragheaderlen + transhdrlen + 20,
1002                         (flags & MSG_DONTWAIT), &err);
1003                 if (skb == NULL)
1004                         return -ENOMEM;
1005
1006                 /* reserve space for Hardware header */
1007                 skb_reserve(skb, hh_len);
1008
1009                 /* create space for UDP/IP header */
1010                 skb_put(skb,fragheaderlen + transhdrlen);
1011
1012                 /* initialize network header pointer */
1013                 skb_reset_network_header(skb);
1014
1015                 /* initialize protocol header pointer */
1016                 skb->transport_header = skb->network_header + fragheaderlen;
1017
1018                 skb->ip_summed = CHECKSUM_PARTIAL;
1019                 skb->csum = 0;
1020                 sk->sk_sndmsg_off = 0;
1021         }
1022
1023         err = skb_append_datato_frags(sk,skb, getfrag, from,
1024                                       (length - transhdrlen));
1025         if (!err) {
1026                 struct frag_hdr fhdr;
1027
1028                 /* specify the length of each IP datagram fragment*/
1029                 skb_shinfo(skb)->gso_size = mtu - fragheaderlen -
1030                                             sizeof(struct frag_hdr);
1031                 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1032                 ipv6_select_ident(skb, &fhdr);
1033                 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1034                 __skb_queue_tail(&sk->sk_write_queue, skb);
1035
1036                 return 0;
1037         }
1038         /* There is not enough support do UPD LSO,
1039          * so follow normal path
1040          */
1041         kfree_skb(skb);
1042
1043         return err;
1044 }
1045
1046 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1047         int offset, int len, int odd, struct sk_buff *skb),
1048         void *from, int length, int transhdrlen,
1049         int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1050         struct rt6_info *rt, unsigned int flags)
1051 {
1052         struct inet_sock *inet = inet_sk(sk);
1053         struct ipv6_pinfo *np = inet6_sk(sk);
1054         struct sk_buff *skb;
1055         unsigned int maxfraglen, fragheaderlen;
1056         int exthdrlen;
1057         int hh_len;
1058         int mtu;
1059         int copy;
1060         int err;
1061         int offset = 0;
1062         int csummode = CHECKSUM_NONE;
1063
1064         if (flags&MSG_PROBE)
1065                 return 0;
1066         if (skb_queue_empty(&sk->sk_write_queue)) {
1067                 /*
1068                  * setup for corking
1069                  */
1070                 if (opt) {
1071                         if (np->cork.opt == NULL) {
1072                                 np->cork.opt = kmalloc(opt->tot_len,
1073                                                        sk->sk_allocation);
1074                                 if (unlikely(np->cork.opt == NULL))
1075                                         return -ENOBUFS;
1076                         } else if (np->cork.opt->tot_len < opt->tot_len) {
1077                                 printk(KERN_DEBUG "ip6_append_data: invalid option length\n");
1078                                 return -EINVAL;
1079                         }
1080                         memcpy(np->cork.opt, opt, opt->tot_len);
1081                         inet->cork.flags |= IPCORK_OPT;
1082                         /* need source address above miyazawa*/
1083                 }
1084                 dst_hold(&rt->u.dst);
1085                 np->cork.rt = rt;
1086                 inet->cork.fl = *fl;
1087                 np->cork.hop_limit = hlimit;
1088                 np->cork.tclass = tclass;
1089                 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1090                       rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
1091                 if (np->frag_size < mtu) {
1092                         if (np->frag_size)
1093                                 mtu = np->frag_size;
1094                 }
1095                 inet->cork.fragsize = mtu;
1096                 if (dst_allfrag(rt->u.dst.path))
1097                         inet->cork.flags |= IPCORK_ALLFRAG;
1098                 inet->cork.length = 0;
1099                 sk->sk_sndmsg_page = NULL;
1100                 sk->sk_sndmsg_off = 0;
1101                 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0);
1102                 length += exthdrlen;
1103                 transhdrlen += exthdrlen;
1104         } else {
1105                 rt = np->cork.rt;
1106                 fl = &inet->cork.fl;
1107                 if (inet->cork.flags & IPCORK_OPT)
1108                         opt = np->cork.opt;
1109                 transhdrlen = 0;
1110                 exthdrlen = 0;
1111                 mtu = inet->cork.fragsize;
1112         }
1113
1114         hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1115
1116         fragheaderlen = sizeof(struct ipv6hdr) + rt->u.dst.nfheader_len + (opt ? opt->opt_nflen : 0);
1117         maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1118
1119         if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1120                 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1121                         ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1122                         return -EMSGSIZE;
1123                 }
1124         }
1125
1126         /*
1127          * Let's try using as much space as possible.
1128          * Use MTU if total length of the message fits into the MTU.
1129          * Otherwise, we need to reserve fragment header and
1130          * fragment alignment (= 8-15 octects, in total).
1131          *
1132          * Note that we may need to "move" the data from the tail of
1133          * of the buffer to the new fragment when we split
1134          * the message.
1135          *
1136          * FIXME: It may be fragmented into multiple chunks
1137          *        at once if non-fragmentable extension headers
1138          *        are too large.
1139          * --yoshfuji
1140          */
1141
1142         inet->cork.length += length;
1143         if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1144             (rt->u.dst.dev->features & NETIF_F_UFO)) {
1145
1146                 err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1147                                           fragheaderlen, transhdrlen, mtu,
1148                                           flags);
1149                 if (err)
1150                         goto error;
1151                 return 0;
1152         }
1153
1154         if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1155                 goto alloc_new_skb;
1156
1157         while (length > 0) {
1158                 /* Check if the remaining data fits into current packet. */
1159                 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1160                 if (copy < length)
1161                         copy = maxfraglen - skb->len;
1162
1163                 if (copy <= 0) {
1164                         char *data;
1165                         unsigned int datalen;
1166                         unsigned int fraglen;
1167                         unsigned int fraggap;
1168                         unsigned int alloclen;
1169                         struct sk_buff *skb_prev;
1170 alloc_new_skb:
1171                         skb_prev = skb;
1172
1173                         /* There's no room in the current skb */
1174                         if (skb_prev)
1175                                 fraggap = skb_prev->len - maxfraglen;
1176                         else
1177                                 fraggap = 0;
1178
1179                         /*
1180                          * If remaining data exceeds the mtu,
1181                          * we know we need more fragment(s).
1182                          */
1183                         datalen = length + fraggap;
1184                         if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1185                                 datalen = maxfraglen - fragheaderlen;
1186
1187                         fraglen = datalen + fragheaderlen;
1188                         if ((flags & MSG_MORE) &&
1189                             !(rt->u.dst.dev->features&NETIF_F_SG))
1190                                 alloclen = mtu;
1191                         else
1192                                 alloclen = datalen + fragheaderlen;
1193
1194                         /*
1195                          * The last fragment gets additional space at tail.
1196                          * Note: we overallocate on fragments with MSG_MODE
1197                          * because we have no idea if we're the last one.
1198                          */
1199                         if (datalen == length + fraggap)
1200                                 alloclen += rt->u.dst.trailer_len;
1201
1202                         /*
1203                          * We just reserve space for fragment header.
1204                          * Note: this may be overallocation if the message
1205                          * (without MSG_MORE) fits into the MTU.
1206                          */
1207                         alloclen += sizeof(struct frag_hdr);
1208
1209                         if (transhdrlen) {
1210                                 skb = sock_alloc_send_skb(sk,
1211                                                 alloclen + hh_len,
1212                                                 (flags & MSG_DONTWAIT), &err);
1213                         } else {
1214                                 skb = NULL;
1215                                 if (atomic_read(&sk->sk_wmem_alloc) <=
1216                                     2 * sk->sk_sndbuf)
1217                                         skb = sock_wmalloc(sk,
1218                                                            alloclen + hh_len, 1,
1219                                                            sk->sk_allocation);
1220                                 if (unlikely(skb == NULL))
1221                                         err = -ENOBUFS;
1222                         }
1223                         if (skb == NULL)
1224                                 goto error;
1225                         /*
1226                          *      Fill in the control structures
1227                          */
1228                         skb->ip_summed = csummode;
1229                         skb->csum = 0;
1230                         /* reserve for fragmentation */
1231                         skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1232
1233                         /*
1234                          *      Find where to start putting bytes
1235                          */
1236                         data = skb_put(skb, fraglen);
1237                         skb_set_network_header(skb, exthdrlen);
1238                         data += fragheaderlen;
1239                         skb->transport_header = (skb->network_header +
1240                                                  fragheaderlen);
1241                         if (fraggap) {
1242                                 skb->csum = skb_copy_and_csum_bits(
1243                                         skb_prev, maxfraglen,
1244                                         data + transhdrlen, fraggap, 0);
1245                                 skb_prev->csum = csum_sub(skb_prev->csum,
1246                                                           skb->csum);
1247                                 data += fraggap;
1248                                 pskb_trim_unique(skb_prev, maxfraglen);
1249                         }
1250                         copy = datalen - transhdrlen - fraggap;
1251                         if (copy < 0) {
1252                                 err = -EINVAL;
1253                                 kfree_skb(skb);
1254                                 goto error;
1255                         } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1256                                 err = -EFAULT;
1257                                 kfree_skb(skb);
1258                                 goto error;
1259                         }
1260
1261                         offset += copy;
1262                         length -= datalen - fraggap;
1263                         transhdrlen = 0;
1264                         exthdrlen = 0;
1265                         csummode = CHECKSUM_NONE;
1266
1267                         /*
1268                          * Put the packet on the pending queue
1269                          */
1270                         __skb_queue_tail(&sk->sk_write_queue, skb);
1271                         continue;
1272                 }
1273
1274                 if (copy > length)
1275                         copy = length;
1276
1277                 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1278                         unsigned int off;
1279
1280                         off = skb->len;
1281                         if (getfrag(from, skb_put(skb, copy),
1282                                                 offset, copy, off, skb) < 0) {
1283                                 __skb_trim(skb, off);
1284                                 err = -EFAULT;
1285                                 goto error;
1286                         }
1287                 } else {
1288                         int i = skb_shinfo(skb)->nr_frags;
1289                         skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1290                         struct page *page = sk->sk_sndmsg_page;
1291                         int off = sk->sk_sndmsg_off;
1292                         unsigned int left;
1293
1294                         if (page && (left = PAGE_SIZE - off) > 0) {
1295                                 if (copy >= left)
1296                                         copy = left;
1297                                 if (page != frag->page) {
1298                                         if (i == MAX_SKB_FRAGS) {
1299                                                 err = -EMSGSIZE;
1300                                                 goto error;
1301                                         }
1302                                         get_page(page);
1303                                         skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1304                                         frag = &skb_shinfo(skb)->frags[i];
1305                                 }
1306                         } else if(i < MAX_SKB_FRAGS) {
1307                                 if (copy > PAGE_SIZE)
1308                                         copy = PAGE_SIZE;
1309                                 page = alloc_pages(sk->sk_allocation, 0);
1310                                 if (page == NULL) {
1311                                         err = -ENOMEM;
1312                                         goto error;
1313                                 }
1314                                 sk->sk_sndmsg_page = page;
1315                                 sk->sk_sndmsg_off = 0;
1316
1317                                 skb_fill_page_desc(skb, i, page, 0, 0);
1318                                 frag = &skb_shinfo(skb)->frags[i];
1319                         } else {
1320                                 err = -EMSGSIZE;
1321                                 goto error;
1322                         }
1323                         if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1324                                 err = -EFAULT;
1325                                 goto error;
1326                         }
1327                         sk->sk_sndmsg_off += copy;
1328                         frag->size += copy;
1329                         skb->len += copy;
1330                         skb->data_len += copy;
1331                         skb->truesize += copy;
1332                         atomic_add(copy, &sk->sk_wmem_alloc);
1333                 }
1334                 offset += copy;
1335                 length -= copy;
1336         }
1337         return 0;
1338 error:
1339         inet->cork.length -= length;
1340         IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1341         return err;
1342 }
1343
1344 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1345 {
1346         inet->cork.flags &= ~IPCORK_OPT;
1347         kfree(np->cork.opt);
1348         np->cork.opt = NULL;
1349         if (np->cork.rt) {
1350                 dst_release(&np->cork.rt->u.dst);
1351                 np->cork.rt = NULL;
1352                 inet->cork.flags &= ~IPCORK_ALLFRAG;
1353         }
1354         memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1355 }
1356
1357 int ip6_push_pending_frames(struct sock *sk)
1358 {
1359         struct sk_buff *skb, *tmp_skb;
1360         struct sk_buff **tail_skb;
1361         struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1362         struct inet_sock *inet = inet_sk(sk);
1363         struct ipv6_pinfo *np = inet6_sk(sk);
1364         struct ipv6hdr *hdr;
1365         struct ipv6_txoptions *opt = np->cork.opt;
1366         struct rt6_info *rt = np->cork.rt;
1367         struct flowi *fl = &inet->cork.fl;
1368         unsigned char proto = fl->proto;
1369         int err = 0;
1370
1371         if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1372                 goto out;
1373         tail_skb = &(skb_shinfo(skb)->frag_list);
1374
1375         /* move skb->data to ip header from ext header */
1376         if (skb->data < skb_network_header(skb))
1377                 __skb_pull(skb, skb_network_offset(skb));
1378         while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1379                 __skb_pull(tmp_skb, skb_network_header_len(skb));
1380                 *tail_skb = tmp_skb;
1381                 tail_skb = &(tmp_skb->next);
1382                 skb->len += tmp_skb->len;
1383                 skb->data_len += tmp_skb->len;
1384                 skb->truesize += tmp_skb->truesize;
1385                 __sock_put(tmp_skb->sk);
1386                 tmp_skb->destructor = NULL;
1387                 tmp_skb->sk = NULL;
1388         }
1389
1390         ipv6_addr_copy(final_dst, &fl->fl6_dst);
1391         __skb_pull(skb, skb_network_header_len(skb));
1392         if (opt && opt->opt_flen)
1393                 ipv6_push_frag_opts(skb, opt, &proto);
1394         if (opt && opt->opt_nflen)
1395                 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1396
1397         skb_push(skb, sizeof(struct ipv6hdr));
1398         skb_reset_network_header(skb);
1399         hdr = ipv6_hdr(skb);
1400
1401         *(__be32*)hdr = fl->fl6_flowlabel |
1402                      htonl(0x60000000 | ((int)np->cork.tclass << 20));
1403
1404         if (skb->len <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN)
1405                 hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
1406         else
1407                 hdr->payload_len = 0;
1408         hdr->hop_limit = np->cork.hop_limit;
1409         hdr->nexthdr = proto;
1410         ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1411         ipv6_addr_copy(&hdr->daddr, final_dst);
1412
1413         skb->priority = sk->sk_priority;
1414
1415         skb->dst = dst_clone(&rt->u.dst);
1416         IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
1417         if (proto == IPPROTO_ICMPV6) {
1418                 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
1419
1420                 ICMP6MSGOUT_INC_STATS_BH(idev, icmp6_hdr(skb)->icmp6_type);
1421                 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
1422         }
1423
1424         err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output);
1425         if (err) {
1426                 if (err > 0)
1427                         err = np->recverr ? net_xmit_errno(err) : 0;
1428                 if (err)
1429                         goto error;
1430         }
1431
1432 out:
1433         ip6_cork_release(inet, np);
1434         return err;
1435 error:
1436         goto out;
1437 }
1438
1439 void ip6_flush_pending_frames(struct sock *sk)
1440 {
1441         struct sk_buff *skb;
1442
1443         while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1444                 if (skb->dst)
1445                         IP6_INC_STATS(ip6_dst_idev(skb->dst),
1446                                       IPSTATS_MIB_OUTDISCARDS);
1447                 kfree_skb(skb);
1448         }
1449
1450         ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1451 }