Merge branch 'release' of git://git.kernel.org/pub/scm/linux/kernel/git/aegl/linux-2.6
[linux-2.6] / net / ipv6 / ip6_output.c
1 /*
2  *      IPv6 output functions
3  *      Linux INET6 implementation
4  *
5  *      Authors:
6  *      Pedro Roque             <roque@di.fc.ul.pt>
7  *
8  *      $Id: ip6_output.c,v 1.34 2002/02/01 22:01:04 davem Exp $
9  *
10  *      Based on linux/net/ipv4/ip_output.c
11  *
12  *      This program is free software; you can redistribute it and/or
13  *      modify it under the terms of the GNU General Public License
14  *      as published by the Free Software Foundation; either version
15  *      2 of the License, or (at your option) any later version.
16  *
17  *      Changes:
18  *      A.N.Kuznetsov   :       airthmetics in fragmentation.
19  *                              extension headers are implemented.
20  *                              route changes now work.
21  *                              ip6_forward does not confuse sniffers.
22  *                              etc.
23  *
24  *      H. von Brand    :       Added missing #include <linux/string.h>
25  *      Imran Patel     :       frag id should be in NBO
26  *      Kazunori MIYAZAWA @USAGI
27  *                      :       add ip6_append_data and related functions
28  *                              for datagram xmit
29  */
30
31 #include <linux/errno.h>
32 #include <linux/types.h>
33 #include <linux/string.h>
34 #include <linux/socket.h>
35 #include <linux/net.h>
36 #include <linux/netdevice.h>
37 #include <linux/if_arp.h>
38 #include <linux/in6.h>
39 #include <linux/tcp.h>
40 #include <linux/route.h>
41 #include <linux/module.h>
42
43 #include <linux/netfilter.h>
44 #include <linux/netfilter_ipv6.h>
45
46 #include <net/sock.h>
47 #include <net/snmp.h>
48
49 #include <net/ipv6.h>
50 #include <net/ndisc.h>
51 #include <net/protocol.h>
52 #include <net/ip6_route.h>
53 #include <net/addrconf.h>
54 #include <net/rawv6.h>
55 #include <net/icmp.h>
56 #include <net/xfrm.h>
57 #include <net/checksum.h>
58
59 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
60
61 static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
62 {
63         static u32 ipv6_fragmentation_id = 1;
64         static DEFINE_SPINLOCK(ip6_id_lock);
65
66         spin_lock_bh(&ip6_id_lock);
67         fhdr->identification = htonl(ipv6_fragmentation_id);
68         if (++ipv6_fragmentation_id == 0)
69                 ipv6_fragmentation_id = 1;
70         spin_unlock_bh(&ip6_id_lock);
71 }
72
73 static int ip6_output_finish(struct sk_buff *skb)
74 {
75         struct dst_entry *dst = skb->dst;
76
77         if (dst->hh)
78                 return neigh_hh_output(dst->hh, skb);
79         else if (dst->neighbour)
80                 return dst->neighbour->output(skb);
81
82         IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
83         kfree_skb(skb);
84         return -EINVAL;
85
86 }
87
88 /* dev_loopback_xmit for use with netfilter. */
89 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
90 {
91         skb_reset_mac_header(newskb);
92         __skb_pull(newskb, skb_network_offset(newskb));
93         newskb->pkt_type = PACKET_LOOPBACK;
94         newskb->ip_summed = CHECKSUM_UNNECESSARY;
95         BUG_TRAP(newskb->dst);
96
97         netif_rx(newskb);
98         return 0;
99 }
100
101
102 static int ip6_output2(struct sk_buff *skb)
103 {
104         struct dst_entry *dst = skb->dst;
105         struct net_device *dev = dst->dev;
106
107         skb->protocol = htons(ETH_P_IPV6);
108         skb->dev = dev;
109
110         if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
111                 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
112                 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
113
114                 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
115                     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
116                                         &ipv6_hdr(skb)->saddr)) {
117                         struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
118
119                         /* Do not check for IFF_ALLMULTI; multicast routing
120                            is not supported in any case.
121                          */
122                         if (newskb)
123                                 NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, newskb, NULL,
124                                         newskb->dev,
125                                         ip6_dev_loopback_xmit);
126
127                         if (ipv6_hdr(skb)->hop_limit == 0) {
128                                 IP6_INC_STATS(idev, IPSTATS_MIB_OUTDISCARDS);
129                                 kfree_skb(skb);
130                                 return 0;
131                         }
132                 }
133
134                 IP6_INC_STATS(idev, IPSTATS_MIB_OUTMCASTPKTS);
135         }
136
137         return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
138 }
139
140 static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
141 {
142         struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
143
144         return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
145                skb->dst->dev->mtu : dst_mtu(skb->dst);
146 }
147
148 int ip6_output(struct sk_buff *skb)
149 {
150         if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
151                                 dst_allfrag(skb->dst))
152                 return ip6_fragment(skb, ip6_output2);
153         else
154                 return ip6_output2(skb);
155 }
156
157 /*
158  *      xmit an sk_buff (used by TCP)
159  */
160
161 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
162              struct ipv6_txoptions *opt, int ipfragok)
163 {
164         struct ipv6_pinfo *np = inet6_sk(sk);
165         struct in6_addr *first_hop = &fl->fl6_dst;
166         struct dst_entry *dst = skb->dst;
167         struct ipv6hdr *hdr;
168         u8  proto = fl->proto;
169         int seg_len = skb->len;
170         int hlimit, tclass;
171         u32 mtu;
172
173         if (opt) {
174                 unsigned int head_room;
175
176                 /* First: exthdrs may take lots of space (~8K for now)
177                    MAX_HEADER is not enough.
178                  */
179                 head_room = opt->opt_nflen + opt->opt_flen;
180                 seg_len += head_room;
181                 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
182
183                 if (skb_headroom(skb) < head_room) {
184                         struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
185                         if (skb2 == NULL) {
186                                 IP6_INC_STATS(ip6_dst_idev(skb->dst),
187                                               IPSTATS_MIB_OUTDISCARDS);
188                                 kfree_skb(skb);
189                                 return -ENOBUFS;
190                         }
191                         kfree_skb(skb);
192                         skb = skb2;
193                         if (sk)
194                                 skb_set_owner_w(skb, sk);
195                 }
196                 if (opt->opt_flen)
197                         ipv6_push_frag_opts(skb, opt, &proto);
198                 if (opt->opt_nflen)
199                         ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
200         }
201
202         skb_push(skb, sizeof(struct ipv6hdr));
203         skb_reset_network_header(skb);
204         hdr = ipv6_hdr(skb);
205
206         /*
207          *      Fill in the IPv6 header
208          */
209
210         hlimit = -1;
211         if (np)
212                 hlimit = np->hop_limit;
213         if (hlimit < 0)
214                 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
215         if (hlimit < 0)
216                 hlimit = ipv6_get_hoplimit(dst->dev);
217
218         tclass = -1;
219         if (np)
220                 tclass = np->tclass;
221         if (tclass < 0)
222                 tclass = 0;
223
224         *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
225
226         hdr->payload_len = htons(seg_len);
227         hdr->nexthdr = proto;
228         hdr->hop_limit = hlimit;
229
230         ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
231         ipv6_addr_copy(&hdr->daddr, first_hop);
232
233         skb->priority = sk->sk_priority;
234
235         mtu = dst_mtu(dst);
236         if ((skb->len <= mtu) || ipfragok || skb_is_gso(skb)) {
237                 IP6_INC_STATS(ip6_dst_idev(skb->dst),
238                               IPSTATS_MIB_OUTREQUESTS);
239                 return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev,
240                                 dst_output);
241         }
242
243         if (net_ratelimit())
244                 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
245         skb->dev = dst->dev;
246         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
247         IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
248         kfree_skb(skb);
249         return -EMSGSIZE;
250 }
251
252 EXPORT_SYMBOL(ip6_xmit);
253
254 /*
255  *      To avoid extra problems ND packets are send through this
256  *      routine. It's code duplication but I really want to avoid
257  *      extra checks since ipv6_build_header is used by TCP (which
258  *      is for us performance critical)
259  */
260
261 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
262                struct in6_addr *saddr, struct in6_addr *daddr,
263                int proto, int len)
264 {
265         struct ipv6_pinfo *np = inet6_sk(sk);
266         struct ipv6hdr *hdr;
267         int totlen;
268
269         skb->protocol = htons(ETH_P_IPV6);
270         skb->dev = dev;
271
272         totlen = len + sizeof(struct ipv6hdr);
273
274         skb_reset_network_header(skb);
275         skb_put(skb, sizeof(struct ipv6hdr));
276         hdr = ipv6_hdr(skb);
277
278         *(__be32*)hdr = htonl(0x60000000);
279
280         hdr->payload_len = htons(len);
281         hdr->nexthdr = proto;
282         hdr->hop_limit = np->hop_limit;
283
284         ipv6_addr_copy(&hdr->saddr, saddr);
285         ipv6_addr_copy(&hdr->daddr, daddr);
286
287         return 0;
288 }
289
290 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
291 {
292         struct ip6_ra_chain *ra;
293         struct sock *last = NULL;
294
295         read_lock(&ip6_ra_lock);
296         for (ra = ip6_ra_chain; ra; ra = ra->next) {
297                 struct sock *sk = ra->sk;
298                 if (sk && ra->sel == sel &&
299                     (!sk->sk_bound_dev_if ||
300                      sk->sk_bound_dev_if == skb->dev->ifindex)) {
301                         if (last) {
302                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
303                                 if (skb2)
304                                         rawv6_rcv(last, skb2);
305                         }
306                         last = sk;
307                 }
308         }
309
310         if (last) {
311                 rawv6_rcv(last, skb);
312                 read_unlock(&ip6_ra_lock);
313                 return 1;
314         }
315         read_unlock(&ip6_ra_lock);
316         return 0;
317 }
318
319 static int ip6_forward_proxy_check(struct sk_buff *skb)
320 {
321         struct ipv6hdr *hdr = ipv6_hdr(skb);
322         u8 nexthdr = hdr->nexthdr;
323         int offset;
324
325         if (ipv6_ext_hdr(nexthdr)) {
326                 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
327                 if (offset < 0)
328                         return 0;
329         } else
330                 offset = sizeof(struct ipv6hdr);
331
332         if (nexthdr == IPPROTO_ICMPV6) {
333                 struct icmp6hdr *icmp6;
334
335                 if (!pskb_may_pull(skb, (skb_network_header(skb) +
336                                          offset + 1 - skb->data)))
337                         return 0;
338
339                 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
340
341                 switch (icmp6->icmp6_type) {
342                 case NDISC_ROUTER_SOLICITATION:
343                 case NDISC_ROUTER_ADVERTISEMENT:
344                 case NDISC_NEIGHBOUR_SOLICITATION:
345                 case NDISC_NEIGHBOUR_ADVERTISEMENT:
346                 case NDISC_REDIRECT:
347                         /* For reaction involving unicast neighbor discovery
348                          * message destined to the proxied address, pass it to
349                          * input function.
350                          */
351                         return 1;
352                 default:
353                         break;
354                 }
355         }
356
357         /*
358          * The proxying router can't forward traffic sent to a link-local
359          * address, so signal the sender and discard the packet. This
360          * behavior is clarified by the MIPv6 specification.
361          */
362         if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
363                 dst_link_failure(skb);
364                 return -1;
365         }
366
367         return 0;
368 }
369
370 static inline int ip6_forward_finish(struct sk_buff *skb)
371 {
372         return dst_output(skb);
373 }
374
375 int ip6_forward(struct sk_buff *skb)
376 {
377         struct dst_entry *dst = skb->dst;
378         struct ipv6hdr *hdr = ipv6_hdr(skb);
379         struct inet6_skb_parm *opt = IP6CB(skb);
380
381         if (ipv6_devconf.forwarding == 0)
382                 goto error;
383
384         if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
385                 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
386                 goto drop;
387         }
388
389         skb_forward_csum(skb);
390
391         /*
392          *      We DO NOT make any processing on
393          *      RA packets, pushing them to user level AS IS
394          *      without ane WARRANTY that application will be able
395          *      to interpret them. The reason is that we
396          *      cannot make anything clever here.
397          *
398          *      We are not end-node, so that if packet contains
399          *      AH/ESP, we cannot make anything.
400          *      Defragmentation also would be mistake, RA packets
401          *      cannot be fragmented, because there is no warranty
402          *      that different fragments will go along one path. --ANK
403          */
404         if (opt->ra) {
405                 u8 *ptr = skb_network_header(skb) + opt->ra;
406                 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
407                         return 0;
408         }
409
410         /*
411          *      check and decrement ttl
412          */
413         if (hdr->hop_limit <= 1) {
414                 /* Force OUTPUT device used as source address */
415                 skb->dev = dst->dev;
416                 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
417                             0, skb->dev);
418                 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
419
420                 kfree_skb(skb);
421                 return -ETIMEDOUT;
422         }
423
424         /* XXX: idev->cnf.proxy_ndp? */
425         if (ipv6_devconf.proxy_ndp &&
426             pneigh_lookup(&nd_tbl, &hdr->daddr, skb->dev, 0)) {
427                 int proxied = ip6_forward_proxy_check(skb);
428                 if (proxied > 0)
429                         return ip6_input(skb);
430                 else if (proxied < 0) {
431                         IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
432                         goto drop;
433                 }
434         }
435
436         if (!xfrm6_route_forward(skb)) {
437                 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
438                 goto drop;
439         }
440         dst = skb->dst;
441
442         /* IPv6 specs say nothing about it, but it is clear that we cannot
443            send redirects to source routed frames.
444            We don't send redirects to frames decapsulated from IPsec.
445          */
446         if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
447             !skb->sp) {
448                 struct in6_addr *target = NULL;
449                 struct rt6_info *rt;
450                 struct neighbour *n = dst->neighbour;
451
452                 /*
453                  *      incoming and outgoing devices are the same
454                  *      send a redirect.
455                  */
456
457                 rt = (struct rt6_info *) dst;
458                 if ((rt->rt6i_flags & RTF_GATEWAY))
459                         target = (struct in6_addr*)&n->primary_key;
460                 else
461                         target = &hdr->daddr;
462
463                 /* Limit redirects both by destination (here)
464                    and by source (inside ndisc_send_redirect)
465                  */
466                 if (xrlim_allow(dst, 1*HZ))
467                         ndisc_send_redirect(skb, n, target);
468         } else {
469                 int addrtype = ipv6_addr_type(&hdr->saddr);
470
471                 /* This check is security critical. */
472                 if (addrtype & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK))
473                         goto error;
474                 if (addrtype & IPV6_ADDR_LINKLOCAL) {
475                         icmpv6_send(skb, ICMPV6_DEST_UNREACH,
476                                 ICMPV6_NOT_NEIGHBOUR, 0, skb->dev);
477                         goto error;
478                 }
479         }
480
481         if (skb->len > dst_mtu(dst)) {
482                 /* Again, force OUTPUT device used as source address */
483                 skb->dev = dst->dev;
484                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
485                 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
486                 IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
487                 kfree_skb(skb);
488                 return -EMSGSIZE;
489         }
490
491         if (skb_cow(skb, dst->dev->hard_header_len)) {
492                 IP6_INC_STATS(ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
493                 goto drop;
494         }
495
496         hdr = ipv6_hdr(skb);
497
498         /* Mangling hops number delayed to point after skb COW */
499
500         hdr->hop_limit--;
501
502         IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
503         return NF_HOOK(PF_INET6,NF_IP6_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish);
504
505 error:
506         IP6_INC_STATS_BH(ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
507 drop:
508         kfree_skb(skb);
509         return -EINVAL;
510 }
511
512 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
513 {
514         to->pkt_type = from->pkt_type;
515         to->priority = from->priority;
516         to->protocol = from->protocol;
517         dst_release(to->dst);
518         to->dst = dst_clone(from->dst);
519         to->dev = from->dev;
520         to->mark = from->mark;
521
522 #ifdef CONFIG_NET_SCHED
523         to->tc_index = from->tc_index;
524 #endif
525         nf_copy(to, from);
526 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
527     defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
528         to->nf_trace = from->nf_trace;
529 #endif
530         skb_copy_secmark(to, from);
531 }
532
533 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
534 {
535         u16 offset = sizeof(struct ipv6hdr);
536         struct ipv6_opt_hdr *exthdr =
537                                 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
538         unsigned int packet_len = skb->tail - skb->network_header;
539         int found_rhdr = 0;
540         *nexthdr = &ipv6_hdr(skb)->nexthdr;
541
542         while (offset + 1 <= packet_len) {
543
544                 switch (**nexthdr) {
545
546                 case NEXTHDR_HOP:
547                         break;
548                 case NEXTHDR_ROUTING:
549                         found_rhdr = 1;
550                         break;
551                 case NEXTHDR_DEST:
552 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
553                         if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
554                                 break;
555 #endif
556                         if (found_rhdr)
557                                 return offset;
558                         break;
559                 default :
560                         return offset;
561                 }
562
563                 offset += ipv6_optlen(exthdr);
564                 *nexthdr = &exthdr->nexthdr;
565                 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
566                                                  offset);
567         }
568
569         return offset;
570 }
571 EXPORT_SYMBOL_GPL(ip6_find_1stfragopt);
572
573 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
574 {
575         struct net_device *dev;
576         struct sk_buff *frag;
577         struct rt6_info *rt = (struct rt6_info*)skb->dst;
578         struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
579         struct ipv6hdr *tmp_hdr;
580         struct frag_hdr *fh;
581         unsigned int mtu, hlen, left, len;
582         __be32 frag_id = 0;
583         int ptr, offset = 0, err=0;
584         u8 *prevhdr, nexthdr = 0;
585
586         dev = rt->u.dst.dev;
587         hlen = ip6_find_1stfragopt(skb, &prevhdr);
588         nexthdr = *prevhdr;
589
590         mtu = ip6_skb_dst_mtu(skb);
591
592         /* We must not fragment if the socket is set to force MTU discovery
593          * or if the skb it not generated by a local socket.  (This last
594          * check should be redundant, but it's free.)
595          */
596         if (!np || np->pmtudisc >= IPV6_PMTUDISC_DO) {
597                 skb->dev = skb->dst->dev;
598                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
599                 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
600                 kfree_skb(skb);
601                 return -EMSGSIZE;
602         }
603
604         if (np && np->frag_size < mtu) {
605                 if (np->frag_size)
606                         mtu = np->frag_size;
607         }
608         mtu -= hlen + sizeof(struct frag_hdr);
609
610         if (skb_shinfo(skb)->frag_list) {
611                 int first_len = skb_pagelen(skb);
612
613                 if (first_len - hlen > mtu ||
614                     ((first_len - hlen) & 7) ||
615                     skb_cloned(skb))
616                         goto slow_path;
617
618                 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
619                         /* Correct geometry. */
620                         if (frag->len > mtu ||
621                             ((frag->len & 7) && frag->next) ||
622                             skb_headroom(frag) < hlen)
623                             goto slow_path;
624
625                         /* Partially cloned skb? */
626                         if (skb_shared(frag))
627                                 goto slow_path;
628
629                         BUG_ON(frag->sk);
630                         if (skb->sk) {
631                                 sock_hold(skb->sk);
632                                 frag->sk = skb->sk;
633                                 frag->destructor = sock_wfree;
634                                 skb->truesize -= frag->truesize;
635                         }
636                 }
637
638                 err = 0;
639                 offset = 0;
640                 frag = skb_shinfo(skb)->frag_list;
641                 skb_shinfo(skb)->frag_list = NULL;
642                 /* BUILD HEADER */
643
644                 *prevhdr = NEXTHDR_FRAGMENT;
645                 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
646                 if (!tmp_hdr) {
647                         IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGFAILS);
648                         return -ENOMEM;
649                 }
650
651                 __skb_pull(skb, hlen);
652                 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
653                 __skb_push(skb, hlen);
654                 skb_reset_network_header(skb);
655                 memcpy(skb_network_header(skb), tmp_hdr, hlen);
656
657                 ipv6_select_ident(skb, fh);
658                 fh->nexthdr = nexthdr;
659                 fh->reserved = 0;
660                 fh->frag_off = htons(IP6_MF);
661                 frag_id = fh->identification;
662
663                 first_len = skb_pagelen(skb);
664                 skb->data_len = first_len - skb_headlen(skb);
665                 skb->len = first_len;
666                 ipv6_hdr(skb)->payload_len = htons(first_len -
667                                                    sizeof(struct ipv6hdr));
668
669                 dst_hold(&rt->u.dst);
670
671                 for (;;) {
672                         /* Prepare header of the next frame,
673                          * before previous one went down. */
674                         if (frag) {
675                                 frag->ip_summed = CHECKSUM_NONE;
676                                 skb_reset_transport_header(frag);
677                                 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
678                                 __skb_push(frag, hlen);
679                                 skb_reset_network_header(frag);
680                                 memcpy(skb_network_header(frag), tmp_hdr,
681                                        hlen);
682                                 offset += skb->len - hlen - sizeof(struct frag_hdr);
683                                 fh->nexthdr = nexthdr;
684                                 fh->reserved = 0;
685                                 fh->frag_off = htons(offset);
686                                 if (frag->next != NULL)
687                                         fh->frag_off |= htons(IP6_MF);
688                                 fh->identification = frag_id;
689                                 ipv6_hdr(frag)->payload_len =
690                                                 htons(frag->len -
691                                                       sizeof(struct ipv6hdr));
692                                 ip6_copy_metadata(frag, skb);
693                         }
694
695                         err = output(skb);
696                         if(!err)
697                                 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGCREATES);
698
699                         if (err || !frag)
700                                 break;
701
702                         skb = frag;
703                         frag = skb->next;
704                         skb->next = NULL;
705                 }
706
707                 kfree(tmp_hdr);
708
709                 if (err == 0) {
710                         IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGOKS);
711                         dst_release(&rt->u.dst);
712                         return 0;
713                 }
714
715                 while (frag) {
716                         skb = frag->next;
717                         kfree_skb(frag);
718                         frag = skb;
719                 }
720
721                 IP6_INC_STATS(ip6_dst_idev(&rt->u.dst), IPSTATS_MIB_FRAGFAILS);
722                 dst_release(&rt->u.dst);
723                 return err;
724         }
725
726 slow_path:
727         left = skb->len - hlen;         /* Space per frame */
728         ptr = hlen;                     /* Where to start from */
729
730         /*
731          *      Fragment the datagram.
732          */
733
734         *prevhdr = NEXTHDR_FRAGMENT;
735
736         /*
737          *      Keep copying data until we run out.
738          */
739         while(left > 0) {
740                 len = left;
741                 /* IF: it doesn't fit, use 'mtu' - the data space left */
742                 if (len > mtu)
743                         len = mtu;
744                 /* IF: we are not sending upto and including the packet end
745                    then align the next start on an eight byte boundary */
746                 if (len < left) {
747                         len &= ~7;
748                 }
749                 /*
750                  *      Allocate buffer.
751                  */
752
753                 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
754                         NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
755                         IP6_INC_STATS(ip6_dst_idev(skb->dst),
756                                       IPSTATS_MIB_FRAGFAILS);
757                         err = -ENOMEM;
758                         goto fail;
759                 }
760
761                 /*
762                  *      Set up data on packet
763                  */
764
765                 ip6_copy_metadata(frag, skb);
766                 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
767                 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
768                 skb_reset_network_header(frag);
769                 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
770                 frag->transport_header = (frag->network_header + hlen +
771                                           sizeof(struct frag_hdr));
772
773                 /*
774                  *      Charge the memory for the fragment to any owner
775                  *      it might possess
776                  */
777                 if (skb->sk)
778                         skb_set_owner_w(frag, skb->sk);
779
780                 /*
781                  *      Copy the packet header into the new buffer.
782                  */
783                 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
784
785                 /*
786                  *      Build fragment header.
787                  */
788                 fh->nexthdr = nexthdr;
789                 fh->reserved = 0;
790                 if (!frag_id) {
791                         ipv6_select_ident(skb, fh);
792                         frag_id = fh->identification;
793                 } else
794                         fh->identification = frag_id;
795
796                 /*
797                  *      Copy a block of the IP datagram.
798                  */
799                 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
800                         BUG();
801                 left -= len;
802
803                 fh->frag_off = htons(offset);
804                 if (left > 0)
805                         fh->frag_off |= htons(IP6_MF);
806                 ipv6_hdr(frag)->payload_len = htons(frag->len -
807                                                     sizeof(struct ipv6hdr));
808
809                 ptr += len;
810                 offset += len;
811
812                 /*
813                  *      Put this fragment into the sending queue.
814                  */
815                 err = output(frag);
816                 if (err)
817                         goto fail;
818
819                 IP6_INC_STATS(ip6_dst_idev(skb->dst), IPSTATS_MIB_FRAGCREATES);
820         }
821         IP6_INC_STATS(ip6_dst_idev(skb->dst),
822                       IPSTATS_MIB_FRAGOKS);
823         kfree_skb(skb);
824         return err;
825
826 fail:
827         IP6_INC_STATS(ip6_dst_idev(skb->dst),
828                       IPSTATS_MIB_FRAGFAILS);
829         kfree_skb(skb);
830         return err;
831 }
832
833 static inline int ip6_rt_check(struct rt6key *rt_key,
834                                struct in6_addr *fl_addr,
835                                struct in6_addr *addr_cache)
836 {
837         return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
838                 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
839 }
840
841 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
842                                           struct dst_entry *dst,
843                                           struct flowi *fl)
844 {
845         struct ipv6_pinfo *np = inet6_sk(sk);
846         struct rt6_info *rt = (struct rt6_info *)dst;
847
848         if (!dst)
849                 goto out;
850
851         /* Yes, checking route validity in not connected
852          * case is not very simple. Take into account,
853          * that we do not support routing by source, TOS,
854          * and MSG_DONTROUTE            --ANK (980726)
855          *
856          * 1. ip6_rt_check(): If route was host route,
857          *    check that cached destination is current.
858          *    If it is network route, we still may
859          *    check its validity using saved pointer
860          *    to the last used address: daddr_cache.
861          *    We do not want to save whole address now,
862          *    (because main consumer of this service
863          *    is tcp, which has not this problem),
864          *    so that the last trick works only on connected
865          *    sockets.
866          * 2. oif also should be the same.
867          */
868         if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
869 #ifdef CONFIG_IPV6_SUBTREES
870             ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
871 #endif
872             (fl->oif && fl->oif != dst->dev->ifindex)) {
873                 dst_release(dst);
874                 dst = NULL;
875         }
876
877 out:
878         return dst;
879 }
880
881 static int ip6_dst_lookup_tail(struct sock *sk,
882                                struct dst_entry **dst, struct flowi *fl)
883 {
884         int err;
885
886         if (*dst == NULL)
887                 *dst = ip6_route_output(sk, fl);
888
889         if ((err = (*dst)->error))
890                 goto out_err_release;
891
892         if (ipv6_addr_any(&fl->fl6_src)) {
893                 err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src);
894                 if (err)
895                         goto out_err_release;
896         }
897
898 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
899                 /*
900                  * Here if the dst entry we've looked up
901                  * has a neighbour entry that is in the INCOMPLETE
902                  * state and the src address from the flow is
903                  * marked as OPTIMISTIC, we release the found
904                  * dst entry and replace it instead with the
905                  * dst entry of the nexthop router
906                  */
907                 if (!((*dst)->neighbour->nud_state & NUD_VALID)) {
908                         struct inet6_ifaddr *ifp;
909                         struct flowi fl_gw;
910                         int redirect;
911
912                         ifp = ipv6_get_ifaddr(&fl->fl6_src, (*dst)->dev, 1);
913
914                         redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
915                         if (ifp)
916                                 in6_ifa_put(ifp);
917
918                         if (redirect) {
919                                 /*
920                                  * We need to get the dst entry for the
921                                  * default router instead
922                                  */
923                                 dst_release(*dst);
924                                 memcpy(&fl_gw, fl, sizeof(struct flowi));
925                                 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
926                                 *dst = ip6_route_output(sk, &fl_gw);
927                                 if ((err = (*dst)->error))
928                                         goto out_err_release;
929                         }
930                 }
931 #endif
932
933         return 0;
934
935 out_err_release:
936         dst_release(*dst);
937         *dst = NULL;
938         return err;
939 }
940
941 /**
942  *      ip6_dst_lookup - perform route lookup on flow
943  *      @sk: socket which provides route info
944  *      @dst: pointer to dst_entry * for result
945  *      @fl: flow to lookup
946  *
947  *      This function performs a route lookup on the given flow.
948  *
949  *      It returns zero on success, or a standard errno code on error.
950  */
951 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
952 {
953         *dst = NULL;
954         return ip6_dst_lookup_tail(sk, dst, fl);
955 }
956 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
957
958 /**
959  *      ip6_sk_dst_lookup - perform socket cached route lookup on flow
960  *      @sk: socket which provides the dst cache and route info
961  *      @dst: pointer to dst_entry * for result
962  *      @fl: flow to lookup
963  *
964  *      This function performs a route lookup on the given flow with the
965  *      possibility of using the cached route in the socket if it is valid.
966  *      It will take the socket dst lock when operating on the dst cache.
967  *      As a result, this function can only be used in process context.
968  *
969  *      It returns zero on success, or a standard errno code on error.
970  */
971 int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
972 {
973         *dst = NULL;
974         if (sk) {
975                 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
976                 *dst = ip6_sk_dst_check(sk, *dst, fl);
977         }
978
979         return ip6_dst_lookup_tail(sk, dst, fl);
980 }
981 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
982
983 static inline int ip6_ufo_append_data(struct sock *sk,
984                         int getfrag(void *from, char *to, int offset, int len,
985                         int odd, struct sk_buff *skb),
986                         void *from, int length, int hh_len, int fragheaderlen,
987                         int transhdrlen, int mtu,unsigned int flags)
988
989 {
990         struct sk_buff *skb;
991         int err;
992
993         /* There is support for UDP large send offload by network
994          * device, so create one single skb packet containing complete
995          * udp datagram
996          */
997         if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
998                 skb = sock_alloc_send_skb(sk,
999                         hh_len + fragheaderlen + transhdrlen + 20,
1000                         (flags & MSG_DONTWAIT), &err);
1001                 if (skb == NULL)
1002                         return -ENOMEM;
1003
1004                 /* reserve space for Hardware header */
1005                 skb_reserve(skb, hh_len);
1006
1007                 /* create space for UDP/IP header */
1008                 skb_put(skb,fragheaderlen + transhdrlen);
1009
1010                 /* initialize network header pointer */
1011                 skb_reset_network_header(skb);
1012
1013                 /* initialize protocol header pointer */
1014                 skb->transport_header = skb->network_header + fragheaderlen;
1015
1016                 skb->ip_summed = CHECKSUM_PARTIAL;
1017                 skb->csum = 0;
1018                 sk->sk_sndmsg_off = 0;
1019         }
1020
1021         err = skb_append_datato_frags(sk,skb, getfrag, from,
1022                                       (length - transhdrlen));
1023         if (!err) {
1024                 struct frag_hdr fhdr;
1025
1026                 /* specify the length of each IP datagram fragment*/
1027                 skb_shinfo(skb)->gso_size = mtu - fragheaderlen -
1028                                             sizeof(struct frag_hdr);
1029                 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1030                 ipv6_select_ident(skb, &fhdr);
1031                 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1032                 __skb_queue_tail(&sk->sk_write_queue, skb);
1033
1034                 return 0;
1035         }
1036         /* There is not enough support do UPD LSO,
1037          * so follow normal path
1038          */
1039         kfree_skb(skb);
1040
1041         return err;
1042 }
1043
1044 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1045         int offset, int len, int odd, struct sk_buff *skb),
1046         void *from, int length, int transhdrlen,
1047         int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1048         struct rt6_info *rt, unsigned int flags)
1049 {
1050         struct inet_sock *inet = inet_sk(sk);
1051         struct ipv6_pinfo *np = inet6_sk(sk);
1052         struct sk_buff *skb;
1053         unsigned int maxfraglen, fragheaderlen;
1054         int exthdrlen;
1055         int hh_len;
1056         int mtu;
1057         int copy;
1058         int err;
1059         int offset = 0;
1060         int csummode = CHECKSUM_NONE;
1061
1062         if (flags&MSG_PROBE)
1063                 return 0;
1064         if (skb_queue_empty(&sk->sk_write_queue)) {
1065                 /*
1066                  * setup for corking
1067                  */
1068                 if (opt) {
1069                         if (np->cork.opt == NULL) {
1070                                 np->cork.opt = kmalloc(opt->tot_len,
1071                                                        sk->sk_allocation);
1072                                 if (unlikely(np->cork.opt == NULL))
1073                                         return -ENOBUFS;
1074                         } else if (np->cork.opt->tot_len < opt->tot_len) {
1075                                 printk(KERN_DEBUG "ip6_append_data: invalid option length\n");
1076                                 return -EINVAL;
1077                         }
1078                         memcpy(np->cork.opt, opt, opt->tot_len);
1079                         inet->cork.flags |= IPCORK_OPT;
1080                         /* need source address above miyazawa*/
1081                 }
1082                 dst_hold(&rt->u.dst);
1083                 np->cork.rt = rt;
1084                 inet->cork.fl = *fl;
1085                 np->cork.hop_limit = hlimit;
1086                 np->cork.tclass = tclass;
1087                 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1088                       rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
1089                 if (np->frag_size < mtu) {
1090                         if (np->frag_size)
1091                                 mtu = np->frag_size;
1092                 }
1093                 inet->cork.fragsize = mtu;
1094                 if (dst_allfrag(rt->u.dst.path))
1095                         inet->cork.flags |= IPCORK_ALLFRAG;
1096                 inet->cork.length = 0;
1097                 sk->sk_sndmsg_page = NULL;
1098                 sk->sk_sndmsg_off = 0;
1099                 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0);
1100                 length += exthdrlen;
1101                 transhdrlen += exthdrlen;
1102         } else {
1103                 rt = np->cork.rt;
1104                 fl = &inet->cork.fl;
1105                 if (inet->cork.flags & IPCORK_OPT)
1106                         opt = np->cork.opt;
1107                 transhdrlen = 0;
1108                 exthdrlen = 0;
1109                 mtu = inet->cork.fragsize;
1110         }
1111
1112         hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1113
1114         fragheaderlen = sizeof(struct ipv6hdr) + rt->u.dst.nfheader_len + (opt ? opt->opt_nflen : 0);
1115         maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1116
1117         if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1118                 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1119                         ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1120                         return -EMSGSIZE;
1121                 }
1122         }
1123
1124         /*
1125          * Let's try using as much space as possible.
1126          * Use MTU if total length of the message fits into the MTU.
1127          * Otherwise, we need to reserve fragment header and
1128          * fragment alignment (= 8-15 octects, in total).
1129          *
1130          * Note that we may need to "move" the data from the tail of
1131          * of the buffer to the new fragment when we split
1132          * the message.
1133          *
1134          * FIXME: It may be fragmented into multiple chunks
1135          *        at once if non-fragmentable extension headers
1136          *        are too large.
1137          * --yoshfuji
1138          */
1139
1140         inet->cork.length += length;
1141         if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1142             (rt->u.dst.dev->features & NETIF_F_UFO)) {
1143
1144                 err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1145                                           fragheaderlen, transhdrlen, mtu,
1146                                           flags);
1147                 if (err)
1148                         goto error;
1149                 return 0;
1150         }
1151
1152         if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1153                 goto alloc_new_skb;
1154
1155         while (length > 0) {
1156                 /* Check if the remaining data fits into current packet. */
1157                 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1158                 if (copy < length)
1159                         copy = maxfraglen - skb->len;
1160
1161                 if (copy <= 0) {
1162                         char *data;
1163                         unsigned int datalen;
1164                         unsigned int fraglen;
1165                         unsigned int fraggap;
1166                         unsigned int alloclen;
1167                         struct sk_buff *skb_prev;
1168 alloc_new_skb:
1169                         skb_prev = skb;
1170
1171                         /* There's no room in the current skb */
1172                         if (skb_prev)
1173                                 fraggap = skb_prev->len - maxfraglen;
1174                         else
1175                                 fraggap = 0;
1176
1177                         /*
1178                          * If remaining data exceeds the mtu,
1179                          * we know we need more fragment(s).
1180                          */
1181                         datalen = length + fraggap;
1182                         if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1183                                 datalen = maxfraglen - fragheaderlen;
1184
1185                         fraglen = datalen + fragheaderlen;
1186                         if ((flags & MSG_MORE) &&
1187                             !(rt->u.dst.dev->features&NETIF_F_SG))
1188                                 alloclen = mtu;
1189                         else
1190                                 alloclen = datalen + fragheaderlen;
1191
1192                         /*
1193                          * The last fragment gets additional space at tail.
1194                          * Note: we overallocate on fragments with MSG_MODE
1195                          * because we have no idea if we're the last one.
1196                          */
1197                         if (datalen == length + fraggap)
1198                                 alloclen += rt->u.dst.trailer_len;
1199
1200                         /*
1201                          * We just reserve space for fragment header.
1202                          * Note: this may be overallocation if the message
1203                          * (without MSG_MORE) fits into the MTU.
1204                          */
1205                         alloclen += sizeof(struct frag_hdr);
1206
1207                         if (transhdrlen) {
1208                                 skb = sock_alloc_send_skb(sk,
1209                                                 alloclen + hh_len,
1210                                                 (flags & MSG_DONTWAIT), &err);
1211                         } else {
1212                                 skb = NULL;
1213                                 if (atomic_read(&sk->sk_wmem_alloc) <=
1214                                     2 * sk->sk_sndbuf)
1215                                         skb = sock_wmalloc(sk,
1216                                                            alloclen + hh_len, 1,
1217                                                            sk->sk_allocation);
1218                                 if (unlikely(skb == NULL))
1219                                         err = -ENOBUFS;
1220                         }
1221                         if (skb == NULL)
1222                                 goto error;
1223                         /*
1224                          *      Fill in the control structures
1225                          */
1226                         skb->ip_summed = csummode;
1227                         skb->csum = 0;
1228                         /* reserve for fragmentation */
1229                         skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1230
1231                         /*
1232                          *      Find where to start putting bytes
1233                          */
1234                         data = skb_put(skb, fraglen);
1235                         skb_set_network_header(skb, exthdrlen);
1236                         data += fragheaderlen;
1237                         skb->transport_header = (skb->network_header +
1238                                                  fragheaderlen);
1239                         if (fraggap) {
1240                                 skb->csum = skb_copy_and_csum_bits(
1241                                         skb_prev, maxfraglen,
1242                                         data + transhdrlen, fraggap, 0);
1243                                 skb_prev->csum = csum_sub(skb_prev->csum,
1244                                                           skb->csum);
1245                                 data += fraggap;
1246                                 pskb_trim_unique(skb_prev, maxfraglen);
1247                         }
1248                         copy = datalen - transhdrlen - fraggap;
1249                         if (copy < 0) {
1250                                 err = -EINVAL;
1251                                 kfree_skb(skb);
1252                                 goto error;
1253                         } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1254                                 err = -EFAULT;
1255                                 kfree_skb(skb);
1256                                 goto error;
1257                         }
1258
1259                         offset += copy;
1260                         length -= datalen - fraggap;
1261                         transhdrlen = 0;
1262                         exthdrlen = 0;
1263                         csummode = CHECKSUM_NONE;
1264
1265                         /*
1266                          * Put the packet on the pending queue
1267                          */
1268                         __skb_queue_tail(&sk->sk_write_queue, skb);
1269                         continue;
1270                 }
1271
1272                 if (copy > length)
1273                         copy = length;
1274
1275                 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1276                         unsigned int off;
1277
1278                         off = skb->len;
1279                         if (getfrag(from, skb_put(skb, copy),
1280                                                 offset, copy, off, skb) < 0) {
1281                                 __skb_trim(skb, off);
1282                                 err = -EFAULT;
1283                                 goto error;
1284                         }
1285                 } else {
1286                         int i = skb_shinfo(skb)->nr_frags;
1287                         skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1288                         struct page *page = sk->sk_sndmsg_page;
1289                         int off = sk->sk_sndmsg_off;
1290                         unsigned int left;
1291
1292                         if (page && (left = PAGE_SIZE - off) > 0) {
1293                                 if (copy >= left)
1294                                         copy = left;
1295                                 if (page != frag->page) {
1296                                         if (i == MAX_SKB_FRAGS) {
1297                                                 err = -EMSGSIZE;
1298                                                 goto error;
1299                                         }
1300                                         get_page(page);
1301                                         skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1302                                         frag = &skb_shinfo(skb)->frags[i];
1303                                 }
1304                         } else if(i < MAX_SKB_FRAGS) {
1305                                 if (copy > PAGE_SIZE)
1306                                         copy = PAGE_SIZE;
1307                                 page = alloc_pages(sk->sk_allocation, 0);
1308                                 if (page == NULL) {
1309                                         err = -ENOMEM;
1310                                         goto error;
1311                                 }
1312                                 sk->sk_sndmsg_page = page;
1313                                 sk->sk_sndmsg_off = 0;
1314
1315                                 skb_fill_page_desc(skb, i, page, 0, 0);
1316                                 frag = &skb_shinfo(skb)->frags[i];
1317                                 skb->truesize += PAGE_SIZE;
1318                                 atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
1319                         } else {
1320                                 err = -EMSGSIZE;
1321                                 goto error;
1322                         }
1323                         if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1324                                 err = -EFAULT;
1325                                 goto error;
1326                         }
1327                         sk->sk_sndmsg_off += copy;
1328                         frag->size += copy;
1329                         skb->len += copy;
1330                         skb->data_len += copy;
1331                 }
1332                 offset += copy;
1333                 length -= copy;
1334         }
1335         return 0;
1336 error:
1337         inet->cork.length -= length;
1338         IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1339         return err;
1340 }
1341
1342 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1343 {
1344         inet->cork.flags &= ~IPCORK_OPT;
1345         kfree(np->cork.opt);
1346         np->cork.opt = NULL;
1347         if (np->cork.rt) {
1348                 dst_release(&np->cork.rt->u.dst);
1349                 np->cork.rt = NULL;
1350                 inet->cork.flags &= ~IPCORK_ALLFRAG;
1351         }
1352         memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1353 }
1354
1355 int ip6_push_pending_frames(struct sock *sk)
1356 {
1357         struct sk_buff *skb, *tmp_skb;
1358         struct sk_buff **tail_skb;
1359         struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1360         struct inet_sock *inet = inet_sk(sk);
1361         struct ipv6_pinfo *np = inet6_sk(sk);
1362         struct ipv6hdr *hdr;
1363         struct ipv6_txoptions *opt = np->cork.opt;
1364         struct rt6_info *rt = np->cork.rt;
1365         struct flowi *fl = &inet->cork.fl;
1366         unsigned char proto = fl->proto;
1367         int err = 0;
1368
1369         if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1370                 goto out;
1371         tail_skb = &(skb_shinfo(skb)->frag_list);
1372
1373         /* move skb->data to ip header from ext header */
1374         if (skb->data < skb_network_header(skb))
1375                 __skb_pull(skb, skb_network_offset(skb));
1376         while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1377                 __skb_pull(tmp_skb, skb_network_header_len(skb));
1378                 *tail_skb = tmp_skb;
1379                 tail_skb = &(tmp_skb->next);
1380                 skb->len += tmp_skb->len;
1381                 skb->data_len += tmp_skb->len;
1382                 skb->truesize += tmp_skb->truesize;
1383                 __sock_put(tmp_skb->sk);
1384                 tmp_skb->destructor = NULL;
1385                 tmp_skb->sk = NULL;
1386         }
1387
1388         ipv6_addr_copy(final_dst, &fl->fl6_dst);
1389         __skb_pull(skb, skb_network_header_len(skb));
1390         if (opt && opt->opt_flen)
1391                 ipv6_push_frag_opts(skb, opt, &proto);
1392         if (opt && opt->opt_nflen)
1393                 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1394
1395         skb_push(skb, sizeof(struct ipv6hdr));
1396         skb_reset_network_header(skb);
1397         hdr = ipv6_hdr(skb);
1398
1399         *(__be32*)hdr = fl->fl6_flowlabel |
1400                      htonl(0x60000000 | ((int)np->cork.tclass << 20));
1401
1402         if (skb->len <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN)
1403                 hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
1404         else
1405                 hdr->payload_len = 0;
1406         hdr->hop_limit = np->cork.hop_limit;
1407         hdr->nexthdr = proto;
1408         ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1409         ipv6_addr_copy(&hdr->daddr, final_dst);
1410
1411         skb->priority = sk->sk_priority;
1412
1413         skb->dst = dst_clone(&rt->u.dst);
1414         IP6_INC_STATS(rt->rt6i_idev, IPSTATS_MIB_OUTREQUESTS);
1415         if (proto == IPPROTO_ICMPV6) {
1416                 struct inet6_dev *idev = ip6_dst_idev(skb->dst);
1417
1418                 ICMP6MSGOUT_INC_STATS_BH(idev, icmp6_hdr(skb)->icmp6_type);
1419                 ICMP6_INC_STATS_BH(idev, ICMP6_MIB_OUTMSGS);
1420         }
1421
1422         err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output);
1423         if (err) {
1424                 if (err > 0)
1425                         err = np->recverr ? net_xmit_errno(err) : 0;
1426                 if (err)
1427                         goto error;
1428         }
1429
1430 out:
1431         ip6_cork_release(inet, np);
1432         return err;
1433 error:
1434         goto out;
1435 }
1436
1437 void ip6_flush_pending_frames(struct sock *sk)
1438 {
1439         struct sk_buff *skb;
1440
1441         while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1442                 if (skb->dst)
1443                         IP6_INC_STATS(ip6_dst_idev(skb->dst),
1444                                       IPSTATS_MIB_OUTDISCARDS);
1445                 kfree_skb(skb);
1446         }
1447
1448         ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1449 }