2 * IPv6 output functions
3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: ip6_output.c,v 1.34 2002/02/01 22:01:04 davem Exp $
10 * Based on linux/net/ipv4/ip_output.c
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
18 * A.N.Kuznetsov : airthmetics in fragmentation.
19 * extension headers are implemented.
20 * route changes now work.
21 * ip6_forward does not confuse sniffers.
24 * H. von Brand : Added missing #include <linux/string.h>
25 * Imran Patel : frag id should be in NBO
26 * Kazunori MIYAZAWA @USAGI
27 * : add ip6_append_data and related functions
31 #include <linux/config.h>
32 #include <linux/errno.h>
33 #include <linux/types.h>
34 #include <linux/string.h>
35 #include <linux/socket.h>
36 #include <linux/net.h>
37 #include <linux/netdevice.h>
38 #include <linux/if_arp.h>
39 #include <linux/in6.h>
40 #include <linux/tcp.h>
41 #include <linux/route.h>
43 #include <linux/netfilter.h>
44 #include <linux/netfilter_ipv6.h>
50 #include <net/ndisc.h>
51 #include <net/protocol.h>
52 #include <net/ip6_route.h>
53 #include <net/addrconf.h>
54 #include <net/rawv6.h>
57 #include <net/checksum.h>
59 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
61 static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
63 static u32 ipv6_fragmentation_id = 1;
64 static DEFINE_SPINLOCK(ip6_id_lock);
66 spin_lock_bh(&ip6_id_lock);
67 fhdr->identification = htonl(ipv6_fragmentation_id);
68 if (++ipv6_fragmentation_id == 0)
69 ipv6_fragmentation_id = 1;
70 spin_unlock_bh(&ip6_id_lock);
73 static inline int ip6_output_finish(struct sk_buff *skb)
76 struct dst_entry *dst = skb->dst;
77 struct hh_cache *hh = dst->hh;
82 read_lock_bh(&hh->hh_lock);
83 hh_alen = HH_DATA_ALIGN(hh->hh_len);
84 memcpy(skb->data - hh_alen, hh->hh_data, hh_alen);
85 read_unlock_bh(&hh->hh_lock);
86 skb_push(skb, hh->hh_len);
87 return hh->hh_output(skb);
88 } else if (dst->neighbour)
89 return dst->neighbour->output(skb);
91 IP6_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
97 /* dev_loopback_xmit for use with netfilter. */
98 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
100 newskb->mac.raw = newskb->data;
101 __skb_pull(newskb, newskb->nh.raw - newskb->data);
102 newskb->pkt_type = PACKET_LOOPBACK;
103 newskb->ip_summed = CHECKSUM_UNNECESSARY;
104 BUG_TRAP(newskb->dst);
111 static int ip6_output2(struct sk_buff *skb)
113 struct dst_entry *dst = skb->dst;
114 struct net_device *dev = dst->dev;
116 skb->protocol = htons(ETH_P_IPV6);
119 if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr)) {
120 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
122 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
123 ipv6_chk_mcast_addr(dev, &skb->nh.ipv6h->daddr,
124 &skb->nh.ipv6h->saddr)) {
125 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
127 /* Do not check for IFF_ALLMULTI; multicast routing
128 is not supported in any case.
131 NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, newskb, NULL,
133 ip6_dev_loopback_xmit);
135 if (skb->nh.ipv6h->hop_limit == 0) {
136 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
142 IP6_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS);
145 return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
148 int ip6_output(struct sk_buff *skb)
150 if (skb->len > dst_mtu(skb->dst) || dst_allfrag(skb->dst))
151 return ip6_fragment(skb, ip6_output2);
153 return ip6_output2(skb);
157 * xmit an sk_buff (used by TCP)
160 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
161 struct ipv6_txoptions *opt, int ipfragok)
163 struct ipv6_pinfo *np = sk ? inet6_sk(sk) : NULL;
164 struct in6_addr *first_hop = &fl->fl6_dst;
165 struct dst_entry *dst = skb->dst;
167 u8 proto = fl->proto;
168 int seg_len = skb->len;
175 /* First: exthdrs may take lots of space (~8K for now)
176 MAX_HEADER is not enough.
178 head_room = opt->opt_nflen + opt->opt_flen;
179 seg_len += head_room;
180 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
182 if (skb_headroom(skb) < head_room) {
183 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
187 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
191 skb_set_owner_w(skb, sk);
194 ipv6_push_frag_opts(skb, opt, &proto);
196 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
199 hdr = skb->nh.ipv6h = (struct ipv6hdr*)skb_push(skb, sizeof(struct ipv6hdr));
202 * Fill in the IPv6 header
207 hlimit = np->hop_limit;
209 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
211 hlimit = ipv6_get_hoplimit(dst->dev);
219 *(u32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
221 hdr->payload_len = htons(seg_len);
222 hdr->nexthdr = proto;
223 hdr->hop_limit = hlimit;
225 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
226 ipv6_addr_copy(&hdr->daddr, first_hop);
229 if ((skb->len <= mtu) || ipfragok) {
230 IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
231 return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev,
236 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
238 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
239 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
245 * To avoid extra problems ND packets are send through this
246 * routine. It's code duplication but I really want to avoid
247 * extra checks since ipv6_build_header is used by TCP (which
248 * is for us performance critical)
251 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
252 struct in6_addr *saddr, struct in6_addr *daddr,
255 struct ipv6_pinfo *np = inet6_sk(sk);
259 skb->protocol = htons(ETH_P_IPV6);
262 totlen = len + sizeof(struct ipv6hdr);
264 hdr = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr));
267 *(u32*)hdr = htonl(0x60000000);
269 hdr->payload_len = htons(len);
270 hdr->nexthdr = proto;
271 hdr->hop_limit = np->hop_limit;
273 ipv6_addr_copy(&hdr->saddr, saddr);
274 ipv6_addr_copy(&hdr->daddr, daddr);
279 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
281 struct ip6_ra_chain *ra;
282 struct sock *last = NULL;
284 read_lock(&ip6_ra_lock);
285 for (ra = ip6_ra_chain; ra; ra = ra->next) {
286 struct sock *sk = ra->sk;
287 if (sk && ra->sel == sel &&
288 (!sk->sk_bound_dev_if ||
289 sk->sk_bound_dev_if == skb->dev->ifindex)) {
291 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
293 rawv6_rcv(last, skb2);
300 rawv6_rcv(last, skb);
301 read_unlock(&ip6_ra_lock);
304 read_unlock(&ip6_ra_lock);
308 static inline int ip6_forward_finish(struct sk_buff *skb)
310 return dst_output(skb);
313 int ip6_forward(struct sk_buff *skb)
315 struct dst_entry *dst = skb->dst;
316 struct ipv6hdr *hdr = skb->nh.ipv6h;
317 struct inet6_skb_parm *opt = IP6CB(skb);
319 if (ipv6_devconf.forwarding == 0)
322 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
323 IP6_INC_STATS(IPSTATS_MIB_INDISCARDS);
327 skb->ip_summed = CHECKSUM_NONE;
330 * We DO NOT make any processing on
331 * RA packets, pushing them to user level AS IS
332 * without ane WARRANTY that application will be able
333 * to interpret them. The reason is that we
334 * cannot make anything clever here.
336 * We are not end-node, so that if packet contains
337 * AH/ESP, we cannot make anything.
338 * Defragmentation also would be mistake, RA packets
339 * cannot be fragmented, because there is no warranty
340 * that different fragments will go along one path. --ANK
343 u8 *ptr = skb->nh.raw + opt->ra;
344 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
349 * check and decrement ttl
351 if (hdr->hop_limit <= 1) {
352 /* Force OUTPUT device used as source address */
354 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
361 if (!xfrm6_route_forward(skb)) {
362 IP6_INC_STATS(IPSTATS_MIB_INDISCARDS);
367 /* IPv6 specs say nothing about it, but it is clear that we cannot
368 send redirects to source routed frames.
370 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0) {
371 struct in6_addr *target = NULL;
373 struct neighbour *n = dst->neighbour;
376 * incoming and outgoing devices are the same
380 rt = (struct rt6_info *) dst;
381 if ((rt->rt6i_flags & RTF_GATEWAY))
382 target = (struct in6_addr*)&n->primary_key;
384 target = &hdr->daddr;
386 /* Limit redirects both by destination (here)
387 and by source (inside ndisc_send_redirect)
389 if (xrlim_allow(dst, 1*HZ))
390 ndisc_send_redirect(skb, n, target);
391 } else if (ipv6_addr_type(&hdr->saddr)&(IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK
392 |IPV6_ADDR_LINKLOCAL)) {
393 /* This check is security critical. */
397 if (skb->len > dst_mtu(dst)) {
398 /* Again, force OUTPUT device used as source address */
400 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
401 IP6_INC_STATS_BH(IPSTATS_MIB_INTOOBIGERRORS);
402 IP6_INC_STATS_BH(IPSTATS_MIB_FRAGFAILS);
407 if (skb_cow(skb, dst->dev->hard_header_len)) {
408 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
414 /* Mangling hops number delayed to point after skb COW */
418 IP6_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS);
419 return NF_HOOK(PF_INET6,NF_IP6_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish);
422 IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
428 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
430 to->pkt_type = from->pkt_type;
431 to->priority = from->priority;
432 to->protocol = from->protocol;
433 dst_release(to->dst);
434 to->dst = dst_clone(from->dst);
437 #ifdef CONFIG_NET_SCHED
438 to->tc_index = from->tc_index;
440 #ifdef CONFIG_NETFILTER
441 to->nfmark = from->nfmark;
442 /* Connection association is same as pre-frag packet */
443 to->nfct = from->nfct;
444 nf_conntrack_get(to->nfct);
445 to->nfctinfo = from->nfctinfo;
446 #ifdef CONFIG_BRIDGE_NETFILTER
447 nf_bridge_put(to->nf_bridge);
448 to->nf_bridge = from->nf_bridge;
449 nf_bridge_get(to->nf_bridge);
454 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
456 u16 offset = sizeof(struct ipv6hdr);
457 struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1);
458 unsigned int packet_len = skb->tail - skb->nh.raw;
460 *nexthdr = &skb->nh.ipv6h->nexthdr;
462 while (offset + 1 <= packet_len) {
467 case NEXTHDR_ROUTING:
469 if (**nexthdr == NEXTHDR_ROUTING) found_rhdr = 1;
470 if (**nexthdr == NEXTHDR_DEST && found_rhdr) return offset;
471 offset += ipv6_optlen(exthdr);
472 *nexthdr = &exthdr->nexthdr;
473 exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
483 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
485 struct net_device *dev;
486 struct sk_buff *frag;
487 struct rt6_info *rt = (struct rt6_info*)skb->dst;
488 struct ipv6hdr *tmp_hdr;
490 unsigned int mtu, hlen, left, len;
492 int ptr, offset = 0, err=0;
493 u8 *prevhdr, nexthdr = 0;
496 hlen = ip6_find_1stfragopt(skb, &prevhdr);
499 mtu = dst_mtu(&rt->u.dst) - hlen - sizeof(struct frag_hdr);
501 if (skb_shinfo(skb)->frag_list) {
502 int first_len = skb_pagelen(skb);
504 if (first_len - hlen > mtu ||
505 ((first_len - hlen) & 7) ||
509 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
510 /* Correct geometry. */
511 if (frag->len > mtu ||
512 ((frag->len & 7) && frag->next) ||
513 skb_headroom(frag) < hlen)
516 /* Partially cloned skb? */
517 if (skb_shared(frag))
524 frag->destructor = sock_wfree;
525 skb->truesize -= frag->truesize;
531 frag = skb_shinfo(skb)->frag_list;
532 skb_shinfo(skb)->frag_list = NULL;
535 tmp_hdr = kmalloc(hlen, GFP_ATOMIC);
537 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
541 *prevhdr = NEXTHDR_FRAGMENT;
542 memcpy(tmp_hdr, skb->nh.raw, hlen);
543 __skb_pull(skb, hlen);
544 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
545 skb->nh.raw = __skb_push(skb, hlen);
546 memcpy(skb->nh.raw, tmp_hdr, hlen);
548 ipv6_select_ident(skb, fh);
549 fh->nexthdr = nexthdr;
551 fh->frag_off = htons(IP6_MF);
552 frag_id = fh->identification;
554 first_len = skb_pagelen(skb);
555 skb->data_len = first_len - skb_headlen(skb);
556 skb->len = first_len;
557 skb->nh.ipv6h->payload_len = htons(first_len - sizeof(struct ipv6hdr));
561 /* Prepare header of the next frame,
562 * before previous one went down. */
564 frag->ip_summed = CHECKSUM_NONE;
565 frag->h.raw = frag->data;
566 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
567 frag->nh.raw = __skb_push(frag, hlen);
568 memcpy(frag->nh.raw, tmp_hdr, hlen);
569 offset += skb->len - hlen - sizeof(struct frag_hdr);
570 fh->nexthdr = nexthdr;
572 fh->frag_off = htons(offset);
573 if (frag->next != NULL)
574 fh->frag_off |= htons(IP6_MF);
575 fh->identification = frag_id;
576 frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
577 ip6_copy_metadata(frag, skb);
593 IP6_INC_STATS(IPSTATS_MIB_FRAGOKS);
603 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
608 left = skb->len - hlen; /* Space per frame */
609 ptr = hlen; /* Where to start from */
612 * Fragment the datagram.
615 *prevhdr = NEXTHDR_FRAGMENT;
618 * Keep copying data until we run out.
622 /* IF: it doesn't fit, use 'mtu' - the data space left */
625 /* IF: we are not sending upto and including the packet end
626 then align the next start on an eight byte boundary */
634 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
635 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
636 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
642 * Set up data on packet
645 ip6_copy_metadata(frag, skb);
646 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
647 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
648 frag->nh.raw = frag->data;
649 fh = (struct frag_hdr*)(frag->data + hlen);
650 frag->h.raw = frag->data + hlen + sizeof(struct frag_hdr);
653 * Charge the memory for the fragment to any owner
657 skb_set_owner_w(frag, skb->sk);
660 * Copy the packet header into the new buffer.
662 memcpy(frag->nh.raw, skb->data, hlen);
665 * Build fragment header.
667 fh->nexthdr = nexthdr;
670 ipv6_select_ident(skb, fh);
671 frag_id = fh->identification;
673 fh->identification = frag_id;
676 * Copy a block of the IP datagram.
678 if (skb_copy_bits(skb, ptr, frag->h.raw, len))
682 fh->frag_off = htons(offset);
684 fh->frag_off |= htons(IP6_MF);
685 frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
691 * Put this fragment into the sending queue.
694 IP6_INC_STATS(IPSTATS_MIB_FRAGCREATES);
701 IP6_INC_STATS(IPSTATS_MIB_FRAGOKS);
706 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
710 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
716 struct ipv6_pinfo *np = inet6_sk(sk);
718 *dst = sk_dst_check(sk, np->dst_cookie);
720 struct rt6_info *rt = (struct rt6_info*)*dst;
722 /* Yes, checking route validity in not connected
723 case is not very simple. Take into account,
724 that we do not support routing by source, TOS,
725 and MSG_DONTROUTE --ANK (980726)
727 1. If route was host route, check that
728 cached destination is current.
729 If it is network route, we still may
730 check its validity using saved pointer
731 to the last used address: daddr_cache.
732 We do not want to save whole address now,
733 (because main consumer of this service
734 is tcp, which has not this problem),
735 so that the last trick works only on connected
737 2. oif also should be the same.
740 if (((rt->rt6i_dst.plen != 128 ||
741 !ipv6_addr_equal(&fl->fl6_dst, &rt->rt6i_dst.addr))
742 && (np->daddr_cache == NULL ||
743 !ipv6_addr_equal(&fl->fl6_dst, np->daddr_cache)))
744 || (fl->oif && fl->oif != (*dst)->dev->ifindex)) {
752 *dst = ip6_route_output(sk, fl);
754 if ((err = (*dst)->error))
755 goto out_err_release;
757 if (ipv6_addr_any(&fl->fl6_src)) {
758 err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src);
761 goto out_err_release;
772 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
773 int offset, int len, int odd, struct sk_buff *skb),
774 void *from, int length, int transhdrlen,
775 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
776 struct rt6_info *rt, unsigned int flags)
778 struct inet_sock *inet = inet_sk(sk);
779 struct ipv6_pinfo *np = inet6_sk(sk);
781 unsigned int maxfraglen, fragheaderlen;
788 int csummode = CHECKSUM_NONE;
792 if (skb_queue_empty(&sk->sk_write_queue)) {
797 if (np->cork.opt == NULL) {
798 np->cork.opt = kmalloc(opt->tot_len,
800 if (unlikely(np->cork.opt == NULL))
802 } else if (np->cork.opt->tot_len < opt->tot_len) {
803 printk(KERN_DEBUG "ip6_append_data: invalid option length\n");
806 memcpy(np->cork.opt, opt, opt->tot_len);
807 inet->cork.flags |= IPCORK_OPT;
808 /* need source address above miyazawa*/
810 dst_hold(&rt->u.dst);
813 np->cork.hop_limit = hlimit;
814 np->cork.tclass = tclass;
815 inet->cork.fragsize = mtu = dst_mtu(rt->u.dst.path);
816 if (dst_allfrag(rt->u.dst.path))
817 inet->cork.flags |= IPCORK_ALLFRAG;
818 inet->cork.length = 0;
819 sk->sk_sndmsg_page = NULL;
820 sk->sk_sndmsg_off = 0;
821 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0);
823 transhdrlen += exthdrlen;
827 if (inet->cork.flags & IPCORK_OPT)
831 mtu = inet->cork.fragsize;
834 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
836 fragheaderlen = sizeof(struct ipv6hdr) + (opt ? opt->opt_nflen : 0);
837 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
839 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
840 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
841 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
847 * Let's try using as much space as possible.
848 * Use MTU if total length of the message fits into the MTU.
849 * Otherwise, we need to reserve fragment header and
850 * fragment alignment (= 8-15 octects, in total).
852 * Note that we may need to "move" the data from the tail of
853 * of the buffer to the new fragment when we split
856 * FIXME: It may be fragmented into multiple chunks
857 * at once if non-fragmentable extension headers
862 inet->cork.length += length;
864 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
868 /* Check if the remaining data fits into current packet. */
869 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
871 copy = maxfraglen - skb->len;
875 unsigned int datalen;
876 unsigned int fraglen;
877 unsigned int fraggap;
878 unsigned int alloclen;
879 struct sk_buff *skb_prev;
883 /* There's no room in the current skb */
885 fraggap = skb_prev->len - maxfraglen;
890 * If remaining data exceeds the mtu,
891 * we know we need more fragment(s).
893 datalen = length + fraggap;
894 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
895 datalen = maxfraglen - fragheaderlen;
897 fraglen = datalen + fragheaderlen;
898 if ((flags & MSG_MORE) &&
899 !(rt->u.dst.dev->features&NETIF_F_SG))
902 alloclen = datalen + fragheaderlen;
905 * The last fragment gets additional space at tail.
906 * Note: we overallocate on fragments with MSG_MODE
907 * because we have no idea if we're the last one.
909 if (datalen == length + fraggap)
910 alloclen += rt->u.dst.trailer_len;
913 * We just reserve space for fragment header.
914 * Note: this may be overallocation if the message
915 * (without MSG_MORE) fits into the MTU.
917 alloclen += sizeof(struct frag_hdr);
920 skb = sock_alloc_send_skb(sk,
922 (flags & MSG_DONTWAIT), &err);
925 if (atomic_read(&sk->sk_wmem_alloc) <=
927 skb = sock_wmalloc(sk,
928 alloclen + hh_len, 1,
930 if (unlikely(skb == NULL))
936 * Fill in the control structures
938 skb->ip_summed = csummode;
940 /* reserve for fragmentation */
941 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
944 * Find where to start putting bytes
946 data = skb_put(skb, fraglen);
947 skb->nh.raw = data + exthdrlen;
948 data += fragheaderlen;
949 skb->h.raw = data + exthdrlen;
952 skb->csum = skb_copy_and_csum_bits(
953 skb_prev, maxfraglen,
954 data + transhdrlen, fraggap, 0);
955 skb_prev->csum = csum_sub(skb_prev->csum,
958 skb_trim(skb_prev, maxfraglen);
960 copy = datalen - transhdrlen - fraggap;
965 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
972 length -= datalen - fraggap;
975 csummode = CHECKSUM_NONE;
978 * Put the packet on the pending queue
980 __skb_queue_tail(&sk->sk_write_queue, skb);
987 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
991 if (getfrag(from, skb_put(skb, copy),
992 offset, copy, off, skb) < 0) {
993 __skb_trim(skb, off);
998 int i = skb_shinfo(skb)->nr_frags;
999 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1000 struct page *page = sk->sk_sndmsg_page;
1001 int off = sk->sk_sndmsg_off;
1004 if (page && (left = PAGE_SIZE - off) > 0) {
1007 if (page != frag->page) {
1008 if (i == MAX_SKB_FRAGS) {
1013 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1014 frag = &skb_shinfo(skb)->frags[i];
1016 } else if(i < MAX_SKB_FRAGS) {
1017 if (copy > PAGE_SIZE)
1019 page = alloc_pages(sk->sk_allocation, 0);
1024 sk->sk_sndmsg_page = page;
1025 sk->sk_sndmsg_off = 0;
1027 skb_fill_page_desc(skb, i, page, 0, 0);
1028 frag = &skb_shinfo(skb)->frags[i];
1029 skb->truesize += PAGE_SIZE;
1030 atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
1035 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1039 sk->sk_sndmsg_off += copy;
1042 skb->data_len += copy;
1049 inet->cork.length -= length;
1050 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1054 int ip6_push_pending_frames(struct sock *sk)
1056 struct sk_buff *skb, *tmp_skb;
1057 struct sk_buff **tail_skb;
1058 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1059 struct inet_sock *inet = inet_sk(sk);
1060 struct ipv6_pinfo *np = inet6_sk(sk);
1061 struct ipv6hdr *hdr;
1062 struct ipv6_txoptions *opt = np->cork.opt;
1063 struct rt6_info *rt = np->cork.rt;
1064 struct flowi *fl = &inet->cork.fl;
1065 unsigned char proto = fl->proto;
1068 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1070 tail_skb = &(skb_shinfo(skb)->frag_list);
1072 /* move skb->data to ip header from ext header */
1073 if (skb->data < skb->nh.raw)
1074 __skb_pull(skb, skb->nh.raw - skb->data);
1075 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1076 __skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
1077 *tail_skb = tmp_skb;
1078 tail_skb = &(tmp_skb->next);
1079 skb->len += tmp_skb->len;
1080 skb->data_len += tmp_skb->len;
1081 skb->truesize += tmp_skb->truesize;
1082 __sock_put(tmp_skb->sk);
1083 tmp_skb->destructor = NULL;
1087 ipv6_addr_copy(final_dst, &fl->fl6_dst);
1088 __skb_pull(skb, skb->h.raw - skb->nh.raw);
1089 if (opt && opt->opt_flen)
1090 ipv6_push_frag_opts(skb, opt, &proto);
1091 if (opt && opt->opt_nflen)
1092 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1094 skb->nh.ipv6h = hdr = (struct ipv6hdr*) skb_push(skb, sizeof(struct ipv6hdr));
1096 *(u32*)hdr = fl->fl6_flowlabel |
1097 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1099 if (skb->len <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN)
1100 hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
1102 hdr->payload_len = 0;
1103 hdr->hop_limit = np->cork.hop_limit;
1104 hdr->nexthdr = proto;
1105 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1106 ipv6_addr_copy(&hdr->daddr, final_dst);
1108 skb->dst = dst_clone(&rt->u.dst);
1109 IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
1110 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output);
1113 err = np->recverr ? net_xmit_errno(err) : 0;
1119 inet->cork.flags &= ~IPCORK_OPT;
1121 kfree(np->cork.opt);
1122 np->cork.opt = NULL;
1125 dst_release(&np->cork.rt->u.dst);
1127 inet->cork.flags &= ~IPCORK_ALLFRAG;
1129 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1135 void ip6_flush_pending_frames(struct sock *sk)
1137 struct inet_sock *inet = inet_sk(sk);
1138 struct ipv6_pinfo *np = inet6_sk(sk);
1139 struct sk_buff *skb;
1141 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1142 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1146 inet->cork.flags &= ~IPCORK_OPT;
1149 kfree(np->cork.opt);
1150 np->cork.opt = NULL;
1153 dst_release(&np->cork.rt->u.dst);
1155 inet->cork.flags &= ~IPCORK_ALLFRAG;
1157 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));