2 * IPv6 output functions
3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: ip6_output.c,v 1.34 2002/02/01 22:01:04 davem Exp $
10 * Based on linux/net/ipv4/ip_output.c
12 * This program is free software; you can redistribute it and/or
13 * modify it under the terms of the GNU General Public License
14 * as published by the Free Software Foundation; either version
15 * 2 of the License, or (at your option) any later version.
18 * A.N.Kuznetsov : airthmetics in fragmentation.
19 * extension headers are implemented.
20 * route changes now work.
21 * ip6_forward does not confuse sniffers.
24 * H. von Brand : Added missing #include <linux/string.h>
25 * Imran Patel : frag id should be in NBO
26 * Kazunori MIYAZAWA @USAGI
27 * : add ip6_append_data and related functions
31 #include <linux/config.h>
32 #include <linux/errno.h>
33 #include <linux/types.h>
34 #include <linux/string.h>
35 #include <linux/socket.h>
36 #include <linux/net.h>
37 #include <linux/netdevice.h>
38 #include <linux/if_arp.h>
39 #include <linux/in6.h>
40 #include <linux/tcp.h>
41 #include <linux/route.h>
43 #include <linux/netfilter.h>
44 #include <linux/netfilter_ipv6.h>
50 #include <net/ndisc.h>
51 #include <net/protocol.h>
52 #include <net/ip6_route.h>
53 #include <net/addrconf.h>
54 #include <net/rawv6.h>
57 #include <net/checksum.h>
59 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
61 static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
63 static u32 ipv6_fragmentation_id = 1;
64 static DEFINE_SPINLOCK(ip6_id_lock);
66 spin_lock_bh(&ip6_id_lock);
67 fhdr->identification = htonl(ipv6_fragmentation_id);
68 if (++ipv6_fragmentation_id == 0)
69 ipv6_fragmentation_id = 1;
70 spin_unlock_bh(&ip6_id_lock);
73 static inline int ip6_output_finish(struct sk_buff *skb)
76 struct dst_entry *dst = skb->dst;
77 struct hh_cache *hh = dst->hh;
82 read_lock_bh(&hh->hh_lock);
83 hh_alen = HH_DATA_ALIGN(hh->hh_len);
84 memcpy(skb->data - hh_alen, hh->hh_data, hh_alen);
85 read_unlock_bh(&hh->hh_lock);
86 skb_push(skb, hh->hh_len);
87 return hh->hh_output(skb);
88 } else if (dst->neighbour)
89 return dst->neighbour->output(skb);
91 IP6_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
97 /* dev_loopback_xmit for use with netfilter. */
98 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
100 newskb->mac.raw = newskb->data;
101 __skb_pull(newskb, newskb->nh.raw - newskb->data);
102 newskb->pkt_type = PACKET_LOOPBACK;
103 newskb->ip_summed = CHECKSUM_UNNECESSARY;
104 BUG_TRAP(newskb->dst);
111 static int ip6_output2(struct sk_buff *skb)
113 struct dst_entry *dst = skb->dst;
114 struct net_device *dev = dst->dev;
116 skb->protocol = htons(ETH_P_IPV6);
119 if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr)) {
120 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
122 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
123 ipv6_chk_mcast_addr(dev, &skb->nh.ipv6h->daddr,
124 &skb->nh.ipv6h->saddr)) {
125 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
127 /* Do not check for IFF_ALLMULTI; multicast routing
128 is not supported in any case.
131 NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, newskb, NULL,
133 ip6_dev_loopback_xmit);
135 if (skb->nh.ipv6h->hop_limit == 0) {
136 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
142 IP6_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS);
145 return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
148 int ip6_output(struct sk_buff *skb)
150 if (skb->len > dst_mtu(skb->dst) || dst_allfrag(skb->dst))
151 return ip6_fragment(skb, ip6_output2);
153 return ip6_output2(skb);
156 #ifdef CONFIG_NETFILTER
157 int ip6_route_me_harder(struct sk_buff *skb)
159 struct ipv6hdr *iph = skb->nh.ipv6h;
160 struct dst_entry *dst;
162 .oif = skb->sk ? skb->sk->sk_bound_dev_if : 0,
165 { .daddr = iph->daddr,
166 .saddr = iph->saddr, } },
167 .proto = iph->nexthdr,
170 dst = ip6_route_output(skb->sk, &fl);
173 IP6_INC_STATS(IPSTATS_MIB_OUTNOROUTES);
175 printk(KERN_DEBUG "ip6_route_me_harder: No more route.\n"));
180 /* Drop old route. */
181 dst_release(skb->dst);
188 static inline int ip6_maybe_reroute(struct sk_buff *skb)
190 #ifdef CONFIG_NETFILTER
191 if (skb->nfcache & NFC_ALTERED){
192 if (ip6_route_me_harder(skb) != 0){
197 #endif /* CONFIG_NETFILTER */
198 return dst_output(skb);
202 * xmit an sk_buff (used by TCP)
205 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
206 struct ipv6_txoptions *opt, int ipfragok)
208 struct ipv6_pinfo *np = sk ? inet6_sk(sk) : NULL;
209 struct in6_addr *first_hop = &fl->fl6_dst;
210 struct dst_entry *dst = skb->dst;
212 u8 proto = fl->proto;
213 int seg_len = skb->len;
220 /* First: exthdrs may take lots of space (~8K for now)
221 MAX_HEADER is not enough.
223 head_room = opt->opt_nflen + opt->opt_flen;
224 seg_len += head_room;
225 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
227 if (skb_headroom(skb) < head_room) {
228 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
232 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
236 skb_set_owner_w(skb, sk);
239 ipv6_push_frag_opts(skb, opt, &proto);
241 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
244 hdr = skb->nh.ipv6h = (struct ipv6hdr*)skb_push(skb, sizeof(struct ipv6hdr));
247 * Fill in the IPv6 header
250 *(u32*)hdr = htonl(0x60000000) | fl->fl6_flowlabel;
253 hlimit = np->hop_limit;
255 hlimit = dst_metric(dst, RTAX_HOPLIMIT);
257 hlimit = ipv6_get_hoplimit(dst->dev);
259 hdr->payload_len = htons(seg_len);
260 hdr->nexthdr = proto;
261 hdr->hop_limit = hlimit;
263 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
264 ipv6_addr_copy(&hdr->daddr, first_hop);
267 if ((skb->len <= mtu) || ipfragok) {
268 IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
269 return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, ip6_maybe_reroute);
273 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
275 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
276 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
282 * To avoid extra problems ND packets are send through this
283 * routine. It's code duplication but I really want to avoid
284 * extra checks since ipv6_build_header is used by TCP (which
285 * is for us performance critical)
288 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
289 struct in6_addr *saddr, struct in6_addr *daddr,
292 struct ipv6_pinfo *np = inet6_sk(sk);
296 skb->protocol = htons(ETH_P_IPV6);
299 totlen = len + sizeof(struct ipv6hdr);
301 hdr = (struct ipv6hdr *) skb_put(skb, sizeof(struct ipv6hdr));
304 *(u32*)hdr = htonl(0x60000000);
306 hdr->payload_len = htons(len);
307 hdr->nexthdr = proto;
308 hdr->hop_limit = np->hop_limit;
310 ipv6_addr_copy(&hdr->saddr, saddr);
311 ipv6_addr_copy(&hdr->daddr, daddr);
316 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
318 struct ip6_ra_chain *ra;
319 struct sock *last = NULL;
321 read_lock(&ip6_ra_lock);
322 for (ra = ip6_ra_chain; ra; ra = ra->next) {
323 struct sock *sk = ra->sk;
324 if (sk && ra->sel == sel) {
326 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
328 rawv6_rcv(last, skb2);
335 rawv6_rcv(last, skb);
336 read_unlock(&ip6_ra_lock);
339 read_unlock(&ip6_ra_lock);
343 static inline int ip6_forward_finish(struct sk_buff *skb)
345 return dst_output(skb);
348 int ip6_forward(struct sk_buff *skb)
350 struct dst_entry *dst = skb->dst;
351 struct ipv6hdr *hdr = skb->nh.ipv6h;
352 struct inet6_skb_parm *opt = IP6CB(skb);
354 if (ipv6_devconf.forwarding == 0)
357 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
358 IP6_INC_STATS(IPSTATS_MIB_INDISCARDS);
362 skb->ip_summed = CHECKSUM_NONE;
365 * We DO NOT make any processing on
366 * RA packets, pushing them to user level AS IS
367 * without ane WARRANTY that application will be able
368 * to interpret them. The reason is that we
369 * cannot make anything clever here.
371 * We are not end-node, so that if packet contains
372 * AH/ESP, we cannot make anything.
373 * Defragmentation also would be mistake, RA packets
374 * cannot be fragmented, because there is no warranty
375 * that different fragments will go along one path. --ANK
378 u8 *ptr = skb->nh.raw + opt->ra;
379 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
384 * check and decrement ttl
386 if (hdr->hop_limit <= 1) {
387 /* Force OUTPUT device used as source address */
389 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
396 if (!xfrm6_route_forward(skb)) {
397 IP6_INC_STATS(IPSTATS_MIB_INDISCARDS);
402 /* IPv6 specs say nothing about it, but it is clear that we cannot
403 send redirects to source routed frames.
405 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0) {
406 struct in6_addr *target = NULL;
408 struct neighbour *n = dst->neighbour;
411 * incoming and outgoing devices are the same
415 rt = (struct rt6_info *) dst;
416 if ((rt->rt6i_flags & RTF_GATEWAY))
417 target = (struct in6_addr*)&n->primary_key;
419 target = &hdr->daddr;
421 /* Limit redirects both by destination (here)
422 and by source (inside ndisc_send_redirect)
424 if (xrlim_allow(dst, 1*HZ))
425 ndisc_send_redirect(skb, n, target);
426 } else if (ipv6_addr_type(&hdr->saddr)&(IPV6_ADDR_MULTICAST|IPV6_ADDR_LOOPBACK
427 |IPV6_ADDR_LINKLOCAL)) {
428 /* This check is security critical. */
432 if (skb->len > dst_mtu(dst)) {
433 /* Again, force OUTPUT device used as source address */
435 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
436 IP6_INC_STATS_BH(IPSTATS_MIB_INTOOBIGERRORS);
437 IP6_INC_STATS_BH(IPSTATS_MIB_FRAGFAILS);
442 if (skb_cow(skb, dst->dev->hard_header_len)) {
443 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
449 /* Mangling hops number delayed to point after skb COW */
453 IP6_INC_STATS_BH(IPSTATS_MIB_OUTFORWDATAGRAMS);
454 return NF_HOOK(PF_INET6,NF_IP6_FORWARD, skb, skb->dev, dst->dev, ip6_forward_finish);
457 IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS);
463 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
465 to->pkt_type = from->pkt_type;
466 to->priority = from->priority;
467 to->protocol = from->protocol;
468 dst_release(to->dst);
469 to->dst = dst_clone(from->dst);
472 #ifdef CONFIG_NET_SCHED
473 to->tc_index = from->tc_index;
475 #ifdef CONFIG_NETFILTER
476 to->nfmark = from->nfmark;
477 /* Connection association is same as pre-frag packet */
478 to->nfct = from->nfct;
479 nf_conntrack_get(to->nfct);
480 to->nfctinfo = from->nfctinfo;
481 #ifdef CONFIG_BRIDGE_NETFILTER
482 nf_bridge_put(to->nf_bridge);
483 to->nf_bridge = from->nf_bridge;
484 nf_bridge_get(to->nf_bridge);
489 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
491 u16 offset = sizeof(struct ipv6hdr);
492 struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1);
493 unsigned int packet_len = skb->tail - skb->nh.raw;
495 *nexthdr = &skb->nh.ipv6h->nexthdr;
497 while (offset + 1 <= packet_len) {
502 case NEXTHDR_ROUTING:
504 if (**nexthdr == NEXTHDR_ROUTING) found_rhdr = 1;
505 if (**nexthdr == NEXTHDR_DEST && found_rhdr) return offset;
506 offset += ipv6_optlen(exthdr);
507 *nexthdr = &exthdr->nexthdr;
508 exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
518 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
520 struct net_device *dev;
521 struct sk_buff *frag;
522 struct rt6_info *rt = (struct rt6_info*)skb->dst;
523 struct ipv6hdr *tmp_hdr;
525 unsigned int mtu, hlen, left, len;
527 int ptr, offset = 0, err=0;
528 u8 *prevhdr, nexthdr = 0;
531 hlen = ip6_find_1stfragopt(skb, &prevhdr);
534 mtu = dst_mtu(&rt->u.dst) - hlen - sizeof(struct frag_hdr);
536 if (skb_shinfo(skb)->frag_list) {
537 int first_len = skb_pagelen(skb);
539 if (first_len - hlen > mtu ||
540 ((first_len - hlen) & 7) ||
544 for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
545 /* Correct geometry. */
546 if (frag->len > mtu ||
547 ((frag->len & 7) && frag->next) ||
548 skb_headroom(frag) < hlen)
551 /* Partially cloned skb? */
552 if (skb_shared(frag))
559 frag->destructor = sock_wfree;
560 skb->truesize -= frag->truesize;
566 frag = skb_shinfo(skb)->frag_list;
567 skb_shinfo(skb)->frag_list = NULL;
570 tmp_hdr = kmalloc(hlen, GFP_ATOMIC);
572 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
576 *prevhdr = NEXTHDR_FRAGMENT;
577 memcpy(tmp_hdr, skb->nh.raw, hlen);
578 __skb_pull(skb, hlen);
579 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
580 skb->nh.raw = __skb_push(skb, hlen);
581 memcpy(skb->nh.raw, tmp_hdr, hlen);
583 ipv6_select_ident(skb, fh);
584 fh->nexthdr = nexthdr;
586 fh->frag_off = htons(IP6_MF);
587 frag_id = fh->identification;
589 first_len = skb_pagelen(skb);
590 skb->data_len = first_len - skb_headlen(skb);
591 skb->len = first_len;
592 skb->nh.ipv6h->payload_len = htons(first_len - sizeof(struct ipv6hdr));
596 /* Prepare header of the next frame,
597 * before previous one went down. */
599 frag->ip_summed = CHECKSUM_NONE;
600 frag->h.raw = frag->data;
601 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
602 frag->nh.raw = __skb_push(frag, hlen);
603 memcpy(frag->nh.raw, tmp_hdr, hlen);
604 offset += skb->len - hlen - sizeof(struct frag_hdr);
605 fh->nexthdr = nexthdr;
607 fh->frag_off = htons(offset);
608 if (frag->next != NULL)
609 fh->frag_off |= htons(IP6_MF);
610 fh->identification = frag_id;
611 frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
612 ip6_copy_metadata(frag, skb);
628 IP6_INC_STATS(IPSTATS_MIB_FRAGOKS);
638 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
643 left = skb->len - hlen; /* Space per frame */
644 ptr = hlen; /* Where to start from */
647 * Fragment the datagram.
650 *prevhdr = NEXTHDR_FRAGMENT;
653 * Keep copying data until we run out.
657 /* IF: it doesn't fit, use 'mtu' - the data space left */
660 /* IF: we are not sending upto and including the packet end
661 then align the next start on an eight byte boundary */
669 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
670 NETDEBUG(printk(KERN_INFO "IPv6: frag: no memory for new fragment!\n"));
671 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
677 * Set up data on packet
680 ip6_copy_metadata(frag, skb);
681 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
682 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
683 frag->nh.raw = frag->data;
684 fh = (struct frag_hdr*)(frag->data + hlen);
685 frag->h.raw = frag->data + hlen + sizeof(struct frag_hdr);
688 * Charge the memory for the fragment to any owner
692 skb_set_owner_w(frag, skb->sk);
695 * Copy the packet header into the new buffer.
697 memcpy(frag->nh.raw, skb->data, hlen);
700 * Build fragment header.
702 fh->nexthdr = nexthdr;
705 ipv6_select_ident(skb, fh);
706 frag_id = fh->identification;
708 fh->identification = frag_id;
711 * Copy a block of the IP datagram.
713 if (skb_copy_bits(skb, ptr, frag->h.raw, len))
717 fh->frag_off = htons(offset);
719 fh->frag_off |= htons(IP6_MF);
720 frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
726 * Put this fragment into the sending queue.
729 IP6_INC_STATS(IPSTATS_MIB_FRAGCREATES);
736 IP6_INC_STATS(IPSTATS_MIB_FRAGOKS);
741 IP6_INC_STATS(IPSTATS_MIB_FRAGFAILS);
745 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
751 struct ipv6_pinfo *np = inet6_sk(sk);
753 *dst = sk_dst_check(sk, np->dst_cookie);
755 struct rt6_info *rt = (struct rt6_info*)*dst;
757 /* Yes, checking route validity in not connected
758 case is not very simple. Take into account,
759 that we do not support routing by source, TOS,
760 and MSG_DONTROUTE --ANK (980726)
762 1. If route was host route, check that
763 cached destination is current.
764 If it is network route, we still may
765 check its validity using saved pointer
766 to the last used address: daddr_cache.
767 We do not want to save whole address now,
768 (because main consumer of this service
769 is tcp, which has not this problem),
770 so that the last trick works only on connected
772 2. oif also should be the same.
775 if (((rt->rt6i_dst.plen != 128 ||
776 !ipv6_addr_equal(&fl->fl6_dst, &rt->rt6i_dst.addr))
777 && (np->daddr_cache == NULL ||
778 !ipv6_addr_equal(&fl->fl6_dst, np->daddr_cache)))
779 || (fl->oif && fl->oif != (*dst)->dev->ifindex)) {
787 *dst = ip6_route_output(sk, fl);
789 if ((err = (*dst)->error))
790 goto out_err_release;
792 if (ipv6_addr_any(&fl->fl6_src)) {
793 err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src);
797 printk(KERN_DEBUG "ip6_dst_lookup: "
798 "no available source address\n");
800 goto out_err_release;
812 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb),
813 void *from, int length, int transhdrlen,
814 int hlimit, struct ipv6_txoptions *opt, struct flowi *fl, struct rt6_info *rt,
817 struct inet_sock *inet = inet_sk(sk);
818 struct ipv6_pinfo *np = inet6_sk(sk);
820 unsigned int maxfraglen, fragheaderlen;
827 int csummode = CHECKSUM_NONE;
831 if (skb_queue_empty(&sk->sk_write_queue)) {
836 if (np->cork.opt == NULL) {
837 np->cork.opt = kmalloc(opt->tot_len,
839 if (unlikely(np->cork.opt == NULL))
841 } else if (np->cork.opt->tot_len < opt->tot_len) {
842 printk(KERN_DEBUG "ip6_append_data: invalid option length\n");
845 memcpy(np->cork.opt, opt, opt->tot_len);
846 inet->cork.flags |= IPCORK_OPT;
847 /* need source address above miyazawa*/
849 dst_hold(&rt->u.dst);
852 np->cork.hop_limit = hlimit;
853 inet->cork.fragsize = mtu = dst_mtu(rt->u.dst.path);
854 if (dst_allfrag(rt->u.dst.path))
855 inet->cork.flags |= IPCORK_ALLFRAG;
856 inet->cork.length = 0;
857 sk->sk_sndmsg_page = NULL;
858 sk->sk_sndmsg_off = 0;
859 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0);
861 transhdrlen += exthdrlen;
865 if (inet->cork.flags & IPCORK_OPT)
869 mtu = inet->cork.fragsize;
872 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
874 fragheaderlen = sizeof(struct ipv6hdr) + (opt ? opt->opt_nflen : 0);
875 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
877 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
878 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
879 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
885 * Let's try using as much space as possible.
886 * Use MTU if total length of the message fits into the MTU.
887 * Otherwise, we need to reserve fragment header and
888 * fragment alignment (= 8-15 octects, in total).
890 * Note that we may need to "move" the data from the tail of
891 * of the buffer to the new fragment when we split
894 * FIXME: It may be fragmented into multiple chunks
895 * at once if non-fragmentable extension headers
900 inet->cork.length += length;
902 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
906 /* Check if the remaining data fits into current packet. */
907 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
909 copy = maxfraglen - skb->len;
913 unsigned int datalen;
914 unsigned int fraglen;
915 unsigned int fraggap;
916 unsigned int alloclen;
917 struct sk_buff *skb_prev;
921 /* There's no room in the current skb */
923 fraggap = skb_prev->len - maxfraglen;
928 * If remaining data exceeds the mtu,
929 * we know we need more fragment(s).
931 datalen = length + fraggap;
932 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
933 datalen = maxfraglen - fragheaderlen;
935 fraglen = datalen + fragheaderlen;
936 if ((flags & MSG_MORE) &&
937 !(rt->u.dst.dev->features&NETIF_F_SG))
940 alloclen = datalen + fragheaderlen;
943 * The last fragment gets additional space at tail.
944 * Note: we overallocate on fragments with MSG_MODE
945 * because we have no idea if we're the last one.
947 if (datalen == length + fraggap)
948 alloclen += rt->u.dst.trailer_len;
951 * We just reserve space for fragment header.
952 * Note: this may be overallocation if the message
953 * (without MSG_MORE) fits into the MTU.
955 alloclen += sizeof(struct frag_hdr);
958 skb = sock_alloc_send_skb(sk,
960 (flags & MSG_DONTWAIT), &err);
963 if (atomic_read(&sk->sk_wmem_alloc) <=
965 skb = sock_wmalloc(sk,
966 alloclen + hh_len, 1,
968 if (unlikely(skb == NULL))
974 * Fill in the control structures
976 skb->ip_summed = csummode;
978 /* reserve for fragmentation */
979 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
982 * Find where to start putting bytes
984 data = skb_put(skb, fraglen);
985 skb->nh.raw = data + exthdrlen;
986 data += fragheaderlen;
987 skb->h.raw = data + exthdrlen;
990 skb->csum = skb_copy_and_csum_bits(
991 skb_prev, maxfraglen,
992 data + transhdrlen, fraggap, 0);
993 skb_prev->csum = csum_sub(skb_prev->csum,
996 skb_trim(skb_prev, maxfraglen);
998 copy = datalen - transhdrlen - fraggap;
1003 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1010 length -= datalen - fraggap;
1013 csummode = CHECKSUM_NONE;
1016 * Put the packet on the pending queue
1018 __skb_queue_tail(&sk->sk_write_queue, skb);
1025 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1029 if (getfrag(from, skb_put(skb, copy),
1030 offset, copy, off, skb) < 0) {
1031 __skb_trim(skb, off);
1036 int i = skb_shinfo(skb)->nr_frags;
1037 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1038 struct page *page = sk->sk_sndmsg_page;
1039 int off = sk->sk_sndmsg_off;
1042 if (page && (left = PAGE_SIZE - off) > 0) {
1045 if (page != frag->page) {
1046 if (i == MAX_SKB_FRAGS) {
1051 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1052 frag = &skb_shinfo(skb)->frags[i];
1054 } else if(i < MAX_SKB_FRAGS) {
1055 if (copy > PAGE_SIZE)
1057 page = alloc_pages(sk->sk_allocation, 0);
1062 sk->sk_sndmsg_page = page;
1063 sk->sk_sndmsg_off = 0;
1065 skb_fill_page_desc(skb, i, page, 0, 0);
1066 frag = &skb_shinfo(skb)->frags[i];
1067 skb->truesize += PAGE_SIZE;
1068 atomic_add(PAGE_SIZE, &sk->sk_wmem_alloc);
1073 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1077 sk->sk_sndmsg_off += copy;
1080 skb->data_len += copy;
1087 inet->cork.length -= length;
1088 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1092 int ip6_push_pending_frames(struct sock *sk)
1094 struct sk_buff *skb, *tmp_skb;
1095 struct sk_buff **tail_skb;
1096 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1097 struct inet_sock *inet = inet_sk(sk);
1098 struct ipv6_pinfo *np = inet6_sk(sk);
1099 struct ipv6hdr *hdr;
1100 struct ipv6_txoptions *opt = np->cork.opt;
1101 struct rt6_info *rt = np->cork.rt;
1102 struct flowi *fl = &inet->cork.fl;
1103 unsigned char proto = fl->proto;
1106 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1108 tail_skb = &(skb_shinfo(skb)->frag_list);
1110 /* move skb->data to ip header from ext header */
1111 if (skb->data < skb->nh.raw)
1112 __skb_pull(skb, skb->nh.raw - skb->data);
1113 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1114 __skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
1115 *tail_skb = tmp_skb;
1116 tail_skb = &(tmp_skb->next);
1117 skb->len += tmp_skb->len;
1118 skb->data_len += tmp_skb->len;
1119 skb->truesize += tmp_skb->truesize;
1120 __sock_put(tmp_skb->sk);
1121 tmp_skb->destructor = NULL;
1125 ipv6_addr_copy(final_dst, &fl->fl6_dst);
1126 __skb_pull(skb, skb->h.raw - skb->nh.raw);
1127 if (opt && opt->opt_flen)
1128 ipv6_push_frag_opts(skb, opt, &proto);
1129 if (opt && opt->opt_nflen)
1130 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1132 skb->nh.ipv6h = hdr = (struct ipv6hdr*) skb_push(skb, sizeof(struct ipv6hdr));
1134 *(u32*)hdr = fl->fl6_flowlabel | htonl(0x60000000);
1136 if (skb->len <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN)
1137 hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
1139 hdr->payload_len = 0;
1140 hdr->hop_limit = np->cork.hop_limit;
1141 hdr->nexthdr = proto;
1142 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1143 ipv6_addr_copy(&hdr->daddr, final_dst);
1145 skb->dst = dst_clone(&rt->u.dst);
1146 IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS);
1147 err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output);
1150 err = np->recverr ? net_xmit_errno(err) : 0;
1156 inet->cork.flags &= ~IPCORK_OPT;
1158 kfree(np->cork.opt);
1159 np->cork.opt = NULL;
1162 dst_release(&np->cork.rt->u.dst);
1164 inet->cork.flags &= ~IPCORK_ALLFRAG;
1166 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1172 void ip6_flush_pending_frames(struct sock *sk)
1174 struct inet_sock *inet = inet_sk(sk);
1175 struct ipv6_pinfo *np = inet6_sk(sk);
1176 struct sk_buff *skb;
1178 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1179 IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
1183 inet->cork.flags &= ~IPCORK_OPT;
1186 kfree(np->cork.opt);
1187 np->cork.opt = NULL;
1190 dst_release(&np->cork.rt->u.dst);
1192 inet->cork.flags &= ~IPCORK_ALLFRAG;
1194 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));