#define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
#define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */
#define FLAG_NONHEAD_RETRANS_ACKED 0x1000 /* Non-head rexmitted data was ACKed */
+#define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */
#define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED)
#define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED)
/* Adapt the MSS value used to make delayed ack decision to the
* real world.
*/
-static void tcp_measure_rcv_mss(struct sock *sk,
- const struct sk_buff *skb)
+static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
{
struct inet_connection_sock *icsk = inet_csk(sk);
const unsigned int lss = icsk->icsk_ack.last_seg_size;
/* skb->len may jitter because of SACKs, even if peer
* sends good full-sized frames.
*/
- len = skb_shinfo(skb)->gso_size ?: skb->len;
+ len = skb_shinfo(skb)->gso_size ? : skb->len;
if (len >= icsk->icsk_ack.rcv_mss) {
icsk->icsk_ack.rcv_mss = len;
} else {
struct inet_connection_sock *icsk = inet_csk(sk);
unsigned quickacks = tcp_sk(sk)->rcv_wnd / (2 * icsk->icsk_ack.rcv_mss);
- if (quickacks==0)
- quickacks=2;
+ if (quickacks == 0)
+ quickacks = 2;
if (quickacks > icsk->icsk_ack.quick)
icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS);
}
static inline void TCP_ECN_queue_cwr(struct tcp_sock *tp)
{
- if (tp->ecn_flags&TCP_ECN_OK)
+ if (tp->ecn_flags & TCP_ECN_OK)
tp->ecn_flags |= TCP_ECN_QUEUE_CWR;
}
static inline void TCP_ECN_check_ce(struct tcp_sock *tp, struct sk_buff *skb)
{
- if (tp->ecn_flags&TCP_ECN_OK) {
+ if (tp->ecn_flags & TCP_ECN_OK) {
if (INET_ECN_is_ce(TCP_SKB_CB(skb)->flags))
tp->ecn_flags |= TCP_ECN_DEMAND_CWR;
/* Funny extension: if ECT is not set on a segment,
static inline void TCP_ECN_rcv_synack(struct tcp_sock *tp, struct tcphdr *th)
{
- if ((tp->ecn_flags&TCP_ECN_OK) && (!th->ece || th->cwr))
+ if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || th->cwr))
tp->ecn_flags &= ~TCP_ECN_OK;
}
static inline void TCP_ECN_rcv_syn(struct tcp_sock *tp, struct tcphdr *th)
{
- if ((tp->ecn_flags&TCP_ECN_OK) && (!th->ece || !th->cwr))
+ if ((tp->ecn_flags & TCP_ECN_OK) && (!th->ece || !th->cwr))
tp->ecn_flags &= ~TCP_ECN_OK;
}
static inline int TCP_ECN_rcv_ecn_echo(struct tcp_sock *tp, struct tcphdr *th)
{
- if (th->ece && !th->syn && (tp->ecn_flags&TCP_ECN_OK))
+ if (th->ece && !th->syn && (tp->ecn_flags & TCP_ECN_OK))
return 1;
return 0;
}
{
struct tcp_sock *tp = tcp_sk(sk);
/* Optimize this! */
- int truesize = tcp_win_from_space(skb->truesize)/2;
- int window = tcp_win_from_space(sysctl_tcp_rmem[2])/2;
+ int truesize = tcp_win_from_space(skb->truesize) >> 1;
+ int window = tcp_win_from_space(sysctl_tcp_rmem[2]) >> 1;
while (tp->rcv_ssthresh <= window) {
if (truesize <= skb->len)
return 0;
}
-static void tcp_grow_window(struct sock *sk,
- struct sk_buff *skb)
+static void tcp_grow_window(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
* will fit to rcvbuf in future.
*/
if (tcp_win_from_space(skb->truesize) <= skb->len)
- incr = 2*tp->advmss;
+ incr = 2 * tp->advmss;
else
incr = __tcp_grow_window(sk, skb);
if (incr) {
- tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr, tp->window_clamp);
+ tp->rcv_ssthresh = min(tp->rcv_ssthresh + incr,
+ tp->window_clamp);
inet_csk(sk)->icsk_ack.quick |= 1;
}
}
sysctl_tcp_rmem[2]);
}
if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf)
- tp->rcv_ssthresh = min(tp->window_clamp, 2U*tp->advmss);
+ tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss);
}
-
/* Initialize RCV_MSS value.
* RCV_MSS is an our guess about MSS used by the peer.
* We haven't any direct information about the MSS.
struct tcp_sock *tp = tcp_sk(sk);
unsigned int hint = min_t(unsigned int, tp->advmss, tp->mss_cache);
- hint = min(hint, tp->rcv_wnd/2);
+ hint = min(hint, tp->rcv_wnd / 2);
hint = min(hint, TCP_MIN_RCVMSS);
hint = max(hint, TCP_MIN_MSS);
goto new_measure;
if (before(tp->rcv_nxt, tp->rcv_rtt_est.seq))
return;
- tcp_rcv_rtt_update(tp,
- jiffies - tp->rcv_rtt_est.time,
- 1);
+ tcp_rcv_rtt_update(tp, jiffies - tp->rcv_rtt_est.time, 1);
new_measure:
tp->rcv_rtt_est.seq = tp->rcv_nxt + tp->rcv_wnd;
tp->rcv_rtt_est.time = tcp_time_stamp;
}
-static inline void tcp_rcv_rtt_measure_ts(struct sock *sk, const struct sk_buff *skb)
+static inline void tcp_rcv_rtt_measure_ts(struct sock *sk,
+ const struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
if (tp->rx_opt.rcv_tsecr &&
goto new_measure;
time = tcp_time_stamp - tp->rcvq_space.time;
- if (time < (tp->rcv_rtt_est.rtt >> 3) ||
- tp->rcv_rtt_est.rtt == 0)
+ if (time < (tp->rcv_rtt_est.rtt >> 3) || tp->rcv_rtt_est.rtt == 0)
return;
space = 2 * (tp->copied_seq - tp->rcvq_space.seq);
} else {
int m = now - icsk->icsk_ack.lrcvtime;
- if (m <= TCP_ATO_MIN/2) {
+ if (m <= TCP_ATO_MIN / 2) {
/* The fastest case is the first. */
icsk->icsk_ack.ato = (icsk->icsk_ack.ato >> 1) + TCP_ATO_MIN / 2;
} else if (m < icsk->icsk_ack.ato) {
* restart window, so that we send ACKs quickly.
*/
tcp_incr_quickack(sk);
- sk_stream_mem_reclaim(sk);
+ sk_mem_reclaim(sk);
}
}
icsk->icsk_ack.lrcvtime = now;
u32 rto_min = TCP_RTO_MIN;
if (dst && dst_metric_locked(dst, RTAX_RTO_MIN))
- rto_min = dst->metrics[RTAX_RTO_MIN-1];
+ rto_min = dst->metrics[RTAX_RTO_MIN - 1];
return rto_min;
}
}
if (after(tp->snd_una, tp->rtt_seq)) {
if (tp->mdev_max < tp->rttvar)
- tp->rttvar -= (tp->rttvar-tp->mdev_max)>>2;
+ tp->rttvar -= (tp->rttvar - tp->mdev_max) >> 2;
tp->rtt_seq = tp->snd_nxt;
tp->mdev_max = tcp_rto_min(sk);
}
} else {
/* no previous measure. */
- tp->srtt = m<<3; /* take the measured time to be rtt */
- tp->mdev = m<<1; /* make sure rto = 3*rtt */
+ tp->srtt = m << 3; /* take the measured time to be rtt */
+ tp->mdev = m << 1; /* make sure rto = 3*rtt */
tp->mdev_max = tp->rttvar = max(tp->mdev, tcp_rto_min(sk));
tp->rtt_seq = tp->snd_nxt;
}
dst_confirm(dst);
- if (dst && (dst->flags&DST_HOST)) {
+ if (dst && (dst->flags & DST_HOST)) {
const struct inet_connection_sock *icsk = inet_csk(sk);
int m;
* Reset our results.
*/
if (!(dst_metric_locked(dst, RTAX_RTT)))
- dst->metrics[RTAX_RTT-1] = 0;
+ dst->metrics[RTAX_RTT - 1] = 0;
return;
}
*/
if (!(dst_metric_locked(dst, RTAX_RTT))) {
if (m <= 0)
- dst->metrics[RTAX_RTT-1] = tp->srtt;
+ dst->metrics[RTAX_RTT - 1] = tp->srtt;
else
- dst->metrics[RTAX_RTT-1] -= (m>>3);
+ dst->metrics[RTAX_RTT - 1] -= (m >> 3);
}
if (!(dst_metric_locked(dst, RTAX_RTTVAR))) {
m = tp->mdev;
if (m >= dst_metric(dst, RTAX_RTTVAR))
- dst->metrics[RTAX_RTTVAR-1] = m;
+ dst->metrics[RTAX_RTTVAR - 1] = m;
else
dst->metrics[RTAX_RTTVAR-1] -=
(dst->metrics[RTAX_RTTVAR-1] - m)>>2;
dst->metrics[RTAX_SSTHRESH-1] = tp->snd_cwnd >> 1;
if (!dst_metric_locked(dst, RTAX_CWND) &&
tp->snd_cwnd > dst_metric(dst, RTAX_CWND))
- dst->metrics[RTAX_CWND-1] = tp->snd_cwnd;
+ dst->metrics[RTAX_CWND - 1] = tp->snd_cwnd;
} else if (tp->snd_cwnd > tp->snd_ssthresh &&
icsk->icsk_ca_state == TCP_CA_Open) {
/* Cong. avoidance phase, cwnd is reliable. */
*/
static void tcp_disable_fack(struct tcp_sock *tp)
{
+ /* RFC3517 uses different metric in lost marker => reset on change */
+ if (tcp_is_fack(tp))
+ tp->lost_skb_hint = NULL;
tp->rx_opt.sack_ok &= ~2;
}
*
* Search retransmitted skbs from write_queue that were sent when snd_nxt was
* less than what is now known to be received by the other end (derived from
- * SACK blocks by the caller). Also calculate the lowest snd_nxt among the
- * remaining retransmitted skbs to avoid some costly processing per ACKs.
+ * highest SACK block). Also calculate the lowest snd_nxt among the remaining
+ * retransmitted skbs to avoid some costly processing per ACKs.
*/
-static int tcp_mark_lost_retrans(struct sock *sk, u32 received_upto)
+static void tcp_mark_lost_retrans(struct sock *sk)
{
+ const struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
- int flag = 0;
int cnt = 0;
u32 new_low_seq = tp->snd_nxt;
+ u32 received_upto = tcp_highest_sack_seq(tp);
+
+ if (!tcp_is_fack(tp) || !tp->retrans_out ||
+ !after(received_upto, tp->lost_retrans_low) ||
+ icsk->icsk_ca_state != TCP_CA_Recovery)
+ return;
tcp_for_write_queue(skb, sk) {
u32 ack_seq = TCP_SKB_CB(skb)->ack_seq;
if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_LOST|TCPCB_SACKED_ACKED))) {
tp->lost_out += tcp_skb_pcount(skb);
TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
- flag |= FLAG_DATA_SACKED;
- NET_INC_STATS_BH(LINUX_MIB_TCPLOSTRETRANSMIT);
}
+ NET_INC_STATS_BH(LINUX_MIB_TCPLOSTRETRANSMIT);
} else {
if (before(ack_seq, new_low_seq))
new_low_seq = ack_seq;
if (tp->retrans_out)
tp->lost_retrans_low = new_low_seq;
-
- return flag;
}
static int tcp_check_dsack(struct tcp_sock *tp, struct sk_buff *ack_skb,
return in_sack;
}
+static int tcp_sacktag_one(struct sk_buff *skb, struct sock *sk,
+ int *reord, int dup_sack, int fack_count)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ u8 sacked = TCP_SKB_CB(skb)->sacked;
+ int flag = 0;
+
+ /* Account D-SACK for retransmitted packet. */
+ if (dup_sack && (sacked & TCPCB_RETRANS)) {
+ if (after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker))
+ tp->undo_retrans--;
+ if (sacked & TCPCB_SACKED_ACKED)
+ *reord = min(fack_count, *reord);
+ }
+
+ /* Nothing to do; acked frame is about to be dropped (was ACKed). */
+ if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una))
+ return flag;
+
+ if (!(sacked & TCPCB_SACKED_ACKED)) {
+ if (sacked & TCPCB_SACKED_RETRANS) {
+ /* If the segment is not tagged as lost,
+ * we do not clear RETRANS, believing
+ * that retransmission is still in flight.
+ */
+ if (sacked & TCPCB_LOST) {
+ TCP_SKB_CB(skb)->sacked &=
+ ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
+ tp->lost_out -= tcp_skb_pcount(skb);
+ tp->retrans_out -= tcp_skb_pcount(skb);
+
+ /* clear lost hint */
+ tp->retransmit_skb_hint = NULL;
+ }
+ } else {
+ if (!(sacked & TCPCB_RETRANS)) {
+ /* New sack for not retransmitted frame,
+ * which was in hole. It is reordering.
+ */
+ if (before(TCP_SKB_CB(skb)->seq,
+ tcp_highest_sack_seq(tp)))
+ *reord = min(fack_count, *reord);
+
+ /* SACK enhanced F-RTO (RFC4138; Appendix B) */
+ if (!after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark))
+ flag |= FLAG_ONLY_ORIG_SACKED;
+ }
+
+ if (sacked & TCPCB_LOST) {
+ TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
+ tp->lost_out -= tcp_skb_pcount(skb);
+
+ /* clear lost hint */
+ tp->retransmit_skb_hint = NULL;
+ }
+ }
+
+ TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED;
+ flag |= FLAG_DATA_SACKED;
+ tp->sacked_out += tcp_skb_pcount(skb);
+
+ fack_count += tcp_skb_pcount(skb);
+
+ /* Lost marker hint past SACKed? Tweak RFC3517 cnt */
+ if (!tcp_is_fack(tp) && (tp->lost_skb_hint != NULL) &&
+ before(TCP_SKB_CB(skb)->seq,
+ TCP_SKB_CB(tp->lost_skb_hint)->seq))
+ tp->lost_cnt_hint += tcp_skb_pcount(skb);
+
+ if (fack_count > tp->fackets_out)
+ tp->fackets_out = fack_count;
+
+ if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp)))
+ tcp_advance_highest_sack(sk, skb);
+ }
+
+ /* D-SACK. We can detect redundant retransmission in S|R and plain R
+ * frames and clear it. undo_retrans is decreased above, L|R frames
+ * are accounted above as well.
+ */
+ if (dup_sack && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)) {
+ TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
+ tp->retrans_out -= tcp_skb_pcount(skb);
+ tp->retransmit_skb_hint = NULL;
+ }
+
+ return flag;
+}
+
+static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk,
+ struct tcp_sack_block *next_dup,
+ u32 start_seq, u32 end_seq,
+ int dup_sack_in, int *fack_count,
+ int *reord, int *flag)
+{
+ tcp_for_write_queue_from(skb, sk) {
+ int in_sack = 0;
+ int dup_sack = dup_sack_in;
+
+ if (skb == tcp_send_head(sk))
+ break;
+
+ /* queue is in-order => we can short-circuit the walk early */
+ if (!before(TCP_SKB_CB(skb)->seq, end_seq))
+ break;
+
+ if ((next_dup != NULL) &&
+ before(TCP_SKB_CB(skb)->seq, next_dup->end_seq)) {
+ in_sack = tcp_match_skb_to_sack(sk, skb,
+ next_dup->start_seq,
+ next_dup->end_seq);
+ if (in_sack > 0)
+ dup_sack = 1;
+ }
+
+ if (in_sack <= 0)
+ in_sack = tcp_match_skb_to_sack(sk, skb, start_seq,
+ end_seq);
+ if (unlikely(in_sack < 0))
+ break;
+
+ if (in_sack)
+ *flag |= tcp_sacktag_one(skb, sk, reord, dup_sack,
+ *fack_count);
+
+ *fack_count += tcp_skb_pcount(skb);
+ }
+ return skb;
+}
+
+/* Avoid all extra work that is being done by sacktag while walking in
+ * a normal way
+ */
+static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk,
+ u32 skip_to_seq, int *fack_count)
+{
+ tcp_for_write_queue_from(skb, sk) {
+ if (skb == tcp_send_head(sk))
+ break;
+
+ if (!before(TCP_SKB_CB(skb)->end_seq, skip_to_seq))
+ break;
+
+ *fack_count += tcp_skb_pcount(skb);
+ }
+ return skb;
+}
+
+static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb,
+ struct sock *sk,
+ struct tcp_sack_block *next_dup,
+ u32 skip_to_seq,
+ int *fack_count, int *reord,
+ int *flag)
+{
+ if (next_dup == NULL)
+ return skb;
+
+ if (before(next_dup->start_seq, skip_to_seq)) {
+ skb = tcp_sacktag_skip(skb, sk, next_dup->start_seq, fack_count);
+ tcp_sacktag_walk(skb, sk, NULL,
+ next_dup->start_seq, next_dup->end_seq,
+ 1, fack_count, reord, flag);
+ }
+
+ return skb;
+}
+
+static int tcp_sack_cache_ok(struct tcp_sock *tp, struct tcp_sack_block *cache)
+{
+ return cache < tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
+}
+
static int
-tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb, u32 prior_snd_una)
+tcp_sacktag_write_queue(struct sock *sk, struct sk_buff *ack_skb,
+ u32 prior_snd_una)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
unsigned char *ptr = (skb_transport_header(ack_skb) +
TCP_SKB_CB(ack_skb)->sacked);
- struct tcp_sack_block_wire *sp = (struct tcp_sack_block_wire *)(ptr+2);
- struct sk_buff *cached_skb;
- int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3;
+ struct tcp_sack_block_wire *sp_wire = (struct tcp_sack_block_wire *)(ptr+2);
+ struct tcp_sack_block sp[4];
+ struct tcp_sack_block *cache;
+ struct sk_buff *skb;
+ int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE) >> 3;
+ int used_sacks;
int reord = tp->packets_out;
- int prior_fackets;
- u32 highest_sack_end_seq = tp->lost_retrans_low;
int flag = 0;
int found_dup_sack = 0;
- int cached_fack_count;
- int i;
+ int fack_count;
+ int i, j;
int first_sack_index;
- int force_one_sack;
if (!tp->sacked_out) {
if (WARN_ON(tp->fackets_out))
tp->fackets_out = 0;
- tp->highest_sack = tp->snd_una;
+ tcp_highest_sack_reset(sk);
}
- prior_fackets = tp->fackets_out;
- found_dup_sack = tcp_check_dsack(tp, ack_skb, sp,
+ found_dup_sack = tcp_check_dsack(tp, ack_skb, sp_wire,
num_sacks, prior_snd_una);
if (found_dup_sack)
flag |= FLAG_DSACKING_ACK;
if (!tp->packets_out)
goto out;
- /* SACK fastpath:
- * if the only SACK change is the increase of the end_seq of
- * the first block then only apply that SACK block
- * and use retrans queue hinting otherwise slowpath */
- force_one_sack = 1;
- for (i = 0; i < num_sacks; i++) {
- __be32 start_seq = sp[i].start_seq;
- __be32 end_seq = sp[i].end_seq;
-
- if (i == 0) {
- if (tp->recv_sack_cache[i].start_seq != start_seq)
- force_one_sack = 0;
- } else {
- if ((tp->recv_sack_cache[i].start_seq != start_seq) ||
- (tp->recv_sack_cache[i].end_seq != end_seq))
- force_one_sack = 0;
- }
- tp->recv_sack_cache[i].start_seq = start_seq;
- tp->recv_sack_cache[i].end_seq = end_seq;
- }
- /* Clear the rest of the cache sack blocks so they won't match mistakenly. */
- for (; i < ARRAY_SIZE(tp->recv_sack_cache); i++) {
- tp->recv_sack_cache[i].start_seq = 0;
- tp->recv_sack_cache[i].end_seq = 0;
- }
-
+ used_sacks = 0;
first_sack_index = 0;
- if (force_one_sack)
- num_sacks = 1;
- else {
- int j;
- tp->fastpath_skb_hint = NULL;
-
- /* order SACK blocks to allow in order walk of the retrans queue */
- for (i = num_sacks-1; i > 0; i--) {
- for (j = 0; j < i; j++){
- if (after(ntohl(sp[j].start_seq),
- ntohl(sp[j+1].start_seq))){
- struct tcp_sack_block_wire tmp;
-
- tmp = sp[j];
- sp[j] = sp[j+1];
- sp[j+1] = tmp;
-
- /* Track where the first SACK block goes to */
- if (j == first_sack_index)
- first_sack_index = j+1;
- }
-
- }
- }
- }
-
- /* Use SACK fastpath hint if valid */
- cached_skb = tp->fastpath_skb_hint;
- cached_fack_count = tp->fastpath_cnt_hint;
- if (!cached_skb) {
- cached_skb = tcp_write_queue_head(sk);
- cached_fack_count = 0;
- }
-
for (i = 0; i < num_sacks; i++) {
- struct sk_buff *skb;
- __u32 start_seq = ntohl(sp->start_seq);
- __u32 end_seq = ntohl(sp->end_seq);
- int fack_count;
- int dup_sack = (found_dup_sack && (i == first_sack_index));
- int next_dup = (found_dup_sack && (i+1 == first_sack_index));
+ int dup_sack = !i && found_dup_sack;
- sp++;
+ sp[used_sacks].start_seq = ntohl(get_unaligned(&sp_wire[i].start_seq));
+ sp[used_sacks].end_seq = ntohl(get_unaligned(&sp_wire[i].end_seq));
- if (!tcp_is_sackblock_valid(tp, dup_sack, start_seq, end_seq)) {
+ if (!tcp_is_sackblock_valid(tp, dup_sack,
+ sp[used_sacks].start_seq,
+ sp[used_sacks].end_seq)) {
if (dup_sack) {
if (!tp->undo_marker)
NET_INC_STATS_BH(LINUX_MIB_TCPDSACKIGNOREDNOUNDO);
} else {
/* Don't count olds caused by ACK reordering */
if ((TCP_SKB_CB(ack_skb)->ack_seq != tp->snd_una) &&
- !after(end_seq, tp->snd_una))
+ !after(sp[used_sacks].end_seq, tp->snd_una))
continue;
NET_INC_STATS_BH(LINUX_MIB_TCPSACKDISCARD);
}
+ if (i == 0)
+ first_sack_index = -1;
continue;
}
- skb = cached_skb;
- fack_count = cached_fack_count;
-
- /* Event "B" in the comment above. */
- if (after(end_seq, tp->high_seq))
- flag |= FLAG_DATA_LOST;
-
- tcp_for_write_queue_from(skb, sk) {
- int in_sack = 0;
- u8 sacked;
-
- if (skb == tcp_send_head(sk))
- break;
-
- cached_skb = skb;
- cached_fack_count = fack_count;
- if (i == first_sack_index) {
- tp->fastpath_skb_hint = skb;
- tp->fastpath_cnt_hint = fack_count;
- }
+ /* Ignore very old stuff early */
+ if (!after(sp[used_sacks].end_seq, prior_snd_una))
+ continue;
- /* The retransmission queue is always in order, so
- * we can short-circuit the walk early.
- */
- if (!before(TCP_SKB_CB(skb)->seq, end_seq))
- break;
+ used_sacks++;
+ }
- dup_sack = (found_dup_sack && (i == first_sack_index));
+ /* order SACK blocks to allow in order walk of the retrans queue */
+ for (i = used_sacks - 1; i > 0; i--) {
+ for (j = 0; j < i; j++) {
+ if (after(sp[j].start_seq, sp[j + 1].start_seq)) {
+ struct tcp_sack_block tmp;
- /* Due to sorting DSACK may reside within this SACK block! */
- if (next_dup) {
- u32 dup_start = ntohl(sp->start_seq);
- u32 dup_end = ntohl(sp->end_seq);
+ tmp = sp[j];
+ sp[j] = sp[j + 1];
+ sp[j + 1] = tmp;
- if (before(TCP_SKB_CB(skb)->seq, dup_end)) {
- in_sack = tcp_match_skb_to_sack(sk, skb, dup_start, dup_end);
- if (in_sack > 0)
- dup_sack = 1;
- }
+ /* Track where the first SACK block goes to */
+ if (j == first_sack_index)
+ first_sack_index = j + 1;
}
+ }
+ }
- /* DSACK info lost if out-of-mem, try SACK still */
- if (in_sack <= 0)
- in_sack = tcp_match_skb_to_sack(sk, skb, start_seq, end_seq);
- if (unlikely(in_sack < 0))
- break;
+ skb = tcp_write_queue_head(sk);
+ fack_count = 0;
+ i = 0;
- sacked = TCP_SKB_CB(skb)->sacked;
+ if (!tp->sacked_out) {
+ /* It's already past, so skip checking against it */
+ cache = tp->recv_sack_cache + ARRAY_SIZE(tp->recv_sack_cache);
+ } else {
+ cache = tp->recv_sack_cache;
+ /* Skip empty blocks in at head of the cache */
+ while (tcp_sack_cache_ok(tp, cache) && !cache->start_seq &&
+ !cache->end_seq)
+ cache++;
+ }
- /* Account D-SACK for retransmitted packet. */
- if ((dup_sack && in_sack) &&
- (sacked & TCPCB_RETRANS) &&
- after(TCP_SKB_CB(skb)->end_seq, tp->undo_marker))
- tp->undo_retrans--;
+ while (i < used_sacks) {
+ u32 start_seq = sp[i].start_seq;
+ u32 end_seq = sp[i].end_seq;
+ int dup_sack = (found_dup_sack && (i == first_sack_index));
+ struct tcp_sack_block *next_dup = NULL;
- /* The frame is ACKed. */
- if (!after(TCP_SKB_CB(skb)->end_seq, tp->snd_una)) {
- if (sacked&TCPCB_RETRANS) {
- if ((dup_sack && in_sack) &&
- (sacked&TCPCB_SACKED_ACKED))
- reord = min(fack_count, reord);
- }
+ if (found_dup_sack && ((i + 1) == first_sack_index))
+ next_dup = &sp[i + 1];
- /* Nothing to do; acked frame is about to be dropped. */
- fack_count += tcp_skb_pcount(skb);
- continue;
- }
+ /* Event "B" in the comment above. */
+ if (after(end_seq, tp->high_seq))
+ flag |= FLAG_DATA_LOST;
- if (!in_sack) {
- fack_count += tcp_skb_pcount(skb);
- continue;
+ /* Skip too early cached blocks */
+ while (tcp_sack_cache_ok(tp, cache) &&
+ !before(start_seq, cache->end_seq))
+ cache++;
+
+ /* Can skip some work by looking recv_sack_cache? */
+ if (tcp_sack_cache_ok(tp, cache) && !dup_sack &&
+ after(end_seq, cache->start_seq)) {
+
+ /* Head todo? */
+ if (before(start_seq, cache->start_seq)) {
+ skb = tcp_sacktag_skip(skb, sk, start_seq,
+ &fack_count);
+ skb = tcp_sacktag_walk(skb, sk, next_dup,
+ start_seq,
+ cache->start_seq,
+ dup_sack, &fack_count,
+ &reord, &flag);
}
- if (!(sacked&TCPCB_SACKED_ACKED)) {
- if (sacked & TCPCB_SACKED_RETRANS) {
- /* If the segment is not tagged as lost,
- * we do not clear RETRANS, believing
- * that retransmission is still in flight.
- */
- if (sacked & TCPCB_LOST) {
- TCP_SKB_CB(skb)->sacked &= ~(TCPCB_LOST|TCPCB_SACKED_RETRANS);
- tp->lost_out -= tcp_skb_pcount(skb);
- tp->retrans_out -= tcp_skb_pcount(skb);
-
- /* clear lost hint */
- tp->retransmit_skb_hint = NULL;
- }
- } else {
- if (!(sacked & TCPCB_RETRANS)) {
- /* New sack for not retransmitted frame,
- * which was in hole. It is reordering.
- */
- if (fack_count < prior_fackets)
- reord = min(fack_count, reord);
-
- /* SACK enhanced F-RTO (RFC4138; Appendix B) */
- if (!after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark))
- flag |= FLAG_ONLY_ORIG_SACKED;
- }
+ /* Rest of the block already fully processed? */
+ if (!after(end_seq, cache->end_seq))
+ goto advance_sp;
- if (sacked & TCPCB_LOST) {
- TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST;
- tp->lost_out -= tcp_skb_pcount(skb);
+ skb = tcp_maybe_skipping_dsack(skb, sk, next_dup,
+ cache->end_seq,
+ &fack_count, &reord,
+ &flag);
- /* clear lost hint */
- tp->retransmit_skb_hint = NULL;
- }
- }
-
- TCP_SKB_CB(skb)->sacked |= TCPCB_SACKED_ACKED;
- flag |= FLAG_DATA_SACKED;
- tp->sacked_out += tcp_skb_pcount(skb);
-
- fack_count += tcp_skb_pcount(skb);
- if (fack_count > tp->fackets_out)
- tp->fackets_out = fack_count;
-
- if (after(TCP_SKB_CB(skb)->seq, tp->highest_sack)) {
- tp->highest_sack = TCP_SKB_CB(skb)->seq;
- highest_sack_end_seq = TCP_SKB_CB(skb)->end_seq;
- }
- } else {
- if (dup_sack && (sacked&TCPCB_RETRANS))
- reord = min(fack_count, reord);
-
- fack_count += tcp_skb_pcount(skb);
+ /* ...tail remains todo... */
+ if (tcp_highest_sack_seq(tp) == cache->end_seq) {
+ /* ...but better entrypoint exists! */
+ skb = tcp_highest_sack(sk);
+ if (skb == NULL)
+ break;
+ fack_count = tp->fackets_out;
+ cache++;
+ goto walk;
}
- /* D-SACK. We can detect redundant retransmission
- * in S|R and plain R frames and clear it.
- * undo_retrans is decreased above, L|R frames
- * are accounted above as well.
- */
- if (dup_sack &&
- (TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_RETRANS)) {
- TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_RETRANS;
- tp->retrans_out -= tcp_skb_pcount(skb);
- tp->retransmit_skb_hint = NULL;
- }
+ skb = tcp_sacktag_skip(skb, sk, cache->end_seq,
+ &fack_count);
+ /* Check overlap against next cached too (past this one already) */
+ cache++;
+ continue;
+ }
+
+ if (!before(start_seq, tcp_highest_sack_seq(tp))) {
+ skb = tcp_highest_sack(sk);
+ if (skb == NULL)
+ break;
+ fack_count = tp->fackets_out;
}
+ skb = tcp_sacktag_skip(skb, sk, start_seq, &fack_count);
+
+walk:
+ skb = tcp_sacktag_walk(skb, sk, next_dup, start_seq, end_seq,
+ dup_sack, &fack_count, &reord, &flag);
+advance_sp:
/* SACK enhanced FRTO (RFC4138, Appendix B): Clearing correct
* due to in-order walk
*/
if (after(end_seq, tp->frto_highmark))
flag &= ~FLAG_ONLY_ORIG_SACKED;
+
+ i++;
+ }
+
+ /* Clear the head of the cache sack blocks so we can skip it next time */
+ for (i = 0; i < ARRAY_SIZE(tp->recv_sack_cache) - used_sacks; i++) {
+ tp->recv_sack_cache[i].start_seq = 0;
+ tp->recv_sack_cache[i].end_seq = 0;
}
+ for (j = 0; j < used_sacks; j++)
+ tp->recv_sack_cache[i++] = sp[j];
- if (tp->retrans_out &&
- after(highest_sack_end_seq, tp->lost_retrans_low) &&
- icsk->icsk_ca_state == TCP_CA_Recovery)
- flag |= tcp_mark_lost_retrans(sk, highest_sack_end_seq);
+ tcp_mark_lost_retrans(sk);
tcp_verify_left_out(tp);
- if ((reord < tp->fackets_out) && icsk->icsk_ca_state != TCP_CA_Loss &&
+ if ((reord < tp->fackets_out) &&
+ ((icsk->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker) &&
(!tp->frto_highmark || after(tp->snd_una, tp->frto_highmark)))
tcp_update_reordering(sk, tp->fackets_out - reord, 0);
return flag;
}
-/* If we receive more dupacks than we expected counting segments
- * in assumption of absent reordering, interpret this as reordering.
- * The only another reason could be bug in receiver TCP.
+/* Limits sacked_out so that sum with lost_out isn't ever larger than
+ * packets_out. Returns zero if sacked_out adjustement wasn't necessary.
*/
-static void tcp_check_reno_reordering(struct sock *sk, const int addend)
+int tcp_limit_reno_sacked(struct tcp_sock *tp)
{
- struct tcp_sock *tp = tcp_sk(sk);
u32 holes;
holes = max(tp->lost_out, 1U);
if ((tp->sacked_out + holes) > tp->packets_out) {
tp->sacked_out = tp->packets_out - holes;
- tcp_update_reordering(sk, tp->packets_out + addend, 0);
+ return 1;
}
+ return 0;
+}
+
+/* If we receive more dupacks than we expected counting segments
+ * in assumption of absent reordering, interpret this as reordering.
+ * The only another reason could be bug in receiver TCP.
+ */
+static void tcp_check_reno_reordering(struct sock *sk, const int addend)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ if (tcp_limit_reno_sacked(tp))
+ tcp_update_reordering(sk, tp->packets_out + addend, 0);
}
/* Emulate SACKs for SACKless connection: account for a new dupack. */
if (acked > 0) {
/* One ACK acked hole. The rest eat duplicate ACKs. */
- if (acked-1 >= tp->sacked_out)
+ if (acked - 1 >= tp->sacked_out)
tp->sacked_out = 0;
else
- tp->sacked_out -= acked-1;
+ tp->sacked_out -= acked - 1;
}
tcp_check_reno_reordering(sk, acked);
tcp_verify_left_out(tp);
int tcp_use_frto(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
+ const struct inet_connection_sock *icsk = inet_csk(sk);
struct sk_buff *skb;
if (!sysctl_tcp_frto)
return 0;
+ /* MTU probe and F-RTO won't really play nicely along currently */
+ if (icsk->icsk_mtup.probe_size)
+ return 0;
+
if (IsSackFrto())
return 1;
tcp_for_write_queue_from(skb, sk) {
if (skb == tcp_send_head(sk))
break;
- if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS)
+ if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
return 0;
/* Short-circuit when first non-SACKed skb has been checked */
- if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED))
+ if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
break;
}
return 1;
* Count the retransmission made on RTO correctly (only when
* waiting for the first ACK and did not get it)...
*/
- if ((tp->frto_counter == 1) && !(flag&FLAG_DATA_ACKED)) {
+ if ((tp->frto_counter == 1) && !(flag & FLAG_DATA_ACKED)) {
/* For some reason this R-bit might get cleared? */
if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
tp->retrans_out += tcp_skb_pcount(skb);
}
/* Don't lost mark skbs that were fwd transmitted after RTO */
- if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) &&
+ if (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) &&
!after(TCP_SKB_CB(skb)->end_seq, tp->frto_highmark)) {
TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
tp->lost_out += tcp_skb_pcount(skb);
tp->bytes_acked = 0;
tp->reordering = min_t(unsigned int, tp->reordering,
- sysctl_tcp_reordering);
+ sysctl_tcp_reordering);
tcp_set_ca_state(sk, TCP_CA_Loss);
tp->high_seq = tp->frto_highmark;
TCP_ECN_queue_cwr(tp);
if (skb == tcp_send_head(sk))
break;
- if (TCP_SKB_CB(skb)->sacked&TCPCB_RETRANS)
+ if (TCP_SKB_CB(skb)->sacked & TCPCB_RETRANS)
tp->undo_marker = 0;
TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED;
if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || how) {
tcp_verify_left_out(tp);
tp->reordering = min_t(unsigned int, tp->reordering,
- sysctl_tcp_reordering);
+ sysctl_tcp_reordering);
tcp_set_ca_state(sk, TCP_CA_Loss);
tp->high_seq = tp->snd_nxt;
TCP_ECN_queue_cwr(tp);
tp->frto_counter = 0;
}
-static int tcp_check_sack_reneging(struct sock *sk)
+/* If ACK arrived pointing to a remembered SACK, it means that our
+ * remembered SACKs do not reflect real state of receiver i.e.
+ * receiver _host_ is heavily congested (or buggy).
+ *
+ * Do processing similar to RTO timeout.
+ */
+static int tcp_check_sack_reneging(struct sock *sk, int flag)
{
- struct sk_buff *skb;
-
- /* If ACK arrived pointing to a remembered SACK,
- * it means that our remembered SACKs do not reflect
- * real state of receiver i.e.
- * receiver _host_ is heavily congested (or buggy).
- * Do processing similar to RTO timeout.
- */
- if ((skb = tcp_write_queue_head(sk)) != NULL &&
- (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) {
+ if (flag & FLAG_SACK_RENEGING) {
struct inet_connection_sock *icsk = inet_csk(sk);
NET_INC_STATS_BH(LINUX_MIB_TCPSACKRENEGING);
static inline int tcp_fackets_out(struct tcp_sock *tp)
{
- return tcp_is_reno(tp) ? tp->sacked_out+1 : tp->fackets_out;
+ return tcp_is_reno(tp) ? tp->sacked_out + 1 : tp->fackets_out;
+}
+
+/* Heurestics to calculate number of duplicate ACKs. There's no dupACKs
+ * counter when SACK is enabled (without SACK, sacked_out is used for
+ * that purpose).
+ *
+ * Instead, with FACK TCP uses fackets_out that includes both SACKed
+ * segments up to the highest received SACK block so far and holes in
+ * between them.
+ *
+ * With reordering, holes may still be in flight, so RFC3517 recovery
+ * uses pure sacked_out (total number of SACKed segments) even though
+ * it violates the RFC that uses duplicate ACKs, often these are equal
+ * but when e.g. out-of-window ACKs or packet duplication occurs,
+ * they differ. Since neither occurs due to loss, TCP should really
+ * ignore them.
+ */
+static inline int tcp_dupack_heurestics(struct tcp_sock *tp)
+{
+ return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1;
}
static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb)
return 1;
/* Not-A-Trick#2 : Classic rule... */
- if (tcp_fackets_out(tp) > tp->reordering)
+ if (tcp_dupack_heurestics(tp) > tp->reordering)
return 1;
/* Trick#3 : when we use RFC2988 timer restart, fast
* retransmit can be triggered by timeout of queue head.
*/
- if (tcp_head_timedout(sk))
+ if (tcp_is_fack(tp) && tcp_head_timedout(sk))
return 1;
/* Trick#4: It is still not OK... But will it be useful to delay
* retransmitted past LOST markings in the first place? I'm not fully sure
* about undo and end of connection cases, which can cause R without L?
*/
-static void tcp_verify_retransmit_hint(struct tcp_sock *tp,
- struct sk_buff *skb)
+static void tcp_verify_retransmit_hint(struct tcp_sock *tp, struct sk_buff *skb)
{
if ((tp->retransmit_skb_hint != NULL) &&
before(TCP_SKB_CB(skb)->seq,
- TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
+ TCP_SKB_CB(tp->retransmit_skb_hint)->seq))
tp->retransmit_skb_hint = NULL;
}
-/* Mark head of queue up as lost. */
+/* Mark head of queue up as lost. With RFC3517 SACK, the packets is
+ * is against sacked "cnt", otherwise it's against facked "cnt"
+ */
static void tcp_mark_head_lost(struct sock *sk, int packets)
{
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
- int cnt;
+ int cnt, oldcnt;
+ int err;
+ unsigned int mss;
BUG_TRAP(packets <= tp->packets_out);
if (tp->lost_skb_hint) {
/* this is not the most efficient way to do this... */
tp->lost_skb_hint = skb;
tp->lost_cnt_hint = cnt;
- cnt += tcp_skb_pcount(skb);
- if (cnt > packets || after(TCP_SKB_CB(skb)->end_seq, tp->high_seq))
+
+ if (after(TCP_SKB_CB(skb)->end_seq, tp->high_seq))
break;
+
+ oldcnt = cnt;
+ if (tcp_is_fack(tp) || tcp_is_reno(tp) ||
+ (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
+ cnt += tcp_skb_pcount(skb);
+
+ if (cnt > packets) {
+ if (tcp_is_sack(tp) || (oldcnt >= packets))
+ break;
+
+ mss = skb_shinfo(skb)->gso_size;
+ err = tcp_fragment(sk, skb, (packets - oldcnt) * mss, mss);
+ if (err < 0)
+ break;
+ cnt = packets;
+ }
+
if (!(TCP_SKB_CB(skb)->sacked & (TCPCB_SACKED_ACKED|TCPCB_LOST))) {
TCP_SKB_CB(skb)->sacked |= TCPCB_LOST;
tp->lost_out += tcp_skb_pcount(skb);
/* Account newly detected lost packet(s) */
-static void tcp_update_scoreboard(struct sock *sk)
+static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit)
{
struct tcp_sock *tp = tcp_sk(sk);
- if (tcp_is_fack(tp)) {
+ if (tcp_is_reno(tp)) {
+ tcp_mark_head_lost(sk, 1);
+ } else if (tcp_is_fack(tp)) {
int lost = tp->fackets_out - tp->reordering;
if (lost <= 0)
lost = 1;
tcp_mark_head_lost(sk, lost);
} else {
- tcp_mark_head_lost(sk, 1);
+ int sacked_upto = tp->sacked_out - tp->reordering;
+ if (sacked_upto < fast_rexmit)
+ sacked_upto = fast_rexmit;
+ tcp_mark_head_lost(sk, sacked_upto);
}
/* New heuristics: it is possible only after we switched
* Hence, we can detect timed out packets during fast
* retransmit without falling to slow start.
*/
- if (!tcp_is_reno(tp) && tcp_head_timedout(sk)) {
+ if (tcp_is_fack(tp) && tcp_head_timedout(sk)) {
struct sk_buff *skb;
skb = tp->scoreboard_skb_hint ? tp->scoreboard_skb_hint
static inline void tcp_moderate_cwnd(struct tcp_sock *tp)
{
tp->snd_cwnd = min(tp->snd_cwnd,
- tcp_packets_in_flight(tp)+tcp_max_burst(tp));
+ tcp_packets_in_flight(tp) + tcp_max_burst(tp));
tp->snd_cwnd_stamp = tcp_time_stamp;
}
struct tcp_sock *tp = tcp_sk(sk);
int decr = tp->snd_cwnd_cnt + 1;
- if ((flag&(FLAG_ANY_PROGRESS|FLAG_DSACKING_ACK)) ||
- (tcp_is_reno(tp) && !(flag&FLAG_NOT_DUP))) {
- tp->snd_cwnd_cnt = decr&1;
+ if ((flag & (FLAG_ANY_PROGRESS | FLAG_DSACKING_ACK)) ||
+ (tcp_is_reno(tp) && !(flag & FLAG_NOT_DUP))) {
+ tp->snd_cwnd_cnt = decr & 1;
decr >>= 1;
if (decr && tp->snd_cwnd > tcp_cwnd_min(sk))
tp->snd_cwnd -= decr;
- tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp)+1);
+ tp->snd_cwnd = min(tp->snd_cwnd, tcp_packets_in_flight(tp) + 1);
tp->snd_cwnd_stamp = tcp_time_stamp;
}
}
{
return !tp->retrans_stamp ||
(tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
- (__s32)(tp->rx_opt.rcv_tsecr - tp->retrans_stamp) < 0);
+ before(tp->rx_opt.rcv_tsecr, tp->retrans_stamp));
}
/* Undo procedures. */
struct tcp_sock *tp = tcp_sk(sk);
struct inet_sock *inet = inet_sk(sk);
- printk(KERN_DEBUG "Undo %s %u.%u.%u.%u/%u c%u l%u ss%u/%u p%u\n",
- msg,
- NIPQUAD(inet->daddr), ntohs(inet->dport),
- tp->snd_cwnd, tcp_left_out(tp),
- tp->snd_ssthresh, tp->prior_ssthresh,
- tp->packets_out);
+ if (sk->sk_family == AF_INET) {
+ printk(KERN_DEBUG "Undo %s " NIPQUAD_FMT "/%u c%u l%u ss%u/%u p%u\n",
+ msg,
+ NIPQUAD(inet->daddr), ntohs(inet->dport),
+ tp->snd_cwnd, tcp_left_out(tp),
+ tp->snd_ssthresh, tp->prior_ssthresh,
+ tp->packets_out);
+ }
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ else if (sk->sk_family == AF_INET6) {
+ struct ipv6_pinfo *np = inet6_sk(sk);
+ printk(KERN_DEBUG "Undo %s " NIP6_FMT "/%u c%u l%u ss%u/%u p%u\n",
+ msg,
+ NIP6(np->daddr), ntohs(inet->dport),
+ tp->snd_cwnd, tcp_left_out(tp),
+ tp->snd_ssthresh, tp->prior_ssthresh,
+ tp->packets_out);
+ }
+#endif
}
#else
#define DBGUNDO(x...) do { } while (0)
if (icsk->icsk_ca_ops->undo_cwnd)
tp->snd_cwnd = icsk->icsk_ca_ops->undo_cwnd(sk);
else
- tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh<<1);
+ tp->snd_cwnd = max(tp->snd_cwnd, tp->snd_ssthresh << 1);
if (undo && tp->prior_ssthresh > tp->snd_ssthresh) {
tp->snd_ssthresh = tp->prior_ssthresh;
static inline int tcp_may_undo(struct tcp_sock *tp)
{
- return tp->undo_marker &&
- (!tp->undo_retrans || tcp_packet_delayed(tp));
+ return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp));
}
/* People celebrate: "We love our President!" */
{
struct tcp_sock *tp = tcp_sk(sk);
/* Partial ACK arrived. Force Hoe's retransmit. */
- int failed = tcp_is_reno(tp) || tp->fackets_out>tp->reordering;
+ int failed = tcp_is_reno(tp) || (tcp_fackets_out(tp) > tp->reordering);
if (tcp_may_undo(tp)) {
/* Plain luck! Hole if filled with delayed
if (tp->retrans_out == 0)
tp->retrans_stamp = 0;
- if (flag&FLAG_ECE)
+ if (flag & FLAG_ECE)
tcp_enter_cwr(sk, 1);
if (inet_csk(sk)->icsk_ca_state != TCP_CA_CWR) {
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
}
-
/* Process an event, which can update packets-in-flight not trivially.
* Main goal of this function is to calculate new estimate for left_out,
* taking into account both packets sitting in receiver's buffer and
* It does _not_ decide what to send, it is made in function
* tcp_xmit_retransmit_queue().
*/
-static void
-tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
+static void tcp_fastretrans_alert(struct sock *sk, int pkts_acked, int flag)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
- int is_dupack = !(flag&(FLAG_SND_UNA_ADVANCED|FLAG_NOT_DUP));
- int do_lost = is_dupack || ((flag&FLAG_DATA_SACKED) &&
- (tp->fackets_out > tp->reordering));
+ int is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
+ int do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) &&
+ (tcp_fackets_out(tp) > tp->reordering));
+ int fast_rexmit = 0;
- /* Some technical things:
- * 1. Reno does not count dupacks (sacked_out) automatically. */
- if (!tp->packets_out)
+ if (WARN_ON(!tp->packets_out && tp->sacked_out))
tp->sacked_out = 0;
-
if (WARN_ON(!tp->sacked_out && tp->fackets_out))
tp->fackets_out = 0;
/* Now state machine starts.
* A. ECE, hence prohibit cwnd undoing, the reduction is required. */
- if (flag&FLAG_ECE)
+ if (flag & FLAG_ECE)
tp->prior_ssthresh = 0;
/* B. In all the states check for reneging SACKs. */
- if (tp->sacked_out && tcp_check_sack_reneging(sk))
+ if (tcp_check_sack_reneging(sk, flag))
return;
/* C. Process data loss notification, provided it is valid. */
- if ((flag&FLAG_DATA_LOST) &&
+ if (tcp_is_fack(tp) && (flag & FLAG_DATA_LOST) &&
before(tp->snd_una, tp->high_seq) &&
icsk->icsk_ca_state != TCP_CA_Open &&
tp->fackets_out > tp->reordering) {
do_lost = tcp_try_undo_partial(sk, pkts_acked);
break;
case TCP_CA_Loss:
- if (flag&FLAG_DATA_ACKED)
+ if (flag & FLAG_DATA_ACKED)
icsk->icsk_retransmits = 0;
+ if (tcp_is_reno(tp) && flag & FLAG_SND_UNA_ADVANCED)
+ tcp_reset_reno_sack(tp);
if (!tcp_try_undo_loss(sk)) {
tcp_moderate_cwnd(tp);
tcp_xmit_retransmit_queue(sk);
tp->undo_retrans = tp->retrans_out;
if (icsk->icsk_ca_state < TCP_CA_CWR) {
- if (!(flag&FLAG_ECE))
+ if (!(flag & FLAG_ECE))
tp->prior_ssthresh = tcp_current_ssthresh(sk);
tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
TCP_ECN_queue_cwr(tp);
tp->bytes_acked = 0;
tp->snd_cwnd_cnt = 0;
tcp_set_ca_state(sk, TCP_CA_Recovery);
+ fast_rexmit = 1;
}
- if (do_lost || tcp_head_timedout(sk))
- tcp_update_scoreboard(sk);
+ if (do_lost || (tcp_is_fack(tp) && tcp_head_timedout(sk)))
+ tcp_update_scoreboard(sk, fast_rexmit);
tcp_cwnd_down(sk, flag);
tcp_xmit_retransmit_queue(sk);
}
tcp_ack_no_tstamp(sk, seq_rtt, flag);
}
-static void tcp_cong_avoid(struct sock *sk, u32 ack,
- u32 in_flight, int good)
+static void tcp_cong_avoid(struct sock *sk, u32 ack, u32 in_flight)
{
const struct inet_connection_sock *icsk = inet_csk(sk);
- icsk->icsk_ca_ops->cong_avoid(sk, ack, in_flight, good);
+ icsk->icsk_ca_ops->cong_avoid(sk, ack, in_flight);
tcp_sk(sk)->snd_cwnd_stamp = tcp_time_stamp;
}
if (!tp->packets_out) {
inet_csk_clear_xmit_timer(sk, ICSK_TIME_RETRANS);
} else {
- inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
+ inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+ inet_csk(sk)->icsk_rto, TCP_RTO_MAX);
}
}
* is before the ack sequence we can discard it as it's confirmed to have
* arrived at the other end.
*/
-static int tcp_clean_rtx_queue(struct sock *sk, s32 *seq_rtt_p,
- int prior_fackets)
+static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets)
{
struct tcp_sock *tp = tcp_sk(sk);
const struct inet_connection_sock *icsk = inet_csk(sk);
u32 now = tcp_time_stamp;
int fully_acked = 1;
int flag = 0;
- int prior_packets = tp->packets_out;
- u32 cnt = 0;
+ u32 pkts_acked = 0;
u32 reord = tp->packets_out;
s32 seq_rtt = -1;
s32 ca_seq_rtt = -1;
while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) {
struct tcp_skb_cb *scb = TCP_SKB_CB(skb);
u32 end_seq;
- u32 packets_acked;
+ u32 acked_pcount;
u8 sacked = scb->sacked;
/* Determine how many packets and what bytes were acked, tso and else */
!after(tp->snd_una, scb->seq))
break;
- packets_acked = tcp_tso_acked(sk, skb);
- if (!packets_acked)
+ acked_pcount = tcp_tso_acked(sk, skb);
+ if (!acked_pcount)
break;
fully_acked = 0;
end_seq = tp->snd_una;
} else {
- packets_acked = tcp_skb_pcount(skb);
+ acked_pcount = tcp_skb_pcount(skb);
end_seq = scb->end_seq;
}
tcp_mtup_probe_success(sk, skb);
}
- if (sacked) {
- if (sacked & TCPCB_RETRANS) {
- if (sacked & TCPCB_SACKED_RETRANS)
- tp->retrans_out -= packets_acked;
- flag |= FLAG_RETRANS_DATA_ACKED;
- ca_seq_rtt = -1;
- seq_rtt = -1;
- if ((flag & FLAG_DATA_ACKED) ||
- (packets_acked > 1))
- flag |= FLAG_NONHEAD_RETRANS_ACKED;
- } else {
- ca_seq_rtt = now - scb->when;
- last_ackt = skb->tstamp;
- if (seq_rtt < 0) {
- seq_rtt = ca_seq_rtt;
- }
- if (!(sacked & TCPCB_SACKED_ACKED))
- reord = min(cnt, reord);
- }
-
- if (sacked & TCPCB_SACKED_ACKED)
- tp->sacked_out -= packets_acked;
- if (sacked & TCPCB_LOST)
- tp->lost_out -= packets_acked;
-
- if ((sacked & TCPCB_URG) && tp->urg_mode &&
- !before(end_seq, tp->snd_up))
- tp->urg_mode = 0;
+ if (sacked & TCPCB_RETRANS) {
+ if (sacked & TCPCB_SACKED_RETRANS)
+ tp->retrans_out -= acked_pcount;
+ flag |= FLAG_RETRANS_DATA_ACKED;
+ ca_seq_rtt = -1;
+ seq_rtt = -1;
+ if ((flag & FLAG_DATA_ACKED) || (acked_pcount > 1))
+ flag |= FLAG_NONHEAD_RETRANS_ACKED;
} else {
ca_seq_rtt = now - scb->when;
last_ackt = skb->tstamp;
if (seq_rtt < 0) {
seq_rtt = ca_seq_rtt;
}
- reord = min(cnt, reord);
+ if (!(sacked & TCPCB_SACKED_ACKED))
+ reord = min(pkts_acked, reord);
}
- tp->packets_out -= packets_acked;
- cnt += packets_acked;
+
+ if (sacked & TCPCB_SACKED_ACKED)
+ tp->sacked_out -= acked_pcount;
+ if (sacked & TCPCB_LOST)
+ tp->lost_out -= acked_pcount;
+
+ if (unlikely(tp->urg_mode && !before(end_seq, tp->snd_up)))
+ tp->urg_mode = 0;
+
+ tp->packets_out -= acked_pcount;
+ pkts_acked += acked_pcount;
/* Initial outgoing SYN's get put onto the write_queue
* just like anything else we transmit. It is not
break;
tcp_unlink_write_queue(skb, sk);
- sk_stream_free_skb(sk, skb);
+ sk_wmem_free_skb(sk, skb);
tcp_clear_all_retrans_hints(tp);
}
+ if (skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
+ flag |= FLAG_SACK_RENEGING;
+
if (flag & FLAG_ACKED) {
- u32 pkts_acked = prior_packets - tp->packets_out;
const struct tcp_congestion_ops *ca_ops
= inet_csk(sk)->icsk_ca_ops;
}
tp->fackets_out -= min(pkts_acked, tp->fackets_out);
- /* hint's skb might be NULL but we don't need to care */
- tp->fastpath_cnt_hint -= min_t(u32, pkts_acked,
- tp->fastpath_cnt_hint);
+
if (ca_ops->pkts_acked) {
s32 rtt_us = -1;
}
}
#endif
- *seq_rtt_p = seq_rtt;
return flag;
}
/* Was it a usable window open? */
- if (!after(TCP_SKB_CB(tcp_send_head(sk))->end_seq,
- tp->snd_una + tp->snd_wnd)) {
+ if (!after(TCP_SKB_CB(tcp_send_head(sk))->end_seq, tcp_wnd_end(tp))) {
icsk->icsk_backoff = 0;
inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0);
/* Socket must be waked up by subsequent tcp_data_snd_check().
/* Check that window update is acceptable.
* The function assumes that snd_una<=ack<=snd_next.
*/
-static inline int tcp_may_update_window(const struct tcp_sock *tp, const u32 ack,
- const u32 ack_seq, const u32 nwin)
+static inline int tcp_may_update_window(const struct tcp_sock *tp,
+ const u32 ack, const u32 ack_seq,
+ const u32 nwin)
{
return (after(ack, tp->snd_una) ||
after(ack_seq, tp->snd_wl1) ||
static void tcp_undo_spur_to_response(struct sock *sk, int flag)
{
- if (flag&FLAG_ECE)
+ if (flag & FLAG_ECE)
tcp_ratehalving_spur_to_response(sk);
else
tcp_undo_cwr(sk, 1);
tcp_verify_left_out(tp);
/* Duplicate the behavior from Loss state (fastretrans_alert) */
- if (flag&FLAG_DATA_ACKED)
+ if (flag & FLAG_DATA_ACKED)
inet_csk(sk)->icsk_retransmits = 0;
if ((flag & FLAG_NONHEAD_RETRANS_ACKED) ||
* ACK isn't duplicate nor advances window, e.g., opposite dir
* data, winupdate
*/
- if (!(flag&FLAG_ANY_PROGRESS) && (flag&FLAG_NOT_DUP))
+ if (!(flag & FLAG_ANY_PROGRESS) && (flag & FLAG_NOT_DUP))
return 1;
- if (!(flag&FLAG_DATA_ACKED)) {
+ if (!(flag & FLAG_DATA_ACKED)) {
tcp_enter_frto_loss(sk, (tp->frto_counter == 1 ? 0 : 3),
flag);
return 1;
}
} else {
- if (!(flag&FLAG_DATA_ACKED) && (tp->frto_counter == 1)) {
+ if (!(flag & FLAG_DATA_ACKED) && (tp->frto_counter == 1)) {
/* Prevent sending of new data. */
tp->snd_cwnd = min(tp->snd_cwnd,
tcp_packets_in_flight(tp));
}
if ((tp->frto_counter >= 2) &&
- (!(flag&FLAG_FORWARD_PROGRESS) ||
- ((flag&FLAG_DATA_SACKED) && !(flag&FLAG_ONLY_ORIG_SACKED)))) {
+ (!(flag & FLAG_FORWARD_PROGRESS) ||
+ ((flag & FLAG_DATA_SACKED) &&
+ !(flag & FLAG_ONLY_ORIG_SACKED)))) {
/* RFC4138 shortcoming (see comment above) */
- if (!(flag&FLAG_FORWARD_PROGRESS) && (flag&FLAG_NOT_DUP))
+ if (!(flag & FLAG_FORWARD_PROGRESS) &&
+ (flag & FLAG_NOT_DUP))
return 1;
tcp_enter_frto_loss(sk, 3, flag);
u32 ack = TCP_SKB_CB(skb)->ack_seq;
u32 prior_in_flight;
u32 prior_fackets;
- s32 seq_rtt;
int prior_packets;
int frto_cwnd = 0;
tp->bytes_acked += ack - prior_snd_una;
else if (icsk->icsk_ca_state == TCP_CA_Loss)
/* we assume just one segment left network */
- tp->bytes_acked += min(ack - prior_snd_una, tp->mss_cache);
+ tp->bytes_acked += min(ack - prior_snd_una,
+ tp->mss_cache);
}
prior_fackets = tp->fackets_out;
prior_in_flight = tcp_packets_in_flight(tp);
- if (!(flag&FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
+ if (!(flag & FLAG_SLOWPATH) && after(ack, prior_snd_una)) {
/* Window is constant, pure forward advance.
* No more checks are required.
* Note, we use the fact that SND.UNA>=SND.WL2.
goto no_queue;
/* See if we can take anything off of the retransmit queue. */
- flag |= tcp_clean_rtx_queue(sk, &seq_rtt, prior_fackets);
+ flag |= tcp_clean_rtx_queue(sk, prior_fackets);
if (tp->frto_counter)
frto_cwnd = tcp_process_frto(sk, flag);
/* Advance CWND, if state allows this. */
if ((flag & FLAG_DATA_ACKED) && !frto_cwnd &&
tcp_may_raise_cwnd(sk, flag))
- tcp_cong_avoid(sk, ack, prior_in_flight, 0);
- tcp_fastretrans_alert(sk, prior_packets - tp->packets_out, flag);
+ tcp_cong_avoid(sk, ack, prior_in_flight);
+ tcp_fastretrans_alert(sk, prior_packets - tp->packets_out,
+ flag);
} else {
if ((flag & FLAG_DATA_ACKED) && !frto_cwnd)
- tcp_cong_avoid(sk, ack, prior_in_flight, 1);
+ tcp_cong_avoid(sk, ack, prior_in_flight);
}
- if ((flag & FLAG_FORWARD_PROGRESS) || !(flag&FLAG_NOT_DUP))
+ if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
dst_confirm(sk->sk_dst_cache);
return 1;
return 0;
}
-
/* Look for tcp options. Normally only called on SYN and SYNACK packets.
* But, this can also be called on packets in the established flow when
* the fast version below fails.
*/
-void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx, int estab)
+void tcp_parse_options(struct sk_buff *skb, struct tcp_options_received *opt_rx,
+ int estab)
{
unsigned char *ptr;
struct tcphdr *th = tcp_hdr(skb);
- int length=(th->doff*4)-sizeof(struct tcphdr);
+ int length = (th->doff * 4) - sizeof(struct tcphdr);
ptr = (unsigned char *)(th + 1);
opt_rx->saw_tstamp = 0;
while (length > 0) {
- int opcode=*ptr++;
+ int opcode = *ptr++;
int opsize;
switch (opcode) {
- case TCPOPT_EOL:
+ case TCPOPT_EOL:
+ return;
+ case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
+ length--;
+ continue;
+ default:
+ opsize = *ptr++;
+ if (opsize < 2) /* "silly options" */
return;
- case TCPOPT_NOP: /* Ref: RFC 793 section 3.1 */
- length--;
- continue;
- default:
- opsize=*ptr++;
- if (opsize < 2) /* "silly options" */
- return;
- if (opsize > length)
- return; /* don't parse partial options */
- switch (opcode) {
- case TCPOPT_MSS:
- if (opsize==TCPOLEN_MSS && th->syn && !estab) {
- u16 in_mss = ntohs(get_unaligned((__be16 *)ptr));
- if (in_mss) {
- if (opt_rx->user_mss && opt_rx->user_mss < in_mss)
- in_mss = opt_rx->user_mss;
- opt_rx->mss_clamp = in_mss;
- }
+ if (opsize > length)
+ return; /* don't parse partial options */
+ switch (opcode) {
+ case TCPOPT_MSS:
+ if (opsize == TCPOLEN_MSS && th->syn && !estab) {
+ u16 in_mss = ntohs(get_unaligned((__be16 *)ptr));
+ if (in_mss) {
+ if (opt_rx->user_mss &&
+ opt_rx->user_mss < in_mss)
+ in_mss = opt_rx->user_mss;
+ opt_rx->mss_clamp = in_mss;
}
- break;
- case TCPOPT_WINDOW:
- if (opsize==TCPOLEN_WINDOW && th->syn && !estab)
- if (sysctl_tcp_window_scaling) {
- __u8 snd_wscale = *(__u8 *) ptr;
- opt_rx->wscale_ok = 1;
- if (snd_wscale > 14) {
- if (net_ratelimit())
- printk(KERN_INFO "tcp_parse_options: Illegal window "
- "scaling value %d >14 received.\n",
- snd_wscale);
- snd_wscale = 14;
- }
- opt_rx->snd_wscale = snd_wscale;
- }
- break;
- case TCPOPT_TIMESTAMP:
- if (opsize==TCPOLEN_TIMESTAMP) {
- if ((estab && opt_rx->tstamp_ok) ||
- (!estab && sysctl_tcp_timestamps)) {
- opt_rx->saw_tstamp = 1;
- opt_rx->rcv_tsval = ntohl(get_unaligned((__be32 *)ptr));
- opt_rx->rcv_tsecr = ntohl(get_unaligned((__be32 *)(ptr+4)));
- }
- }
- break;
- case TCPOPT_SACK_PERM:
- if (opsize==TCPOLEN_SACK_PERM && th->syn && !estab) {
- if (sysctl_tcp_sack) {
- opt_rx->sack_ok = 1;
- tcp_sack_reset(opt_rx);
- }
+ }
+ break;
+ case TCPOPT_WINDOW:
+ if (opsize == TCPOLEN_WINDOW && th->syn &&
+ !estab && sysctl_tcp_window_scaling) {
+ __u8 snd_wscale = *(__u8 *)ptr;
+ opt_rx->wscale_ok = 1;
+ if (snd_wscale > 14) {
+ if (net_ratelimit())
+ printk(KERN_INFO "tcp_parse_options: Illegal window "
+ "scaling value %d >14 received.\n",
+ snd_wscale);
+ snd_wscale = 14;
}
- break;
+ opt_rx->snd_wscale = snd_wscale;
+ }
+ break;
+ case TCPOPT_TIMESTAMP:
+ if ((opsize == TCPOLEN_TIMESTAMP) &&
+ ((estab && opt_rx->tstamp_ok) ||
+ (!estab && sysctl_tcp_timestamps))) {
+ opt_rx->saw_tstamp = 1;
+ opt_rx->rcv_tsval = ntohl(get_unaligned((__be32 *)ptr));
+ opt_rx->rcv_tsecr = ntohl(get_unaligned((__be32 *)(ptr+4)));
+ }
+ break;
+ case TCPOPT_SACK_PERM:
+ if (opsize == TCPOLEN_SACK_PERM && th->syn &&
+ !estab && sysctl_tcp_sack) {
+ opt_rx->sack_ok = 1;
+ tcp_sack_reset(opt_rx);
+ }
+ break;
- case TCPOPT_SACK:
- if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
- !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
- opt_rx->sack_ok) {
- TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
- }
- break;
+ case TCPOPT_SACK:
+ if ((opsize >= (TCPOLEN_SACK_BASE + TCPOLEN_SACK_PERBLOCK)) &&
+ !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK) &&
+ opt_rx->sack_ok) {
+ TCP_SKB_CB(skb)->sacked = (ptr - 2) - (unsigned char *)th;
+ }
+ break;
#ifdef CONFIG_TCP_MD5SIG
- case TCPOPT_MD5SIG:
- /*
- * The MD5 Hash has already been
- * checked (see tcp_v{4,6}_do_rcv()).
- */
- break;
+ case TCPOPT_MD5SIG:
+ /*
+ * The MD5 Hash has already been
+ * checked (see tcp_v{4,6}_do_rcv()).
+ */
+ break;
#endif
- }
+ }
- ptr+=opsize-2;
- length-=opsize;
+ ptr += opsize-2;
+ length -= opsize;
}
}
}
static int tcp_fast_parse_options(struct sk_buff *skb, struct tcphdr *th,
struct tcp_sock *tp)
{
- if (th->doff == sizeof(struct tcphdr)>>2) {
+ if (th->doff == sizeof(struct tcphdr) >> 2) {
tp->rx_opt.saw_tstamp = 0;
return 0;
} else if (tp->rx_opt.tstamp_ok &&
(s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) <= (inet_csk(sk)->icsk_rto * 1024) / HZ);
}
-static inline int tcp_paws_discard(const struct sock *sk, const struct sk_buff *skb)
+static inline int tcp_paws_discard(const struct sock *sk,
+ const struct sk_buff *skb)
{
const struct tcp_sock *tp = tcp_sk(sk);
return ((s32)(tp->rx_opt.ts_recent - tp->rx_opt.rcv_tsval) > TCP_PAWS_WINDOW &&
{
/* We want the right error as BSD sees it (and indeed as we do). */
switch (sk->sk_state) {
- case TCP_SYN_SENT:
- sk->sk_err = ECONNREFUSED;
- break;
- case TCP_CLOSE_WAIT:
- sk->sk_err = EPIPE;
- break;
- case TCP_CLOSE:
- return;
- default:
- sk->sk_err = ECONNRESET;
+ case TCP_SYN_SENT:
+ sk->sk_err = ECONNREFUSED;
+ break;
+ case TCP_CLOSE_WAIT:
+ sk->sk_err = EPIPE;
+ break;
+ case TCP_CLOSE:
+ return;
+ default:
+ sk->sk_err = ECONNRESET;
}
if (!sock_flag(sk, SOCK_DEAD))
sock_set_flag(sk, SOCK_DONE);
switch (sk->sk_state) {
- case TCP_SYN_RECV:
- case TCP_ESTABLISHED:
- /* Move to CLOSE_WAIT */
- tcp_set_state(sk, TCP_CLOSE_WAIT);
- inet_csk(sk)->icsk_ack.pingpong = 1;
- break;
+ case TCP_SYN_RECV:
+ case TCP_ESTABLISHED:
+ /* Move to CLOSE_WAIT */
+ tcp_set_state(sk, TCP_CLOSE_WAIT);
+ inet_csk(sk)->icsk_ack.pingpong = 1;
+ break;
- case TCP_CLOSE_WAIT:
- case TCP_CLOSING:
- /* Received a retransmission of the FIN, do
- * nothing.
- */
- break;
- case TCP_LAST_ACK:
- /* RFC793: Remain in the LAST-ACK state. */
- break;
+ case TCP_CLOSE_WAIT:
+ case TCP_CLOSING:
+ /* Received a retransmission of the FIN, do
+ * nothing.
+ */
+ break;
+ case TCP_LAST_ACK:
+ /* RFC793: Remain in the LAST-ACK state. */
+ break;
- case TCP_FIN_WAIT1:
- /* This case occurs when a simultaneous close
- * happens, we must ack the received FIN and
- * enter the CLOSING state.
- */
- tcp_send_ack(sk);
- tcp_set_state(sk, TCP_CLOSING);
- break;
- case TCP_FIN_WAIT2:
- /* Received a FIN -- send ACK and enter TIME_WAIT. */
- tcp_send_ack(sk);
- tcp_time_wait(sk, TCP_TIME_WAIT, 0);
- break;
- default:
- /* Only TCP_LISTEN and TCP_CLOSE are left, in these
- * cases we should never reach this piece of code.
- */
- printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n",
- __FUNCTION__, sk->sk_state);
- break;
+ case TCP_FIN_WAIT1:
+ /* This case occurs when a simultaneous close
+ * happens, we must ack the received FIN and
+ * enter the CLOSING state.
+ */
+ tcp_send_ack(sk);
+ tcp_set_state(sk, TCP_CLOSING);
+ break;
+ case TCP_FIN_WAIT2:
+ /* Received a FIN -- send ACK and enter TIME_WAIT. */
+ tcp_send_ack(sk);
+ tcp_time_wait(sk, TCP_TIME_WAIT, 0);
+ break;
+ default:
+ /* Only TCP_LISTEN and TCP_CLOSE are left, in these
+ * cases we should never reach this piece of code.
+ */
+ printk(KERN_ERR "%s: Impossible, sk->sk_state=%d\n",
+ __func__, sk->sk_state);
+ break;
}
/* It _is_ possible, that we have something out-of-order _after_ FIN.
__skb_queue_purge(&tp->out_of_order_queue);
if (tcp_is_sack(tp))
tcp_sack_reset(&tp->rx_opt);
- sk_stream_mem_reclaim(sk);
+ sk_mem_reclaim(sk);
if (!sock_flag(sk, SOCK_DEAD)) {
sk->sk_state_change(sk);
/* Do not send POLL_HUP for half duplex close. */
if (sk->sk_shutdown == SHUTDOWN_MASK ||
sk->sk_state == TCP_CLOSE)
- sk_wake_async(sk, 1, POLL_HUP);
+ sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_HUP);
else
- sk_wake_async(sk, 1, POLL_IN);
+ sk_wake_async(sk, SOCK_WAKE_WAITD, POLL_IN);
}
}
-static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq, u32 end_seq)
+static inline int tcp_sack_extend(struct tcp_sack_block *sp, u32 seq,
+ u32 end_seq)
{
if (!after(seq, sp->end_seq) && !after(sp->start_seq, end_seq)) {
if (before(seq, sp->start_seq))
tp->rx_opt.dsack = 1;
tp->duplicate_sack[0].start_seq = seq;
tp->duplicate_sack[0].end_seq = end_seq;
- tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + 1, 4 - tp->rx_opt.tstamp_ok);
+ tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + 1,
+ 4 - tp->rx_opt.tstamp_ok);
}
}
{
int this_sack;
struct tcp_sack_block *sp = &tp->selective_acks[0];
- struct tcp_sack_block *swalk = sp+1;
+ struct tcp_sack_block *swalk = sp + 1;
/* See if the recent change to the first SACK eats into
* or hits the sequence space of other SACK blocks, if so coalesce.
*/
- for (this_sack = 1; this_sack < tp->rx_opt.num_sacks; ) {
+ for (this_sack = 1; this_sack < tp->rx_opt.num_sacks;) {
if (tcp_sack_extend(sp, swalk->start_seq, swalk->end_seq)) {
int i;
* Decrease num_sacks.
*/
tp->rx_opt.num_sacks--;
- tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack, 4 - tp->rx_opt.tstamp_ok);
- for (i=this_sack; i < tp->rx_opt.num_sacks; i++)
- sp[i] = sp[i+1];
+ tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks +
+ tp->rx_opt.dsack,
+ 4 - tp->rx_opt.tstamp_ok);
+ for (i = this_sack; i < tp->rx_opt.num_sacks; i++)
+ sp[i] = sp[i + 1];
continue;
}
this_sack++, swalk++;
}
}
-static inline void tcp_sack_swap(struct tcp_sack_block *sack1, struct tcp_sack_block *sack2)
+static inline void tcp_sack_swap(struct tcp_sack_block *sack1,
+ struct tcp_sack_block *sack2)
{
__u32 tmp;
if (!cur_sacks)
goto new_sack;
- for (this_sack=0; this_sack<cur_sacks; this_sack++, sp++) {
+ for (this_sack = 0; this_sack < cur_sacks; this_sack++, sp++) {
if (tcp_sack_extend(sp, seq, end_seq)) {
/* Rotate this_sack to the first one. */
- for (; this_sack>0; this_sack--, sp--)
- tcp_sack_swap(sp, sp-1);
+ for (; this_sack > 0; this_sack--, sp--)
+ tcp_sack_swap(sp, sp - 1);
if (cur_sacks > 1)
tcp_sack_maybe_coalesce(tp);
return;
sp--;
}
for (; this_sack > 0; this_sack--, sp--)
- *sp = *(sp-1);
+ *sp = *(sp - 1);
new_sack:
/* Build the new head SACK, and we're done. */
sp->start_seq = seq;
sp->end_seq = end_seq;
tp->rx_opt.num_sacks++;
- tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack, 4 - tp->rx_opt.tstamp_ok);
+ tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack,
+ 4 - tp->rx_opt.tstamp_ok);
}
/* RCV.NXT advances, some SACKs should be eaten. */
return;
}
- for (this_sack = 0; this_sack < num_sacks; ) {
+ for (this_sack = 0; this_sack < num_sacks;) {
/* Check if the start of the sack is covered by RCV.NXT. */
if (!before(tp->rcv_nxt, sp->start_seq)) {
int i;
}
if (num_sacks != tp->rx_opt.num_sacks) {
tp->rx_opt.num_sacks = num_sacks;
- tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks + tp->rx_opt.dsack, 4 - tp->rx_opt.tstamp_ok);
+ tp->rx_opt.eff_sacks = min(tp->rx_opt.num_sacks +
+ tp->rx_opt.dsack,
+ 4 - tp->rx_opt.tstamp_ok);
}
}
}
}
+static int tcp_prune_ofo_queue(struct sock *sk);
static int tcp_prune_queue(struct sock *sk);
+static inline int tcp_try_rmem_schedule(struct sock *sk, unsigned int size)
+{
+ if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
+ !sk_rmem_schedule(sk, size)) {
+
+ if (tcp_prune_queue(sk) < 0)
+ return -1;
+
+ if (!sk_rmem_schedule(sk, size)) {
+ if (!tcp_prune_ofo_queue(sk))
+ return -1;
+
+ if (!sk_rmem_schedule(sk, size))
+ return -1;
+ }
+ }
+ return 0;
+}
+
static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
{
struct tcphdr *th = tcp_hdr(skb);
if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq)
goto drop;
- __skb_pull(skb, th->doff*4);
+ __skb_pull(skb, th->doff * 4);
TCP_ECN_accept_cwr(tp, skb);
if (tp->rx_opt.dsack) {
tp->rx_opt.dsack = 0;
tp->rx_opt.eff_sacks = min_t(unsigned int, tp->rx_opt.num_sacks,
- 4 - tp->rx_opt.tstamp_ok);
+ 4 - tp->rx_opt.tstamp_ok);
}
/* Queue data for delivery to the user.
tp->copied_seq == tp->rcv_nxt && tp->ucopy.len &&
sock_owned_by_user(sk) && !tp->urg_data) {
int chunk = min_t(unsigned int, skb->len,
- tp->ucopy.len);
+ tp->ucopy.len);
__set_current_state(TASK_RUNNING);
if (eaten <= 0) {
queue_and_out:
if (eaten < 0 &&
- (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
- !sk_stream_rmem_schedule(sk, skb))) {
- if (tcp_prune_queue(sk) < 0 ||
- !sk_stream_rmem_schedule(sk, skb))
- goto drop;
- }
- sk_stream_set_owner_r(skb, sk);
+ tcp_try_rmem_schedule(sk, skb->truesize))
+ goto drop;
+
+ skb_set_owner_r(skb, sk);
__skb_queue_tail(&sk->sk_receive_queue, skb);
}
tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
TCP_ECN_check_ce(tp, skb);
- if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
- !sk_stream_rmem_schedule(sk, skb)) {
- if (tcp_prune_queue(sk) < 0 ||
- !sk_stream_rmem_schedule(sk, skb))
- goto drop;
- }
+ if (tcp_try_rmem_schedule(sk, skb->truesize))
+ goto drop;
/* Disable header prediction. */
tp->pred_flags = 0;
SOCK_DEBUG(sk, "out of order segment: rcv_next %X seq %X - %X\n",
tp->rcv_nxt, TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq);
- sk_stream_set_owner_r(skb, sk);
+ skb_set_owner_r(skb, sk);
if (!skb_peek(&tp->out_of_order_queue)) {
/* Initial out of order segment, build 1 SACK. */
tp->selective_acks[0].end_seq =
TCP_SKB_CB(skb)->end_seq;
}
- __skb_queue_head(&tp->out_of_order_queue,skb);
+ __skb_queue_head(&tp->out_of_order_queue, skb);
} else {
struct sk_buff *skb1 = tp->out_of_order_queue.prev;
u32 seq = TCP_SKB_CB(skb)->seq;
u32 end_seq = TCP_SKB_CB(skb)->end_seq;
if (seq == TCP_SKB_CB(skb1)->end_seq) {
- __skb_append(skb1, skb, &tp->out_of_order_queue);
+ __skb_queue_after(&tp->out_of_order_queue, skb1, skb);
if (!tp->rx_opt.num_sacks ||
tp->selective_acks[0].end_seq != seq)
if (!after(TCP_SKB_CB(skb1)->seq, seq))
break;
} while ((skb1 = skb1->prev) !=
- (struct sk_buff*)&tp->out_of_order_queue);
+ (struct sk_buff *)&tp->out_of_order_queue);
/* Do skb overlap to previous one? */
- if (skb1 != (struct sk_buff*)&tp->out_of_order_queue &&
+ if (skb1 != (struct sk_buff *)&tp->out_of_order_queue &&
before(seq, TCP_SKB_CB(skb1)->end_seq)) {
if (!after(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
/* All the bits are present. Drop. */
}
if (after(seq, TCP_SKB_CB(skb1)->seq)) {
/* Partial overlap. */
- tcp_dsack_set(tp, seq, TCP_SKB_CB(skb1)->end_seq);
+ tcp_dsack_set(tp, seq,
+ TCP_SKB_CB(skb1)->end_seq);
} else {
skb1 = skb1->prev;
}
/* And clean segments covered by new one as whole. */
while ((skb1 = skb->next) !=
- (struct sk_buff*)&tp->out_of_order_queue &&
+ (struct sk_buff *)&tp->out_of_order_queue &&
after(end_seq, TCP_SKB_CB(skb1)->seq)) {
- if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
- tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq, end_seq);
- break;
- }
- __skb_unlink(skb1, &tp->out_of_order_queue);
- tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq, TCP_SKB_CB(skb1)->end_seq);
- __kfree_skb(skb1);
+ if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) {
+ tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq,
+ end_seq);
+ break;
+ }
+ __skb_unlink(skb1, &tp->out_of_order_queue);
+ tcp_dsack_extend(tp, TCP_SKB_CB(skb1)->seq,
+ TCP_SKB_CB(skb1)->end_seq);
+ __kfree_skb(skb1);
}
add_sack:
/* First, check that queue is collapsible and find
* the point where collapsing can be useful. */
- for (skb = head; skb != tail; ) {
+ for (skb = head; skb != tail;) {
/* No new bits? It is possible on ofo queue. */
if (!before(start, TCP_SKB_CB(skb)->end_seq)) {
struct sk_buff *next = skb->next;
/* Too big header? This can happen with IPv6. */
if (copy < 0)
return;
- if (end-start < copy)
- copy = end-start;
- nskb = alloc_skb(copy+header, GFP_ATOMIC);
+ if (end - start < copy)
+ copy = end - start;
+ nskb = alloc_skb(copy + header, GFP_ATOMIC);
if (!nskb)
return;
memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(nskb)->end_seq = start;
__skb_insert(nskb, skb->prev, skb, list);
- sk_stream_set_owner_r(nskb, sk);
+ skb_set_owner_r(nskb, sk);
/* Copy data, releasing collapsed skbs. */
while (copy > 0) {
}
}
+/*
+ * Purge the out-of-order queue.
+ * Return true if queue was pruned.
+ */
+static int tcp_prune_ofo_queue(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ int res = 0;
+
+ if (!skb_queue_empty(&tp->out_of_order_queue)) {
+ NET_INC_STATS_BH(LINUX_MIB_OFOPRUNED);
+ __skb_queue_purge(&tp->out_of_order_queue);
+
+ /* Reset SACK state. A conforming SACK implementation will
+ * do the same at a timeout based retransmit. When a connection
+ * is in a sad state like this, we care only about integrity
+ * of the connection not performance.
+ */
+ if (tp->rx_opt.sack_ok)
+ tcp_sack_reset(&tp->rx_opt);
+ sk_mem_reclaim(sk);
+ res = 1;
+ }
+ return res;
+}
+
/* Reduce allocated memory if we can, trying to get
* the socket within its memory limits again.
*
tcp_collapse_ofo_queue(sk);
tcp_collapse(sk, &sk->sk_receive_queue,
sk->sk_receive_queue.next,
- (struct sk_buff*)&sk->sk_receive_queue,
+ (struct sk_buff *)&sk->sk_receive_queue,
tp->copied_seq, tp->rcv_nxt);
- sk_stream_mem_reclaim(sk);
+ sk_mem_reclaim(sk);
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
return 0;
/* Collapsing did not help, destructive actions follow.
* This must not ever occur. */
- /* First, purge the out_of_order queue. */
- if (!skb_queue_empty(&tp->out_of_order_queue)) {
- NET_INC_STATS_BH(LINUX_MIB_OFOPRUNED);
- __skb_queue_purge(&tp->out_of_order_queue);
-
- /* Reset SACK state. A conforming SACK implementation will
- * do the same at a timeout based retransmit. When a connection
- * is in a sad state like this, we care only about integrity
- * of the connection not performance.
- */
- if (tcp_is_sack(tp))
- tcp_sack_reset(&tp->rx_opt);
- sk_stream_mem_reclaim(sk);
- }
+ tcp_prune_ofo_queue(sk);
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf)
return 0;
return -1;
}
-
/* RFC2861, slow part. Adjust cwnd, after it was not full during one rto.
* As additional protections, we do not touch cwnd in retransmission phases,
* and if application hit its sndbuf limit recently.
int sndmem = max_t(u32, tp->rx_opt.mss_clamp, tp->mss_cache) +
MAX_TCP_HEADER + 16 + sizeof(struct sk_buff),
demanded = max_t(unsigned int, tp->snd_cwnd,
- tp->reordering + 1);
- sndmem *= 2*demanded;
+ tp->reordering + 1);
+ sndmem *= 2 * demanded;
if (sndmem > sk->sk_sndbuf)
sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]);
tp->snd_cwnd_stamp = tcp_time_stamp;
/* We ACK each frame or... */
tcp_in_quickack_mode(sk) ||
/* We have out of order data. */
- (ofo_possible &&
- skb_peek(&tp->out_of_order_queue))) {
+ (ofo_possible && skb_peek(&tp->out_of_order_queue))) {
/* Then ack it now */
tcp_send_ack(sk);
} else {
* either form (or just set the sysctl tcp_stdurg).
*/
-static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
+static void tcp_check_urg(struct sock *sk, struct tcphdr *th)
{
struct tcp_sock *tp = tcp_sk(sk);
u32 ptr = ntohs(th->urg_ptr);
* buggy users.
*/
if (tp->urg_seq == tp->copied_seq && tp->urg_data &&
- !sock_flag(sk, SOCK_URGINLINE) &&
- tp->copied_seq != tp->rcv_nxt) {
+ !sock_flag(sk, SOCK_URGINLINE) && tp->copied_seq != tp->rcv_nxt) {
struct sk_buff *skb = skb_peek(&sk->sk_receive_queue);
tp->copied_seq++;
if (skb && !before(tp->copied_seq, TCP_SKB_CB(skb)->end_seq)) {
}
}
- tp->urg_data = TCP_URG_NOTYET;
- tp->urg_seq = ptr;
+ tp->urg_data = TCP_URG_NOTYET;
+ tp->urg_seq = ptr;
/* Disable header prediction. */
tp->pred_flags = 0;
/* Check if we get a new urgent pointer - normally not. */
if (th->urg)
- tcp_check_urg(sk,th);
+ tcp_check_urg(sk, th);
/* Do we wait for any urgent data? - normally not... */
if (tp->urg_data == TCP_URG_NOTYET) {
}
}
+static int tcp_defer_accept_check(struct sock *sk)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+
+ if (tp->defer_tcp_accept.request) {
+ int queued_data = tp->rcv_nxt - tp->copied_seq;
+ int hasfin = !skb_queue_empty(&sk->sk_receive_queue) ?
+ tcp_hdr((struct sk_buff *)
+ sk->sk_receive_queue.prev)->fin : 0;
+
+ if (queued_data && hasfin)
+ queued_data--;
+
+ if (queued_data &&
+ tp->defer_tcp_accept.listen_sk->sk_state == TCP_LISTEN) {
+ if (sock_flag(sk, SOCK_KEEPOPEN)) {
+ inet_csk_reset_keepalive_timer(sk,
+ keepalive_time_when(tp));
+ } else {
+ inet_csk_delete_keepalive_timer(sk);
+ }
+
+ inet_csk_reqsk_queue_add(
+ tp->defer_tcp_accept.listen_sk,
+ tp->defer_tcp_accept.request,
+ sk);
+
+ tp->defer_tcp_accept.listen_sk->sk_data_ready(
+ tp->defer_tcp_accept.listen_sk, 0);
+
+ sock_put(tp->defer_tcp_accept.listen_sk);
+ sock_put(sk);
+ tp->defer_tcp_accept.listen_sk = NULL;
+ tp->defer_tcp_accept.request = NULL;
+ } else if (hasfin ||
+ tp->defer_tcp_accept.listen_sk->sk_state != TCP_LISTEN) {
+ tcp_reset(sk);
+ return -1;
+ }
+ }
+ return 0;
+}
+
static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
{
struct tcp_sock *tp = tcp_sk(sk);
return err;
}
-static __sum16 __tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb)
+static __sum16 __tcp_checksum_complete_user(struct sock *sk,
+ struct sk_buff *skb)
{
__sum16 result;
return result;
}
-static inline int tcp_checksum_complete_user(struct sock *sk, struct sk_buff *skb)
+static inline int tcp_checksum_complete_user(struct sock *sk,
+ struct sk_buff *skb)
{
return !skb_csum_unnecessary(skb) &&
- __tcp_checksum_complete_user(sk, skb);
+ __tcp_checksum_complete_user(sk, skb);
}
#ifdef CONFIG_NET_DMA
-static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb, int hlen)
+static int tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb,
+ int hlen)
{
struct tcp_sock *tp = tcp_sk(sk);
int chunk = skb->len - hlen;
if (tp->ucopy.dma_chan && skb_csum_unnecessary(skb)) {
dma_cookie = dma_skb_copy_datagram_iovec(tp->ucopy.dma_chan,
- skb, hlen, tp->ucopy.iov, chunk, tp->ucopy.pinned_list);
+ skb, hlen,
+ tp->ucopy.iov, chunk,
+ tp->ucopy.pinned_list);
if (dma_cookie < 0)
goto out;
*/
if ((tcp_flag_word(th) & TCP_HP_BITS) == tp->pred_flags &&
- TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
+ TCP_SKB_CB(skb)->seq == tp->rcv_nxt) {
int tcp_header_len = tp->tcp_header_len;
/* Timestamp header prediction: tcp_header_len
eaten = 1;
}
#endif
- if (tp->ucopy.task == current && sock_owned_by_user(sk) && !copied_early) {
+ if (tp->ucopy.task == current &&
+ sock_owned_by_user(sk) && !copied_early) {
__set_current_state(TASK_RUNNING);
if (!tcp_copy_to_iovec(sk, skb, tcp_header_len))
NET_INC_STATS_BH(LINUX_MIB_TCPHPHITS);
/* Bulk data transfer: receiver */
- __skb_pull(skb,tcp_header_len);
+ __skb_pull(skb, tcp_header_len);
__skb_queue_tail(&sk->sk_receive_queue, skb);
- sk_stream_set_owner_r(skb, sk);
+ skb_set_owner_r(skb, sk);
tp->rcv_nxt = TCP_SKB_CB(skb)->end_seq;
}
}
slow_path:
- if (len < (th->doff<<2) || tcp_checksum_complete_user(sk, skb))
+ if (len < (th->doff << 2) || tcp_checksum_complete_user(sk, skb))
goto csum_error;
/*
tcp_data_snd_check(sk);
tcp_ack_snd_check(sk);
+
+ tcp_defer_accept_check(sk);
return 0;
csum_error:
if (!sock_flag(sk, SOCK_DEAD)) {
sk->sk_state_change(sk);
- sk_wake_async(sk, 0, POLL_OUT);
+ sk_wake_async(sk, SOCK_WAKE_IO, POLL_OUT);
}
if (sk->sk_write_pending ||
}
/* PAWS check. */
- if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp && tcp_paws_check(&tp->rx_opt, 0))
+ if (tp->rx_opt.ts_recent_stamp && tp->rx_opt.saw_tstamp &&
+ tcp_paws_check(&tp->rx_opt, 0))
goto discard_and_undo;
if (th->syn) {
tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
tcp_initialize_rcv_mss(sk);
-
tcp_send_synack(sk);
#if 0
/* Note, we could accept data and URG from this segment.
return 1;
}
-
/*
* This function implements the receiving procedure of RFC 793 for
* all states except ESTABLISHED and TIME_WAIT.
* are not waked up, because sk->sk_sleep ==
* NULL and sk->sk_socket == NULL.
*/
- if (sk->sk_socket) {
- sk_wake_async(sk,0,POLL_OUT);
- }
+ if (sk->sk_socket)
+ sk_wake_async(sk,
+ SOCK_WAKE_IO, POLL_OUT);
tp->snd_una = TCP_SKB_CB(skb)->ack_seq;
tp->snd_wnd = ntohs(th->window) <<
* and does not calculate rtt.
* Fix it at least with timestamps.
*/
- if (tp->rx_opt.saw_tstamp && tp->rx_opt.rcv_tsecr &&
- !tp->srtt)
+ if (tp->rx_opt.saw_tstamp &&
+ tp->rx_opt.rcv_tsecr && !tp->srtt)
tcp_ack_saw_tstamp(sk, 0);
if (tp->rx_opt.tstamp_ok)
EXPORT_SYMBOL(sysctl_tcp_ecn);
EXPORT_SYMBOL(sysctl_tcp_reordering);
+EXPORT_SYMBOL(sysctl_tcp_adv_win_scale);
EXPORT_SYMBOL(tcp_parse_options);
EXPORT_SYMBOL(tcp_rcv_established);
EXPORT_SYMBOL(tcp_rcv_state_process);