net/tcp: fix selective acknowledge

Current code assume that all (except last) packets are of the same size.
This is definitely wrong. Replace SACK code with a new one, that does
not rely on this assumption. Also this code uses less memory.

Signed-off-by: Mikhail Kshevetskiy <mikhail.kshevetskiy@iopsys.eu>
Reviewed-by: Simon Glass <sjg@chromium.org>
This commit is contained in:
Mikhail Kshevetskiy
2024-12-28 13:46:28 +03:00
committed by Tom Rini
parent 77da29b868
commit 40d3d6557d

206
net/tcp.c
View File

@@ -38,21 +38,6 @@ static u32 tcp_ack_edge;
static int tcp_activity_count; static int tcp_activity_count;
/*
* Search for TCP_SACK and review the comments before the code section
* TCP_SACK is the number of packets at the front of the stream
*/
enum pkt_state {PKT, NOPKT};
struct sack_r {
struct sack_edges se;
enum pkt_state st;
};
static struct sack_r edge_a[TCP_SACK];
static unsigned int sack_idx;
static unsigned int prev_len;
/* /*
* TCP lengths are stored as a rounded up number of 32 bit words. * TCP lengths are stored as a rounded up number of 32 bit words.
* Add 3 to length round up, rounded, then divided into the * Add 3 to length round up, rounded, then divided into the
@@ -69,6 +54,11 @@ static enum tcp_state current_tcp_state;
/* Current TCP RX packet handler */ /* Current TCP RX packet handler */
static rxhand_tcp *tcp_packet_handler; static rxhand_tcp *tcp_packet_handler;
static inline s32 tcp_seq_cmp(u32 a, u32 b)
{
return (s32)(a - b);
}
/** /**
* tcp_get_tcp_state() - get current TCP state * tcp_get_tcp_state() - get current TCP state
* *
@@ -267,6 +257,7 @@ int tcp_set_tcp_header(uchar *pkt, int dport, int sport, int payload_len,
action = TCP_FIN; action = TCP_FIN;
current_tcp_state = TCP_FIN_WAIT_1; current_tcp_state = TCP_FIN_WAIT_1;
} else { } else {
tcp_lost.len = TCP_OPT_LEN_2;
current_tcp_state = TCP_SYN_SENT; current_tcp_state = TCP_SYN_SENT;
} }
break; break;
@@ -353,6 +344,20 @@ int tcp_set_tcp_header(uchar *pkt, int dport, int sport, int payload_len,
return pkt_hdr_len; return pkt_hdr_len;
} }
static void tcp_update_ack_edge(void)
{
if (tcp_seq_cmp(tcp_ack_edge, tcp_lost.hill[0].l) >= 0) {
tcp_ack_edge = tcp_lost.hill[0].r;
memmove(&tcp_lost.hill[0], &tcp_lost.hill[1],
(TCP_SACK_HILLS - 1) * sizeof(struct sack_edges));
tcp_lost.len -= TCP_OPT_LEN_8;
tcp_lost.hill[TCP_SACK_HILLS - 1].l = TCP_O_NOP;
tcp_lost.hill[TCP_SACK_HILLS - 1].r = TCP_O_NOP;
}
}
/** /**
* tcp_hole() - Selective Acknowledgment (Essential for fast stream transfer) * tcp_hole() - Selective Acknowledgment (Essential for fast stream transfer)
* @tcp_seq_num: TCP sequence start number * @tcp_seq_num: TCP sequence start number
@@ -360,106 +365,79 @@ int tcp_set_tcp_header(uchar *pkt, int dport, int sport, int payload_len,
*/ */
void tcp_hole(u32 tcp_seq_num, u32 len) void tcp_hole(u32 tcp_seq_num, u32 len)
{ {
u32 idx_sack, sack_in; int i, j, cnt, cnt_move;
u32 sack_end = TCP_SACK - 1;
u32 hill = 0;
enum pkt_state expect = PKT;
u32 seq = tcp_seq_num - tcp_seq_init;
u32 hol_l = tcp_ack_edge - tcp_seq_init;
u32 hol_r = 0;
/* Place new seq number in correct place in receive array */ cnt = (tcp_lost.len - TCP_OPT_LEN_2) / TCP_OPT_LEN_8;
if (prev_len == 0) for (i = 0; i < cnt; i++) {
prev_len = len; if (tcp_seq_cmp(tcp_lost.hill[i].r, tcp_seq_num) < 0)
continue;
idx_sack = sack_idx + ((tcp_seq_num - tcp_ack_edge) / prev_len); if (tcp_seq_cmp(tcp_lost.hill[i].l, tcp_seq_num + len) > 0)
if (idx_sack < TCP_SACK) {
edge_a[idx_sack].se.l = tcp_seq_num;
edge_a[idx_sack].se.r = tcp_seq_num + len;
edge_a[idx_sack].st = PKT;
/*
* The fin (last) packet is not the same length as data
* packets, and if it's length is recorded and used for
* array index calculation, calculation breaks.
*/
if (prev_len < len)
prev_len = len;
}
debug_cond(DEBUG_DEV_PKT,
"TCP 1 seq %d, edg %d, len %d, sack_idx %d, sack_end %d\n",
seq, hol_l, len, sack_idx, sack_end);
/* Right edge of contiguous stream, is the left edge of first hill */
hol_l = tcp_seq_num - tcp_seq_init;
hol_r = hol_l + len;
if (IS_ENABLED(CONFIG_PROT_TCP_SACK))
tcp_lost.len = TCP_OPT_LEN_2;
debug_cond(DEBUG_DEV_PKT,
"TCP 1 in %d, seq %d, pkt_l %d, pkt_r %d, sack_idx %d, sack_end %d\n",
idx_sack, seq, hol_l, hol_r, sack_idx, sack_end);
for (sack_in = sack_idx; sack_in < sack_end && hill < TCP_SACK_HILLS;
sack_in++) {
switch (expect) {
case NOPKT:
switch (edge_a[sack_in].st) {
case NOPKT:
debug_cond(DEBUG_INT_STATE, "N");
break;
case PKT:
debug_cond(DEBUG_INT_STATE, "n");
if (IS_ENABLED(CONFIG_PROT_TCP_SACK)) {
tcp_lost.hill[hill].l =
edge_a[sack_in].se.l;
tcp_lost.hill[hill].r =
edge_a[sack_in].se.r;
}
expect = PKT;
break;
}
break; break;
case PKT:
switch (edge_a[sack_in].st) {
case NOPKT:
debug_cond(DEBUG_INT_STATE, "p");
if (sack_in > sack_idx &&
hill < TCP_SACK_HILLS) {
hill++;
if (IS_ENABLED(CONFIG_PROT_TCP_SACK))
tcp_lost.len += TCP_OPT_LEN_8;
}
expect = NOPKT;
break;
case PKT:
debug_cond(DEBUG_INT_STATE, "P");
if (tcp_ack_edge == edge_a[sack_in].se.l) { if (tcp_seq_cmp(tcp_lost.hill[i].l, tcp_seq_num) > 0)
tcp_ack_edge = edge_a[sack_in].se.r; tcp_lost.hill[i].l = tcp_seq_num;
edge_a[sack_in].st = NOPKT; if (tcp_seq_cmp(tcp_lost.hill[i].l, tcp_seq_num) < 0) {
sack_idx++; len += tcp_seq_num - tcp_lost.hill[i].l;
} else { tcp_seq_num = tcp_lost.hill[i].l;
if (IS_ENABLED(CONFIG_PROT_TCP_SACK) &&
hill < TCP_SACK_HILLS)
tcp_lost.hill[hill].r =
edge_a[sack_in].se.r;
if (IS_ENABLED(CONFIG_PROT_TCP_SACK) &&
sack_in == sack_end - 1)
tcp_lost.hill[hill].r =
edge_a[sack_in].se.r;
}
break;
}
break;
} }
if (tcp_seq_cmp(tcp_lost.hill[i].r, tcp_seq_num + len) >= 0) {
tcp_update_ack_edge();
return;
}
/* check overlapping with next hills */
cnt_move = 0;
tcp_lost.hill[i].r = tcp_seq_num + len;
for (j = i + 1; j < cnt; j++) {
if (tcp_seq_cmp(tcp_lost.hill[j].l, tcp_lost.hill[i].r) > 0)
break;
tcp_lost.hill[i].r = tcp_lost.hill[j].r;
cnt_move++;
}
if (cnt_move > 0) {
if (cnt > i + cnt_move + 1)
memmove(&tcp_lost.hill[i + 1],
&tcp_lost.hill[i + cnt_move + 1],
cnt_move * sizeof(struct sack_edges));
cnt -= cnt_move;
tcp_lost.len = TCP_OPT_LEN_2 + cnt * TCP_OPT_LEN_8;
for (j = cnt; j < TCP_SACK_HILLS; j++) {
tcp_lost.hill[j].l = TCP_O_NOP;
tcp_lost.hill[j].r = TCP_O_NOP;
}
}
tcp_update_ack_edge();
return;
} }
debug_cond(DEBUG_INT_STATE, "\n");
if (!IS_ENABLED(CONFIG_PROT_TCP_SACK) || tcp_lost.len <= TCP_OPT_LEN_2) if (i == TCP_SACK_HILLS) {
sack_idx = 0; tcp_update_ack_edge();
} return;
}
if (cnt < TCP_SACK_HILLS) {
cnt_move = cnt - i;
cnt++;
} else {
cnt = TCP_SACK_HILLS;
cnt_move = TCP_SACK_HILLS - i;
}
if (cnt_move > 0)
memmove(&tcp_lost.hill[i + 1],
&tcp_lost.hill[i],
cnt_move * sizeof(struct sack_edges));
tcp_lost.hill[i].l = tcp_seq_num;
tcp_lost.hill[i].r = tcp_seq_num + len;
tcp_lost.len = TCP_OPT_LEN_2 + cnt * TCP_OPT_LEN_8;
tcp_update_ack_edge();
};
/** /**
* tcp_parse_options() - parsing TCP options * tcp_parse_options() - parsing TCP options
@@ -509,7 +487,6 @@ static u8 tcp_state_machine(u8 tcp_flags, u32 tcp_seq_num, int payload_len)
u8 tcp_push = tcp_flags & TCP_PUSH; u8 tcp_push = tcp_flags & TCP_PUSH;
u8 tcp_ack = tcp_flags & TCP_ACK; u8 tcp_ack = tcp_flags & TCP_ACK;
u8 action = TCP_DATA; u8 action = TCP_DATA;
int i;
/* /*
* tcp_flags are examined to determine TX action in a given state * tcp_flags are examined to determine TX action in a given state
@@ -536,6 +513,7 @@ static u8 tcp_state_machine(u8 tcp_flags, u32 tcp_seq_num, int payload_len)
action = TCP_SYN | TCP_ACK; action = TCP_SYN | TCP_ACK;
tcp_seq_init = tcp_seq_num; tcp_seq_init = tcp_seq_num;
tcp_ack_edge = tcp_seq_num + 1; tcp_ack_edge = tcp_seq_num + 1;
tcp_lost.len = TCP_OPT_LEN_2;
current_tcp_state = TCP_SYN_RECEIVED; current_tcp_state = TCP_SYN_RECEIVED;
} else if (tcp_ack || tcp_fin) { } else if (tcp_ack || tcp_fin) {
action = TCP_DATA; action = TCP_DATA;
@@ -552,13 +530,7 @@ static u8 tcp_state_machine(u8 tcp_flags, u32 tcp_seq_num, int payload_len)
action |= TCP_ACK; action |= TCP_ACK;
tcp_seq_init = tcp_seq_num; tcp_seq_init = tcp_seq_num;
tcp_ack_edge = tcp_seq_num + 1; tcp_ack_edge = tcp_seq_num + 1;
sack_idx = 0;
edge_a[sack_idx].se.l = tcp_ack_edge;
edge_a[sack_idx].se.r = tcp_ack_edge;
prev_len = 0;
current_tcp_state = TCP_ESTABLISHED; current_tcp_state = TCP_ESTABLISHED;
for (i = 0; i < TCP_SACK; i++)
edge_a[i].st = NOPKT;
if (tcp_syn && tcp_ack) if (tcp_syn && tcp_ack)
action |= TCP_PUSH; action |= TCP_PUSH;