Skip to content

Commit

Permalink
Merge pull request #26 from minuscat/AccECN-2024
Browse files Browse the repository at this point in the history
Merge AccECN change back into L4STeam/linux:AccECN-2023
  • Loading branch information
minuscat authored Feb 7, 2024
2 parents b06ab7c + 623cd47 commit 64441b4
Show file tree
Hide file tree
Showing 9 changed files with 117 additions and 80 deletions.
4 changes: 2 additions & 2 deletions Documentation/networking/ip-sysctl.rst
Original file line number Diff line number Diff line change
Expand Up @@ -433,9 +433,9 @@ tcp_ecn_option - INTEGER

tcp_ecn_option_beacon - INTEGER
Control Accurate ECN (AccECN) option sending frequency per RTT and it
take effect only when tcp_ecn_option is set to 2.
takes effect only when tcp_ecn_option is set to 2.

Default: 3 (AccECN will be send at least 3 times per RTT)
Default: 1 (AccECN will be send at least 1 time per RTT)

tcp_ecn_fallback - BOOLEAN
If the kernel detects that ECN connection misbehaves, enable fall
Expand Down
6 changes: 4 additions & 2 deletions include/linux/tcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,7 @@ struct tcp_request_sock {
#if IS_ENABLED(CONFIG_MPTCP)
bool drop_req;
#endif
u8 noect : 1,
accecn_ok : 1,
u8 accecn_ok : 1,
saw_accecn_opt : 2,
syn_ect_snt: 2,
syn_ect_rcv: 2;
Expand Down Expand Up @@ -234,6 +233,9 @@ struct tcp_sock {
syn_ect_snt:2, /* AccECN ECT memory, only */
syn_ect_rcv:2, /* ... needed durign 3WHS + first seqno */
ecn_fail:1; /* ECN reflector detected path mangling */
u8 accecn_no_respond:1, /* AccECN no response on feedback */
accecn_no_options:1, /* AccECN no options send out */
first_data_ack:1; /* Check for first data ack */
u8 saw_accecn_opt:2, /* An AccECN option was seen */
fast_ack_mode:2, /* which fast ack mode ? */
unused:4;
Expand Down
4 changes: 3 additions & 1 deletion include/net/request_sock.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ struct request_sock {
u16 mss;
u8 num_retrans; /* number of retransmits */
u8 syncookie:1; /* syncookie: encode tcpopts in timestamp */
u8 num_timeout:7; /* number of timeouts */
u8 num_timeout:7,
is_rtx:1; /* number of timeouts */
u32 ts_recent;
struct timer_list rsk_timer;
const struct request_sock_ops *rsk_ops;
Expand Down Expand Up @@ -105,6 +106,7 @@ reqsk_alloc(const struct request_sock_ops *ops, struct sock *sk_listener,
sk_tx_queue_clear(req_to_sk(req));
req->saved_syn = NULL;
req->num_timeout = 0;
req->is_rtx = 0;
req->num_retrans = 0;
req->sk = NULL;
refcount_set(&req->rsk_refcnt, 0);
Expand Down
4 changes: 2 additions & 2 deletions include/net/tcp.h
Original file line number Diff line number Diff line change
Expand Up @@ -432,8 +432,8 @@ static inline int tcp_accecn_extract_syn_ect(u8 ace)
}

bool tcp_accecn_validate_syn_feedback(struct sock *sk, u8 ace, u8 sent_ect);
bool tcp_accecn_third_ack(struct sock *sk, const struct sk_buff *skb,
struct request_sock *req, u8 syn_ect_snt);
void tcp_accecn_third_ack(struct sock *sk, const struct sk_buff *skb,
u8 syn_ect_snt);
u8 tcp_accecn_option_init(const struct sk_buff *skb, u8 opt_offset);
void tcp_ecn_received_counters(struct sock *sk, const struct sk_buff *skb,
u32 payload_len);
Expand Down
1 change: 1 addition & 0 deletions net/ipv4/inet_connection_sock.c
Original file line number Diff line number Diff line change
Expand Up @@ -688,6 +688,7 @@ static void syn_ack_recalc(struct request_sock *req,

int inet_rtx_syn_ack(const struct sock *parent, struct request_sock *req)
{
req->is_rtx = 1;
int err = req->rsk_ops->rtx_syn_ack(parent, req);

if (!err)
Expand Down
3 changes: 3 additions & 0 deletions net/ipv4/tcp.c
Original file line number Diff line number Diff line change
Expand Up @@ -3033,6 +3033,9 @@ int tcp_disconnect(struct sock *sk, int flags)
tp->delivered_ce = 0;
tp->saw_accecn_opt = 0;
tp->ecn_fail = 0;
tp->accecn_no_respond = 0;
tp->accecn_no_options = 0;
tp->first_data_ack = 0;
tcp_accecn_init_counters(tp);
tp->prev_ecnfield = 0;
tp->accecn_opt_tstamp = 0;
Expand Down
48 changes: 39 additions & 9 deletions net/ipv4/tcp_input.c
Original file line number Diff line number Diff line change
Expand Up @@ -449,11 +449,11 @@ static void tcp_ecn_rcv_synack(struct sock *sk, const struct sk_buff *skb,
else
tcp_ecn_mode_set(tp, TCP_ECN_MODE_RFC3168);
break;
// [CY] 3.1.2. Backward Compatibility - If a TCP Client has sent a SYN requesting AccECN feedback with
// (AE,CWR,ECE) = (1,1,1) then receives a SYN/ACK with the currently reserved combination (AE,CWR,ECE)
// = (1,0,1) but it does not have logic specific to such a combination, the Client MUST enable AccECN
// mode as if the SYN/ACK confirmed that the Server supported AccECN and as if it fed back that the
// IP-ECN field on the SYN had arrived unchanged.
/* [CY] 3.1.2. Backward Compatibility - If a TCP Client has sent a SYN requesting AccECN feedback with (AE,CWR,ECE) =
* (1,1,1) then receives a SYN/ACK with the currently reserved combination (AE,CWR,ECE) = (1,0,1) but it does not
* have logic specific to such a combination, the Client MUST enable AccECN mode as if the SYN/ACK confirmed that the
* Server supported AccECN and as if it fed back that the IP-ECN field on the SYN had arrived unchanged.
*/
case 0x5:
if (tcp_ecn_mode_pending(tp)) {
tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN);
Expand Down Expand Up @@ -595,7 +595,7 @@ static bool tcp_accecn_process_option(struct tcp_sock *tp,
bool order1, res;
unsigned int i;

if (tp->saw_accecn_opt == TCP_ACCECN_OPT_FAIL)
if (tp->saw_accecn_opt == TCP_ACCECN_OPT_FAIL || tp->accecn_no_respond)
return false;

if (!(flag & FLAG_SLOWPATH) || !tp->rx_opt.accecn) {
Expand Down Expand Up @@ -703,6 +703,22 @@ static u32 __tcp_accecn_process(struct sock *sk, const struct sk_buff *skb,
if (flag & FLAG_SYN_ACKED)
return 0;

/* [CY] 3.2.2.4. Testing for Zeroing of the ACE Field - If AccECN has been successfully negotiated, the Data Sender
* MAY check the value of the ACE counter in the first feedback packet (with or without data) that arrives after the
* 3-way handshake. If the value of this ACE field is found to be zero (0b000), for the remainder of the half-
* connection the Data Sender ought to send non-ECN-capable packets and it is advised not to respond to any feedback
* of CE markings.
*/
if (!tp->first_data_ack) {
tp->first_data_ack = 1;
if (tcp_accecn_ace(tcp_hdr(skb)) == 0x0) {
tp->ecn_fail = 1;
INET_ECN_dontxmit(sk);
tp->accecn_no_respond = 1;
return 0;
}
}

if (tp->received_ce_pending >= TCP_ACCECN_ACE_MAX_DELTA)
inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_NOW;

Expand Down Expand Up @@ -4893,8 +4909,18 @@ static void tcp_rcv_spurious_retrans(struct sock *sk, const struct sk_buff *skb)
* DSACK state and change the txhash to re-route speculatively.
*/
if (TCP_SKB_CB(skb)->seq == tcp_sk(sk)->duplicate_sack[0].start_seq &&
sk_rethink_txhash(sk))
sk_rethink_txhash(sk)) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDUPLICATEDATAREHASH);
/* [CY] 3.2.3.2.2. Testing for Loss of Packets Carrying the AccECN Option - If a middlebox is dropping
* packets with options it does not recognize, a host that is sending little or no data but mostly pure
* ACKs will not inherently detect such losses. Such a host MAY detect loss of ACKs carrying the AccECN
* Option by detecting whether the acknowledged data always reappears as a retransmission. In such cases,
* the host SHOULD disable the sending of the AccECN Option for this half-connection.
*/
if (tcp_ecn_mode_accecn(tcp_sk(sk)))
tcp_sk(sk)->accecn_no_options = 1;

}
}

static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
Expand Down Expand Up @@ -6235,6 +6261,11 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
if (th->syn) {
if (tcp_ecn_mode_accecn(tp)) {
send_accecn_reflector = true;
/* [CY] 3.1.5. Implications of AccECN Mode - A host in AccECN mode that is feeding back the IP-ECN
* field on a SYN or SYN/ACK: MUST feed back the IP-ECN field on the latest valid SYN or acceptable
* SYN/ACK to arrive.”
*/
tp->syn_ect_rcv = TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK;
if (tp->rx_opt.accecn &&
tp->saw_accecn_opt < TCP_ACCECN_OPT_COUNTER_SEEN) {
tp->saw_accecn_opt = tcp_accecn_option_init(skb,
Expand Down Expand Up @@ -7017,7 +7048,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb)

tcp_initialize_rcv_mss(sk);
if (tcp_ecn_mode_accecn(tp))
tcp_accecn_third_ack(sk, skb, req, tp->syn_ect_snt);
tcp_accecn_third_ack(sk, skb, tp->syn_ect_snt);
tcp_fast_path_on(tp);
break;

Expand Down Expand Up @@ -7218,7 +7249,6 @@ static void tcp_openreq_init(struct request_sock *req,
tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
tcp_rsk(req)->snt_synack = 0;
tcp_rsk(req)->last_oow_ack_time = 0;
tcp_rsk(req)->noect = 0;
tcp_rsk(req)->accecn_ok = 0;
tcp_rsk(req)->saw_accecn_opt = 0;
tcp_rsk(req)->syn_ect_rcv = 0;
Expand Down
78 changes: 34 additions & 44 deletions net/ipv4/tcp_minisocks.c
Original file line number Diff line number Diff line change
Expand Up @@ -397,26 +397,21 @@ void tcp_openreq_init_rwin(struct request_sock *req,
}
EXPORT_SYMBOL(tcp_openreq_init_rwin);

bool tcp_accecn_third_ack(struct sock *sk, const struct sk_buff *skb,
struct request_sock *req, u8 syn_ect_snt)
void tcp_accecn_third_ack(struct sock *sk, const struct sk_buff *skb,
u8 syn_ect_snt)
{
u8 ace = tcp_accecn_ace(tcp_hdr(skb));
struct tcp_sock *tp = tcp_sk(sk);
bool verify_ace = true;

switch (ace) {
case 0x0:
/* [CY] 3.2.2.1. ACE Field on the ACK of the SYN/ACK - If the Server is in AccECN mode and in SYN-RCVD
* state, and if it receives a value of zero on a pure ACK with SYN=0 and no SACK blocks, for the rest
* of the connection the Server MUST NOT set ECT on outgoing packets and MUST NOT respond to AccECN
* feedback. Nonetheless, as a Data Receiver it MUST NOT disable AccECN feedback.
*/
tp->ecn_fail = 1;
// [CY] 3.2.2.1. ACE Field on the ACK of the SYN/ACK - If the Server is in AccECN mode and in SYN-RCVD
// state, and if it receives a value of zero on a pure ACK with SYN=0 and no SACK blocks, for the rest
// of the connection the Server MUST NOT set ECT on outgoing packets and MUST NOT respond to AccECN
// feedback. Nonetheless, as a Data Receiver it MUST NOT disable AccECN feedback.
if (!TCP_SKB_CB(skb)->sacked) {
inet_rsk(req)->ecn_ok = 0;
tcp_rsk(req)->accecn_ok = 0;
tcp_ecn_mode_set(tp, TCP_ECN_DISABLED);
verify_ace = false;
}
tp->accecn_no_respond = 1;
break;
case 0x7:
case 0x5:
Expand All @@ -434,37 +429,32 @@ bool tcp_accecn_third_ack(struct sock *sk, const struct sk_buff *skb,
}
break;
}
return verify_ace;
}

static void tcp_ecn_openreq_child(struct sock *sk,
struct request_sock *req,
const struct request_sock *req,
const struct sk_buff *skb)
{
struct tcp_request_sock *treq = tcp_rsk(req);
const struct tcp_request_sock *treq = tcp_rsk(req);
struct tcp_sock *tp = tcp_sk(sk);

// [CY] 3.1.5. Implications of AccECN Mode - A TCP Server in AccECN mode: MUST NOT set ECT on
// any packet for the rest of the connection, if it has received or sent at least one valid
// SYN or Acceptable SYN/ACK with (AE,CWR,ECE) = (0,0,0) during the handshake.
if (treq->noect) {
tcp_ecn_mode_set(tp, TCP_ECN_DISABLED);
} else {
if (treq->accecn_ok) {
/* [CY] 3.1.5. Implications of AccECN Mode - A TCP Server in AccECN mode: MUST NOT set ECT on
* any packet for the rest of the connection, if it has received or sent at least one valid
* SYN or Acceptable SYN/ACK with (AE,CWR,ECE) = (0,0,0) during the handshake.
*/
if (treq->accecn_ok) {
const struct tcphdr *th = (const struct tcphdr *)skb->data;
if (tcp_accecn_third_ack(sk, skb, req, treq->syn_ect_snt)) {
tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN);
tp->syn_ect_snt = treq->syn_ect_snt;
tp->saw_accecn_opt = treq->saw_accecn_opt;
tp->prev_ecnfield = treq->syn_ect_rcv;
tp->accecn_opt_demand = 1;
tcp_ecn_received_counters(sk, skb, skb->len - th->doff * 4);
}
} else {
tcp_ecn_mode_set(tp, TCP_ECN_MODE_ACCECN);
tp->syn_ect_snt = treq->syn_ect_snt;
tcp_accecn_third_ack(sk, skb, treq->syn_ect_snt);
tp->saw_accecn_opt = treq->saw_accecn_opt;
tp->prev_ecnfield = treq->syn_ect_rcv;
tp->accecn_opt_demand = 1;
tcp_ecn_received_counters(sk, skb, skb->len - th->doff * 4);
} else {
tcp_ecn_mode_set(tp, inet_rsk(req)->ecn_ok && !tcp_ca_no_fallback_rfc3168(sk) ?
TCP_ECN_MODE_RFC3168 :
TCP_ECN_DISABLED);
}
TCP_ECN_DISABLED);
}
}

Expand Down Expand Up @@ -717,17 +707,17 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
&tcp_rsk(req)->last_oow_ack_time)) {

if (tcp_rsk(req)->accecn_ok) {
/* [CY] 3.1.5 Implications of AccECN Mode - A host in AccECN mode that is feeding back the IP-ECN
* field on a SYN or SYN/ACK: MUST feed back the IP-ECN field on the latest valid SYN or acceptable
* SYN/ACK to arrive.
*/
tcp_rsk(req)->syn_ect_rcv = TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK;
if (tcp_accecn_ace(tcp_hdr(skb)) == 0x0) {
// [CY] 3.1.5. Implications of AccECN Mode - A TCP Server already in AccECN mode: SHOULD
// acknowledge a valid SYN arriving with (AE,CWR,ECE) =(0,0,0) by emitting an AccECN SYN/ACK (with
// the appropriate combination of TCP-ECN flags to feed back the IP-ECN field of this latest SYN)
tcp_sk(sk)->syn_ect_rcv = TCP_SKB_CB(skb)->ip_dsfield & INET_ECN_MASK;

// [CY] 3.1.5. Implications of AccECN Mode - A TCP Server in AccECN mode: MUST NOT set ECT on
// any packet for the rest of the connection, if it has received or sent at least one valid
// SYN or Acceptable SYN/ACK with (AE,CWR,ECE) = (0,0,0) during the handshake.
tcp_rsk(req)->noect = 1;
INET_ECN_dontxmit(sk);
/* [CY] 3.1.5. Implications of AccECN Mode - A TCP Server in AccECN mode: MUST NOT set ECT on
* any packet for the rest of the connection, if it has received or sent at least one valid
* SYN or Acceptable SYN/ACK with (AE,CWR,ECE) = (0,0,0) during the handshake
*/
tcp_sk(sk)->ecn_fail = 1;
}
}

Expand Down
49 changes: 29 additions & 20 deletions net/ipv4/tcp_output.c
Original file line number Diff line number Diff line change
Expand Up @@ -378,26 +378,26 @@ static void tcp_accecn_echo_syn_ect(struct tcphdr *th, u8 ect)
}

static void
tcp_ecn_make_synack(struct sock *sk, struct request_sock *req, struct tcphdr *th)
tcp_ecn_make_synack(struct sock *sk, const struct request_sock *req, struct tcphdr *th)
{
if (req->num_timeout < 2) {
if (!req->is_rtx || req->num_timeout < 1) {
if (tcp_rsk(req)->accecn_ok)
tcp_accecn_echo_syn_ect(th, tcp_rsk(req)->syn_ect_rcv);
else if (inet_rsk(req)->ecn_ok)
th->ece = 1;
} else if (tcp_rsk(req)->accecn_ok) {
// [CY] 3.2.3.2.2. Testing for Loss of Packets Carrying the AccECN Option - If this retransmission times out,
// to expedite connection setup, the TCP Server SHOULD retransmit the SYN/ACK with (AE,CWR,ECE) = (0,0,0) and
// no AccECN Option, but it remains in AccECN feedback mode
/* [CY] 3.2.3.2.2. Testing for Loss of Packets Carrying the AccECN Option - If this retransmission times out,
* to expedite connection setup, the TCP Server SHOULD retransmit the SYN/ACK with (AE,CWR,ECE) = (0,0,0) and
* no AccECN Option, but it remains in AccECN feedback mode
*/
th->ae = 0;
th->cwr = 0;
th->ece = 0;

// [CY] 3.1.5. Implications of AccECN Mode - A TCP Server in AccECN mode: MUST NOT set ECT on
// any packet for the rest of the connection, if it has received or sent at least one valid
// SYN or Acceptable SYN/ACK with (AE,CWR,ECE) = (0,0,0) during the handshake.
tcp_rsk(req)->noect = 1;
INET_ECN_dontxmit(sk);
/* [CY] 3.1.5. Implications of AccECN Mode - A TCP Server in AccECN mode: MUST NOT set ECT on any packet for
* the rest of the connection, if it has received or sent at least one valid SYN or Acceptable SYN/ACK with
* (AE,CWR,ECE) = (0,0,0) during the handshake.
*/
tcp_sk(sk)->ecn_fail = 1;
}
}

Expand Down Expand Up @@ -1105,10 +1105,11 @@ static unsigned int tcp_synack_options(const struct sock *sk,

smc_set_option_cond(tcp_sk(sk), ireq, opts, &remaining);

// [CY] 3.2.3.2.2. Testing for Loss of Packets Carrying the AccECN Option - TCP Server SHOULD retransmit the
// SYN/ACK, but with no AccECN Option
/* [CY] 3.2.3.2.2. Testing for Loss of Packets Carrying the AccECN Option - TCP Server SHOULD retransmit the
* SYN/ACK, but with no AccECN Option
*/
if (treq->accecn_ok && sock_net(sk)->ipv4.sysctl_tcp_ecn_option &&
req->num_timeout < 1 && (remaining >= TCPOLEN_ACCECN_BASE)) {
!req->is_rtx && (remaining >= TCPOLEN_ACCECN_BASE)) {
opts->ecn_bytes = synack_ecn_bytes;
remaining -= tcp_options_fit_accecn(opts, 0, remaining,
tcp_synack_options_combine_saving(opts));
Expand Down Expand Up @@ -1188,7 +1189,7 @@ static unsigned int tcp_established_options(struct sock *sk, struct sk_buff *skb

if (tcp_ecn_mode_accecn(tp) &&
sock_net(sk)->ipv4.sysctl_tcp_ecn_option &&
(tp->saw_accecn_opt && tp->saw_accecn_opt != TCP_ACCECN_OPT_FAIL)) {
(tp->saw_accecn_opt && tp->saw_accecn_opt != TCP_ACCECN_OPT_FAIL && !tp->accecn_no_options)) {
if (sock_net(sk)->ipv4.sysctl_tcp_ecn_option >= 2 ||
tp->accecn_opt_demand ||
tcp_accecn_option_beacon_check(sk)) {
Expand Down Expand Up @@ -3452,12 +3453,20 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs)
tcp_retrans_try_collapse(sk, skb, avail_wnd);
}

/* RFC3168, section 6.1.1.1. ECN fallback
* As AccECN uses the same SYN flags (+ AE), this check covers both
* cases.
/* [CY] 3.1.4.1. Retransmitted SYNs - If the sender of an AccECN SYN (the TCP Client) times out before receiving the SYN/ACK,
* it SHOULD attempt to negotiate the use of AccECN at least one more time by continuing to set all three TCP ECN flags
* (AE,CWR,ECE) = (1,1,1) on the first retransmitted SYN (using the usual retransmission time-outs). If this first
* retransmission also fails to be acknowledged, in deployment scenarios where AccECN path traversal might be problematic, the
* TCP Client SHOULD send subsequent retransmissions of the SYN with the three TCP-ECN flags cleared (AE,CWR,ECE) = (0,0,0).
*/
if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN_ECN) == TCPHDR_SYN_ECN)
tcp_ecn_clear_syn(sk, skb);
if (!tcp_ecn_mode_pending(tp) || icsk->icsk_retransmits > 1) {
/* RFC3168, section 6.1.1.1. ECN fallback
* As AccECN uses the same SYN flags (+ AE), this check covers both
* cases.
*/
if ((TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN_ECN) == TCPHDR_SYN_ECN)
tcp_ecn_clear_syn(sk, skb);
}

/* Update global and local TCP statistics. */
segs = tcp_skb_pcount(skb);
Expand Down

0 comments on commit 64441b4

Please sign in to comment.