forked from openwrt/openwrt
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
BBR v3 is an enhacement to the BBR v1 algorithm. It's designed to aim for lower queues, lower loss, and better Reno/CUBIC coexistence than BBR v1. More information: https://github.com/google/bbr/blob/bbr-v3-upstream-prep-2024-02-19-01/README.md https://github.com/google/bbr/tree/001a430b3ddbbc6d9173061a74d8b580c757b91b Signed-off-by: Tan Zien <[email protected]> iproute2: ss: output TCP BBRv3 diag information Add logic for printing diag information for TCP BBRv3 congestion control. Signed-off-by: Tan Zien <[email protected]> ath79: refresh patch refresh: target/linux/ath79/patches-6.6/900-unaligned_access_hacks.patch Signed-off-by: Tan Zien <[email protected]>
- Loading branch information
Showing
21 changed files
with
4,198 additions
and
4 deletions.
There are no files selected for viewing
165 changes: 165 additions & 0 deletions
165
package/network/utils/iproute2/patches/500-ss-output-TCP-BBRv3-diag-information.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,165 @@ | ||
From ca7f11ebc4d4a99ccfd44be8555d505b26996c12 Mon Sep 17 00:00:00 2001 | ||
From: Arjun Roy <[email protected]> | ||
Date: Mon, 25 Jul 2022 12:49:35 -0400 | ||
Subject: [PATCH 2/2] ss: output TCP BBRv3 diag information | ||
|
||
Add logic for printing diag information for TCP BBRv3 congestion | ||
control. This commit leaves in place the support for printing the | ||
earlier TCP BBRv1 congestion control information. | ||
|
||
Both BBRv1 and BBRv3 are using the same enum value. The BBRv3 struct | ||
starts with the same data as BBRv1, so it is is backward-compatible | ||
with BBRv1, to allow lder ss binaries to print basic information for | ||
BBRv3. We use the size of the returned data and the version field to | ||
check the version of the data. | ||
|
||
Signed-off-by: Arjun Roy <[email protected]> | ||
Signed-off-by: Neal Cardwell <[email protected]> | ||
Signed-off-by: David Morley <[email protected]> | ||
--- | ||
include/uapi/linux/inet_diag.h | 23 ++++++++++++ | ||
misc/ss.c | 66 +++++++++++++++++++++++++++++++++- | ||
2 files changed, 88 insertions(+), 1 deletion(-) | ||
|
||
diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h | ||
index d81cb69a26a9..dca833ecb783 100644 | ||
--- a/include/uapi/linux/inet_diag.h | ||
+++ b/include/uapi/linux/inet_diag.h | ||
@@ -229,6 +229,29 @@ struct tcp_bbr_info { | ||
__u32 bbr_min_rtt; /* min-filtered RTT in uSec */ | ||
__u32 bbr_pacing_gain; /* pacing gain shifted left 8 bits */ | ||
__u32 bbr_cwnd_gain; /* cwnd gain shifted left 8 bits */ | ||
+ __u32 bbr_bw_hi_lsb; /* lower 32 bits of bw_hi */ | ||
+ __u32 bbr_bw_hi_msb; /* upper 32 bits of bw_hi */ | ||
+ __u32 bbr_bw_lo_lsb; /* lower 32 bits of bw_lo */ | ||
+ __u32 bbr_bw_lo_msb; /* upper 32 bits of bw_lo */ | ||
+ __u8 bbr_mode; /* current bbr_mode in state machine */ | ||
+ __u8 bbr_phase; /* current state machine phase */ | ||
+ __u8 unused1; /* alignment padding; not used yet */ | ||
+ __u8 bbr_version; /* BBR algorithm version */ | ||
+ __u32 bbr_inflight_lo; /* lower short-term data volume bound */ | ||
+ __u32 bbr_inflight_hi; /* higher long-term data volume bound */ | ||
+ __u32 bbr_extra_acked; /* max excess packets ACKed in epoch */ | ||
+}; | ||
+ | ||
+/* TCP BBR congestion control bbr_phase as reported in netlink/ss stats. */ | ||
+enum tcp_bbr_phase { | ||
+ BBR_PHASE_INVALID = 0, | ||
+ BBR_PHASE_STARTUP = 1, | ||
+ BBR_PHASE_DRAIN = 2, | ||
+ BBR_PHASE_PROBE_RTT = 3, | ||
+ BBR_PHASE_PROBE_BW_UP = 4, | ||
+ BBR_PHASE_PROBE_BW_DOWN = 5, | ||
+ BBR_PHASE_PROBE_BW_CRUISE = 6, | ||
+ BBR_PHASE_PROBE_BW_REFILL = 7, | ||
}; | ||
|
||
union tcp_cc_info { | ||
diff --git a/misc/ss.c b/misc/ss.c | ||
index e9d813596b91..5f413118f0dd 100644 | ||
--- a/misc/ss.c | ||
+++ b/misc/ss.c | ||
@@ -912,6 +912,7 @@ struct tcpstat { | ||
bool app_limited; | ||
struct dctcpstat *dctcp; | ||
struct tcp_bbr_info *bbr_info; | ||
+ unsigned int bbr_info_len; | ||
}; | ||
|
||
/* SCTP assocs share the same inode number with their parent endpoint. So if we | ||
@@ -2585,6 +2586,29 @@ static void sctp_stats_print(struct sctp_info *s) | ||
out(" fraginl:%d", s->sctpi_s_frag_interleave); | ||
} | ||
|
||
+static const char* bbr_phase_to_str(enum tcp_bbr_phase phase) | ||
+{ | ||
+ switch (phase) { | ||
+ case BBR_PHASE_STARTUP: | ||
+ return "STARTUP"; | ||
+ case BBR_PHASE_DRAIN: | ||
+ return "DRAIN"; | ||
+ case BBR_PHASE_PROBE_RTT: | ||
+ return "PROBE_RTT"; | ||
+ case BBR_PHASE_PROBE_BW_UP: | ||
+ return "PROBE_BW_UP"; | ||
+ case BBR_PHASE_PROBE_BW_DOWN: | ||
+ return "PROBE_BW_DOWN"; | ||
+ case BBR_PHASE_PROBE_BW_CRUISE: | ||
+ return "PROBE_BW_CRUISE"; | ||
+ case BBR_PHASE_PROBE_BW_REFILL: | ||
+ return "PROBE_BW_REFILL"; | ||
+ case BBR_PHASE_INVALID: | ||
+ default: | ||
+ return "INVALID"; | ||
+ } | ||
+} | ||
+ | ||
static void tcp_stats_print(struct tcpstat *s) | ||
{ | ||
char b1[64]; | ||
@@ -2658,7 +2682,14 @@ static void tcp_stats_print(struct tcpstat *s) | ||
} | ||
|
||
if (s->bbr_info) { | ||
- __u64 bw; | ||
+ /* All versions of the BBR algorithm use the INET_DIAG_BBRINFO | ||
+ * enum value. Later versions of the tcp_bbr_info struct are | ||
+ * backward-compatible with earlier versions, to allow older ss | ||
+ * binaries to print basic information for newer versions of | ||
+ * the algorithm. We use the size of the returned tcp_bbr_info | ||
+ * struct to decide how much to print. | ||
+ */ | ||
+ __u64 bw, bw_hi, bw_lo; | ||
|
||
bw = s->bbr_info->bbr_bw_hi; | ||
bw <<= 32; | ||
@@ -2673,6 +2704,38 @@ static void tcp_stats_print(struct tcpstat *s) | ||
if (s->bbr_info->bbr_cwnd_gain) | ||
out(",cwnd_gain:%g", | ||
(double)s->bbr_info->bbr_cwnd_gain / 256.0); | ||
+ | ||
+ if (s->bbr_info_len >= | ||
+ (offsetof(struct tcp_bbr_info, bbr_extra_acked) + | ||
+ sizeof(__u32))) { | ||
+ | ||
+ bw_hi = s->bbr_info->bbr_bw_hi_msb; | ||
+ bw_hi <<= 32; | ||
+ bw_hi |= s->bbr_info->bbr_bw_hi_lsb; | ||
+ | ||
+ bw_lo = s->bbr_info->bbr_bw_lo_msb; | ||
+ bw_lo <<= 32; | ||
+ bw_lo |= s->bbr_info->bbr_bw_lo_lsb; | ||
+ | ||
+ out(",version:%u", s->bbr_info->bbr_version); | ||
+ if (bw_hi != ~0UL) | ||
+ out(",bw_hi:%sbps", sprint_bw(b1, bw_hi * 8.0)); | ||
+ if (bw_lo != ~0UL) | ||
+ out(",bw_lo:%sbps", sprint_bw(b1, bw_lo * 8.0)); | ||
+ if (s->bbr_info->bbr_inflight_hi != ~0U) | ||
+ out(",inflight_hi:%u", | ||
+ s->bbr_info->bbr_inflight_hi); | ||
+ if (s->bbr_info->bbr_inflight_lo != ~0U) | ||
+ out(",inflight_lo:%u", | ||
+ s->bbr_info->bbr_inflight_lo); | ||
+ out(",extra_acked:%u", s->bbr_info->bbr_extra_acked); | ||
+ out(",mode:%d", (int)s->bbr_info->bbr_mode); | ||
+ out(",phase:%s", | ||
+ bbr_phase_to_str( | ||
+ (enum tcp_bbr_phase) | ||
+ s->bbr_info->bbr_phase)); | ||
+ } | ||
+ | ||
out(")"); | ||
} | ||
|
||
@@ -3147,6 +3210,7 @@ static void tcp_show_info(const struct nlmsghdr *nlh, struct inet_diag_msg *r, | ||
s.bbr_info = calloc(1, sizeof(*s.bbr_info)); | ||
if (s.bbr_info && bbr_info) | ||
memcpy(s.bbr_info, bbr_info, len); | ||
+ s.bbr_info_len = len; | ||
} | ||
|
||
if (rtt > 0 && info->tcpi_snd_mss && info->tcpi_snd_cwnd) { | ||
-- | ||
2.41.0.487.g6d72f3e995-goog | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
51 changes: 51 additions & 0 deletions
51
...v3-upstream-prep-2024-02-19-01-net-tcp_bbr-broaden-app-limited-rate-sample-detectio.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
From 304c6ff637e53c8f3530a0bb50ba95e532d681b8 Mon Sep 17 00:00:00 2001 | ||
From: Neal Cardwell <[email protected]> | ||
Date: Tue, 11 Jun 2019 12:26:55 -0400 | ||
Subject: [PATCH 01/19] net-tcp_bbr: broaden app-limited rate sample detection | ||
|
||
This commit is a bug fix for the Linux TCP app-limited | ||
(application-limited) logic that is used for collecting rate | ||
(bandwidth) samples. | ||
|
||
Previously the app-limited logic only looked for "bubbles" of | ||
silence in between application writes, by checking at the start | ||
of each sendmsg. But "bubbles" of silence can also happen before | ||
retransmits: e.g. bubbles can happen between an application write | ||
and a retransmit, or between two retransmits. | ||
|
||
Retransmits are triggered by ACKs or timers. So this commit checks | ||
for bubbles of app-limited silence upon ACKs or timers. | ||
|
||
Why does this commit check for app-limited state at the start of | ||
ACKs and timer handling? Because at that point we know whether | ||
inflight was fully using the cwnd. During processing the ACK or | ||
timer event we often change the cwnd; after changing the cwnd we | ||
can't know whether inflight was fully using the old cwnd. | ||
|
||
Origin-9xx-SHA1: 3fe9b53291e018407780fb8c356adb5666722cbc | ||
Change-Id: I37221506f5166877c2b110753d39bb0757985e68 | ||
--- | ||
net/ipv4/tcp_input.c | 1 + | ||
net/ipv4/tcp_timer.c | 1 + | ||
2 files changed, 2 insertions(+) | ||
|
||
--- a/net/ipv4/tcp_input.c | ||
+++ b/net/ipv4/tcp_input.c | ||
@@ -3915,6 +3915,7 @@ static int tcp_ack(struct sock *sk, cons | ||
|
||
prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una; | ||
rs.prior_in_flight = tcp_packets_in_flight(tp); | ||
+ tcp_rate_check_app_limited(sk); | ||
|
||
/* ts_recent update must be made after we are sure that the packet | ||
* is in window. | ||
--- a/net/ipv4/tcp_timer.c | ||
+++ b/net/ipv4/tcp_timer.c | ||
@@ -677,6 +677,7 @@ void tcp_write_timer_handler(struct sock | ||
return; | ||
} | ||
|
||
+ tcp_rate_check_app_limited(sk); | ||
tcp_mstamp_refresh(tcp_sk(sk)); | ||
event = icsk->icsk_pending; | ||
|
73 changes: 73 additions & 0 deletions
73
...v3-upstream-prep-2024-02-19-01-net-tcp_bbr-v2-shrink-delivered_mstamp-first_tx_msta.patch
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
From 5a47eb49ad406b439a00b90f5285359cd1e876f4 Mon Sep 17 00:00:00 2001 | ||
From: Neal Cardwell <[email protected]> | ||
Date: Sun, 24 Jun 2018 21:55:59 -0400 | ||
Subject: [PATCH 02/19] net-tcp_bbr: v2: shrink delivered_mstamp, | ||
first_tx_mstamp to u32 to free up 8 bytes | ||
|
||
Free up some space for tracking inflight and losses for each | ||
bw sample, in upcoming commits. | ||
|
||
These timestamps are in microseconds, and are now stored in 32 | ||
bits. So they can only hold time intervals up to roughly 2^12 = 4096 | ||
seconds. But Linux TCP RTT and RTO tracking has the same 32-bit | ||
microsecond implementation approach and resulting deployment | ||
limitations. So this is not introducing a new limit. And these should | ||
not be a limitation for the foreseeable future. | ||
|
||
Effort: net-tcp_bbr | ||
Origin-9xx-SHA1: 238a7e6b5d51625fef1ce7769826a7b21b02ae55 | ||
Change-Id: I3b779603797263b52a61ad57c565eb91fe42680c | ||
--- | ||
include/net/tcp.h | 9 +++++++-- | ||
net/ipv4/tcp_rate.c | 7 ++++--- | ||
2 files changed, 11 insertions(+), 5 deletions(-) | ||
|
||
--- a/include/net/tcp.h | ||
+++ b/include/net/tcp.h | ||
@@ -822,6 +822,11 @@ static inline u32 tcp_stamp_us_delta(u64 | ||
return max_t(s64, t1 - t0, 0); | ||
} | ||
|
||
+static inline u32 tcp_stamp32_us_delta(u32 t1, u32 t0) | ||
+{ | ||
+ return max_t(s32, t1 - t0, 0); | ||
+} | ||
+ | ||
static inline u32 tcp_skb_timestamp(const struct sk_buff *skb) | ||
{ | ||
return tcp_ns_to_ts(skb->skb_mstamp_ns); | ||
@@ -897,9 +902,9 @@ struct tcp_skb_cb { | ||
/* pkts S/ACKed so far upon tx of skb, incl retrans: */ | ||
__u32 delivered; | ||
/* start of send pipeline phase */ | ||
- u64 first_tx_mstamp; | ||
+ u32 first_tx_mstamp; | ||
/* when we reached the "delivered" count */ | ||
- u64 delivered_mstamp; | ||
+ u32 delivered_mstamp; | ||
} tx; /* only used for outgoing skbs */ | ||
union { | ||
struct inet_skb_parm h4; | ||
--- a/net/ipv4/tcp_rate.c | ||
+++ b/net/ipv4/tcp_rate.c | ||
@@ -101,8 +101,9 @@ void tcp_rate_skb_delivered(struct sock | ||
/* Record send time of most recently ACKed packet: */ | ||
tp->first_tx_mstamp = tx_tstamp; | ||
/* Find the duration of the "send phase" of this window: */ | ||
- rs->interval_us = tcp_stamp_us_delta(tp->first_tx_mstamp, | ||
- scb->tx.first_tx_mstamp); | ||
+ rs->interval_us = tcp_stamp32_us_delta( | ||
+ tp->first_tx_mstamp, | ||
+ scb->tx.first_tx_mstamp); | ||
|
||
} | ||
/* Mark off the skb delivered once it's sacked to avoid being | ||
@@ -155,7 +156,7 @@ void tcp_rate_gen(struct sock *sk, u32 d | ||
* longer phase. | ||
*/ | ||
snd_us = rs->interval_us; /* send phase */ | ||
- ack_us = tcp_stamp_us_delta(tp->tcp_mstamp, | ||
+ ack_us = tcp_stamp32_us_delta(tp->tcp_mstamp, | ||
rs->prior_mstamp); /* ack phase */ | ||
rs->interval_us = max(snd_us, ack_us); | ||
|
Oops, something went wrong.