From 5c908a12edb53b65930a7acd23ac5c8c82259cc6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Damian=20Kr=C3=B3lik?= <66667989+Damian-Nordic@users.noreply.github.com> Date: Tue, 7 May 2024 11:44:16 +0200 Subject: [PATCH] [mrp] Increase default retry interval for Thread (#33314) * [mrp] Increase default retry interval for Thread The current 800ms is not enough in real setups, where Thread routers must serve as intermediate hops for many parallel conversations. Bump this to 2s. Signed-off-by: Damian Krolik * [mrp] Make additional MRP backoff time dynamic for all Make the additional MRP backoff time (aka CHIP_CONFIG_MRP_RETRY_INTERVAL_SENDER_BOOST) dynamic for all build configurations to remove the need for adjusting timeouts in unit tests whenever this parameter changes. In messaging tests, by default, set this parameter to 0, except for tests that explicitly verify its meaning. By the way, fix tests increasing the MRP backoff time by the slow-polling instead of fast-polling interval. --------- Signed-off-by: Damian Krolik (cherry picked from commit bc0f5ee7f7174eab056c8e75343b37dc0a9c1467) --- .../CHIP/MTRDeviceControllerFactory.mm | 2 +- src/messaging/ReliableMessageMgr.cpp | 15 ++-- src/messaging/ReliableMessageMgr.h | 11 +-- src/messaging/ReliableMessageProtocolConfig.h | 6 +- src/messaging/tests/MessagingContext.cpp | 7 ++ .../tests/TestAbortExchangesForFabric.cpp | 8 +-- .../tests/TestReliableMessageProtocol.cpp | 69 +++++++++++-------- src/platform/nrfconnect/CHIPPlatformConfig.h | 13 ++-- .../secure_channel/tests/TestPASESession.cpp | 11 +-- 9 files changed, 72 insertions(+), 70 deletions(-) diff --git a/src/darwin/Framework/CHIP/MTRDeviceControllerFactory.mm b/src/darwin/Framework/CHIP/MTRDeviceControllerFactory.mm index 3d84a8fea22571..5f5dc86f209303 100644 --- a/src/darwin/Framework/CHIP/MTRDeviceControllerFactory.mm +++ b/src/darwin/Framework/CHIP/MTRDeviceControllerFactory.mm @@ -1489,7 +1489,7 @@ void MTRSetMessageReliabilityParameters(NSNumber * _Nullable idleRetransmitMs, resetAdvertising = ReliableMessageProtocolConfig::SetLocalMRPConfig(NullOptional); } else { if (additionalRetransmitDelayMs != nil) { - System::Clock::Milliseconds64 additionalBackoff(additionalRetransmitDelayMs.unsignedLongLongValue); + System::Clock::Timeout additionalBackoff(additionalRetransmitDelayMs.unsignedLongValue); Messaging::ReliableMessageMgr::SetAdditionalMRPBackoffTime(MakeOptional(additionalBackoff)); } diff --git a/src/messaging/ReliableMessageMgr.cpp b/src/messaging/ReliableMessageMgr.cpp index 53e292c9b32b00..ad267941919b7f 100644 --- a/src/messaging/ReliableMessageMgr.cpp +++ b/src/messaging/ReliableMessageMgr.cpp @@ -47,9 +47,7 @@ using namespace chip::System::Clock::Literals; namespace chip { namespace Messaging { -#if CHIP_DEVICE_CONFIG_ENABLE_DYNAMIC_MRP_CONFIG -Optional ReliableMessageMgr::sAdditionalMRPBackoffTime; -#endif // CHIP_DEVICE_CONFIG_ENABLE_DYNAMIC_MRP_CONFIG +System::Clock::Timeout ReliableMessageMgr::sAdditionalMRPBackoffTime = CHIP_CONFIG_MRP_RETRY_INTERVAL_SENDER_BOOST; ReliableMessageMgr::RetransTableEntry::RetransTableEntry(ReliableMessageContext * rc) : ec(*rc->GetExchangeContext()), nextRetransTime(0), sendCount(0) @@ -267,11 +265,7 @@ System::Clock::Timestamp ReliableMessageMgr::GetBackoff(System::Clock::Timestamp mrpBackoffTime += ICDConfigurationData::GetInstance().GetFastPollingInterval(); #endif -#if CHIP_DEVICE_CONFIG_ENABLE_DYNAMIC_MRP_CONFIG - mrpBackoffTime += sAdditionalMRPBackoffTime.ValueOr(CHIP_CONFIG_MRP_RETRY_INTERVAL_SENDER_BOOST); -#else - mrpBackoffTime += CHIP_CONFIG_MRP_RETRY_INTERVAL_SENDER_BOOST; -#endif // CHIP_DEVICE_CONFIG_ENABLE_DYNAMIC_MRP_CONFIG + mrpBackoffTime += sAdditionalMRPBackoffTime; return mrpBackoffTime; } @@ -461,6 +455,11 @@ CHIP_ERROR ReliableMessageMgr::MapSendError(CHIP_ERROR error, uint16_t exchangeI return error; } +void ReliableMessageMgr::SetAdditionalMRPBackoffTime(const Optional & additionalTime) +{ + sAdditionalMRPBackoffTime = additionalTime.ValueOr(CHIP_CONFIG_MRP_RETRY_INTERVAL_SENDER_BOOST); +} + void ReliableMessageMgr::CalculateNextRetransTime(RetransTableEntry & entry) { System::Clock::Timestamp baseTimeout = System::Clock::Milliseconds64(0); diff --git a/src/messaging/ReliableMessageMgr.h b/src/messaging/ReliableMessageMgr.h index ae953cbddd5ad5..6e16c3e55242df 100644 --- a/src/messaging/ReliableMessageMgr.h +++ b/src/messaging/ReliableMessageMgr.h @@ -206,7 +206,6 @@ class ReliableMessageMgr } #endif // CHIP_CONFIG_TEST -#if CHIP_DEVICE_CONFIG_ENABLE_DYNAMIC_MRP_CONFIG /** * Set the value to add to the MRP backoff time we compute. This is meant to * account for high network latency on the sending side (us) that can't be @@ -220,11 +219,7 @@ class ReliableMessageMgr * set this before actually bringing up the stack and having access to a * ReliableMessageMgr. */ - static void SetAdditionalMRPBackoffTime(const Optional & additionalTime) - { - sAdditionalMRPBackoffTime = additionalTime; - } -#endif // CHIP_DEVICE_CONFIG_ENABLE_DYNAMIC_MRP_CONFIG + static void SetAdditionalMRPBackoffTime(const Optional & additionalTime); private: /** @@ -255,9 +250,7 @@ class ReliableMessageMgr SessionUpdateDelegate * mSessionUpdateDelegate = nullptr; -#if CHIP_DEVICE_CONFIG_ENABLE_DYNAMIC_MRP_CONFIG - static Optional sAdditionalMRPBackoffTime; -#endif // CHIP_DEVICE_CONFIG_ENABLE_DYNAMIC_MRP_CONFIG + static System::Clock::Timeout sAdditionalMRPBackoffTime; }; } // namespace Messaging diff --git a/src/messaging/ReliableMessageProtocolConfig.h b/src/messaging/ReliableMessageProtocolConfig.h index 2ab1c139657fc6..44480a44eb4600 100644 --- a/src/messaging/ReliableMessageProtocolConfig.h +++ b/src/messaging/ReliableMessageProtocolConfig.h @@ -56,7 +56,7 @@ namespace chip { */ #ifndef CHIP_CONFIG_MRP_LOCAL_ACTIVE_RETRY_INTERVAL #if CHIP_ENABLE_OPENTHREAD && !CHIP_DEVICE_LAYER_TARGET_LINUX -#define CHIP_CONFIG_MRP_LOCAL_ACTIVE_RETRY_INTERVAL (800_ms32) +#define CHIP_CONFIG_MRP_LOCAL_ACTIVE_RETRY_INTERVAL (2000_ms32) #else #define CHIP_CONFIG_MRP_LOCAL_ACTIVE_RETRY_INTERVAL (300_ms32) #endif @@ -82,7 +82,7 @@ namespace chip { */ #ifndef CHIP_CONFIG_MRP_LOCAL_IDLE_RETRY_INTERVAL #if CHIP_ENABLE_OPENTHREAD && !CHIP_DEVICE_LAYER_TARGET_LINUX -#define CHIP_CONFIG_MRP_LOCAL_IDLE_RETRY_INTERVAL (800_ms32) +#define CHIP_CONFIG_MRP_LOCAL_IDLE_RETRY_INTERVAL (2000_ms32) #else #define CHIP_CONFIG_MRP_LOCAL_IDLE_RETRY_INTERVAL (500_ms32) #endif @@ -174,7 +174,7 @@ namespace chip { */ #ifndef CHIP_CONFIG_MRP_RETRY_INTERVAL_SENDER_BOOST #if CHIP_ENABLE_OPENTHREAD && !CHIP_DEVICE_LAYER_TARGET_LINUX -#define CHIP_CONFIG_MRP_RETRY_INTERVAL_SENDER_BOOST (500_ms) +#define CHIP_CONFIG_MRP_RETRY_INTERVAL_SENDER_BOOST (1500_ms) #else #define CHIP_CONFIG_MRP_RETRY_INTERVAL_SENDER_BOOST (0_ms) #endif diff --git a/src/messaging/tests/MessagingContext.cpp b/src/messaging/tests/MessagingContext.cpp index 441492e80d5694..4ce4f26d5098b2 100644 --- a/src/messaging/tests/MessagingContext.cpp +++ b/src/messaging/tests/MessagingContext.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include namespace chip { @@ -70,6 +71,9 @@ CHIP_ERROR MessagingContext::Init(TransportMgrBase * transport, IOContext * ioCo ReturnErrorOnFailure(CreatePASESessionDavidToCharlie()); } + // Set the additional MRP backoff to zero so that it does not affect the test execution time. + Messaging::ReliableMessageMgr::SetAdditionalMRPBackoffTime(MakeOptional(System::Clock::kZero)); + return CHIP_NO_ERROR; } @@ -85,6 +89,9 @@ void MessagingContext::Shutdown() mFabricTable.Shutdown(); mOpCertStore.Finish(); mOpKeyStore.Finish(); + + // Reset the default additional MRP backoff. + Messaging::ReliableMessageMgr::SetAdditionalMRPBackoffTime(NullOptional); } CHIP_ERROR MessagingContext::InitFromExisting(const MessagingContext & existing) diff --git a/src/messaging/tests/TestAbortExchangesForFabric.cpp b/src/messaging/tests/TestAbortExchangesForFabric.cpp index a9999eb6c2f9e0..d5ab600164805e 100644 --- a/src/messaging/tests/TestAbortExchangesForFabric.cpp +++ b/src/messaging/tests/TestAbortExchangesForFabric.cpp @@ -212,15 +212,11 @@ void CommonCheckAbortAllButOneExchange(nlTestSuite * inSuite, TestContext & ctx, // auto waitTimeout = System::Clock::Milliseconds32(1000); -#if CHIP_CONFIG_ENABLE_ICD_SERVER == 1 +#if CHIP_CONFIG_ENABLE_ICD_SERVER // If running as an ICD, increase waitTimeout to account for the polling interval - waitTimeout += ICDConfigurationData::GetInstance().GetSlowPollingInterval(); + waitTimeout += ICDConfigurationData::GetInstance().GetFastPollingInterval(); #endif - // Account for the retry delay booster, so that we do not timeout our IO processing before the - // retransmission failure is triggered. - waitTimeout += CHIP_CONFIG_RMP_DEFAULT_MAX_RETRANS * CHIP_CONFIG_MRP_RETRY_INTERVAL_SENDER_BOOST; - ctx.GetIOContext().DriveIOUntil(waitTimeout, [&]() { return false; }); } else diff --git a/src/messaging/tests/TestReliableMessageProtocol.cpp b/src/messaging/tests/TestReliableMessageProtocol.cpp index 04b8a1aadb149f..c4a82279ea0893 100644 --- a/src/messaging/tests/TestReliableMessageProtocol.cpp +++ b/src/messaging/tests/TestReliableMessageProtocol.cpp @@ -61,14 +61,6 @@ using namespace chip::System::Clock::Literals; const char PAYLOAD[] = "Hello!"; -// The CHIP_CONFIG_MRP_RETRY_INTERVAL_SENDER_BOOST can be set to non-zero value -// to boost the retransmission timeout for a high latency network like Thread to -// avoid spurious retransmits. -// -// This adds extra I/O time to account for this. See the documentation for -// CHIP_CONFIG_MRP_RETRY_INTERVAL_SENDER_BOOST for more details. -constexpr auto retryBoosterTimeout = CHIP_CONFIG_RMP_DEFAULT_MAX_RETRANS * CHIP_CONFIG_MRP_RETRY_INTERVAL_SENDER_BOOST; - class TestContext : public chip::Test::LoopbackMessagingContext { public: @@ -312,6 +304,34 @@ struct BackoffComplianceTestVector theBackoffComplianceTestVector[] = { }, }; +void CheckGetBackoffImpl(nlTestSuite * inSuite, System::Clock::Timeout additionalMRPBackoffTime) +{ + ReliableMessageMgr::SetAdditionalMRPBackoffTime(MakeOptional(additionalMRPBackoffTime)); + + // Run 3x iterations to thoroughly test random jitter always results in backoff within bounds. + for (uint32_t j = 0; j < 3; j++) + { + for (const auto & test : theBackoffComplianceTestVector) + { + System::Clock::Timeout backoff = ReliableMessageMgr::GetBackoff(test.backoffBase, test.sendCount); + System::Clock::Timeout extraBackoff = additionalMRPBackoffTime; + +#if CHIP_CONFIG_ENABLE_ICD_SERVER + // If running as an ICD, increase maxBackoff to account for the polling interval + extraBackoff += ICDConfigurationData::GetInstance().GetFastPollingInterval(); +#endif + + ChipLogProgress(Test, "Backoff base %" PRIu32 " extra %" PRIu32 " # %d: %" PRIu32, test.backoffBase.count(), + extraBackoff.count(), test.sendCount, backoff.count()); + + NL_TEST_ASSERT(inSuite, backoff >= test.backoffMin + extraBackoff); + NL_TEST_ASSERT(inSuite, backoff <= test.backoffMax + extraBackoff); + } + } + + ReliableMessageMgr::SetAdditionalMRPBackoffTime(NullOptional); +} + } // namespace class TestReliableMessageProtocol @@ -336,6 +356,7 @@ class TestReliableMessageProtocol static void CheckLostStandaloneAck(nlTestSuite * inSuite, void * inContext); static void CheckIsPeerActiveNotInitiator(nlTestSuite * inSuite, void * inContext); static void CheckGetBackoff(nlTestSuite * inSuite, void * inContext); + static void CheckGetBackoffAdditionalTime(nlTestSuite * inSuite, void * inContext); static void CheckApplicationResponseDelayed(nlTestSuite * inSuite, void * inContext); static void CheckApplicationResponseNeverComes(nlTestSuite * inSuite, void * inContext); }; @@ -437,7 +458,7 @@ void TestReliableMessageProtocol::CheckResendApplicationMessage(nlTestSuite * in NL_TEST_ASSERT(inSuite, rm->TestGetCountRetransTable() == 1); // Wait for the initial message to fail (should take 330-413ms) - ctx.GetIOContext().DriveIOUntil(1000_ms32 + retryBoosterTimeout, [&] { return loopback.mSentMessageCount >= 2; }); + ctx.GetIOContext().DriveIOUntil(1000_ms32, [&] { return loopback.mSentMessageCount >= 2; }); now = System::SystemClock().GetMonotonicTimestamp(); timeoutTime = now - startTime; ChipLogProgress(Test, "Attempt #1 Timeout : %" PRIu32 "ms", timeoutTime.count()); @@ -454,7 +475,7 @@ void TestReliableMessageProtocol::CheckResendApplicationMessage(nlTestSuite * in NL_TEST_ASSERT(inSuite, rm->TestGetCountRetransTable() == 1); // Wait for the 1st retry to fail (should take 330-413ms) - ctx.GetIOContext().DriveIOUntil(1000_ms32 + retryBoosterTimeout, [&] { return loopback.mSentMessageCount >= 3; }); + ctx.GetIOContext().DriveIOUntil(1000_ms32, [&] { return loopback.mSentMessageCount >= 3; }); now = System::SystemClock().GetMonotonicTimestamp(); timeoutTime = now - startTime; ChipLogProgress(Test, "Attempt #2 Timeout : %" PRIu32 "ms", timeoutTime.count()); @@ -471,7 +492,7 @@ void TestReliableMessageProtocol::CheckResendApplicationMessage(nlTestSuite * in NL_TEST_ASSERT(inSuite, rm->TestGetCountRetransTable() == 1); // Wait for the 2nd retry to fail (should take 528-660ms) - ctx.GetIOContext().DriveIOUntil(1000_ms32 + retryBoosterTimeout, [&] { return loopback.mSentMessageCount >= 4; }); + ctx.GetIOContext().DriveIOUntil(1000_ms32, [&] { return loopback.mSentMessageCount >= 4; }); now = System::SystemClock().GetMonotonicTimestamp(); timeoutTime = now - startTime; ChipLogProgress(Test, "Attempt #3 Timeout : %" PRIu32 "ms", timeoutTime.count()); @@ -488,7 +509,7 @@ void TestReliableMessageProtocol::CheckResendApplicationMessage(nlTestSuite * in NL_TEST_ASSERT(inSuite, rm->TestGetCountRetransTable() == 1); // Wait for the 3rd retry to fail (should take 845-1056ms) - ctx.GetIOContext().DriveIOUntil(1500_ms32 + retryBoosterTimeout, [&] { return loopback.mSentMessageCount >= 5; }); + ctx.GetIOContext().DriveIOUntil(1500_ms32, [&] { return loopback.mSentMessageCount >= 5; }); now = System::SystemClock().GetMonotonicTimestamp(); timeoutTime = now - startTime; ChipLogProgress(Test, "Attempt #4 Timeout : %" PRIu32 "ms", timeoutTime.count()); @@ -1833,25 +1854,12 @@ void TestReliableMessageProtocol::CheckLostStandaloneAck(nlTestSuite * inSuite, void TestReliableMessageProtocol::CheckGetBackoff(nlTestSuite * inSuite, void * inContext) { - // Run 3x iterations to thoroughly test random jitter always results in backoff within bounds. - for (uint32_t j = 0; j < 3; j++) - { - for (const auto & test : theBackoffComplianceTestVector) - { - System::Clock::Timeout backoff = ReliableMessageMgr::GetBackoff(test.backoffBase, test.sendCount); - ChipLogProgress(Test, "Backoff base %" PRIu32 " # %d: %" PRIu32, test.backoffBase.count(), test.sendCount, - backoff.count()); - - NL_TEST_ASSERT(inSuite, backoff >= test.backoffMin); + CheckGetBackoffImpl(inSuite, System::Clock::kZero); +} - auto maxBackoff = test.backoffMax + retryBoosterTimeout; -#if CHIP_CONFIG_ENABLE_ICD_SERVER == 1 - // If running as an ICD, increase maxBackoff to account for the polling interval - maxBackoff += ICDConfigurationData::GetInstance().GetSlowPollingInterval(); -#endif - NL_TEST_ASSERT(inSuite, backoff <= maxBackoff); - } - } +void TestReliableMessageProtocol::CheckGetBackoffAdditionalTime(nlTestSuite * inSuite, void * inContext) +{ + CheckGetBackoffImpl(inSuite, System::Clock::Seconds32(1)); } void TestReliableMessageProtocol::CheckApplicationResponseDelayed(nlTestSuite * inSuite, void * inContext) @@ -2195,6 +2203,7 @@ const nlTest sTests[] = { TestReliableMessageProtocol::CheckLostStandaloneAck), NL_TEST_DEF("Test Is Peer Active Retry logic", TestReliableMessageProtocol::CheckIsPeerActiveNotInitiator), NL_TEST_DEF("Test MRP backoff algorithm", TestReliableMessageProtocol::CheckGetBackoff), + NL_TEST_DEF("Test MRP backoff algorithm with additional time", TestReliableMessageProtocol::CheckGetBackoffAdditionalTime), // TODO: Re-enable this test, after changing test to use Mock clock / DriveIO rather than DriveIOUntil. // Issue: https://github.com/project-chip/connectedhomeip/issues/32440 // NL_TEST_DEF("Test an application response that comes after MRP retransmits run out", diff --git a/src/platform/nrfconnect/CHIPPlatformConfig.h b/src/platform/nrfconnect/CHIPPlatformConfig.h index 3ece933d377996..ee4609e05c5e5c 100644 --- a/src/platform/nrfconnect/CHIPPlatformConfig.h +++ b/src/platform/nrfconnect/CHIPPlatformConfig.h @@ -106,21 +106,16 @@ #define CHIP_CONFIG_LOG_MODULE_Support_PROGRESS 0 #endif -// Set MRP retry intervals for Thread and Wi-Fi to test-proven values. #ifndef CHIP_CONFIG_MRP_LOCAL_ACTIVE_RETRY_INTERVAL -#if CHIP_ENABLE_OPENTHREAD -#define CHIP_CONFIG_MRP_LOCAL_ACTIVE_RETRY_INTERVAL (800_ms32) -#else +#ifndef CONFIG_NET_L2_OPENTHREAD #define CHIP_CONFIG_MRP_LOCAL_ACTIVE_RETRY_INTERVAL (1000_ms32) -#endif // CHIP_ENABLE_OPENTHREAD +#endif // CONFIG_NET_L2_OPENTHREAD #endif // CHIP_CONFIG_MRP_LOCAL_ACTIVE_RETRY_INTERVAL #ifndef CHIP_CONFIG_MRP_LOCAL_IDLE_RETRY_INTERVAL -#if CHIP_ENABLE_OPENTHREAD -#define CHIP_CONFIG_MRP_LOCAL_IDLE_RETRY_INTERVAL (800_ms32) -#else +#ifndef CONFIG_NET_L2_OPENTHREAD #define CHIP_CONFIG_MRP_LOCAL_IDLE_RETRY_INTERVAL (1000_ms32) -#endif // CHIP_ENABLE_OPENTHREAD +#endif // CONFIG_NET_L2_OPENTHREAD #endif // CHIP_CONFIG_MRP_LOCAL_IDLE_RETRY_INTERVAL #ifndef CHIP_CONFIG_ICD_IDLE_MODE_DURATION_SEC diff --git a/src/protocols/secure_channel/tests/TestPASESession.cpp b/src/protocols/secure_channel/tests/TestPASESession.cpp index a68b69f417838b..1ae3fad024afed 100644 --- a/src/protocols/secure_channel/tests/TestPASESession.cpp +++ b/src/protocols/secure_channel/tests/TestPASESession.cpp @@ -304,11 +304,14 @@ void SecurePairingHandshakeTestCommon(nlTestSuite * inSuite, void * inContext, S while (delegate.mMessageDropped) { - auto waitTimeout = 100_ms + CHIP_CONFIG_MRP_RETRY_INTERVAL_SENDER_BOOST; + auto waitTimeout = 100_ms; -#if CHIP_CONFIG_ENABLE_ICD_SERVER == 1 - // If running as an ICD, increase waitTimeout to account for the polling interval - waitTimeout += ICDConfigurationData::GetInstance().GetSlowPollingInterval(); +#if CHIP_CONFIG_ENABLE_ICD_SERVER + // If running as an ICD, increase waitTimeout to account for: + // - longer MRP intervals, configured above to 1s/1s, + // - the fast-polling interval that is added to the MRP backoff time. + waitTimeout += 2000_ms32; + waitTimeout += ICDConfigurationData::GetInstance().GetFastPollingInterval(); #endif // Wait some time so the dropped message will be retransmitted when we drain the IO.