From 0a537d231c0e6dea1771422e9e2116883a07544b Mon Sep 17 00:00:00 2001 From: Chris Lalancette Date: Wed, 4 Oct 2023 17:51:01 -0400 Subject: [PATCH] Improve the reliability of test_get_type_description_service. (#1107) We were seeing occasional failures of test_get_type_description_service on all platforms. It turns out that the removal of a service from the graph cache is an asynchronous operation. In particular, all of our current RMW implementations publish a graph update to the graph topic, and only remove things from the graph once that data has been delivered. This can potentially happen in another thread. That means that immediately after service_fini(), a call to get_service_names_and_types() may actually still have the old service name in it. Since our get_type_description tests were relying on this to go away, this was causing it to be flakey. We fix this here by adding in a new helper function, service_not_exists(). This helper is not just the inverse of service_exists() (which returns immediately when it finds the service in the graph cache). Instead, service_not_exists() waits until the service has *left* the cache before returning (or times out). In my testing, this fixes the flakiness locally. Signed-off-by: Chris Lalancette --- .../rcl/test_get_type_description_service.cpp | 59 +++++++++++++++++-- 1 file changed, 54 insertions(+), 5 deletions(-) diff --git a/rcl/test/rcl/test_get_type_description_service.cpp b/rcl/test/rcl/test_get_type_description_service.cpp index 39a2fc19b..baac08013 100644 --- a/rcl/test/rcl/test_get_type_description_service.cpp +++ b/rcl/test/rcl/test_get_type_description_service.cpp @@ -57,7 +57,7 @@ static bool service_exists( { rcl_allocator_t allocator = rcl_get_default_allocator(); - // Wait for a maximum of timeout seconds for the service to show up + // Wait for a maximum of timeout milliseconds for the service to show up auto start_time = std::chrono::system_clock::now(); while (std::chrono::system_clock::now() - start_time < timeout) { rcl_names_and_types_t srv_names_and_types = rcl_get_zero_initialized_names_and_types(); @@ -85,7 +85,56 @@ static bool service_exists( } } - std::this_thread::sleep_for(std::chrono::milliseconds(100)); + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + } + + return false; +} + +static bool service_not_exists( + const rcl_node_t * node_ptr, const char * service_name, + const char * service_type, std::chrono::milliseconds timeout) +{ + rcl_allocator_t allocator = rcl_get_default_allocator(); + + // Wait for a maximum of timeout milliseconds for the service to go away + // Note that this is not just the negation of service_exists; we actually + // want to wait until the service goes away + auto start_time = std::chrono::system_clock::now(); + while (std::chrono::system_clock::now() - start_time < timeout) { + rcl_names_and_types_t srv_names_and_types = rcl_get_zero_initialized_names_and_types(); + + if ( + RCL_RET_OK != rcl_get_service_names_and_types( + node_ptr, + &allocator, &srv_names_and_types)) + { + return false; + } + + OSRF_TESTING_TOOLS_CPP_SCOPE_EXIT( + { + EXPECT_EQ(RCL_RET_OK, rcl_names_and_types_fini(&srv_names_and_types)); + }); + + if (srv_names_and_types.names.size == 0) { + return true; + } + + if (!string_in_array(&(srv_names_and_types.names), service_name)) { + return true; + } + + // If we make it here, the service name was in the array. If the *type* isn't, then there is + // another service with this name, which we can ignore. + for (size_t i = 0; i < srv_names_and_types.names.size; ++i) { + if (string_in_array(&(srv_names_and_types.types[i]), service_type)) { + // Both the name and type were here, so the service currently exists. Continue waiting + break; + } + } + + std::this_thread::sleep_for(std::chrono::milliseconds(10)); } return false; @@ -153,10 +202,10 @@ TEST_F( this->node_ptr, this->get_type_description_service_name, GET_TYPE_DESCRIPTION_SRV_TYPE_NAME, std::chrono::seconds(5))); EXPECT_EQ(RCL_RET_OK, rcl_node_type_description_service_fini(this->node_ptr)); - EXPECT_FALSE( - service_exists( + EXPECT_TRUE( + service_not_exists( this->node_ptr, this->get_type_description_service_name, - GET_TYPE_DESCRIPTION_SRV_TYPE_NAME, std::chrono::milliseconds(100))); + GET_TYPE_DESCRIPTION_SRV_TYPE_NAME, std::chrono::seconds(5))); EXPECT_EQ(RCL_RET_NOT_INIT, rcl_node_type_description_service_fini(this->node_ptr)); EXPECT_EQ(RCL_RET_OK, rcl_node_type_description_service_init(this->node_ptr));