From 3fb14427fd3311967a24b5450bdc0562fe3cda95 Mon Sep 17 00:00:00 2001 From: Jerome Soumagne Date: Thu, 5 Sep 2024 14:49:53 -0500 Subject: [PATCH] HG Test: add hg_first perf test to measure cost of initial RPC Add some missing error checks in mercury_perf --- Testing/perf/hg/CMakeLists.txt | 2 +- Testing/perf/hg/hg_first.c | 115 +++++++++++++++++++++++++++++++++ Testing/perf/hg/mercury_perf.c | 71 ++++++++++++++++++++ Testing/perf/hg/mercury_perf.h | 9 +++ 4 files changed, 196 insertions(+), 1 deletion(-) create mode 100644 Testing/perf/hg/hg_first.c diff --git a/Testing/perf/hg/CMakeLists.txt b/Testing/perf/hg/CMakeLists.txt index ed3f256b..38cc627b 100644 --- a/Testing/perf/hg/CMakeLists.txt +++ b/Testing/perf/hg/CMakeLists.txt @@ -12,7 +12,7 @@ if(${CMAKE_VERSION} VERSION_GREATER 3.12) endif() endif() -set(HG_PERF_TARGETS hg_rate hg_bw_read hg_bw_write hg_perf_server) +set(HG_PERF_TARGETS hg_rate hg_first hg_bw_read hg_bw_write hg_perf_server) foreach(perf ${HG_PERF_TARGETS}) if(${CMAKE_VERSION} VERSION_GREATER 3.12) add_executable(${perf} ${perf}.c) diff --git a/Testing/perf/hg/hg_first.c b/Testing/perf/hg/hg_first.c new file mode 100644 index 00000000..ec361eba --- /dev/null +++ b/Testing/perf/hg/hg_first.c @@ -0,0 +1,115 @@ +/** + * Copyright (c) 2013-2022 UChicago Argonne, LLC and The HDF Group. + * Copyright (c) 2022-2023 Intel Corporation. + * + * SPDX-License-Identifier: BSD-3-Clause + */ + +#include "mercury_perf.h" + +/****************/ +/* Local Macros */ +/****************/ +#define BENCHMARK_NAME "Time of first RPC" + +/************************************/ +/* Local Type and Struct Definition */ +/************************************/ + +/********************/ +/* Local Prototypes */ +/********************/ + +static hg_return_t +hg_perf_run( + const struct hg_test_info *hg_test_info, struct hg_perf_class_info *info); + +/*******************/ +/* Local Variables */ +/*******************/ + +/*---------------------------------------------------------------------------*/ +static hg_return_t +hg_perf_run( + const struct hg_test_info *hg_test_info, struct hg_perf_class_info *info) +{ + struct hg_perf_request request = { + .expected_count = (int32_t) info->handle_max, + .complete_count = 0, + .completed = HG_ATOMIC_VAR_INIT(0)}; + unsigned int i; + hg_time_t t1, t2; + hg_return_t ret; + + if (hg_test_info->na_test_info.mpi_info.size > 1) + NA_Test_barrier(&hg_test_info->na_test_info); + hg_time_get_current(&t1); + + for (i = 0; i < info->handle_max; i++) { + ret = HG_Forward( + info->handles[i], hg_perf_request_complete, &request, NULL); + HG_TEST_CHECK_HG_ERROR( + error, ret, "HG_Forward() failed (%s)", HG_Error_to_string(ret)); + } + + ret = hg_perf_request_wait(info, &request, HG_MAX_IDLE_TIME, NULL); + HG_TEST_CHECK_HG_ERROR(error, ret, "hg_perf_request_wait() failed (%s)", + HG_Error_to_string(ret)); + + if (hg_test_info->na_test_info.mpi_info.size > 1) + NA_Test_barrier(&hg_test_info->na_test_info); + + hg_time_get_current(&t2); + + if (hg_test_info->na_test_info.mpi_info.rank == 0) + hg_perf_print_time(hg_test_info, info, 0, hg_time_subtract(t2, t1)); + + return HG_SUCCESS; + +error: + return ret; +} + +/*---------------------------------------------------------------------------*/ +int +main(int argc, char *argv[]) +{ + struct hg_perf_info perf_info; + struct hg_test_info *hg_test_info; + struct hg_perf_class_info *info; + hg_return_t hg_ret; + + /* Initialize the interface */ + hg_ret = hg_perf_init(argc, argv, false, &perf_info); + HG_TEST_CHECK_HG_ERROR(error, hg_ret, "hg_perf_init() failed (%s)", + HG_Error_to_string(hg_ret)); + hg_test_info = &perf_info.hg_test_info; + info = &perf_info.class_info[0]; + + /* Set HG handles */ + hg_ret = hg_perf_set_handles(hg_test_info, info, HG_PERF_FIRST); + HG_TEST_CHECK_HG_ERROR(error, hg_ret, "hg_perf_set_handles() failed (%s)", + HG_Error_to_string(hg_ret)); + + /* Header info */ + if (hg_test_info->na_test_info.mpi_info.rank == 0) + hg_perf_print_header_time(hg_test_info, info, BENCHMARK_NAME); + + /* Always a NULL RPC */ + hg_ret = hg_perf_run(hg_test_info, info); + HG_TEST_CHECK_HG_ERROR( + error, hg_ret, "hg_perf_run() failed (%s)", HG_Error_to_string(hg_ret)); + + /* Finalize interface */ + if (hg_test_info->na_test_info.mpi_info.rank == 0) + hg_perf_send_done(info); + + hg_perf_cleanup(&perf_info); + + return EXIT_SUCCESS; + +error: + hg_perf_cleanup(&perf_info); + + return EXIT_FAILURE; +} diff --git a/Testing/perf/hg/mercury_perf.c b/Testing/perf/hg/mercury_perf.c index 1d715abe..22d8a74c 100644 --- a/Testing/perf/hg/mercury_perf.c +++ b/Testing/perf/hg/mercury_perf.c @@ -87,6 +87,9 @@ hg_perf_rpc_rate_init_cb(hg_handle_t handle); static hg_return_t hg_perf_rpc_rate_cb(hg_handle_t handle); +static hg_return_t +hg_perf_first_cb(hg_handle_t handle); + static hg_return_t hg_perf_bulk_init_cb(hg_handle_t handle); @@ -285,19 +288,34 @@ hg_perf_class_init(const struct hg_test_info *hg_test_info, int class_id, /* Register RPCs */ ret = HG_Register(info->hg_class, HG_PERF_RATE_INIT, NULL, NULL, hg_perf_rpc_rate_init_cb); + HG_TEST_CHECK_HG_ERROR( + error, ret, "HG_Register() failed (%s)", HG_Error_to_string(ret)); ret = HG_Register(info->hg_class, HG_PERF_RATE, hg_perf_proc_iovec, (hg_test_info->bidirectional) ? hg_perf_proc_iovec : NULL, hg_perf_rpc_rate_cb); + HG_TEST_CHECK_HG_ERROR( + error, ret, "HG_Register() failed (%s)", HG_Error_to_string(ret)); + + ret = HG_Register( + info->hg_class, HG_PERF_FIRST, NULL, NULL, hg_perf_first_cb); + HG_TEST_CHECK_HG_ERROR( + error, ret, "HG_Register() failed (%s)", HG_Error_to_string(ret)); ret = HG_Register(info->hg_class, HG_PERF_BW_INIT, hg_perf_proc_bulk_init_info, NULL, hg_perf_bulk_init_cb); + HG_TEST_CHECK_HG_ERROR( + error, ret, "HG_Register() failed (%s)", HG_Error_to_string(ret)); ret = HG_Register(info->hg_class, HG_PERF_BW_READ, hg_perf_proc_bulk_info, NULL, hg_perf_bulk_push_cb); + HG_TEST_CHECK_HG_ERROR( + error, ret, "HG_Register() failed (%s)", HG_Error_to_string(ret)); ret = HG_Register(info->hg_class, HG_PERF_BW_WRITE, hg_perf_proc_bulk_info, NULL, hg_perf_bulk_pull_cb); + HG_TEST_CHECK_HG_ERROR( + error, ret, "HG_Register() failed (%s)", HG_Error_to_string(ret)); ret = HG_Register(info->hg_class, HG_PERF_DONE, NULL, NULL, hg_perf_done_cb); @@ -777,6 +795,38 @@ hg_perf_print_lat(const struct hg_test_info *hg_test_info, (long unsigned int) (1e6 / rpc_time)); } +/*---------------------------------------------------------------------------*/ +void +hg_perf_print_header_time(const struct hg_test_info *hg_test_info, + const struct hg_perf_class_info *info, const char *benchmark) +{ + printf("# %s v%s\n", benchmark, VERSION_NAME); + printf( + "# %d client process(es)\n", hg_test_info->na_test_info.mpi_info.size); + printf("# NULL RPC with %zu handle(s) in-flight\n", info->handle_max); + if (info->handle_max * (size_t) hg_test_info->na_test_info.mpi_info.size < + info->target_addr_max) + printf("# WARNING number of handles in flight less than number of " + "targets\n"); + printf("%-*s%*s\n", 10, "# Size", NWIDTH, "Avg time (us)"); + fflush(stdout); +} + +/*---------------------------------------------------------------------------*/ +void +hg_perf_print_time(const struct hg_test_info *hg_test_info, + const struct hg_perf_class_info *info, size_t buf_size, hg_time_t t) +{ + double rpc_time; + size_t handle_max = (size_t) info->handle_max, + mpi_comm_size = (size_t) hg_test_info->na_test_info.mpi_info.size; + + rpc_time = + hg_time_to_double(t) * 1e6 / (double) (handle_max * mpi_comm_size); + + printf("%-*zu%*.*f\n", 10, buf_size, NWIDTH, NDIGITS, rpc_time); +} + /*---------------------------------------------------------------------------*/ void hg_perf_print_header_bw(const struct hg_test_info *hg_test_info, @@ -1024,6 +1074,27 @@ hg_perf_rpc_rate_cb(hg_handle_t handle) return ret; } +/*---------------------------------------------------------------------------*/ +static hg_return_t +hg_perf_first_cb(hg_handle_t handle) +{ + hg_return_t ret; + + /* Send response back */ + ret = HG_Respond(handle, NULL, NULL, NULL); + HG_TEST_CHECK_HG_ERROR( + error, ret, "HG_Respond() failed (%s)", HG_Error_to_string(ret)); + + (void) HG_Destroy(handle); + + return HG_SUCCESS; + +error: + (void) HG_Destroy(handle); + + return ret; +} + /*---------------------------------------------------------------------------*/ static hg_return_t hg_perf_bulk_init_cb(hg_handle_t handle) diff --git a/Testing/perf/hg/mercury_perf.h b/Testing/perf/hg/mercury_perf.h index 8285a946..2048feb6 100644 --- a/Testing/perf/hg/mercury_perf.h +++ b/Testing/perf/hg/mercury_perf.h @@ -25,6 +25,7 @@ enum hg_perf_rpc_id { HG_PERF_RATE_INIT = 1, HG_PERF_RATE, + HG_PERF_FIRST, HG_PERF_BW_INIT, HG_PERF_BW_READ, HG_PERF_BW_WRITE, @@ -138,6 +139,14 @@ void hg_perf_print_lat(const struct hg_test_info *hg_test_info, const struct hg_perf_class_info *info, size_t buf_size, hg_time_t t); +void +hg_perf_print_header_time(const struct hg_test_info *hg_test_info, + const struct hg_perf_class_info *info, const char *benchmark); + +void +hg_perf_print_time(const struct hg_test_info *hg_test_info, + const struct hg_perf_class_info *info, size_t buf_size, hg_time_t t); + void hg_perf_print_header_bw(const struct hg_test_info *hg_test_info, const struct hg_perf_class_info *info, const char *benchmark);