From dc23a85325c1348334bff7ef2c86876a724061ea Mon Sep 17 00:00:00 2001 From: Di Nguyen Date: Tue, 23 Jul 2024 16:49:03 -0600 Subject: [PATCH] Updated benchmarks output name field that was not in the format key:value (#571) * Fixed FileNotFound error on windows * updated changelog * updated copyright in rtest.py * fixed copyright to 2021-2024 * Fixed FileNotFound error on windows * updated changelog * updated copyright in rtest.py * fixed copyright to 2021-2024 * init; * updated benchmark_block_run_length_decode * added name_format option * updated benchmark_config_dispatch * updated some typos * Update benchmark/benchmark_config_dispatch.cpp Co-authored-by: Nara * tuned up block run length decode and config dispatch * updated changelog Co-authored-by: Nara * removed debug comments --------- Co-authored-by: Nara --- CHANGELOG.md | 5 +- .../benchmark_block_run_length_decode.cpp | 36 +++++----- benchmark/benchmark_config_dispatch.cpp | 70 ++++++++++++++++--- benchmark/benchmark_device_scan_by_key.cpp | 1 + 4 files changed, 86 insertions(+), 26 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 347dbb71f..73e6c18b5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,8 +6,9 @@ Documentation for rocPRIM is available at ## Unreleased rocPRIM-3.3.0 for ROCm 6.3.0 ### Fixes -* Fixed an issue where while running rtest.py on windows and passing in an absolute path to --install_dir - causes FileNotFound error + + +* Fixed an issue where while running rtest.py on windows and passing in an absolute path to `--install_dir` causes a `FileNotFound` error. ## Unreleased rocPRIM-3.2.0 for ROCm 6.2.0 diff --git a/benchmark/benchmark_block_run_length_decode.cpp b/benchmark/benchmark_block_run_length_decode.cpp index 7d10630fd..7dd9faf61 100644 --- a/benchmark/benchmark_block_run_length_decode.cpp +++ b/benchmark/benchmark_block_run_length_decode.cpp @@ -171,36 +171,40 @@ void run_benchmark(benchmark::State& state, hipStream_t stream, size_t N) HIP_CHECK(hipFree(d_output)); } -#define CREATE_BENCHMARK(IT, OT, MINRL, MAXRL, BS, RPT, DIPT) \ - benchmark::RegisterBenchmark("block_run_length_decode", \ - &run_benchmark, \ - stream, \ - size) +#define CREATE_BENCHMARK(IT, OT, MINRL, MAXRL, BS, RPT, DIPT) \ + benchmark::RegisterBenchmark(bench_naming::format_name("{lvl:block,algo:run_length_decode" \ + ",item_type:" #IT \ + ",offset_type:" #OT \ + ",min_run_length:" #MINRL \ + ",max_run_length:" #MAXRL \ + ",cfg:{block_size:" #BS \ + ",run_per_thread:" #RPT \ + ",decoded_items_per_thread:" #DIPT \ + "}}" \ + ).c_str(), \ + &run_benchmark, \ + stream, \ + size) int main(int argc, char* argv[]) { cli::Parser parser(argc, argv); parser.set_optional("size", "size", DEFAULT_N, "number of values"); parser.set_optional("trials", "trials", -1, "number of iterations"); + parser.set_optional("name_format", + "name_format", + "human", + "either: json,human,txt"); parser.run_and_exit_if_error(); // Parse argv benchmark::Initialize(&argc, argv); const size_t size = parser.get("size"); const int trials = parser.get("trials"); + bench_naming::set_format(parser.get("name_format")); - std::cout << "benchmark_block_run_length_decode" << std::endl; - - // HIP + // // HIP hipStream_t stream = 0; // default - hipDeviceProp_t devProp; - int device_id = 0; - HIP_CHECK(hipGetDevice(&device_id)); - HIP_CHECK(hipGetDeviceProperties(&devProp, device_id)); - std::cout << "[HIP] Device name: " << devProp.name << std::endl; // Add benchmarks std::vector benchmarks{ diff --git a/benchmark/benchmark_config_dispatch.cpp b/benchmark/benchmark_config_dispatch.cpp index 2b9f917e6..298e5e55c 100644 --- a/benchmark/benchmark_config_dispatch.cpp +++ b/benchmark/benchmark_config_dispatch.cpp @@ -1,6 +1,6 @@ #include "benchmark_utils.hpp" - +#include "cmdparser.hpp" #include #include @@ -9,6 +9,11 @@ #include +#ifndef DEFAULT_N +const size_t DEFAULT_N = 1024 * 1024 * 32; +#endif + + enum class stream_kind { default_stream, @@ -61,15 +66,64 @@ static void BM_kernel_launch(benchmark::State& state) hipStreamSynchronize(stream); } -BENCHMARK_CAPTURE(BM_host_target_arch, default_stream, stream_kind::default_stream); -BENCHMARK_CAPTURE(BM_host_target_arch, per_thread_stream, stream_kind::per_thread_stream); -BENCHMARK_CAPTURE(BM_host_target_arch, explicit_stream, stream_kind::explicit_stream); -BENCHMARK_CAPTURE(BM_host_target_arch, async_stream, stream_kind::async_stream); -BENCHMARK(BM_kernel_launch); +#define CREATE_BENCHMARK(ST, SK) \ + benchmark::RegisterBenchmark( \ + bench_naming::format_name( \ + "{lvl:na" \ + ",algo:" #ST \ + ",cfg:default_config}" \ + ).c_str(), \ + &BM_host_target_arch, \ + SK \ + ) \ + int main(int argc, char** argv) { + cli::Parser parser(argc, argv); + parser.set_optional("size", "size", DEFAULT_N, "number of values"); + parser.set_optional("trials", "trials", -1, "number of iterations"); + parser.set_optional("name_format", + "name_format", + "human", + "either: json,human,txt"); + parser.run_and_exit_if_error(); + + // Parse argv benchmark::Initialize(&argc, argv); - add_common_benchmark_info(); + const size_t size = parser.get("size"); + const int trials = parser.get("trials"); + bench_naming::set_format(parser.get("name_format")); + + + // HIP + + std::vector benchmarks{ + CREATE_BENCHMARK(default_stream, stream_kind::default_stream), + CREATE_BENCHMARK(per_thread_stream, stream_kind::per_thread_stream), + CREATE_BENCHMARK(explicit_stream, stream_kind::explicit_stream), + CREATE_BENCHMARK(async_stream, stream_kind::async_stream) + }; + + + // Use manual timing + for(auto& b : benchmarks) + { + b->UseManualTime(); + b->Unit(benchmark::kMillisecond); + } + + // Force number of iterations + if(trials > 0) + { + for(auto& b : benchmarks) + { + b->Iterations(trials); + } + } + + // Run benchmarks benchmark::RunSpecifiedBenchmarks(); -} \ No newline at end of file + return 0; + +} diff --git a/benchmark/benchmark_device_scan_by_key.cpp b/benchmark/benchmark_device_scan_by_key.cpp index 7e8cfface..956c0af38 100644 --- a/benchmark/benchmark_device_scan_by_key.cpp +++ b/benchmark/benchmark_device_scan_by_key.cpp @@ -71,6 +71,7 @@ int main(int argc, char* argv[]) "name_format", "human", "either: json,human,txt"); + #ifdef BENCHMARK_CONFIG_TUNING // optionally run an evenly split subset of benchmarks, when making multiple program invocations parser.set_optional("parallel_instance",