From 50eb22f3adad7890e732c9216c6a1eb78a832e9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Berke=20Kocao=C4=9Flu?= Date: Sun, 8 Sep 2024 02:04:43 +0300 Subject: [PATCH] gpu-bandwidth: take average of many iterations, also benchmark writes, clean up --- src/cpp/util/core/gpu-bandwidth.cpp | 58 +++++++++++++++++++---------- 1 file changed, 38 insertions(+), 20 deletions(-) diff --git a/src/cpp/util/core/gpu-bandwidth.cpp b/src/cpp/util/core/gpu-bandwidth.cpp index a4649800..c0d522b8 100644 --- a/src/cpp/util/core/gpu-bandwidth.cpp +++ b/src/cpp/util/core/gpu-bandwidth.cpp @@ -15,50 +15,68 @@ DEFINE_EXEC_INFO(); -const constexpr std::size_t data_length = 1000000uz; -using data_type = float; -const constexpr std::size_t data_size = data_length * sizeof(data_type); +using data_t = float; +const constexpr std::size_t data_length = 1'000'000'000; +const constexpr std::size_t data_size = data_length * sizeof(data_t); +const constexpr std::size_t iteration_count = 100; -int main(void) +int main() { std::vector platforms; cl::Platform::get(&platforms); - xph::die_if(platforms.empty(), "no OpenCL platforms found"); + xph::die_if(platforms.empty(), "No OpenCL platforms found."); - const cl::Platform platform = platforms.front(); + const cl::Platform& platform = platforms.front(); std::vector devices; platform.getDevices(CL_DEVICE_TYPE_GPU, &devices); - xph::die_if(devices.empty(), "no GPUs found"); + xph::die_if(devices.empty(), "No GPUs found."); - const cl::Device device = devices.front(); + const cl::Device& device = devices.front(); const cl::Context context(device); const cl::CommandQueue queue(context, device); - std::vector data(data_length, 0.0f); + std::vector data(data_length); const cl::Buffer buffer(context, CL_MEM_READ_WRITE, data_size); + std::cerr << "Initialising data..." << std::flush; + for (std::size_t i = 0; i < data_length; ++i) - data[i] = i * 1.0f; + data[i] = static_cast(i); + + std::cerr << " OK\nPerforming write test..." << std::flush; + + const auto write_start_time = std::chrono::high_resolution_clock::now(); + for (std::size_t i = 0; i < iteration_count; ++i) + queue.enqueueWriteBuffer(buffer, CL_TRUE, 0, data_size, data.data()); + const auto write_end_time = std::chrono::high_resolution_clock::now(); + + std::cerr << " OK\nPerforming read test..." << std::flush; - std::cerr << "Initializing data..." << std::endl; + const auto read_start_time = std::chrono::high_resolution_clock::now(); + for (std::size_t i = 0; i < iteration_count; ++i) + queue.enqueueReadBuffer(buffer, CL_TRUE, 0, data_size, data.data()); + const auto read_end_time = std::chrono::high_resolution_clock::now(); - queue.enqueueWriteBuffer(buffer, CL_TRUE, 0, data_size, data.data()); + std::cerr << " OK" << std::endl; - const auto begin_time = std::chrono::high_resolution_clock::now(); - queue.enqueueReadBuffer(buffer, CL_TRUE, 0, data_size, data.data()); - const auto end_time = std::chrono::high_resolution_clock::now(); - const std::chrono::duration elapsed_time = end_time - begin_time; + const double total_write_time = + std::chrono::duration(write_end_time - write_start_time).count(); + const double total_read_time = + std::chrono::duration(read_end_time - read_start_time).count(); - std::cerr << "Data read back from device." << std::endl; + const double average_write_time = total_write_time / iteration_count; + const double average_read_time = total_read_time / iteration_count; + const double data_size_MB = static_cast(data_size) / (1024.0 * 1024.0); - const double data_size_MB = data_size / (1024.0 * 1024.0); - const double bandwidth_MBps = data_size_MB / elapsed_time.count(); + const double write_bandwidth_MBps = data_size_MB / average_write_time; + const double read_bandwidth_MBps = data_size_MB / average_read_time; - std::cout << bandwidth_MBps << " MB/s" << std::endl; + std::cout << "Average Write Bandwidth: " << write_bandwidth_MBps << " MB/s\n" + << "Average Read Bandwidth: " << read_bandwidth_MBps << " MB/s" << std::endl; return EXIT_SUCCESS; }