From a023e0e01322402d7fee1aa6b5e9ec40e3498e51 Mon Sep 17 00:00:00 2001 From: vatican1 Date: Sun, 8 Oct 2023 23:45:24 +0300 Subject: [PATCH] =?UTF-8?q?=D0=BB=D0=B0=D0=B1=D0=B0=205?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/cl/merge.cl | 54 ++++++++++++++++++++++++++++++++++++++++++++++ src/main_merge.cpp | 19 +++++++++++----- 2 files changed, 68 insertions(+), 5 deletions(-) diff --git a/src/cl/merge.cl b/src/cl/merge.cl index 8b137891..9e421042 100644 --- a/src/cl/merge.cl +++ b/src/cl/merge.cl @@ -1 +1,55 @@ +int calc_shif(__global float * as, + const unsigned int left_, + const unsigned int right_, + const float value, + bool flag) +{ + unsigned int left = left_; + unsigned int right = right_; + unsigned int middle = (left + right) / 2; + while (right > left) + { + if ((flag && (as[middle] >= value)) || as[middle] > value) + { + right = middle; + } + else + { + left = middle + 1; + } + middle = (left + right) / 2; + } + return left - left_; +} + + +__kernel void merge(__global float * as, + __global float * bs, + unsigned int k, + unsigned int n) +{ + int id = get_global_id(0); + if (id >= n) + return; + + float value = as[id]; + + unsigned int left_start = id - id % (2 * k); + unsigned int rigth_start = left_start + k; + unsigned int left_end = rigth_start; + unsigned int right_end = (rigth_start + k <= n) ? rigth_start + k : n; + unsigned int default_start = left_start; + + unsigned int new_index; + if (id < rigth_start) + { + new_index = default_start + calc_shif(as, rigth_start, right_end, value, true) + (id - left_start); + } + else + { + new_index = default_start + calc_shif(as, left_start, left_end, value, false) + (id - rigth_start); + } + + bs[new_index] = value; +} diff --git a/src/main_merge.cpp b/src/main_merge.cpp index 207672bc..be9c9f6c 100644 --- a/src/main_merge.cpp +++ b/src/main_merge.cpp @@ -30,8 +30,9 @@ int main(int argc, char **argv) { context.init(device.device_id_opencl); context.activate(); - int benchmarkingIters = 10; + int benchmarkingIters = 1; unsigned int n = 32 * 1024 * 1024; +// unsigned int n = 1024 * 8 ; std::vector as(n, 0); FastRandom r(n); for (unsigned int i = 0; i < n; ++i) { @@ -48,11 +49,12 @@ int main(int argc, char **argv) { t.nextLap(); } std::cout << "CPU: " << t.lapAvg() << "+-" << t.lapStd() << " s" << std::endl; - std::cout << "CPU: " << (n / 1000 / 1000) / t.lapAvg() << " millions/s" << std::endl; + std::cout << "CPU: " << (n / 1000. / 1000.) / t.lapAvg() << " millions/s" << std::endl; } - /* gpu::gpu_mem_32f as_gpu; + gpu::gpu_mem_32f bs_gpu; as_gpu.resizeN(n); + bs_gpu.resizeN(n); { ocl::Kernel merge(merge_kernel, merge_kernel_length, "merge"); merge.compile(); @@ -62,7 +64,13 @@ int main(int argc, char **argv) { t.restart();// Запускаем секундомер после прогрузки данных, чтобы замерять время работы кернела, а не трансфера данных unsigned int workGroupSize = 128; unsigned int global_work_size = (n + workGroupSize - 1) / workGroupSize * workGroupSize; - merge.exec(gpu::WorkSize(workGroupSize, global_work_size), as_gpu, n); + + for (int merge_size = 1; merge_size <= n; merge_size *= 2) + { + merge.exec(gpu::WorkSize(workGroupSize, global_work_size), as_gpu, bs_gpu, merge_size, n); + std::swap(as_gpu, bs_gpu); + } + t.nextLap(); } std::cout << "GPU: " << t.lapAvg() << "+-" << t.lapStd() << " s" << std::endl; @@ -71,8 +79,9 @@ int main(int argc, char **argv) { } // Проверяем корректность результатов for (int i = 0; i < n; ++i) { +// std::cout << i << " / " << n << std::endl; EXPECT_THE_SAME(as[i], cpu_sorted[i], "GPU results should be equal to CPU results!"); } -*/ + return 0; }