Skip to content

Commit

Permalink
lab6
Browse files Browse the repository at this point in the history
  • Loading branch information
vatican1 committed Dec 9, 2023
1 parent fbba6ee commit 4bba6cd
Show file tree
Hide file tree
Showing 4 changed files with 115 additions and 9 deletions.
28 changes: 26 additions & 2 deletions src/cl/bitonic.cl
Original file line number Diff line number Diff line change
@@ -1,3 +1,27 @@
__kernel void bitonic(__global float *as) {
// TODO
enum ORDER { Lower = 0, Upper = 1};

__kernel void bitonic(__global float *as,
unsigned int small_block_size,
unsigned int step,
unsigned int n)
{
unsigned int gid = get_global_id(0);

unsigned int shift = small_block_size / 2;
unsigned int amount_prev_blocks = gid / shift;
unsigned int ind = small_block_size * amount_prev_blocks + (gid % shift);
unsigned int other_ind = ind + shift;

unsigned int big_block_number = ind / step;
enum ORDER order = (big_block_number & 1) ? Upper: Lower;
if(other_ind < n)
{
if((order == Lower && as[ind] > as[other_ind]) ||
(order == Upper && as[ind] < as[other_ind]))
{
float tmp = as[ind]; \
as[ind] = as[other_ind]; \
as[other_ind] = tmp;
}
}
}
17 changes: 16 additions & 1 deletion src/cl/prefix_sum.cl
Original file line number Diff line number Diff line change
@@ -1 +1,16 @@
// TODO

__kernel void reduce(__global unsigned int *as, unsigned int block_size, unsigned int n)
{
unsigned int gid = get_global_id(0);
unsigned int ind = gid * 2 * block_size - 1;
if(ind + 2 * block_size >= n)
return;
as[ind + 2 * block_size] += as[ind + block_size];
}

__kernel void sum(__global unsigned int *as, __global unsigned int *bs, unsigned int block_size)
{
unsigned int gid = get_global_id(0);
unsigned int block_ind = ((gid / block_size) * 2 + 1) * block_size;
bs[block_ind + gid % block_size - 1] += as[block_ind - 1];
}
25 changes: 22 additions & 3 deletions src/main_bitonic.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@ int main(int argc, char **argv) {
std::cout << "CPU: " << t.lapAvg() << "+-" << t.lapStd() << " s" << std::endl;
std::cout << "CPU: " << (n / 1000 / 1000) / t.lapAvg() << " millions/s" << std::endl;
}
/*
gpu::gpu_mem_32f as_gpu;
as_gpu.resizeN(n);

Expand All @@ -64,7 +63,27 @@ int main(int argc, char **argv) {

t.restart();// Запускаем секундомер после прогрузки данных, чтобы замерять время работы кернела, а не трансфер данных

// TODO
unsigned int workGroupSize = 256;
unsigned int globalWorkSize = n / 2;

unsigned int blockSize = 2;
unsigned int step = 1; // логарифм_2 block_size
while (blockSize <= n)
{
unsigned int small_block_size = blockSize;
while(small_block_size >= 1)
{
bitonic.exec(gpu::WorkSize(workGroupSize, globalWorkSize),
as_gpu,
small_block_size,
blockSize,
n);
small_block_size /= 2;
}
step += 1;
blockSize *= 2;
}
t.nextLap();
}
std::cout << "GPU: " << t.lapAvg() << "+-" << t.lapStd() << " s" << std::endl;
std::cout << "GPU: " << (n / 1000 / 1000) / t.lapAvg() << " millions/s" << std::endl;
Expand All @@ -76,6 +95,6 @@ int main(int argc, char **argv) {
for (int i = 0; i < n; ++i) {
EXPECT_THE_SAME(as[i], cpu_sorted[i], "GPU results should be equal to CPU results!");
}
*/

return 0;
}
54 changes: 51 additions & 3 deletions src/main_prefix_sum.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,57 @@ int main(int argc, char **argv)
std::cout << "CPU: " << t.lapAvg() << "+-" << t.lapStd() << " s" << std::endl;
std::cout << "CPU: " << (n / 1000.0 / 1000.0) / t.lapAvg() << " millions/s" << std::endl;
}
gpu::Device device = gpu::chooseGPUDevice(argc, argv);
gpu::Context context;
context.init(device.device_id_opencl);
context.activate();
{
std::vector<unsigned int> result(n, 0);
gpu::gpu_mem_32u as_gpu, result_gpu;
as_gpu.resizeN(n);
result_gpu.resizeN(n);

{
// TODO: implement on OpenCL
}
ocl::Kernel reduce(prefix_sum_kernel, prefix_sum_kernel_length, "reduce");
ocl::Kernel sum(prefix_sum_kernel, prefix_sum_kernel_length, "sum");
reduce.compile();
sum.compile();

timer t;
for (int iter = 0; iter < benchmarkingIters; ++iter)
{
as_gpu.writeN(as.data(), n);
result_gpu.writeN(result.data(), n);
unsigned int work_group_size = 128;
unsigned int add = n / 2;
gpu::WorkSize work_size_add = gpu::WorkSize(work_group_size, add);
t.restart();
for (unsigned int block_size = 1; block_size < n; block_size *= 2)
{
sum.exec(work_size_add,
as_gpu,
result_gpu,
block_size);

reduce.exec(gpu::WorkSize(work_group_size, n / (2 * block_size)),
as_gpu,
block_size,
n);
}
t.nextLap();
}
std::cout << "GPU: " << t.lapAvg() << "+-" << t.lapStd() << " s" << std::endl;
std::cout << "GPU: " << (n / 1000.0 / 1000.0) / t.lapAvg() << " millions/s" << std::endl;

result_gpu.readN(result.data(), n-1);
unsigned int tmp;
as_gpu.readN(&tmp, 1, n-1);
result[n - 1] = tmp;

// Проверяем корректность результатов
for (int i = 0; i < n; ++i)
{
EXPECT_THE_SAME(result[i], reference_result[i], "GPU results should be equal to CPU results!");
}
}
}
}

0 comments on commit 4bba6cd

Please sign in to comment.