lab6

vatican1 · Dec 9, 2023 · 4bba6cd · 4bba6cd
1 parent fbba6ee
commit 4bba6cd
Show file tree

Hide file tree

Showing 4 changed files with 115 additions and 9 deletions.
diff --git a/src/cl/bitonic.cl b/src/cl/bitonic.cl
@@ -1,3 +1,27 @@
-__kernel void bitonic(__global float *as) {
-    // TODO
+enum ORDER { Lower = 0, Upper = 1};
+
+__kernel void bitonic(__global float *as,
+                      unsigned int small_block_size,
+                      unsigned int step,
+                      unsigned int n)
+{
+    unsigned int gid = get_global_id(0);
+
+    unsigned int shift = small_block_size / 2;
+    unsigned int amount_prev_blocks = gid / shift;
+    unsigned int ind = small_block_size * amount_prev_blocks + (gid % shift);
+    unsigned int other_ind = ind + shift;
+
+    unsigned int big_block_number = ind / step;
+    enum ORDER order = (big_block_number & 1) ? Upper: Lower;
+    if(other_ind < n)
+    {
+        if((order == Lower && as[ind] > as[other_ind]) ||
+            (order == Upper && as[ind] < as[other_ind]))
+        {
+            float tmp = as[ind];                                                                                               \
+            as[ind] = as[other_ind];                                                                                                 \
+            as[other_ind] = tmp;
+        }
+    }
 }
diff --git a/src/cl/prefix_sum.cl b/src/cl/prefix_sum.cl
@@ -1 +1,16 @@
-// TODO
+
+__kernel void reduce(__global unsigned int *as, unsigned int block_size, unsigned int n)
+{
+    unsigned int gid = get_global_id(0);
+    unsigned int ind = gid * 2 * block_size - 1;
+    if(ind + 2 * block_size >= n)
+        return;
+     as[ind + 2 * block_size] += as[ind + block_size];
+}
+
+__kernel void sum(__global unsigned int *as, __global unsigned int *bs, unsigned int block_size)
+{
+    unsigned int gid = get_global_id(0);
+    unsigned int block_ind = ((gid / block_size) * 2 + 1) * block_size;
+    bs[block_ind + gid % block_size - 1] += as[block_ind - 1];
+}
diff --git a/src/main_bitonic.cpp b/src/main_bitonic.cpp
@@ -50,7 +50,6 @@ int main(int argc, char **argv) {
         std::cout << "CPU: " << t.lapAvg() << "+-" << t.lapStd() << " s" << std::endl;
         std::cout << "CPU: " << (n / 1000 / 1000) / t.lapAvg() << " millions/s" << std::endl;
     }
-    /*
     gpu::gpu_mem_32f as_gpu;
     as_gpu.resizeN(n);
 
@@ -64,7 +63,27 @@ int main(int argc, char **argv) {
 
             t.restart();// Запускаем секундомер после прогрузки данных, чтобы замерять время работы кернела, а не трансфер данных
 
-            // TODO
+            unsigned int workGroupSize = 256;
+            unsigned int globalWorkSize = n / 2;
+
+            unsigned int blockSize = 2;
+            unsigned int step = 1; // логарифм_2 block_size
+            while (blockSize <= n)
+            {
+                unsigned int small_block_size = blockSize;
+                while(small_block_size >= 1)
+                {
+                    bitonic.exec(gpu::WorkSize(workGroupSize, globalWorkSize),
+                                 as_gpu,
+                                 small_block_size,
+                                 blockSize,
+                                 n);
+                    small_block_size /= 2;
+                }
+                step += 1;
+                blockSize *= 2;
+            }
+            t.nextLap();
         }
         std::cout << "GPU: " << t.lapAvg() << "+-" << t.lapStd() << " s" << std::endl;
         std::cout << "GPU: " << (n / 1000 / 1000) / t.lapAvg() << " millions/s" << std::endl;
@@ -76,6 +95,6 @@ int main(int argc, char **argv) {
     for (int i = 0; i < n; ++i) {
         EXPECT_THE_SAME(as[i], cpu_sorted[i], "GPU results should be equal to CPU results!");
     }
-*/
+
     return 0;
 }
diff --git a/src/main_prefix_sum.cpp b/src/main_prefix_sum.cpp
@@ -75,9 +75,57 @@ int main(int argc, char **argv)
 			std::cout << "CPU: " << t.lapAvg() << "+-" << t.lapStd() << " s" << std::endl;
 			std::cout << "CPU: " << (n / 1000.0 / 1000.0) / t.lapAvg() << " millions/s" << std::endl;
 		}
+        gpu::Device device = gpu::chooseGPUDevice(argc, argv);
+        gpu::Context context;
+        context.init(device.device_id_opencl);
+        context.activate();
+        {
+            std::vector<unsigned int> result(n, 0);
+            gpu::gpu_mem_32u as_gpu, result_gpu;
+            as_gpu.resizeN(n);
+            result_gpu.resizeN(n);
 
-		{
-			// TODO: implement on OpenCL
-		}
+            ocl::Kernel reduce(prefix_sum_kernel, prefix_sum_kernel_length, "reduce");
+            ocl::Kernel sum(prefix_sum_kernel, prefix_sum_kernel_length, "sum");
+            reduce.compile();
+            sum.compile();
+
+            timer t;
+            for (int iter = 0; iter < benchmarkingIters; ++iter)
+            {
+                as_gpu.writeN(as.data(), n);
+                result_gpu.writeN(result.data(), n);
+                unsigned int work_group_size = 128;
+                unsigned int add = n / 2;
+                gpu::WorkSize work_size_add = gpu::WorkSize(work_group_size, add);
+                t.restart();
+                for (unsigned int block_size = 1; block_size < n; block_size *= 2)
+                {
+                    sum.exec(work_size_add,
+                             as_gpu,
+                             result_gpu,
+                             block_size);
+
+                    reduce.exec(gpu::WorkSize(work_group_size, n / (2 * block_size)),
+                                as_gpu,
+                                block_size,
+                                n);
+                }
+                t.nextLap();
+            }
+            std::cout << "GPU: " << t.lapAvg() << "+-" << t.lapStd() << " s" << std::endl;
+            std::cout << "GPU: " << (n / 1000.0 / 1000.0) / t.lapAvg() << " millions/s" << std::endl;
+
+            result_gpu.readN(result.data(), n-1);
+            unsigned int tmp;
+            as_gpu.readN(&tmp, 1, n-1);
+            result[n - 1] = tmp;
+
+            // Проверяем корректность результатов
+            for (int i = 0; i < n; ++i)
+            {
+                EXPECT_THE_SAME(result[i], reference_result[i], "GPU results should be equal to CPU results!");
+            }
+        }
 	}
 }