-
Notifications
You must be signed in to change notification settings - Fork 53
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
254 additions
and
47 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,13 +1,41 @@ | ||
#ifdef __CLION_IDE__ | ||
#include <libgpu/opencl/cl/clion_defines.cl> | ||
#endif | ||
__kernel void mandelbrot(__global float* results, | ||
unsigned int width, unsigned int height, | ||
float fromX, float fromY, | ||
float sizeX, float sizeY, | ||
unsigned int iters) | ||
{ | ||
const float threshold = 256.0f; | ||
const float threshold2 = threshold * threshold; | ||
|
||
#line 6 | ||
int i = get_global_id(0); | ||
int j = get_global_id(1); | ||
|
||
__kernel void mandelbrot(...) | ||
{ | ||
// TODO если хочется избавиться от зернистости и дрожания при интерактивном погружении, добавьте anti-aliasing: | ||
// грубо говоря, при anti-aliasing уровня N вам нужно рассчитать не одно значение в центре пикселя, а N*N значений | ||
// в узлах регулярной решетки внутри пикселя, а затем посчитав среднее значение результатов - взять его за результат для всего пикселя | ||
// это увеличит число операций в N*N раз, поэтому при рассчетах гигаплопс антиальясинг должен быть выключен | ||
if(i >= width) | ||
return; | ||
if(j >= height) | ||
return; | ||
|
||
float x0 = fromX + (i + 0.5f) * sizeX / width; | ||
float y0 = fromY + (j + 0.5f) * sizeY / height; | ||
|
||
float x = x0; | ||
float y = y0; | ||
|
||
int iter = 0; | ||
for (; iter < iters; ++iter) { | ||
float xPrev = x; | ||
x = x * x - y * y + x0; | ||
y = 2.0f * xPrev * y + y0; | ||
if ((x * x + y * y) > threshold2) { | ||
break; | ||
} | ||
} | ||
float result = iter; | ||
|
||
result = 1.0f * result / iters; | ||
results[j * width + i] = result; | ||
// TODO если хочется избавиться от зернистости и дрожания при интерактивном погружении, добавьте anti-aliasing: | ||
// грубо говоря, при anti-aliasing уровня N вам нужно рассчитать не одно значение в центре пикселя, а N*N значений | ||
// в узлах регулярной решетки внутри пикселя, а затем посчитав среднее значение результатов - взять его за результат для всего пикселя | ||
// это увеличит число операций в N*N раз, поэтому при рассчетах гигаплопс антиальясинг должен быть выключен | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,106 @@ | ||
// TODO | ||
#define VALUES_PER_WORK_ITEM 32 | ||
#define WORKGROUP_SIZE 128 | ||
|
||
__kernel void atomic_sum(__global const int *arr, | ||
__global unsigned int *sum, | ||
unsigned int n) | ||
{ | ||
unsigned int id = get_global_id(0); | ||
if (id < n) | ||
{ | ||
atomic_add(sum, arr[id]); | ||
} | ||
} | ||
|
||
__kernel void loop_sum(__global const int *arr, | ||
__global unsigned int *sum, | ||
unsigned int n) | ||
{ | ||
const unsigned int idx = get_global_id(0); | ||
unsigned int res = 0; | ||
for (int i = idx * VALUES_PER_WORK_ITEM; i < (idx + 1) * VALUES_PER_WORK_ITEM; ++i) | ||
{ | ||
if (i < n) | ||
{ | ||
res += arr[i]; | ||
} | ||
} | ||
|
||
atomic_add(sum, res); | ||
} | ||
|
||
__kernel void loop_coalesced_sum(__global const int *arr, | ||
__global unsigned int *sum, | ||
unsigned int n) | ||
{ | ||
const unsigned int lid = get_local_id(0); | ||
const unsigned int wid = get_group_id(0); | ||
const unsigned int grs = get_local_size(0); | ||
|
||
unsigned int res = 0; | ||
for (int i = 0; i < VALUES_PER_WORK_ITEM; ++i) | ||
{ | ||
int idx = wid * grs * VALUES_PER_WORK_ITEM + i * grs + lid; | ||
if (idx < n) | ||
{ | ||
res += arr[idx]; | ||
} | ||
} | ||
|
||
atomic_add(sum, res); | ||
} | ||
|
||
__kernel void sum_local_mem(__global const int *arr, | ||
__global unsigned int *sum, | ||
unsigned int n) | ||
{ | ||
const unsigned int gid = get_global_id(0); | ||
const unsigned int lid = get_local_id(0); | ||
|
||
__local unsigned int buf[WORKGROUP_SIZE]; | ||
|
||
buf[lid] = gid < n ? arr[gid] : 0; | ||
|
||
barrier(CLK_LOCAL_MEM_FENCE); | ||
|
||
if (lid == 0) | ||
{ | ||
unsigned int group_res = 0; | ||
for (unsigned int i = 0; i < WORKGROUP_SIZE; ++i) | ||
{ | ||
group_res += buf[i]; | ||
} | ||
|
||
atomic_add(sum, group_res); | ||
} | ||
} | ||
|
||
__kernel void tree_sum(__global const int *arr, | ||
__global unsigned int *sum, | ||
const unsigned int n) | ||
{ | ||
const unsigned int lid = get_local_id(0); | ||
const unsigned int gid = get_global_id(0); | ||
|
||
__local unsigned int buf [WORKGROUP_SIZE]; | ||
buf[lid] = gid < n ? arr[gid] : 0; | ||
barrier(CLK_LOCAL_MEM_FENCE); | ||
|
||
for (int nValues = WORKGROUP_SIZE; nValues > 1; nValues /= 2) | ||
{ | ||
if (2 * lid < nValues) | ||
{ | ||
unsigned int a = buf[lid]; | ||
unsigned int b = buf[lid + nValues / 2]; | ||
buf[lid] = a + b; | ||
} | ||
barrier(CLK_LOCAL_MEM_FENCE); | ||
} | ||
|
||
if (lid == 0) | ||
{ | ||
atomic_add(sum, buf[0]); | ||
} | ||
} | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters