-
Notifications
You must be signed in to change notification settings - Fork 0
/
clbessel.cpp
91 lines (66 loc) · 1.99 KB
/
clbessel.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
#include "clbessel.h"
#include <OpenCL/opencl.h>
//#include <CL/opencl.h>
#include <stdio.h>
#include "arrayfire.h"
#include "af/util.h"
#include "helper.h"
cl_context context = 0;
cl_program program = 0;
cl_command_queue queue = 0;
cl_kernel kbessj0 = 0;
cl_kernel kbessj1 = 0;
cl_kernel kbessy0 = 0;
static void launch_kernel(af::array &in, af::array &out, int num, cl_kernel kernel)
{
//setup af memory
out = af::array(in.dims());
float *_in = in.device<float>();
float *_out = out.device<float>();
//sync any unfinished commands
af::sync();
if (!context)
context = get_context((cl_mem)_in);
if (!queue)
queue = create_queue(context);
//setup args
cl_int err = CL_SUCCESS;
int arg = 0;
err |= clSetKernelArg(kernel, arg++, sizeof(cl_mem), &_in);
err |= clSetKernelArg(kernel, arg++, sizeof(cl_mem), &_out);
err |= clSetKernelArg(kernel, arg++, sizeof(int), &num);
//launch kernel
size_t local = 32;
size_t global = local * (num / local + ((num % local) ? 1 : 0));
clEnqueueNDRangeKernel(queue, kernel, 1, NULL, &global, &local, 0, NULL, NULL);
clFinish(queue);
//release arrays back to AF
in.unlock();
out.unlock();
}
void bessj0(af::array &in, af::array &out)
{
launch_kernel(in, out, in.elements(), kbessj0);
}
void bessj1(af::array &in, af::array &out)
{
launch_kernel(in, out, in.elements(), kbessj1);
}
void initKernels(af::array dummy)
{
std::string kernel = "besselj0.cl";
//read in kernel
std::string kernel_str = get_kernel_string(kernel.c_str());
float *_dum = dummy.device<float>();
//compile kernel
cl_context context = get_context((cl_mem)_dum);
program = build_program(context, kernel_str);
kbessj0 = create_kernel(program, "besslj0");
kbessj1 = create_kernel(program, "besslj1");
kbessy0 = create_kernel(program, "bessly0");
dummy.unlock();
}
void bessy0(af::array &in, af::array &out)
{
launch_kernel(in, out, in.elements(), kbessy0);
}