Skip to content

Commit

Permalink
Fix LINPACK for PAC SVM
Browse files Browse the repository at this point in the history
  • Loading branch information
Marius Meyer committed Jun 1, 2020
1 parent 2ba4b94 commit 3eec33e
Show file tree
Hide file tree
Showing 3 changed files with 123 additions and 11 deletions.
40 changes: 29 additions & 11 deletions LINPACK/src/host/execution_blocked_pvt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,8 +67,15 @@ calculate(const hpcc_base::ExecutionSettings<linpack::LinpackProgramSettings>&co

// prepare kernels
#ifdef USE_SVM
// To prevent the reuse of the result of previous repetitions, use this
// buffer instead and copy the result back to the real buffer
HOST_DATA_TYPE* A_tmp = reinterpret_cast<HOST_DATA_TYPE*>(
clSVMAlloc((*config.context)(), 0 ,
config.programSettings->matrixSize *
config.programSettings->matrixSize * sizeof(HOST_DATA_TYPE), 1024));

err = clSetKernelArgSVMPointer(gefakernel(), 0,
reinterpret_cast<void*>(A));
reinterpret_cast<void*>(A_tmp));
err = clSetKernelArgSVMPointer(gefakernel(), 1,
reinterpret_cast<void*>(ipvt));
#else
Expand All @@ -86,9 +93,13 @@ calculate(const hpcc_base::ExecutionSettings<linpack::LinpackProgramSettings>&co
std::vector<double> executionTimes;
for (int i = 0; i < config.programSettings->numRepetitions; i++) {
#ifdef USE_SVM
for (int k=0; k < config.programSettings->matrixSize * config.programSettings->matrixSize; k++) {
A_tmp[k] = A[k];
}

clEnqueueSVMMap(compute_queue(), CL_TRUE,
CL_MAP_READ | CL_MAP_WRITE,
reinterpret_cast<void *>(A),
reinterpret_cast<void *>(A_tmp),
sizeof(HOST_DATA_TYPE) *
(config.programSettings->matrixSize * config.programSettings->matrixSize), 0,
NULL, NULL);
Expand Down Expand Up @@ -122,15 +133,22 @@ calculate(const hpcc_base::ExecutionSettings<linpack::LinpackProgramSettings>&co
/* --- Read back results from Device --- */

#ifdef USE_SVM
clEnqueueSVMUnmap(compute_queue(),
reinterpret_cast<void *>(A), 0,
NULL, NULL);
clEnqueueSVMUnmap(compute_queue(),
reinterpret_cast<void *>(b), 0,
NULL, NULL);
clEnqueueSVMUnmap(compute_queue(),
reinterpret_cast<void *>(ipvt), 0,
NULL, NULL);
clEnqueueSVMUnmap(compute_queue(),
reinterpret_cast<void *>(A), 0,
NULL, NULL);
clEnqueueSVMUnmap(compute_queue(),
reinterpret_cast<void *>(b), 0,
NULL, NULL);
clEnqueueSVMUnmap(compute_queue(),
reinterpret_cast<void *>(ipvt), 0,
NULL, NULL);

// read back result from temporary buffer
for (int k=0; k < config.programSettings->matrixSize * config.programSettings->matrixSize; k++) {
A[k] = A_tmp[k];
}
clSVMFree((*config.context)(), reinterpret_cast<void*>(A_tmp));

#else
compute_queue.enqueueReadBuffer(Buffer_a, CL_TRUE, 0,
sizeof(HOST_DATA_TYPE)*config.programSettings->matrixSize*config.programSettings->matrixSize, A);
Expand Down
3 changes: 3 additions & 0 deletions LINPACK/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,9 @@ if (INTELFPGAOPENCL_FOUND)
target_link_libraries(${HOST_EXE_NAME}_test_intel ${LAPACK_LIBRARIES})
include_directories(SYSTEM $ENV{MKLROOT}/include)
endif()
if (USE_SVM)
target_compile_definitions(${HOST_EXE_NAME}_test_intel PRIVATE -DCL_VERSION_2_0)
endif()
add_dependencies(${HOST_EXE_NAME}_test_intel lu_blocked_pvt_emulate_intel)
add_dependencies(${HOST_EXE_NAME}_test_intel lu_blocked_pvt_test_emulate_intel)
target_compile_definitions(${HOST_EXE_NAME}_test_intel PRIVATE -DINTEL_FPGA)
Expand Down
91 changes: 91 additions & 0 deletions LINPACK/tests/test_kernel_functionality_separate_cores.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,25 @@ struct LinpackKernelSeparateTest : testing::Test, testing::WithParamInterface<st
char* argv[3] = {"Test", "-f", kernelFileName};
bm = std::unique_ptr<linpack::LinpackBenchmark>(new linpack::LinpackBenchmark(argc, argv));
array_size = (1 << LOCAL_MEM_BLOCK_LOG);
bm->getExecutionSettings().programSettings->numRepetitions = 1;
bm->getExecutionSettings().programSettings->matrixSize = array_size;
#ifdef USE_SVM
A = reinterpret_cast<HOST_DATA_TYPE*>(
clSVMAlloc((*bm->getExecutionSettings().context)(), 0 ,
array_size * array_size * sizeof(HOST_DATA_TYPE), 1024));
B = reinterpret_cast<HOST_DATA_TYPE*>(
clSVMAlloc((*bm->getExecutionSettings().context)(), 0 ,
array_size * array_size * sizeof(HOST_DATA_TYPE), 1024));
C = reinterpret_cast<HOST_DATA_TYPE*>(
clSVMAlloc((*bm->getExecutionSettings().context)(), 0 ,
array_size * array_size * sizeof(HOST_DATA_TYPE), 1024));
scale = reinterpret_cast<HOST_DATA_TYPE*>(
clSVMAlloc((*bm->getExecutionSettings().context)(), 0 ,
array_size * sizeof(HOST_DATA_TYPE), 1024));
ipvt = reinterpret_cast<cl_int*>(
clSVMAlloc((*bm->getExecutionSettings().context)(), 0 ,
array_size * sizeof(cl_int), 1024));
#else
posix_memalign(reinterpret_cast<void **>(&A), 4096,
sizeof(HOST_DATA_TYPE) * array_size * array_size);
posix_memalign(reinterpret_cast<void **>(&B), 4096,
Expand All @@ -36,6 +54,7 @@ struct LinpackKernelSeparateTest : testing::Test, testing::WithParamInterface<st
sizeof(HOST_DATA_TYPE) * array_size );
posix_memalign(reinterpret_cast<void **>(&ipvt), 4096,
sizeof(cl_int) * array_size);
#endif
}

void initializeData() {
Expand Down Expand Up @@ -83,6 +102,18 @@ struct LinpackKernelSeparateTest : testing::Test, testing::WithParamInterface<st


// prepare kernels
#ifdef USE_SVM
err = clSetKernelArgSVMPointer(test_c4_kernel(), 0,
reinterpret_cast<void*>(A));
err = clSetKernelArgSVMPointer(test_c4_kernel(), 1,
reinterpret_cast<void*>(B));
err = clSetKernelArgSVMPointer(test_c4_kernel(), 2,
reinterpret_cast<void*>(C));
err = clSetKernelArgSVMPointer(test_c4_kernel(), 3,
reinterpret_cast<void*>(scale));
err = clSetKernelArgSVMPointer(test_c4_kernel(), 4,
reinterpret_cast<void*>(ipvt));
#else
err = test_c4_kernel.setArg(0, Buffer_a);
ASSERT_CL(err);
err = test_c4_kernel.setArg(1, Buffer_b);
Expand All @@ -93,6 +124,7 @@ struct LinpackKernelSeparateTest : testing::Test, testing::WithParamInterface<st
ASSERT_CL(err);
err = test_c4_kernel.setArg(4, Buffer_pivot);
ASSERT_CL(err);
#endif
err = test_c4_kernel.setArg(5, static_cast<uint>(array_size >> LOCAL_MEM_BLOCK_LOG));
ASSERT_CL(err);

Expand All @@ -101,6 +133,38 @@ struct LinpackKernelSeparateTest : testing::Test, testing::WithParamInterface<st
double t;
std::vector<double> executionTimes;
for (int i = 0; i < bm->getExecutionSettings().programSettings->numRepetitions; i++) {
#ifdef USE_SVM
clEnqueueSVMMap(compute_queue(), CL_TRUE,
CL_MAP_READ | CL_MAP_WRITE,
reinterpret_cast<void *>(A),
sizeof(HOST_DATA_TYPE) *
(array_size * array_size), 0,
NULL, NULL);
clEnqueueSVMMap(compute_queue(), CL_TRUE,
CL_MAP_READ | CL_MAP_WRITE,
reinterpret_cast<void *>(B),
sizeof(HOST_DATA_TYPE) *
(array_size * array_size), 0,
NULL, NULL);
clEnqueueSVMMap(compute_queue(), CL_TRUE,
CL_MAP_READ | CL_MAP_WRITE,
reinterpret_cast<void *>(C),
sizeof(HOST_DATA_TYPE) *
(array_size * array_size), 0,
NULL, NULL);
clEnqueueSVMMap(compute_queue(), CL_TRUE,
CL_MAP_READ | CL_MAP_WRITE,
reinterpret_cast<void *>(scale),
sizeof(HOST_DATA_TYPE) *
(array_size * array_size), 0,
NULL, NULL);
clEnqueueSVMMap(compute_queue(), CL_TRUE,
CL_MAP_READ | CL_MAP_WRITE,
reinterpret_cast<void *>(ipvt),
sizeof(HOST_DATA_TYPE) *
(array_size * array_size), 0,
NULL, NULL);
#else
compute_queue.enqueueWriteBuffer(Buffer_a, CL_TRUE, 0,
sizeof(HOST_DATA_TYPE)*array_size*array_size, A);
compute_queue.enqueueWriteBuffer(Buffer_b, CL_TRUE, 0,
Expand All @@ -111,6 +175,7 @@ struct LinpackKernelSeparateTest : testing::Test, testing::WithParamInterface<st
sizeof(HOST_DATA_TYPE)*array_size, scale);
compute_queue.enqueueWriteBuffer(Buffer_pivot, CL_TRUE, 0,
sizeof(cl_int)*array_size, ipvt);
#endif
compute_queue.finish();
auto t1 = std::chrono::high_resolution_clock::now();
compute_queue.enqueueTask(test_c4_kernel);
Expand All @@ -123,20 +188,46 @@ struct LinpackKernelSeparateTest : testing::Test, testing::WithParamInterface<st
}

/* --- Read back results from Device --- */
#ifdef USE_SVM
clEnqueueSVMUnmap(compute_queue(),
reinterpret_cast<void *>(A), 0,
NULL, NULL);
clEnqueueSVMUnmap(compute_queue(),
reinterpret_cast<void *>(B), 0,
NULL, NULL);
clEnqueueSVMUnmap(compute_queue(),
reinterpret_cast<void *>(C), 0,
NULL, NULL);
clEnqueueSVMUnmap(compute_queue(),
reinterpret_cast<void *>(scale), 0,
NULL, NULL);
clEnqueueSVMUnmap(compute_queue(),
reinterpret_cast<void *>(ipvt), 0,
NULL, NULL);
#else
compute_queue.enqueueReadBuffer(Buffer_a, CL_TRUE, 0,
sizeof(HOST_DATA_TYPE)*array_size*array_size, A);
compute_queue.enqueueReadBuffer(Buffer_b, CL_TRUE, 0,
sizeof(HOST_DATA_TYPE)*array_size*array_size, B);
compute_queue.enqueueReadBuffer(Buffer_c, CL_TRUE, 0,
sizeof(HOST_DATA_TYPE)*array_size*array_size, C);
#endif
}

~LinpackKernelSeparateTest() override {
#ifdef USE_SVM
clSVMFree((*bm->getExecutionSettings().context)(), reinterpret_cast<void*>(A));
clSVMFree((*bm->getExecutionSettings().context)(), reinterpret_cast<void*>(B));
clSVMFree((*bm->getExecutionSettings().context)(), reinterpret_cast<void*>(C));
clSVMFree((*bm->getExecutionSettings().context)(), reinterpret_cast<void*>(scale));
clSVMFree((*bm->getExecutionSettings().context)(), reinterpret_cast<void*>(ipvt));
#else
free(A);
free(B);
free(C);
free(ipvt);
free(scale);
#endif
delete [] kernelFileName;
}
};
Expand Down

0 comments on commit 3eec33e

Please sign in to comment.