Skip to content

Commit

Permalink
Merge branch 'improve-testing' into 'master'
Browse files Browse the repository at this point in the history
Improve Testing

See merge request pc2/HPCC_FPGA!11
  • Loading branch information
Marius Meyer committed May 27, 2020
2 parents 6bfd62f + 34d2dbd commit ea9d696
Show file tree
Hide file tree
Showing 137 changed files with 5,235 additions and 5,289 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,4 @@ cmake-*
build/*
.idea
.venv
docs/*
24 changes: 12 additions & 12 deletions .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ build:STREAM:
- build/bin/stream_kernels_single_emulate.aocx
- build/bin/stream_kernels_emulate.aocx
- build/bin/STREAM_FPGA_intel
- build/bin/Test_intel
- build/bin/STREAM_FPGA_test_intel


build:RandomAccess:
Expand All @@ -38,7 +38,7 @@ build:RandomAccess:
paths:
- build/bin/random_access_kernels_single_emulate.aocx
- build/bin/RandomAccess_intel
- build/bin/Test_intel
- build/bin/RandomAccess_test_intel


build:PTRANS:
Expand All @@ -54,8 +54,8 @@ build:PTRANS:
paths:
- build/bin/transpose_optimized_emulate.aocx
- build/bin/transpose_default_emulate.aocx
- build/bin/trans_intel
- build/bin/Test_intel
- build/bin/Transpose_intel
- build/bin/Transpose_test_intel

build:LINPACK:
stage: build
Expand All @@ -70,8 +70,8 @@ build:LINPACK:
paths:
- build/bin/lu_blocked_pvt_emulate.aocx
- build/bin/lu_blocked_pvt_test_emulate.aocx
- build/bin/LINPACK_intel
- build/bin/Test_intel
- build/bin/Linpack_intel
- build/bin/Linpack_test_intel

build:GEMM:
stage: build
Expand All @@ -86,7 +86,7 @@ build:GEMM:
paths:
- build/bin/gemm_cannon_emulate.aocx
- build/bin/GEMM_intel
- build/bin/Test_intel
- build/bin/GEMM_test_intel

build:FFT:
stage: build
Expand All @@ -101,7 +101,7 @@ build:FFT:
paths:
- build/bin/fft1d_float_8_emulate.aocx
- build/bin/FFT_intel
- build/bin/Test_intel
- build/bin/FFT_test_intel

build:b_eff:
stage: build
Expand All @@ -115,8 +115,8 @@ build:b_eff:
artifacts:
paths:
- build/bin/communication_bw520n_emulate.aocx
- build/bin/fnet
- build/bin/Google_Tests_run
- build/bin/Network_intel
- build/bin/Network_test_intel

###
#
Expand Down Expand Up @@ -167,7 +167,7 @@ test:LINPACK:
stage: test
script:
- cd build
- cmake ../LINPACK -DDEFAULT_PLATFORM=0 -DDEFAULT_DEVICE=0
- cmake ../LINPACK -DDEFAULT_PLATFORM=0 -DDEFAULT_DEVICE=0 -DBLOCK_SIZE=32
- make CL_CONTEXT_EMULATOR_DEVICE_INTELFPGA=1 CTEST_OUTPUT_ON_FAILURE=1 test
dependencies:
- build:LINPACK
Expand All @@ -180,7 +180,7 @@ test:GEMM:
stage: test
script:
- cd build
- cmake ../GEMM -DDEFAULT_PLATFORM=0 -DDEFAULT_DEVICE=0
- cmake ../GEMM -DDEFAULT_PLATFORM=0 -DDEFAULT_DEVICE=0 -DBLOCK_SIZE=32
- make CL_CONTEXT_EMULATOR_DEVICE_INTELFPGA=1 CTEST_OUTPUT_ON_FAILURE=1 test
dependencies:
- build:GEMM
Expand Down
6 changes: 6 additions & 0 deletions FFT/CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@

This file contains all changes made to the source code for each release.

## 1.0.2

#### Changed:
- Converted host code to new OO code
- Unit tests and emulation kernels wrok now: Fail for Xilinx

## 1.0.1

#### Added:
Expand Down
2 changes: 1 addition & 1 deletion FFT/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
cmake_minimum_required(VERSION 3.1)
project(FFT VERSION 1.0.1)
project(FFT VERSION 1.0.2)

set(DEFAULT_ITERATIONS 100 CACHE STRING "Default number of iterations that is done with a single kernel execution")
set(HOST_DATA_TYPE cl_float CACHE STRING "Data type used by the host code. Should match the data type of the used FFT")
Expand Down
8 changes: 4 additions & 4 deletions FFT/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ The targets below can be used to build the benchmark and its kernels, where `VEN
| Target | Description |
| -------- | ---------------------------------------------- |
| FFT_`VENDOR` | Builds the host application |
| Test_`VENDOR` | Compile the tests and its dependencies |
| FFT_test_`VENDOR` | Compile the tests and its dependencies |

More over the are additional targets to generate kernel reports and bitstreams.
The provided kernel is optimized for Stratix 10 with 512bit LSUs.
Expand All @@ -37,7 +37,7 @@ The targets below can be used to build the benchmark and its kernels, where `VEN

mkdir build && cd build
cmake ..
make fFFT
make FFT_intel

You will find all executables and kernel files in the `bin`
folder of your build directory.
Expand Down Expand Up @@ -68,7 +68,7 @@ For more information on available input parameters run

-f, --file arg Kernel file name
-n, arg Number of repetitions (default: 10)
-i, arg Multiplier for the used data size that will be i *
-b, arg Multiplier for the used data size that will be i *
FFT_SIZE (default: 100)
--inverse If set, the inverse FFT is calculated instead
--device arg Index of the device that has to be used. If not given
Expand All @@ -81,7 +81,7 @@ For more information on available input parameters run

To execute the unit and integration tests run

./Test_intel
./FFT_test_intel -f KERNEL_FILE_NAME

in the `bin` folder within the build directory.
It will run an emulation of the kernel and execute some functionality tests.
Expand Down
8 changes: 8 additions & 0 deletions FFT/src/common/parameters.h.in
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,14 @@
#define LOG_FFT_SIZE @LOG_FFT_SIZE@
#define FFT_UNROLL @FFT_UNROLL@

/*
Short description of the program.
Moreover the version and build time is also compiled into the description.
*/
#define PROGRAM_DESCRIPTION "Implementation of the FFT benchmark"\
" proposed in the HPCC benchmark suite for FPGA.\n"\
"Version: " VERSION "\n"

/**
Output separator
*/
Expand Down
4 changes: 2 additions & 2 deletions FFT/src/device/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,14 +5,14 @@ if (INTELFPGAOPENCL_FOUND)
generate_kernel_targets_intel(fft1d_float_8)
add_test(NAME test_emulation_intel COMMAND ./FFT_intel -f fft1d_float_8_emulate.aocx -i 1 -n 1
WORKING_DIRECTORY ${EXECUTABLE_OUTPUT_PATH})
add_test(NAME test_output_parsing_intel COMMAND ${CMAKE_SOURCE_DIR}/../scripts/evaluation/execute_and_parse.sh ./FFT_intel -f fft1d_float_8_emulate.aocx -i 1 -n 1
add_test(NAME test_output_parsing_intel COMMAND ${CMAKE_SOURCE_DIR}/../scripts/evaluation/execute_and_parse.sh ./FFT_intel -f fft1d_float_8_emulate.aocx -b 1 -n 1
WORKING_DIRECTORY ${EXECUTABLE_OUTPUT_PATH})
endif()

if (Vitis_FOUND)
generate_kernel_targets_xilinx(fft1d_float_8)
add_test(NAME test_emulation_xilinx COMMAND ./FFT_xilinx -f fft1d_float_8_emulate.xclbin -i 1 -n 1
WORKING_DIRECTORY ${EXECUTABLE_OUTPUT_PATH})
add_test(NAME test_output_parsing_xilinx COMMAND ${CMAKE_SOURCE_DIR}/../scripts/evaluation/execute_and_parse.sh ./FFT_xilinx -f fft1d_float_8_emulate.xclbin -i 1 -n 1
add_test(NAME test_output_parsing_xilinx COMMAND ${CMAKE_SOURCE_DIR}/../scripts/evaluation/execute_and_parse.sh ./FFT_xilinx -f fft1d_float_8_emulate.xclbin -b 1 -n 1
WORKING_DIRECTORY ${EXECUTABLE_OUTPUT_PATH})
endif()
35 changes: 24 additions & 11 deletions FFT/src/host/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,19 +1,32 @@

include_directories(../../../extern/cxxopts/include ../../../shared)
include_directories(${CMAKE_BINARY_DIR}/src/common)
add_subdirectory(../../../shared ${CMAKE_BINARY_DIR}/lib/hpccbase)
set(HOST_SOURCE execution_default.cpp fft_benchmark.cpp)

set(HOST_SOURCE execution_default.cpp main.cpp ../../../shared/setup/fpga_setup.cpp fft_functionality.cpp)
set(HOST_EXE_NAME FFT)
set(LIB_NAME fft_lib)

if (INTELFPGAOPENCL_FOUND)
include_directories(${IntelFPGAOpenCL_INCLUDE_DIRS})
add_executable(FFT_intel ${HOST_SOURCE})
target_compile_definitions(FFT_intel PRIVATE -DINTEL_FPGA)
target_link_libraries(FFT_intel ${IntelFPGAOpenCL_LIBRARIES})
add_library(${LIB_NAME}_intel STATIC ${HOST_SOURCE})
target_include_directories(${LIB_NAME}_intel PRIVATE ${HPCCBaseLibrary_INCLUDE_DIRS} ${CMAKE_BINARY_DIR}/src/common ${IntelFPGAOpenCL_INCLUDE_DIRS})
target_include_directories(${LIB_NAME}_intel PUBLIC ${CMAKE_SOURCE_DIR}/src/host)
add_executable(${HOST_EXE_NAME}_intel main.cpp)
target_link_libraries(${LIB_NAME}_intel "${IntelFPGAOpenCL_LIBRARIES}" "${OpenMP_CXX_FLAGS}")
target_link_libraries(${LIB_NAME}_intel hpcc_fpga_base)
target_link_libraries(${HOST_EXE_NAME}_intel ${LIB_NAME}_intel)
target_compile_definitions(${LIB_NAME}_intel PRIVATE -DINTEL_FPGA)
target_compile_options(${LIB_NAME}_intel PRIVATE "${OpenMP_CXX_FLAGS}")
add_test(NAME test_intel_host_executable COMMAND $<TARGET_FILE:${HOST_EXE_NAME}_intel> -h)
endif()

if (Vitis_FOUND)
include_directories(${Vitis_INCLUDE_DIRS})
add_executable(FFT_xilinx ${HOST_SOURCE})
target_compile_definitions(FFT_xilinx PRIVATE -DXILINX_FPGA)
target_link_libraries(FFT_xilinx ${Vitis_LIBRARIES})
add_library(${LIB_NAME}_xilinx STATIC ${HOST_SOURCE})
target_include_directories(${LIB_NAME}_xilinx PRIVATE ${HPCCBaseLibrary_INCLUDE_DIRS} ${CMAKE_BINARY_DIR}/src/common ${Vitis_INCLUDE_DIRS})
target_include_directories(${LIB_NAME}_xilinx PUBLIC ${CMAKE_SOURCE_DIR}/src/host)
add_executable(${HOST_EXE_NAME}_xilinx main.cpp)
target_link_libraries(${LIB_NAME}_xilinx ${Vitis_LIBRARIES} "${OpenMP_CXX_FLAGS}")
target_link_libraries(${LIB_NAME}_xilinx hpcc_fpga_base)
target_link_libraries(${HOST_EXE_NAME}_xilinx ${LIB_NAME}_xilinx)
target_compile_definitions(${LIB_NAME}_xilinx PRIVATE -DXILINX_FPGA)
target_compile_options(${LIB_NAME}_xilinx PRIVATE "${OpenMP_CXX_FLAGS}")
add_test(NAME test_xilinx_host_executable COMMAND $<TARGET_FILE:${HOST_EXE_NAME}_xilinx> -h)
endif()
17 changes: 3 additions & 14 deletions FFT/src/host/execution.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,22 +30,11 @@ SOFTWARE.
/* External library headers */
#include "CL/cl.hpp"
#include "parameters.h"
#include "fft_benchmark.hpp"


namespace bm_execution {

struct ExecutionConfiguration {
cl::Context context;
cl::Device device;
cl::Program program;
uint repetitions;
};

struct ExecutionTimings {
unsigned iterations;
bool inverse;
std::vector<double> calculationTimings;
};

/**
The actual execution of the benchmark.
Expand All @@ -57,8 +46,8 @@ simple exchange of the different calculation methods.
@return The resulting matrix
*/
std::shared_ptr<ExecutionTimings>
calculate(std::shared_ptr<ExecutionConfiguration> config, std::complex<HOST_DATA_TYPE>* data, unsigned iterations, bool inverse);
std::unique_ptr<fft::FFTExecutionTimings>
calculate(hpcc_base::ExecutionSettings<fft::FFTProgramSettings> const& config, std::complex<HOST_DATA_TYPE>* data, unsigned iterations, bool inverse);

} // namespace bm_execution

Expand Down
22 changes: 10 additions & 12 deletions FFT/src/host/execution_default.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,32 +39,32 @@ namespace bm_execution {
Implementation for the single kernel.
@copydoc bm_execution::calculate()
*/
std::shared_ptr<ExecutionTimings>
calculate(std::shared_ptr<ExecutionConfiguration> config,
std::unique_ptr<fft::FFTExecutionTimings>
calculate(hpcc_base::ExecutionSettings<fft::FFTProgramSettings> const& config,
std::complex<HOST_DATA_TYPE>* data,
unsigned iterations,
bool inverse) {

cl::Buffer inBuffer = cl::Buffer(config->context, CL_MEM_WRITE_ONLY, (1 << LOG_FFT_SIZE) * iterations * 2 * sizeof(HOST_DATA_TYPE));
cl::Buffer outBuffer = cl::Buffer(config->context, CL_MEM_READ_ONLY, (1 << LOG_FFT_SIZE) * iterations * 2 * sizeof(HOST_DATA_TYPE));
cl::Buffer inBuffer = cl::Buffer(*config.context, CL_MEM_WRITE_ONLY, (1 << LOG_FFT_SIZE) * iterations * 2 * sizeof(HOST_DATA_TYPE));
cl::Buffer outBuffer = cl::Buffer(*config.context, CL_MEM_READ_ONLY, (1 << LOG_FFT_SIZE) * iterations * 2 * sizeof(HOST_DATA_TYPE));

cl::Kernel fetchKernel(config->program, FETCH_KERNEL_NAME);
cl::Kernel fetchKernel(*config.program, FETCH_KERNEL_NAME);

fetchKernel.setArg(0, inBuffer);

cl::Kernel fftKernel(config->program, FFT_KERNEL_NAME);
cl::Kernel fftKernel(*config.program, FFT_KERNEL_NAME);

fftKernel.setArg(0, outBuffer);
fftKernel.setArg(1, iterations);
fftKernel.setArg(2, static_cast<cl_int>(inverse));

cl::CommandQueue fetchQueue(config->context);
cl::CommandQueue fftQueue(config->context);
cl::CommandQueue fetchQueue(*config.context);
cl::CommandQueue fftQueue(*config.context);

fetchQueue.enqueueWriteBuffer(inBuffer,CL_TRUE,0, (1 << LOG_FFT_SIZE) * iterations * 2 * sizeof(HOST_DATA_TYPE), data);

std::vector<double> calculationTimings;
for (uint r =0; r < config->repetitions; r++) {
for (uint r =0; r < config.programSettings->numRepetitions; r++) {
auto startCalculation = std::chrono::high_resolution_clock::now();
fetchQueue.enqueueNDRangeKernel(fetchKernel, cl::NullRange, cl::NDRange((1 << LOG_FFT_SIZE)/ FFT_UNROLL * iterations),
cl::NDRange((1 << LOG_FFT_SIZE)/ FFT_UNROLL));
Expand All @@ -80,9 +80,7 @@ namespace bm_execution {

fetchQueue.enqueueReadBuffer(outBuffer,CL_TRUE,0, (1 << LOG_FFT_SIZE) * iterations * 2 * sizeof(HOST_DATA_TYPE), data);

std::shared_ptr<ExecutionTimings> result(new ExecutionTimings{
iterations,
inverse,
std::unique_ptr<fft::FFTExecutionTimings> result(new fft::FFTExecutionTimings{
calculationTimings
});
return result;
Expand Down
Loading

0 comments on commit ea9d696

Please sign in to comment.