diff --git a/FFT/src/common/parameters.h.in b/FFT/src/common/parameters.h.in
index 2e11cc4c..4d29fbaa 100644
--- a/FFT/src/common/parameters.h.in
+++ b/FFT/src/common/parameters.h.in
@@ -19,6 +19,7 @@
 #define LOG_FFT_SIZE @LOG_FFT_SIZE@
 #define FFT_UNROLL @FFT_UNROLL@
 
+#cmakedefine USE_SVM
 /*
 Short description of the program.
 Moreover the version and build time is also compiled into the description.
diff --git a/FFT/src/host/CMakeLists.txt b/FFT/src/host/CMakeLists.txt
index 0c505452..8568c69e 100755
--- a/FFT/src/host/CMakeLists.txt
+++ b/FFT/src/host/CMakeLists.txt
@@ -13,6 +13,9 @@ if (INTELFPGAOPENCL_FOUND)
     target_link_libraries(${LIB_NAME}_intel "${IntelFPGAOpenCL_LIBRARIES}" "${OpenMP_CXX_FLAGS}")
     target_link_libraries(${LIB_NAME}_intel hpcc_fpga_base)
     target_link_libraries(${HOST_EXE_NAME}_intel ${LIB_NAME}_intel)
+    if (USE_SVM)
+        target_compile_definitions(${LIB_NAME}_intel PRIVATE -DCL_VERSION_2_0)
+    endif()
     target_compile_definitions(${LIB_NAME}_intel PRIVATE -DINTEL_FPGA)
     target_compile_options(${LIB_NAME}_intel PRIVATE "${OpenMP_CXX_FLAGS}")
     add_test(NAME test_intel_host_executable COMMAND $<TARGET_FILE:${HOST_EXE_NAME}_intel> -h)
diff --git a/FFT/src/host/execution.h b/FFT/src/host/execution.h
index 65800bf7..01989309 100644
--- a/FFT/src/host/execution.h
+++ b/FFT/src/host/execution.h
@@ -47,7 +47,7 @@ simple exchange of the different calculation methods.
 @return The resulting matrix
 */
     std::unique_ptr<fft::FFTExecutionTimings>
-    calculate(hpcc_base::ExecutionSettings<fft::FFTProgramSettings> const& config, std::complex<HOST_DATA_TYPE>* data, unsigned iterations, bool inverse);
+    calculate(hpcc_base::ExecutionSettings<fft::FFTProgramSettings> const& config, std::complex<HOST_DATA_TYPE>* data, std::complex<HOST_DATA_TYPE>* data_out, unsigned iterations, bool inverse);
 
 }  // namespace bm_execution
 
diff --git a/FFT/src/host/execution_default.cpp b/FFT/src/host/execution_default.cpp
index 53a693d1..e08be365 100644
--- a/FFT/src/host/execution_default.cpp
+++ b/FFT/src/host/execution_default.cpp
@@ -42,6 +42,7 @@ namespace bm_execution {
     std::unique_ptr<fft::FFTExecutionTimings>
     calculate(hpcc_base::ExecutionSettings<fft::FFTProgramSettings> const&  config,
             std::complex<HOST_DATA_TYPE>* data,
+            std::complex<HOST_DATA_TYPE>* data_out,
             unsigned iterations,
             bool inverse) {
 
@@ -49,19 +50,37 @@ namespace bm_execution {
         cl::Buffer outBuffer = cl::Buffer(*config.context, CL_MEM_READ_ONLY, (1 << LOG_FFT_SIZE) * iterations * 2 * sizeof(HOST_DATA_TYPE));
 
         cl::Kernel fetchKernel(*config.program, FETCH_KERNEL_NAME);
-
-        fetchKernel.setArg(0, inBuffer);
-
         cl::Kernel fftKernel(*config.program, FFT_KERNEL_NAME);
 
+#ifdef USE_SVM
+        clSetKernelArgSVMPointer(fetchKernel(), 0,
+                                        reinterpret_cast<void*>(data));
+        clSetKernelArgSVMPointer(fftKernel(), 0,
+                                        reinterpret_cast<void*>(data_out));
+#else
+        fetchKernel.setArg(0, inBuffer);
         fftKernel.setArg(0, outBuffer);
+#endif
         fftKernel.setArg(1, iterations);
         fftKernel.setArg(2, static_cast<cl_int>(inverse));
 
         cl::CommandQueue fetchQueue(*config.context);
         cl::CommandQueue fftQueue(*config.context);
 
+#ifdef USE_SVM
+        clEnqueueSVMMap(fetchQueue(), CL_TRUE,
+                        CL_MAP_READ,
+                        reinterpret_cast<void *>(data),
+                        (1 << LOG_FFT_SIZE) * iterations * 2 * sizeof(HOST_DATA_TYPE), 0,
+                        NULL, NULL);
+        clEnqueueSVMMap(fftQueue(), CL_TRUE,
+                        CL_MAP_WRITE,
+                        reinterpret_cast<void *>(data_out),
+                        (1 << LOG_FFT_SIZE) * iterations * 2 * sizeof(HOST_DATA_TYPE), 0,
+                        NULL, NULL);
+#else
         fetchQueue.enqueueWriteBuffer(inBuffer,CL_TRUE,0, (1 << LOG_FFT_SIZE) * iterations * 2 * sizeof(HOST_DATA_TYPE), data);
+#endif
 
         std::vector<double> calculationTimings;
         for (uint r =0; r < config.programSettings->numRepetitions; r++) {
@@ -77,8 +96,16 @@ namespace bm_execution {
                             (endCalculation - startCalculation);
             calculationTimings.push_back(calculationTime.count());
         }
-
-        fetchQueue.enqueueReadBuffer(outBuffer,CL_TRUE,0, (1 << LOG_FFT_SIZE) * iterations * 2 * sizeof(HOST_DATA_TYPE), data);
+#ifdef USE_SVM
+            clEnqueueSVMUnmap(fetchQueue(),
+                                reinterpret_cast<void *>(data), 0,
+                                NULL, NULL);
+            clEnqueueSVMUnmap(fftQueue(),
+                                reinterpret_cast<void *>(data_out), 0,
+                                NULL, NULL);
+#else
+        fetchQueue.enqueueReadBuffer(outBuffer,CL_TRUE,0, (1 << LOG_FFT_SIZE) * iterations * 2 * sizeof(HOST_DATA_TYPE), data_out);
+#endif
 
         std::unique_ptr<fft::FFTExecutionTimings> result(new fft::FFTExecutionTimings{
                 calculationTimings
diff --git a/FFT/src/host/fft_benchmark.cpp b/FFT/src/host/fft_benchmark.cpp
index 2a84e492..cf943c21 100644
--- a/FFT/src/host/fft_benchmark.cpp
+++ b/FFT/src/host/fft_benchmark.cpp
@@ -47,6 +47,30 @@ fft::FFTProgramSettings::getSettingsMap() {
         return map;
 }
 
+fft::FFTData::FFTData(cl::Context context, uint iterations) : context(context) {
+#ifdef USE_SVM
+    data = reinterpret_cast<std::complex<HOST_DATA_TYPE>*>(
+                        clSVMAlloc(context(), 0 ,
+                        iterations * (1 << LOG_FFT_SIZE) * sizeof(std::complex<HOST_DATA_TYPE>), 1024));
+    data_out = reinterpret_cast<std::complex<HOST_DATA_TYPE>*>(
+                        clSVMAlloc(context(), 0 ,
+                        iterations * (1 << LOG_FFT_SIZE) * sizeof(std::complex<HOST_DATA_TYPE>), 1024));
+#else
+    posix_memalign(reinterpret_cast<void**>(&data), 64, iterations * (1 << LOG_FFT_SIZE) * sizeof(std::complex<HOST_DATA_TYPE>));
+    posix_memalign(reinterpret_cast<void**>(&data_out), 64, iterations * (1 << LOG_FFT_SIZE) * sizeof(std::complex<HOST_DATA_TYPE>));
+#endif
+}
+
+fft::FFTData::~FFTData() {
+#ifdef USE_SVM
+    clSVMFree(context(), reinterpret_cast<void*>(data));
+    clSVMFree(context(), reinterpret_cast<void*>(data_out));
+#else
+    free(data);
+    free(data_out);
+#endif
+}
+
 fft::FFTBenchmark::FFTBenchmark(int argc, char* argv[]) {
     setupBenchmark(argc, argv);
 }
@@ -63,7 +87,7 @@ fft::FFTBenchmark::addAdditionalParseOptions(cxxopts::Options &options) {
 
 std::unique_ptr<fft::FFTExecutionTimings>
 fft::FFTBenchmark::executeKernel(FFTData &data) {
-    return bm_execution::calculate(*executionSettings, data.data,executionSettings->programSettings->iterations,
+    return bm_execution::calculate(*executionSettings, data.data, data.data_out, executionSettings->programSettings->iterations,
                                          executionSettings->programSettings->inverse);
 }
 
@@ -85,33 +109,34 @@ fft::FFTBenchmark::printResults(const fft::FFTExecutionTimings &output) {
 
 std::unique_ptr<fft::FFTData>
 fft::FFTBenchmark::generateInputData() {
-    auto d = std::unique_ptr<fft::FFTData>(new fft::FFTData(executionSettings->programSettings->iterations));
+    auto d = std::unique_ptr<fft::FFTData>(new fft::FFTData(*executionSettings->context, executionSettings->programSettings->iterations));
     std::mt19937 gen(0);
     auto dis = std::uniform_real_distribution<HOST_DATA_TYPE>(-1.0, 1.0);
     for (int i=0; i< executionSettings->programSettings->iterations * (1 << LOG_FFT_SIZE); i++) {
         d->data[i].real(dis(gen));
         d->data[i].imag(dis(gen));
+        d->data_out[i].real(0.0);
+        d->data_out[i].imag(0.0);
     }
     return d;
 }
 
 bool  
 fft::FFTBenchmark::validateOutputAndPrintError(fft::FFTData &data) {
-    auto verify_data = generateInputData();
     double residual_max = 0;
     for (int i = 0; i < executionSettings->programSettings->iterations; i++) {
         // we have to bit reverse the output data of the FPGA kernel, since it will be provided in bit-reversed order.
         // Directly applying iFFT on the data would thus not form the identity function we want to have for verification.
         // TODO: This might need to be changed for other FPGA implementations that return the data in correct order
-        fft::bit_reverse(&data.data[i * (1 << LOG_FFT_SIZE)], 1);
-        fft::fourier_transform_gold(true, LOG_FFT_SIZE, &data.data[i * (1 << LOG_FFT_SIZE)]);
+        fft::bit_reverse(&data.data_out[i * (1 << LOG_FFT_SIZE)], 1);
+        fft::fourier_transform_gold(true, LOG_FFT_SIZE, &data.data_out[i * (1 << LOG_FFT_SIZE)]);
 
         // Normalize the data after applying iFFT
         for (int j = 0; j < (1 << LOG_FFT_SIZE); j++) {
-            data.data[i * (1 << LOG_FFT_SIZE) + j] /= (1 << LOG_FFT_SIZE);
+            data.data_out[i * (1 << LOG_FFT_SIZE) + j] /= (1 << LOG_FFT_SIZE);
         }
         for (int j = 0; j < (1 << LOG_FFT_SIZE); j++) {
-            double tmp_error =  std::abs(verify_data->data[i * (1 << LOG_FFT_SIZE) + j] - data.data[i * (1 << LOG_FFT_SIZE) + j]);
+            double tmp_error =  std::abs(data.data[i * (1 << LOG_FFT_SIZE) + j] - data.data_out[i * (1 << LOG_FFT_SIZE) + j]);
             residual_max = residual_max > tmp_error ? residual_max : tmp_error;
         }
     }
diff --git a/FFT/src/host/fft_benchmark.hpp b/FFT/src/host/fft_benchmark.hpp
index 2f1cbd17..53d8dea4 100644
--- a/FFT/src/host/fft_benchmark.hpp
+++ b/FFT/src/host/fft_benchmark.hpp
@@ -80,27 +80,36 @@ class FFTData {
 public:
 
     /**
-     * @brief The data array used ofr the FFT calculation
+     * @brief The data array used as input of the FFT calculation
      * 
      */
     std::complex<HOST_DATA_TYPE>* data;
 
+    /**
+     * @brief The data array used as output of the FFT calculation
+     * 
+     */
+    std::complex<HOST_DATA_TYPE>* data_out;
+
+    /**
+     * @brief The context that is used to allocate memory in SVM mode
+     * 
+     */
+    cl::Context context;
+
     /**
      * @brief Construct a new FFT Data object
      * 
+     * @param context The OpenCL context used to allocate memory in SVM mode
      * @param iterations Number of FFT data that will be stored sequentially in the array
      */
-    FFTData(uint iterations) {
-        posix_memalign(reinterpret_cast<void**>(&data), 64, iterations * (1 << LOG_FFT_SIZE) * sizeof(std::complex<HOST_DATA_TYPE>));
-    }
+    FFTData(cl::Context context, uint iterations);
 
     /**
      * @brief Destroy the FFT Data object. Free the allocated memory
      * 
      */
-    ~FFTData() {
-        free(data);
-    }
+     ~FFTData();
 
 };
 
diff --git a/FFT/tests/test_execution_functionality.cpp b/FFT/tests/test_execution_functionality.cpp
index 3db9d624..cd0d85a0 100644
--- a/FFT/tests/test_execution_functionality.cpp
+++ b/FFT/tests/test_execution_functionality.cpp
@@ -58,7 +58,7 @@ TEST_F(FFTKernelTest, FFTReturnsZero) {
     }
     auto result = bm->executeKernel(*data);
     for (int i=0; i<(1 << LOG_FFT_SIZE); i++) {
-        EXPECT_FLOAT_EQ(std::abs(data->data[i]), 0.0);
+        EXPECT_FLOAT_EQ(std::abs(data->data_out[i]), 0.0);
     }
 }
 
@@ -72,11 +72,11 @@ TEST_F(FFTKernelTest, FFTCloseToZeroForAll1And1) {
         data->data[i].imag(1.0);
     }
     auto result = bm->executeKernel(*data);
-    EXPECT_NEAR(data->data[0].real(), (1 << LOG_FFT_SIZE), 0.00001);
-    EXPECT_NEAR(data->data[0].imag(), (1 << LOG_FFT_SIZE), 0.00001);
+    EXPECT_NEAR(data->data_out[0].real(), (1 << LOG_FFT_SIZE), 0.00001);
+    EXPECT_NEAR(data->data_out[0].imag(), (1 << LOG_FFT_SIZE), 0.00001);
     for (int i=1; i < (1 << LOG_FFT_SIZE); i++) {
-        EXPECT_NEAR(data->data[i].real(), 0.0, 0.00001);
-        EXPECT_NEAR(data->data[i].imag(), 0.0, 0.00001);
+        EXPECT_NEAR(data->data_out[i].real(), 0.0, 0.00001);
+        EXPECT_NEAR(data->data_out[i].imag(), 0.0, 0.00001);
     }
 }
 
@@ -90,11 +90,11 @@ TEST_F(FFTKernelTest, IFFTCloseToZeroForAll1And1) {
         data->data[i].imag(0.0);
     }
     auto result = bm->executeKernel(*data);
-    EXPECT_NEAR(data->data[0].real(), static_cast<HOST_DATA_TYPE>(1 << LOG_FFT_SIZE), 0.00001);
-    EXPECT_NEAR(data->data[0].imag(), 0.0, 0.00001);
+    EXPECT_NEAR(data->data_out[0].real(), static_cast<HOST_DATA_TYPE>(1 << LOG_FFT_SIZE), 0.00001);
+    EXPECT_NEAR(data->data_out[0].imag(), 0.0, 0.00001);
     for (int i=1; i < (1 << LOG_FFT_SIZE); i++) {
-        EXPECT_NEAR(data->data[i].real(), 0.0, 0.00001);
-        EXPECT_NEAR(data->data[i].imag(), 0.0, 0.00001);
+        EXPECT_NEAR(data->data_out[i].real(), 0.0, 0.00001);
+        EXPECT_NEAR(data->data_out[i].imag(), 0.0, 0.00001);
     }
 }
 
@@ -108,18 +108,24 @@ TEST_F(FFTKernelTest, FFTandiFFTProduceResultCloseToSource) {
 
     // Normalize iFFT result
     for (int i=0; i<(1 << LOG_FFT_SIZE); i++) {
-        data->data[i] /=  (1 << LOG_FFT_SIZE);
+        data->data_out[i] /=  (1 << LOG_FFT_SIZE);
     }
 
     // Need to again bit reverse input for iFFT
-    fft::bit_reverse(data->data, 1);
+    fft::bit_reverse(data->data_out, 1);
+
+    // Copy to input buffer for iFFT
+    for (int i=0; i<(1 << LOG_FFT_SIZE); i++) {
+        data->data[i] =  data->data_out[i];
+    }
+
     bm->getExecutionSettings().programSettings->inverse = true;
     auto result2 = bm->executeKernel(*data);
     // Since data was already sorted by iFFT the bit reversal of the kernel has t be undone
-    fft::bit_reverse(data->data, 1);
+    fft::bit_reverse(data->data_out, 1);
 
     for (int i=1; i < (1 << LOG_FFT_SIZE); i++) {
-        EXPECT_NEAR(std::abs(data->data[i]), std::abs(verify_data->data[i]), 0.001);
+        EXPECT_NEAR(std::abs(data->data_out[i]), std::abs(verify_data->data[i]), 0.001);
     }
 }
 
@@ -136,10 +142,10 @@ TEST_F(FFTKernelTest, FPGAFFTAndCPUFFTGiveSameResults) {
 
     // Normalize iFFT result
     for (int i=0; i<(1 << LOG_FFT_SIZE); i++) {
-        data->data[i] -= verify_data->data[i];
+        data->data_out[i] -= verify_data->data[i];
     }
     for (int i=1; i < (1 << LOG_FFT_SIZE); i++) {
-        EXPECT_NEAR(std::abs(data->data[i]), 0.0, 0.001);
+        EXPECT_NEAR(std::abs(data->data_out[i]), 0.0, 0.001);
     }
 }
 
@@ -157,9 +163,9 @@ TEST_F(FFTKernelTest, FPGAiFFTAndCPUiFFTGiveSameResults) {
 
     // Normalize iFFT result
     for (int i=0; i<(1 << LOG_FFT_SIZE); i++) {
-        data->data[i] -= verify_data->data[i];
+        data->data_out[i] -= verify_data->data[i];
     }
     for (int i=1; i < (1 << LOG_FFT_SIZE); i++) {
-        EXPECT_NEAR(std::abs(data->data[i]), 0.0, 0.001);
+        EXPECT_NEAR(std::abs(data->data_out[i]), 0.0, 0.001);
     }
 }
diff --git a/GEMM/src/common/parameters.h.in b/GEMM/src/common/parameters.h.in
index 275d63be..9686f5e9 100644
--- a/GEMM/src/common/parameters.h.in
+++ b/GEMM/src/common/parameters.h.in
@@ -18,6 +18,8 @@
 #define HOST_DATA_TYPE @HOST_DATA_TYPE@
 #define DEVICE_DATA_TYPE @DEVICE_DATA_TYPE@
 
+#cmakedefine USE_SVM
+
 /*
 Short description of the program
 */
diff --git a/GEMM/src/host/execution_cannon.cpp b/GEMM/src/host/execution_cannon.cpp
index 927997a8..e615430a 100644
--- a/GEMM/src/host/execution_cannon.cpp
+++ b/GEMM/src/host/execution_cannon.cpp
@@ -79,6 +79,16 @@ calculate(hpcc_base::ExecutionSettings<gemm::GEMMProgramSettings> const& config,
 
 
     // prepare kernels
+#ifdef USE_SVM
+    err = clSetKernelArgSVMPointer(gemmkernel(), 0,
+                                    reinterpret_cast<void*>(a));
+    err = clSetKernelArgSVMPointer(gemmkernel(), 1,
+                                    reinterpret_cast<void*>(b));
+    err = clSetKernelArgSVMPointer(gemmkernel(), 2,
+                                    reinterpret_cast<void*>(c));
+    err = clSetKernelArgSVMPointer(gemmkernel(), 3,
+                                    reinterpret_cast<void*>(c_out));
+#else
     err = gemmkernel.setArg(0, Buffer_a);
     ASSERT_CL(err);
     err = gemmkernel.setArg(1, Buffer_b);
@@ -87,6 +97,7 @@ calculate(hpcc_base::ExecutionSettings<gemm::GEMMProgramSettings> const& config,
     ASSERT_CL(err);
     err = gemmkernel.setArg(3, Buffer_c_out);
     ASSERT_CL(err);
+#endif
     err = gemmkernel.setArg(4, alpha);
     ASSERT_CL(err);
     err = gemmkernel.setArg(5, beta);
@@ -98,7 +109,33 @@ calculate(hpcc_base::ExecutionSettings<gemm::GEMMProgramSettings> const& config,
 
     double t;
     std::vector<double> executionTimes;
-    for (int i = 0; i < config.programSettings->matrixSize; i++) {
+    for (int i = 0; i < config.programSettings->numRepetitions; i++) {
+#ifdef USE_SVM
+        clEnqueueSVMMap(compute_queue(), CL_TRUE,
+                        CL_MAP_READ,
+                        reinterpret_cast<void *>(a),
+                        sizeof(HOST_DATA_TYPE) *
+                        (config.programSettings->matrixSize * config.programSettings->matrixSize), 0,
+                        NULL, NULL);
+        clEnqueueSVMMap(compute_queue(), CL_TRUE,
+                        CL_MAP_READ,
+                        reinterpret_cast<void *>(b),
+                        sizeof(HOST_DATA_TYPE) *
+                        (config.programSettings->matrixSize * config.programSettings->matrixSize), 0,
+                        NULL, NULL);
+        clEnqueueSVMMap(compute_queue(), CL_TRUE,
+                        CL_MAP_READ,
+                        reinterpret_cast<void *>(c),
+                        sizeof(HOST_DATA_TYPE) *
+                        (config.programSettings->matrixSize * config.programSettings->matrixSize), 0,
+                        NULL, NULL);
+        clEnqueueSVMMap(compute_queue(), CL_TRUE,
+                        CL_MAP_WRITE,
+                        reinterpret_cast<void *>(c_out),
+                        sizeof(HOST_DATA_TYPE) *
+                        (config.programSettings->matrixSize * config.programSettings->matrixSize), 0,
+                        NULL, NULL);
+#else
         compute_queue.enqueueWriteBuffer(Buffer_a, CL_TRUE, 0,
                                     sizeof(HOST_DATA_TYPE)*config.programSettings->matrixSize*config.programSettings->matrixSize, a);
         compute_queue.enqueueWriteBuffer(Buffer_b, CL_TRUE, 0,
@@ -106,20 +143,33 @@ calculate(hpcc_base::ExecutionSettings<gemm::GEMMProgramSettings> const& config,
         compute_queue.enqueueWriteBuffer(Buffer_c_in, CL_TRUE, 0,
                                     sizeof(HOST_DATA_TYPE)*config.programSettings->matrixSize*config.programSettings->matrixSize, c);
         compute_queue.finish();
+#endif
         auto t1 = std::chrono::high_resolution_clock::now();
         compute_queue.enqueueTask(gemmkernel);
         compute_queue.finish();
         auto t2 = std::chrono::high_resolution_clock::now();
-        std::chrono::duration<double> timespan =
-            std::chrono::duration_cast<std::chrono::duration<double>>
-                                                                (t2 - t1);
+        std::chrono::duration<double> timespan = t2 - t1;
         executionTimes.push_back(timespan.count());
     }
 
     /* --- Read back results from Device --- */
-
+#ifdef USE_SVM
+            clEnqueueSVMUnmap(compute_queue(),
+                                reinterpret_cast<void *>(a), 0,
+                                NULL, NULL);
+            clEnqueueSVMUnmap(compute_queue(),
+                                reinterpret_cast<void *>(b), 0,
+                                NULL, NULL);
+            clEnqueueSVMUnmap(compute_queue(),
+                                reinterpret_cast<void *>(c), 0,
+                                NULL, NULL);
+            clEnqueueSVMUnmap(compute_queue(),
+                                reinterpret_cast<void *>(c_out), 0,
+                                NULL, NULL);
+#else
     compute_queue.enqueueReadBuffer(Buffer_c_out, CL_TRUE, 0,
                                      sizeof(HOST_DATA_TYPE)*config.programSettings->matrixSize*config.programSettings->matrixSize, c_out);
+#endif
 
 
     std::unique_ptr<gemm::GEMMExecutionTimings> results(
diff --git a/GEMM/src/host/gemm_benchmark.cpp b/GEMM/src/host/gemm_benchmark.cpp
index a5fe009a..beb7005d 100644
--- a/GEMM/src/host/gemm_benchmark.cpp
+++ b/GEMM/src/host/gemm_benchmark.cpp
@@ -46,6 +46,42 @@ gemm::GEMMProgramSettings::getSettingsMap() {
         return map;
 }
 
+gemm::GEMMData::GEMMData(cl::Context context, uint size) : normtotal(0.0), alpha(0.5), beta(2.0), context(context) {
+#ifdef USE_SVM
+    A = reinterpret_cast<HOST_DATA_TYPE*>(
+                        clSVMAlloc(context(), 0 ,
+                        size * size * sizeof(HOST_DATA_TYPE), 1024));
+    B = reinterpret_cast<HOST_DATA_TYPE*>(
+                        clSVMAlloc(context(), 0 ,
+                        size * size * sizeof(HOST_DATA_TYPE), 1024));
+    C = reinterpret_cast<HOST_DATA_TYPE*>(
+                        clSVMAlloc(context(), 0 ,
+                        size * size * sizeof(HOST_DATA_TYPE), 1024));
+    C_out = reinterpret_cast<HOST_DATA_TYPE*>(
+                        clSVMAlloc(context(), 0 ,
+                        size * size * sizeof(HOST_DATA_TYPE), 1024));
+#else
+    posix_memalign(reinterpret_cast<void**>(&A), 4096, size * size * sizeof(HOST_DATA_TYPE));
+    posix_memalign(reinterpret_cast<void**>(&B), 4096, size * size * sizeof(HOST_DATA_TYPE));
+    posix_memalign(reinterpret_cast<void**>(&C), 4096, size * size * sizeof(HOST_DATA_TYPE));
+    posix_memalign(reinterpret_cast<void**>(&C_out), 4096, size * size * sizeof(HOST_DATA_TYPE));
+#endif
+}
+
+gemm::GEMMData::~GEMMData() {
+#ifdef USE_SVM
+    clSVMFree(context(), reinterpret_cast<void**>(A));
+    clSVMFree(context(), reinterpret_cast<void**>(B));
+    clSVMFree(context(), reinterpret_cast<void**>(C));
+    clSVMFree(context(), reinterpret_cast<void**>(C_out));
+#else
+    free(A);
+    free(B);
+    free(C);
+    free(C_out);
+#endif
+}
+
 gemm::GEMMBenchmark::GEMMBenchmark(int argc, char* argv[]) {
     setupBenchmark(argc, argv);
 }
@@ -74,9 +110,9 @@ gemm::GEMMBenchmark::printResults(const gemm::GEMMExecutionTimings &output) {
     double tmean = 0;
     double tmin = std::numeric_limits<double>::max();
 
-    double gflops = 2.0 * static_cast<double>(executionSettings->programSettings->matrixSize
-                                            *executionSettings->programSettings->matrixSize
-                                            *executionSettings->programSettings->matrixSize)/1.0e9;
+    double gflops = 2.0 * (static_cast<double>(executionSettings->programSettings->matrixSize)
+                        *static_cast<double>(executionSettings->programSettings->matrixSize)
+                        *static_cast<double>(executionSettings->programSettings->matrixSize))/1.0e9;
     for (double currentTime : output.timings) {
         tmean +=  currentTime;
         if (currentTime < tmin) {
@@ -94,7 +130,7 @@ gemm::GEMMBenchmark::printResults(const gemm::GEMMExecutionTimings &output) {
 
 std::unique_ptr<gemm::GEMMData>
 gemm::GEMMBenchmark::generateInputData() {
-    auto d = std::unique_ptr<gemm::GEMMData>(new gemm::GEMMData(executionSettings->programSettings->matrixSize));
+    auto d = std::unique_ptr<gemm::GEMMData>(new gemm::GEMMData(*executionSettings->context, executionSettings->programSettings->matrixSize));
     std::mt19937 gen(7);
     std::uniform_real_distribution<> dis(-1.0, 1.0);
     for (int j = 0; j < executionSettings->programSettings->matrixSize; j++) {
diff --git a/GEMM/src/host/gemm_benchmark.hpp b/GEMM/src/host/gemm_benchmark.hpp
index 7a7ccd5c..1a632e30 100644
--- a/GEMM/src/host/gemm_benchmark.hpp
+++ b/GEMM/src/host/gemm_benchmark.hpp
@@ -117,6 +117,12 @@ class GEMMData {
      */
     HOST_DATA_TYPE alpha;
 
+    /**
+     * @brief The context that is used to allocate memory in SVM mode
+     * 
+     */
+    cl::Context context;
+
     /**
      * @brief The scalar value that will be used for \f$\beta\f$ in the calculation
      * 
@@ -126,25 +132,16 @@ class GEMMData {
     /**
      * @brief Construct a new GEMM Data object
      * 
+     * @param context The OpenCL context used to allocate memory in SVM mode
      * @param size Size of the allocated square matrices
      */
-    GEMMData(uint size) : normtotal(0.0), alpha(0.5), beta(2.0) {
-        posix_memalign(reinterpret_cast<void**>(&A), 4096, size * size * sizeof(HOST_DATA_TYPE));
-        posix_memalign(reinterpret_cast<void**>(&B), 4096, size * size * sizeof(HOST_DATA_TYPE));
-        posix_memalign(reinterpret_cast<void**>(&C), 4096, size * size * sizeof(HOST_DATA_TYPE));
-        posix_memalign(reinterpret_cast<void**>(&C_out), 4096, size * size * sizeof(HOST_DATA_TYPE));
-    }
+    GEMMData(cl::Context context, uint size);
 
     /**
      * @brief Destroy the GEMM Data object. Free the allocated memory
      * 
      */
-    ~GEMMData() {
-        free(A);
-        free(B);
-        free(C);
-        free(C_out);
-    }
+    ~GEMMData();
 
 };
 
diff --git a/LINPACK/src/common/parameters.h.in b/LINPACK/src/common/parameters.h.in
index 1b43c6ae..e1346397 100644
--- a/LINPACK/src/common/parameters.h.in
+++ b/LINPACK/src/common/parameters.h.in
@@ -20,6 +20,8 @@
 #define LOCAL_MEM_BLOCK_LOG @LOCAL_MEM_BLOCK_LOG@
 #define REGISTER_BLOCK_LOG @REGISTER_BLOCK_LOG@
 
+#cmakedefine USE_SVM
+
 /*
 Short description of the program
 */
diff --git a/LINPACK/src/host/execution_blocked_pvt.cpp b/LINPACK/src/host/execution_blocked_pvt.cpp
index c6ea99d4..0e3dcd65 100644
--- a/LINPACK/src/host/execution_blocked_pvt.cpp
+++ b/LINPACK/src/host/execution_blocked_pvt.cpp
@@ -66,10 +66,24 @@ calculate(const hpcc_base::ExecutionSettings<linpack::LinpackProgramSettings>&co
 
 
     // prepare kernels
+#ifdef USE_SVM
+    // To prevent the reuse of the result of previous repetitions, use this
+    // buffer instead and copy the result back to the real buffer 
+    HOST_DATA_TYPE* A_tmp = reinterpret_cast<HOST_DATA_TYPE*>(
+                    clSVMAlloc((*config.context)(), 0 ,
+                    config.programSettings->matrixSize * 
+                    config.programSettings->matrixSize * sizeof(HOST_DATA_TYPE), 1024));
+
+    err = clSetKernelArgSVMPointer(gefakernel(), 0,
+                                    reinterpret_cast<void*>(A_tmp));
+    err = clSetKernelArgSVMPointer(gefakernel(), 1,
+                                    reinterpret_cast<void*>(ipvt));
+#else
     err = gefakernel.setArg(0, Buffer_a);
     ASSERT_CL(err);
     err = gefakernel.setArg(1, Buffer_pivot);
     ASSERT_CL(err);
+#endif
     err = gefakernel.setArg(2, static_cast<uint>(config.programSettings->matrixSize >> LOCAL_MEM_BLOCK_LOG));
     ASSERT_CL(err);
 
@@ -78,9 +92,34 @@ calculate(const hpcc_base::ExecutionSettings<linpack::LinpackProgramSettings>&co
     double t;
     std::vector<double> executionTimes;
     for (int i = 0; i < config.programSettings->numRepetitions; i++) {
+#ifdef USE_SVM
+        for (int k=0; k < config.programSettings->matrixSize * config.programSettings->matrixSize; k++) {
+            A_tmp[k] = A[k];
+        }
+
+        clEnqueueSVMMap(compute_queue(), CL_TRUE,
+                        CL_MAP_READ | CL_MAP_WRITE,
+                        reinterpret_cast<void *>(A_tmp),
+                        sizeof(HOST_DATA_TYPE) *
+                        (config.programSettings->matrixSize * config.programSettings->matrixSize), 0,
+                        NULL, NULL);
+        clEnqueueSVMMap(compute_queue(), CL_TRUE,
+                        CL_MAP_READ,
+                        reinterpret_cast<void *>(b),
+                        sizeof(HOST_DATA_TYPE) *
+                        (config.programSettings->matrixSize), 0,
+                        NULL, NULL);
+        clEnqueueSVMMap(compute_queue(), CL_TRUE,
+                        CL_MAP_WRITE,
+                        reinterpret_cast<void *>(ipvt),
+                        sizeof(cl_int) *
+                        (config.programSettings->matrixSize), 0,
+                        NULL, NULL);
+#else
         compute_queue.enqueueWriteBuffer(Buffer_a, CL_TRUE, 0,
                                     sizeof(HOST_DATA_TYPE)*config.programSettings->matrixSize*config.programSettings->matrixSize, A);
         compute_queue.finish();
+#endif
         auto t1 = std::chrono::high_resolution_clock::now();
         compute_queue.enqueueTask(gefakernel);
         compute_queue.finish();
@@ -93,10 +132,29 @@ calculate(const hpcc_base::ExecutionSettings<linpack::LinpackProgramSettings>&co
 
     /* --- Read back results from Device --- */
 
+#ifdef USE_SVM
+    clEnqueueSVMUnmap(compute_queue(),
+                        reinterpret_cast<void *>(A), 0,
+                        NULL, NULL);
+    clEnqueueSVMUnmap(compute_queue(),
+                        reinterpret_cast<void *>(b), 0,
+                        NULL, NULL);
+    clEnqueueSVMUnmap(compute_queue(),
+                        reinterpret_cast<void *>(ipvt), 0,
+                        NULL, NULL);
+    
+    // read back result from temporary buffer
+    for (int k=0; k < config.programSettings->matrixSize * config.programSettings->matrixSize; k++) {
+        A[k] = A_tmp[k];
+    }
+    clSVMFree((*config.context)(), reinterpret_cast<void*>(A_tmp));
+
+#else
     compute_queue.enqueueReadBuffer(Buffer_a, CL_TRUE, 0,
                                      sizeof(HOST_DATA_TYPE)*config.programSettings->matrixSize*config.programSettings->matrixSize, A);
     compute_queue.enqueueReadBuffer(Buffer_pivot, CL_TRUE, 0,
                                      sizeof(cl_int)*config.programSettings->matrixSize, ipvt);
+#endif
 
     // Solve linear equations on CPU
     // TODO: This has to be done on FPGA
diff --git a/LINPACK/src/host/linpack_benchmark.cpp b/LINPACK/src/host/linpack_benchmark.cpp
index deab852b..300c8dc3 100644
--- a/LINPACK/src/host/linpack_benchmark.cpp
+++ b/LINPACK/src/host/linpack_benchmark.cpp
@@ -46,6 +46,36 @@ linpack::LinpackProgramSettings::getSettingsMap() {
         return map;
 }
 
+linpack::LinpackData::LinpackData(cl::Context context, uint size) : norma(0.0), context(context) {
+#ifdef USE_SVM
+    A = reinterpret_cast<HOST_DATA_TYPE*>(
+                        clSVMAlloc(context(), 0 ,
+                        size * size * sizeof(HOST_DATA_TYPE), 1024));
+    b = reinterpret_cast<HOST_DATA_TYPE*>(
+                        clSVMAlloc(context(), 0 ,
+                        size  * sizeof(HOST_DATA_TYPE), 1024));
+    ipvt = reinterpret_cast<cl_int*>(
+                        clSVMAlloc(context(), 0 ,
+                        size * sizeof(cl_int), 1024));
+#else
+    posix_memalign(reinterpret_cast<void**>(&A), 4096, size * size * sizeof(HOST_DATA_TYPE));
+    posix_memalign(reinterpret_cast<void**>(&b), 4096, size * sizeof(HOST_DATA_TYPE));
+    posix_memalign(reinterpret_cast<void**>(&ipvt), 4096, size * sizeof(cl_int));
+#endif
+    }
+
+linpack::LinpackData::~LinpackData() {
+#ifdef USE_SVM
+    clSVMFree(context(), reinterpret_cast<void*>(A));
+    clSVMFree(context(), reinterpret_cast<void*>(b));
+    clSVMFree(context(), reinterpret_cast<void*>(ipvt));
+#else
+    free(A);
+    free(b);
+    free(ipvt);
+#endif
+}
+
 linpack::LinpackBenchmark::LinpackBenchmark(int argc, char* argv[]) {
     setupBenchmark(argc, argv);
 }
@@ -100,7 +130,7 @@ linpack::LinpackBenchmark::printResults(const linpack::LinpackExecutionTimings &
 
 std::unique_ptr<linpack::LinpackData>
 linpack::LinpackBenchmark::generateInputData() {
-    auto d = std::unique_ptr<linpack::LinpackData>(new linpack::LinpackData(executionSettings->programSettings->matrixSize));
+    auto d = std::unique_ptr<linpack::LinpackData>(new linpack::LinpackData(*executionSettings->context ,executionSettings->programSettings->matrixSize));
     std::mt19937 gen(7);
     std::uniform_real_distribution<> dis(-1.0, 1.0);
     d->norma = 0.0;
diff --git a/LINPACK/src/host/linpack_benchmark.hpp b/LINPACK/src/host/linpack_benchmark.hpp
index 68efe1f6..c9e2d3c7 100644
--- a/LINPACK/src/host/linpack_benchmark.hpp
+++ b/LINPACK/src/host/linpack_benchmark.hpp
@@ -92,6 +92,12 @@ class LinpackData {
      */
     cl_int* ipvt;
 
+    /**
+     * @brief The context that is used to allocate memory in SVM mode
+     * 
+     */
+    cl::Context context;
+
     /**
      * @brief The maximum value of A that will be used for the error calculation
      * 
@@ -101,23 +107,16 @@ class LinpackData {
     /**
      * @brief Construct a new Linpack Data object
      * 
+     * @param context The OpenCL context used to allocate memory in SVM mode
      * @param size Size of the allocated square matrix and vectors
      */
-    LinpackData(uint size) : norma(0.0) {
-        posix_memalign(reinterpret_cast<void**>(&A), 4096, size * size * sizeof(HOST_DATA_TYPE));
-        posix_memalign(reinterpret_cast<void**>(&b), 4096, size * sizeof(HOST_DATA_TYPE));
-        posix_memalign(reinterpret_cast<void**>(&ipvt), 4096, size * sizeof(cl_int));
-    }
+    LinpackData(cl::Context context, uint size);
 
     /**
      * @brief Destroy the Linpack Data object. Free the allocated memory
      * 
      */
-    ~LinpackData() {
-        free(A);
-        free(b);
-        free(ipvt);
-    }
+    ~LinpackData();
 
 };
 
diff --git a/LINPACK/tests/CMakeLists.txt b/LINPACK/tests/CMakeLists.txt
index b2f6adc6..a4383d47 100755
--- a/LINPACK/tests/CMakeLists.txt
+++ b/LINPACK/tests/CMakeLists.txt
@@ -25,6 +25,9 @@ if (INTELFPGAOPENCL_FOUND)
         target_link_libraries(${HOST_EXE_NAME}_test_intel ${LAPACK_LIBRARIES})
         include_directories(SYSTEM $ENV{MKLROOT}/include)
     endif()
+    if (USE_SVM)
+        target_compile_definitions(${HOST_EXE_NAME}_test_intel PRIVATE -DCL_VERSION_2_0)
+    endif()
     add_dependencies(${HOST_EXE_NAME}_test_intel lu_blocked_pvt_emulate_intel)
     add_dependencies(${HOST_EXE_NAME}_test_intel lu_blocked_pvt_test_emulate_intel)
     target_compile_definitions(${HOST_EXE_NAME}_test_intel PRIVATE -DINTEL_FPGA)
diff --git a/LINPACK/tests/test_kernel_functionality_separate_cores.cpp b/LINPACK/tests/test_kernel_functionality_separate_cores.cpp
index ec580769..beed8f03 100644
--- a/LINPACK/tests/test_kernel_functionality_separate_cores.cpp
+++ b/LINPACK/tests/test_kernel_functionality_separate_cores.cpp
@@ -25,7 +25,25 @@ struct LinpackKernelSeparateTest : testing::Test, testing::WithParamInterface<st
         char* argv[3] = {"Test", "-f", kernelFileName};
         bm = std::unique_ptr<linpack::LinpackBenchmark>(new linpack::LinpackBenchmark(argc, argv));
         array_size = (1 << LOCAL_MEM_BLOCK_LOG);
+        bm->getExecutionSettings().programSettings->numRepetitions = 1;
         bm->getExecutionSettings().programSettings->matrixSize = array_size;
+#ifdef USE_SVM
+        A = reinterpret_cast<HOST_DATA_TYPE*>(
+                            clSVMAlloc((*bm->getExecutionSettings().context)(), 0 ,
+                            array_size * array_size * sizeof(HOST_DATA_TYPE), 1024));
+        B = reinterpret_cast<HOST_DATA_TYPE*>(
+                            clSVMAlloc((*bm->getExecutionSettings().context)(), 0 ,
+                            array_size * array_size * sizeof(HOST_DATA_TYPE), 1024));
+        C = reinterpret_cast<HOST_DATA_TYPE*>(
+                            clSVMAlloc((*bm->getExecutionSettings().context)(), 0 ,
+                            array_size * array_size * sizeof(HOST_DATA_TYPE), 1024));
+        scale = reinterpret_cast<HOST_DATA_TYPE*>(
+                            clSVMAlloc((*bm->getExecutionSettings().context)(), 0 ,
+                            array_size  * sizeof(HOST_DATA_TYPE), 1024));
+        ipvt = reinterpret_cast<cl_int*>(
+                            clSVMAlloc((*bm->getExecutionSettings().context)(), 0 ,
+                            array_size * sizeof(cl_int), 1024));
+#else
         posix_memalign(reinterpret_cast<void **>(&A), 4096,
                        sizeof(HOST_DATA_TYPE) * array_size * array_size);
         posix_memalign(reinterpret_cast<void **>(&B), 4096,
@@ -36,6 +54,7 @@ struct LinpackKernelSeparateTest : testing::Test, testing::WithParamInterface<st
                        sizeof(HOST_DATA_TYPE) * array_size );
         posix_memalign(reinterpret_cast<void **>(&ipvt), 4096,
                        sizeof(cl_int) * array_size);  
+#endif
     }
 
     void initializeData() {
@@ -83,6 +102,18 @@ struct LinpackKernelSeparateTest : testing::Test, testing::WithParamInterface<st
 
 
         // prepare kernels
+#ifdef USE_SVM
+        err = clSetKernelArgSVMPointer(test_c4_kernel(), 0,
+                                    reinterpret_cast<void*>(A));
+        err = clSetKernelArgSVMPointer(test_c4_kernel(), 1,
+                                    reinterpret_cast<void*>(B));
+        err = clSetKernelArgSVMPointer(test_c4_kernel(), 2,
+                                    reinterpret_cast<void*>(C));
+        err = clSetKernelArgSVMPointer(test_c4_kernel(), 3,
+                                    reinterpret_cast<void*>(scale));
+        err = clSetKernelArgSVMPointer(test_c4_kernel(), 4,
+                                    reinterpret_cast<void*>(ipvt));
+#else
         err = test_c4_kernel.setArg(0, Buffer_a);
         ASSERT_CL(err);
         err = test_c4_kernel.setArg(1, Buffer_b);
@@ -93,6 +124,7 @@ struct LinpackKernelSeparateTest : testing::Test, testing::WithParamInterface<st
         ASSERT_CL(err);
         err = test_c4_kernel.setArg(4, Buffer_pivot);
         ASSERT_CL(err);
+#endif
         err = test_c4_kernel.setArg(5, static_cast<uint>(array_size >> LOCAL_MEM_BLOCK_LOG));
         ASSERT_CL(err);
 
@@ -101,6 +133,38 @@ struct LinpackKernelSeparateTest : testing::Test, testing::WithParamInterface<st
         double t;
         std::vector<double> executionTimes;
         for (int i = 0; i < bm->getExecutionSettings().programSettings->numRepetitions; i++) {
+#ifdef USE_SVM
+            clEnqueueSVMMap(compute_queue(), CL_TRUE,
+                        CL_MAP_READ | CL_MAP_WRITE,
+                        reinterpret_cast<void *>(A),
+                        sizeof(HOST_DATA_TYPE) *
+                        (array_size * array_size), 0,
+                        NULL, NULL);
+            clEnqueueSVMMap(compute_queue(), CL_TRUE,
+                        CL_MAP_READ | CL_MAP_WRITE,
+                        reinterpret_cast<void *>(B),
+                        sizeof(HOST_DATA_TYPE) *
+                        (array_size * array_size), 0,
+                        NULL, NULL);
+            clEnqueueSVMMap(compute_queue(), CL_TRUE,
+                        CL_MAP_READ | CL_MAP_WRITE,
+                        reinterpret_cast<void *>(C),
+                        sizeof(HOST_DATA_TYPE) *
+                        (array_size * array_size), 0,
+                        NULL, NULL);
+            clEnqueueSVMMap(compute_queue(), CL_TRUE,
+                        CL_MAP_READ | CL_MAP_WRITE,
+                        reinterpret_cast<void *>(scale),
+                        sizeof(HOST_DATA_TYPE) *
+                        (array_size * array_size), 0,
+                        NULL, NULL);
+            clEnqueueSVMMap(compute_queue(), CL_TRUE,
+                        CL_MAP_READ | CL_MAP_WRITE,
+                        reinterpret_cast<void *>(ipvt),
+                        sizeof(HOST_DATA_TYPE) *
+                        (array_size * array_size), 0,
+                        NULL, NULL);
+#else
             compute_queue.enqueueWriteBuffer(Buffer_a, CL_TRUE, 0,
                                              sizeof(HOST_DATA_TYPE)*array_size*array_size, A);
             compute_queue.enqueueWriteBuffer(Buffer_b, CL_TRUE, 0,
@@ -111,6 +175,7 @@ struct LinpackKernelSeparateTest : testing::Test, testing::WithParamInterface<st
                                              sizeof(HOST_DATA_TYPE)*array_size, scale);
             compute_queue.enqueueWriteBuffer(Buffer_pivot, CL_TRUE, 0,
                                              sizeof(cl_int)*array_size, ipvt);
+#endif
             compute_queue.finish();
             auto t1 = std::chrono::high_resolution_clock::now();
             compute_queue.enqueueTask(test_c4_kernel);
@@ -123,20 +188,46 @@ struct LinpackKernelSeparateTest : testing::Test, testing::WithParamInterface<st
         }
 
         /* --- Read back results from Device --- */
+#ifdef USE_SVM
+            clEnqueueSVMUnmap(compute_queue(),
+                                reinterpret_cast<void *>(A), 0,
+                                NULL, NULL);
+            clEnqueueSVMUnmap(compute_queue(),
+                                reinterpret_cast<void *>(B), 0,
+                                NULL, NULL);
+            clEnqueueSVMUnmap(compute_queue(),
+                                reinterpret_cast<void *>(C), 0,
+                                NULL, NULL);
+            clEnqueueSVMUnmap(compute_queue(),
+                                reinterpret_cast<void *>(scale), 0,
+                                NULL, NULL);
+            clEnqueueSVMUnmap(compute_queue(),
+                                reinterpret_cast<void *>(ipvt), 0,
+                                NULL, NULL);
+#else
         compute_queue.enqueueReadBuffer(Buffer_a, CL_TRUE, 0,
                                         sizeof(HOST_DATA_TYPE)*array_size*array_size, A);
         compute_queue.enqueueReadBuffer(Buffer_b, CL_TRUE, 0,
                                         sizeof(HOST_DATA_TYPE)*array_size*array_size, B);
         compute_queue.enqueueReadBuffer(Buffer_c, CL_TRUE, 0,
                                         sizeof(HOST_DATA_TYPE)*array_size*array_size, C);
+#endif
     }
 
     ~LinpackKernelSeparateTest() override {
+#ifdef USE_SVM
+    clSVMFree((*bm->getExecutionSettings().context)(), reinterpret_cast<void*>(A));
+    clSVMFree((*bm->getExecutionSettings().context)(), reinterpret_cast<void*>(B));
+    clSVMFree((*bm->getExecutionSettings().context)(), reinterpret_cast<void*>(C));
+    clSVMFree((*bm->getExecutionSettings().context)(), reinterpret_cast<void*>(scale));
+    clSVMFree((*bm->getExecutionSettings().context)(), reinterpret_cast<void*>(ipvt));
+#else
         free(A);
         free(B);
         free(C);
         free(ipvt);
         free(scale);
+#endif
         delete [] kernelFileName;
     }
 };
diff --git a/PTRANS/src/common/parameters.h.in b/PTRANS/src/common/parameters.h.in
index 423a96ca..82f70b47 100644
--- a/PTRANS/src/common/parameters.h.in
+++ b/PTRANS/src/common/parameters.h.in
@@ -17,6 +17,8 @@
 #define HOST_DATA_TYPE @HOST_DATA_TYPE@
 #define DEVICE_DATA_TYPE @DEVICE_DATA_TYPE@
 
+#cmakedefine USE_SVM
+
 /*
 Short description of the program.
 Moreover the version and build time is also compiled into the description.
diff --git a/PTRANS/src/host/execution_default.cpp b/PTRANS/src/host/execution_default.cpp
index 83948912..df9ca2eb 100644
--- a/PTRANS/src/host/execution_default.cpp
+++ b/PTRANS/src/host/execution_default.cpp
@@ -52,9 +52,18 @@ namespace bm_execution {
 
         cl::Kernel transposeKernel(*config.program, KERNEL_NAME);
 
+#ifdef USE_SVM
+        clSetKernelArgSVMPointer(transposeKernel(), 0,
+                                        reinterpret_cast<void*>(A));
+        clSetKernelArgSVMPointer(transposeKernel(), 1,
+                                        reinterpret_cast<void*>(B));
+        clSetKernelArgSVMPointer(transposeKernel(), 2,
+                                        reinterpret_cast<void*>(A_out));
+#else
         transposeKernel.setArg(0, bufferA);
         transposeKernel.setArg(1, bufferB);
         transposeKernel.setArg(2, bufferA_out);
+#endif
         transposeKernel.setArg(3, config.programSettings->matrixSize / config.programSettings->blockSize);
 
         cl::CommandQueue queue(*config.context);
@@ -65,10 +74,31 @@ namespace bm_execution {
         for (int repetition = 0; repetition < config.programSettings->numRepetitions; repetition++) {
 
             auto startTransfer = std::chrono::high_resolution_clock::now();
+#ifdef USE_SVM
+        clEnqueueSVMMap(queue(), CL_TRUE,
+                        CL_MAP_READ,
+                        reinterpret_cast<void *>(A),
+                        sizeof(HOST_DATA_TYPE) *
+                        (config.programSettings->matrixSize * config.programSettings->matrixSize), 0,
+                        NULL, NULL);
+        clEnqueueSVMMap(queue(), CL_TRUE,
+                        CL_MAP_READ,
+                        reinterpret_cast<void *>(B),
+                        sizeof(HOST_DATA_TYPE) *
+                        (config.programSettings->matrixSize * config.programSettings->matrixSize), 0,
+                        NULL, NULL);
+        clEnqueueSVMMap(queue(), CL_TRUE,
+                        CL_MAP_WRITE,
+                        reinterpret_cast<void *>(A_out),
+                        sizeof(HOST_DATA_TYPE) *
+                        (config.programSettings->matrixSize * config.programSettings->matrixSize), 0,
+                        NULL, NULL);
+#else
             queue.enqueueWriteBuffer(bufferA, CL_FALSE, 0,
                                      sizeof(HOST_DATA_TYPE) * config.programSettings->matrixSize * config.programSettings->matrixSize, A);
             queue.enqueueWriteBuffer(bufferB, CL_FALSE, 0,
                                      sizeof(HOST_DATA_TYPE) * config.programSettings->matrixSize * config.programSettings->matrixSize, B);
+#endif
             queue.finish();
             auto endTransfer = std::chrono::high_resolution_clock::now();
             std::chrono::duration<double> transferTime =
@@ -85,8 +115,20 @@ namespace bm_execution {
             calculationTimings.push_back(calculationTime.count());
 
             startTransfer = std::chrono::high_resolution_clock::now();
+#ifdef USE_SVM
+            clEnqueueSVMUnmap(queue(),
+                                reinterpret_cast<void *>(A), 0,
+                                NULL, NULL);
+            clEnqueueSVMUnmap(queue(),
+                                reinterpret_cast<void *>(B), 0,
+                                NULL, NULL);
+            clEnqueueSVMUnmap(queue(),
+                                reinterpret_cast<void *>(A_out), 0,
+                                NULL, NULL);
+#else
             queue.enqueueReadBuffer(bufferA_out, CL_TRUE, 0,
                                     sizeof(HOST_DATA_TYPE) * config.programSettings->matrixSize * config.programSettings->matrixSize, A_out);
+#endif
             endTransfer = std::chrono::high_resolution_clock::now();
             transferTime +=
                     std::chrono::duration_cast<std::chrono::duration<double>>
diff --git a/PTRANS/src/host/transpose_benchmark.cpp b/PTRANS/src/host/transpose_benchmark.cpp
index 89debac9..229de120 100644
--- a/PTRANS/src/host/transpose_benchmark.cpp
+++ b/PTRANS/src/host/transpose_benchmark.cpp
@@ -48,6 +48,39 @@ transpose::TransposeProgramSettings::getSettingsMap() {
         return map;
 }
 
+transpose::TransposeData::TransposeData(cl::Context context, uint size) : context(context) {
+#ifdef USE_SVM
+    A = reinterpret_cast<HOST_DATA_TYPE*>(
+                        clSVMAlloc(context(), 0 ,
+                        size * size * sizeof(HOST_DATA_TYPE), 1024));
+    B = reinterpret_cast<HOST_DATA_TYPE*>(
+                        clSVMAlloc(context(), 0 ,
+                        size * size * sizeof(HOST_DATA_TYPE), 1024));
+    result = reinterpret_cast<HOST_DATA_TYPE*>(
+                        clSVMAlloc(context(), 0 ,
+                        size * size * sizeof(HOST_DATA_TYPE), 1024));
+#else
+    posix_memalign(reinterpret_cast<void **>(&A), 64,
+                sizeof(HOST_DATA_TYPE) * size * size);
+    posix_memalign(reinterpret_cast<void **>(&B), 64,
+                sizeof(HOST_DATA_TYPE) * size * size);
+    posix_memalign(reinterpret_cast<void **>(&result), 64,
+                sizeof(HOST_DATA_TYPE) * size * size);
+#endif
+}
+
+transpose::TransposeData::~TransposeData() {
+#ifdef USE_SVM
+    clSVMFree(context(), reinterpret_cast<void*>(A));
+    clSVMFree(context(), reinterpret_cast<void*>(B));
+    clSVMFree(context(), reinterpret_cast<void*>(result));
+#else
+    free(A);
+    free(B);
+    free(result);
+#endif
+}
+
 transpose::TransposeBenchmark::TransposeBenchmark(int argc, char* argv[]) {
     setupBenchmark(argc, argv);
 }
@@ -102,7 +135,7 @@ transpose::TransposeBenchmark::printResults(const transpose::TransposeExecutionT
 
 std::unique_ptr<transpose::TransposeData>
 transpose::TransposeBenchmark::generateInputData() {
-    auto d = std::unique_ptr<transpose::TransposeData>(new transpose::TransposeData(executionSettings->programSettings->matrixSize));
+    auto d = std::unique_ptr<transpose::TransposeData>(new transpose::TransposeData(*executionSettings->context, executionSettings->programSettings->matrixSize));
 
     std::mt19937 gen(7);
     std::uniform_real_distribution<> dis(-100.0, 100.0);
diff --git a/PTRANS/src/host/transpose_benchmark.hpp b/PTRANS/src/host/transpose_benchmark.hpp
index 929a1aa5..68459124 100644
--- a/PTRANS/src/host/transpose_benchmark.hpp
+++ b/PTRANS/src/host/transpose_benchmark.hpp
@@ -97,29 +97,25 @@ class TransposeData {
      */
     HOST_DATA_TYPE *result;
 
+    /**
+     * @brief The context that is used to allocate memory in SVM mode
+     * 
+     */
+    cl::Context context;
+
     /**
      * @brief Construct a new Transpose Data object
      * 
+     * @param context Context that is used to allocate memory for SVM
      * @param size Size of the allocated square matrices
      */
-    TransposeData(uint size) {
-        posix_memalign(reinterpret_cast<void **>(&A), 64,
-                    sizeof(HOST_DATA_TYPE) * size * size);
-        posix_memalign(reinterpret_cast<void **>(&B), 64,
-                    sizeof(HOST_DATA_TYPE) * size * size);
-        posix_memalign(reinterpret_cast<void **>(&result), 64,
-                    sizeof(HOST_DATA_TYPE) * size * size);
-    }
+    TransposeData(cl::Context context, uint size);
 
     /**
      * @brief Destroy the Transpose Data object. Free the allocated memory
      * 
      */
-    ~TransposeData() {
-        free(A);
-        free(B);
-        free(result);
-    }
+    ~TransposeData();
 
 };
 
diff --git a/RandomAccess/CMakeLists.txt b/RandomAccess/CMakeLists.txt
index 0569be21..829f8dd0 100755
--- a/RandomAccess/CMakeLists.txt
+++ b/RandomAccess/CMakeLists.txt
@@ -7,7 +7,6 @@ set(PARALLEL_MEM_ACCESSES 1 CACHE STRING "Unrolling factor that is used for all
 set(NUM_REPLICATIONS 4 CACHE STRING "Number of times the kernels will be replicated")
 set(DEVICE_BUFFER_SIZE 1 CACHE STRING "Buffer size in number of values that is used within the single kernel implementation.")
 set(COMBINE_LOOPS Yes CACHE BOOL "If enabled this will combine the address calculation loop and the load darta loop to a single loop. This can improve the performance when all loops are running sequentially")
-set(USE_SVM No CACHE BOOL "Use coarse grained SVM instead of loading the buffer on the FPGA before execution. Device needs to support this feature.")
 
 set(DATA_TYPE long)
 set(HOST_DATA_TYPE cl_ulong)
diff --git a/RandomAccess/src/host/random_access_benchmark.cpp b/RandomAccess/src/host/random_access_benchmark.cpp
index cf67db60..3797c50e 100644
--- a/RandomAccess/src/host/random_access_benchmark.cpp
+++ b/RandomAccess/src/host/random_access_benchmark.cpp
@@ -50,6 +50,24 @@ random_access::RandomAccessProgramSettings::getSettingsMap() {
         return map;
 }
 
+random_access::RandomAccessData::RandomAccessData(cl::Context& context, size_t size) : context(context) {
+#ifdef USE_SVM
+    data = reinterpret_cast<HOST_DATA_TYPE*>(
+                        clSVMAlloc(context(), 0 ,
+                        size * sizeof(HOST_DATA_TYPE), 1024));
+#else
+    posix_memalign(reinterpret_cast<void**>(&data), 4096, size * sizeof(HOST_DATA_TYPE));
+#endif
+}
+
+random_access::RandomAccessData::~RandomAccessData() {
+#ifdef USE_SVM
+    clSVMFree(context(), reinterpret_cast<void*>(data));
+#else
+    free(data);
+#endif
+}
+
 random_access::RandomAccessBenchmark::RandomAccessBenchmark(int argc, char* argv[]) {
     setupBenchmark(argc, argv);
 }
diff --git a/RandomAccess/src/host/random_access_benchmark.hpp b/RandomAccess/src/host/random_access_benchmark.hpp
index d3c8bc79..65346415 100644
--- a/RandomAccess/src/host/random_access_benchmark.hpp
+++ b/RandomAccess/src/host/random_access_benchmark.hpp
@@ -86,33 +86,25 @@ class RandomAccessData {
      */
     HOST_DATA_TYPE *data;
 
+    /**
+     * @brief The context that is used to allocate memory in SVM mode
+     * 
+     */
+    cl::Context context;
+
     /**
      * @brief Construct a new Random Access Data object
      * 
      * @param context The OpenCL context that will be used to allocate SVM memory
      * @param size The size  of the allocated memory in number of values
      */
-    RandomAccessData(cl::Context& context, size_t size) {
-    #ifdef USE_SVM
-        data = reinterpret_cast<HOST_DATA_TYPE*>(
-                            clSVMAlloc(context(), 0 ,
-                            size * sizeof(HOST_DATA_TYPE), 1024));
-    #else
-        posix_memalign(reinterpret_cast<void**>(&data), 4096, size * sizeof(HOST_DATA_TYPE));
-    #endif
-    }
+    RandomAccessData(cl::Context& context, size_t size);
 
     /**
      * @brief Destroy the Random Access Data object and free the memory allocated in the constructor
      * 
      */
-    ~RandomAccessData() {
-    #ifdef USE_SVM
-        clSVMFree(data);
-    #else
-        free(data);
-    #endif
-    }
+    ~RandomAccessData();
 
 };
 
diff --git a/RandomAccess/tests/CMakeLists.txt b/RandomAccess/tests/CMakeLists.txt
index eadba306..565af0dc 100755
--- a/RandomAccess/tests/CMakeLists.txt
+++ b/RandomAccess/tests/CMakeLists.txt
@@ -14,6 +14,9 @@ if (INTELFPGAOPENCL_FOUND)
     target_link_libraries(${HOST_EXE_NAME}_test_intel gtest gmock ${LIB_NAME}_intel ${IntelFPGAOpenCL_LIBRARIES} "${OpenMP_CXX_FLAGS}")
     add_dependencies(${HOST_EXE_NAME}_test_intel random_access_kernels_single_emulate_intel)
     target_compile_definitions(${HOST_EXE_NAME}_test_intel PRIVATE -DINTEL_FPGA)
+    if (USE_SVM)
+        target_compile_definitions(${HOST_EXE_NAME}_test_intel PRIVATE -DCL_VERSION_2_0)
+    endif()
     target_compile_options(${HOST_EXE_NAME}_test_intel PRIVATE "${OpenMP_CXX_FLAGS}")
     add_test(NAME ${HOST_EXE_NAME}_test_intel_single_unit COMMAND $<TARGET_FILE:${HOST_EXE_NAME}_test_intel> -f random_access_kernels_single_emulate.aocx WORKING_DIRECTORY ${EXECUTABLE_OUTPUT_PATH})
 endif()
diff --git a/STREAM/CMakeLists.txt b/STREAM/CMakeLists.txt
index 87998322..218be8b3 100755
--- a/STREAM/CMakeLists.txt
+++ b/STREAM/CMakeLists.txt
@@ -8,7 +8,6 @@ set(GLOBAL_MEM_UNROLL 1 CACHE STRING "Unrolling factor that is used for all loop
 set(NUM_REPLICATIONS 4 CACHE STRING "Number of times the kernels will be replicated")
 set(DEVICE_BUFFER_SIZE 512 CACHE STRING "Buffer size in number of values that is used within the single kernel implementation.")
 set(INNER_LOOP_BUFFERS ON CACHE BOOL "Put the local memory buffers inside the outer loop in the kernel code")
-set(USE_SVM No CACHE BOOL "Use SVM pointers instead of creating buffers on the board and transferring the data there before execution.")
 
 # Set the data type since optional vector types are used
 set(DATA_TYPE float)
diff --git a/STREAM/src/host/stream_benchmark.cpp b/STREAM/src/host/stream_benchmark.cpp
index 82ce87c8..a5fd6ab3 100644
--- a/STREAM/src/host/stream_benchmark.cpp
+++ b/STREAM/src/host/stream_benchmark.cpp
@@ -53,6 +53,43 @@ stream::StreamProgramSettings::getSettingsMap() {
         return map;
 }
 
+stream::StreamData::StreamData(const cl::Context& _context, size_t size) : context(_context) {
+#ifdef INTEL_FPGA
+#ifdef USE_SVM
+    A = reinterpret_cast<HOST_DATA_TYPE*>(
+                            clSVMAlloc(context(), 0 ,
+                            size * sizeof(HOST_DATA_TYPE), 1024));
+    B = reinterpret_cast<HOST_DATA_TYPE*>(
+                            clSVMAlloc(context(), 0 ,
+                            size * sizeof(HOST_DATA_TYPE), 1024));
+    C = reinterpret_cast<HOST_DATA_TYPE*>(
+                            clSVMAlloc(context(), 0 ,
+                            size * sizeof(HOST_DATA_TYPE), 1024));
+#else
+    posix_memalign(reinterpret_cast<void**>(&A), 64, size * sizeof(HOST_DATA_TYPE));
+    posix_memalign(reinterpret_cast<void**>(&B), 64, size * sizeof(HOST_DATA_TYPE));
+    posix_memalign(reinterpret_cast<void**>(&C), 64, size * sizeof(HOST_DATA_TYPE));
+#endif
+#endif
+#ifdef XILINX_FPGA
+    posix_memalign(reinterpret_cast<void**>(&A), 4096, size * sizeof(HOST_DATA_TYPE));
+    posix_memalign(reinterpret_cast<void**>(&B), 4096, size * sizeof(HOST_DATA_TYPE));
+    posix_memalign(reinterpret_cast<void**>(&C), 4096, size * sizeof(HOST_DATA_TYPE));
+#endif
+}
+
+stream::StreamData::~StreamData() {
+#ifdef USE_SVM
+    clSVMFree(context(), reinterpret_cast<void*>(A));
+    clSVMFree(context(), reinterpret_cast<void*>(B));
+    clSVMFree(context(), reinterpret_cast<void*>(C));
+#else
+    free(A);
+    free(B);
+    free(C);
+#endif
+}
+
 stream::StreamBenchmark::StreamBenchmark(int argc, char* argv[]) {
     setupBenchmark(argc, argv);
 }
diff --git a/STREAM/src/host/stream_benchmark.hpp b/STREAM/src/host/stream_benchmark.hpp
index a38cb925..ab818d02 100644
--- a/STREAM/src/host/stream_benchmark.hpp
+++ b/STREAM/src/host/stream_benchmark.hpp
@@ -103,52 +103,25 @@ class StreamData {
      */
     HOST_DATA_TYPE *C;
 
+    /**
+     * @brief The context that is used to allocate memory in SVM mode
+     * 
+     */
+    cl::Context context;
+
     /**
      * @brief Construct a new Stream Data object
      * 
-     * @param context the context that will be used to allocate SVM memory
+     * @param _context the context that will be used to allocate SVM memory
      * @param size the size of the data arrays in number of values
      */
-    StreamData(const cl::Context& context, size_t size) {
-    #ifdef INTEL_FPGA
-    #ifdef USE_SVM
-        A = reinterpret_cast<HOST_DATA_TYPE*>(
-                                clSVMAlloc(context(), 0 ,
-                                size * sizeof(HOST_DATA_TYPE), 1024));
-        B = reinterpret_cast<HOST_DATA_TYPE*>(
-                                clSVMAlloc(context(), 0 ,
-                                size * sizeof(HOST_DATA_TYPE), 1024));
-        C = reinterpret_cast<HOST_DATA_TYPE*>(
-                                clSVMAlloc(context(), 0 ,
-                                size * sizeof(HOST_DATA_TYPE), 1024));
-    #else
-        posix_memalign(reinterpret_cast<void**>(&A), 64, size * sizeof(HOST_DATA_TYPE));
-        posix_memalign(reinterpret_cast<void**>(&B), 64, size * sizeof(HOST_DATA_TYPE));
-        posix_memalign(reinterpret_cast<void**>(&C), 64, size * sizeof(HOST_DATA_TYPE));
-    #endif
-    #endif
-    #ifdef XILINX_FPGA
-        posix_memalign(reinterpret_cast<void**>(&A), 4096, size * sizeof(HOST_DATA_TYPE));
-        posix_memalign(reinterpret_cast<void**>(&B), 4096, size * sizeof(HOST_DATA_TYPE));
-        posix_memalign(reinterpret_cast<void**>(&C), 4096, size * sizeof(HOST_DATA_TYPE));
-    #endif
-    }
+    StreamData(const cl::Context& _context, size_t size);
 
     /**
      * @brief Destroy the Stream Data object
      * 
      */
-    ~StreamData() {
-    #ifdef USE_SVM
-        clSVMFree(A);
-        clSVMFree(B);
-        clSVMFree(C);
-    #else
-        free(A);
-        free(B);
-        free(C);
-    #endif
-    }
+    ~StreamData();
 
 };
 
diff --git a/STREAM/tests/CMakeLists.txt b/STREAM/tests/CMakeLists.txt
index 9097411a..137dbdf9 100755
--- a/STREAM/tests/CMakeLists.txt
+++ b/STREAM/tests/CMakeLists.txt
@@ -15,6 +15,9 @@ if (INTELFPGAOPENCL_FOUND)
     target_link_libraries(${HOST_EXE_NAME}_test_intel gtest gmock ${LIB_NAME}_intel ${IntelFPGAOpenCL_LIBRARIES} "${OpenMP_CXX_FLAGS}")
     add_dependencies(${HOST_EXE_NAME}_test_intel stream_kernels_emulate_intel stream_kernels_single_emulate_intel)
     target_compile_definitions(${HOST_EXE_NAME}_test_intel PRIVATE -DINTEL_FPGA)
+    if (USE_SVM)
+        target_compile_definitions(${HOST_EXE_NAME}_test_intel PRIVATE -DCL_VERSION_2_0)
+    endif()
     target_compile_options(${HOST_EXE_NAME}_test_intel PRIVATE "${OpenMP_CXX_FLAGS}")
     add_test(NAME ${HOST_EXE_NAME}_test_intel_unit COMMAND $<TARGET_FILE:${HOST_EXE_NAME}_test_intel> -f stream_kernels_emulate.aocx WORKING_DIRECTORY ${EXECUTABLE_OUTPUT_PATH})
     add_test(NAME ${HOST_EXE_NAME}_test_intel_single_unit COMMAND $<TARGET_FILE:${HOST_EXE_NAME}_test_intel> -f stream_kernels_single_emulate.aocx --single-kernel WORKING_DIRECTORY ${EXECUTABLE_OUTPUT_PATH})
diff --git a/cmake/general_benchmark_build_setup.cmake b/cmake/general_benchmark_build_setup.cmake
index 24176b46..9410ce9d 100644
--- a/cmake/general_benchmark_build_setup.cmake
+++ b/cmake/general_benchmark_build_setup.cmake
@@ -10,6 +10,7 @@ set(DEFAULT_REPETITIONS 10 CACHE STRING "Default number of repetitions")
 set(DEFAULT_DEVICE -1 CACHE STRING "Index of the default device to use")
 set(DEFAULT_PLATFORM -1 CACHE STRING "Index of the default platform to use")
 set(USE_OPENMP ${USE_OPENMP} CACHE BOOL "Use OpenMP in the host code")
+set(USE_SVM No CACHE BOOL "Use SVM pointers instead of creating buffers on the board and transferring the data there before execution.")
 
 # Set the used data type
 if (NOT DATA_TYPE)
diff --git a/scripts/power_measurements/pac_s10_dc.fpgainfo.sh b/scripts/power_measurements/pac_s10_dc.fpgainfo.sh
new file mode 100755
index 00000000..14b00a18
--- /dev/null
+++ b/scripts/power_measurements/pac_s10_dc.fpgainfo.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+
+LOGFILE=powermeasure.csv
+
+echo "" > $LOGFILE
+
+
+# Start the benchmark
+
+$@ &
+
+bm_pid=$!
+# Start power measurements
+
+while $(kill -0 $bm_pid); do
+    echo $(fpgainfo power | grep "Amps\|Volts" | sed -r 's/.*: ([0-9]+)\.([0-9]+).*/\1.\2/g' | sed -r ':a;N;$!ba;s/\n/,/g') >> $LOGFILE
+    sleep 0.01
+done
\ No newline at end of file
diff --git a/shared/include/hpcc_benchmark.hpp b/shared/include/hpcc_benchmark.hpp
index 3d2df760..f10e73dd 100644
--- a/shared/include/hpcc_benchmark.hpp
+++ b/shared/include/hpcc_benchmark.hpp
@@ -354,22 +354,34 @@ class HpccFpgaBenchmark {
             std::cout << HLINE << "Start benchmark using the given configuration. Generating data..." << std::endl
                     << HLINE;
         }
+
+        auto gen_start = std::chrono::high_resolution_clock::now();
         std::unique_ptr<TData> data = generateInputData();
+        std::chrono::duration<double> gen_time = std::chrono::high_resolution_clock::now() - gen_start;
+        
         if (world_rank == 0) {
+            std::cout << "Generation Time: " << gen_time.count() << " s"  << std::endl;
             std::cout << HLINE << "Execute benchmark kernel..." << std::endl
                     << HLINE;
         }
-        std::unique_ptr<TOutput> output =  executeKernel(*data);
 
+        auto exe_start = std::chrono::high_resolution_clock::now();
+        std::unique_ptr<TOutput> output =  executeKernel(*data);
+        std::chrono::duration<double> exe_time = std::chrono::high_resolution_clock::now() - exe_start;
+        
         if (world_rank == 0) {
+            std::cout << "Execution Time: " << exe_time.count() << " s"  << std::endl;
             std::cout << HLINE << "Validate output..." << std::endl
                     << HLINE;
         }
-        
+
+        auto eval_start = std::chrono::high_resolution_clock::now();
         bool validateSuccess = validateOutputAndPrintError(*data);
+        std::chrono::duration<double> eval_time = std::chrono::high_resolution_clock::now() - eval_start;
 
         if (world_rank == 0) {
             printResults(*output);
+            std::cout << "Validation Time: " << eval_time.count() << " s" << std::endl;
         }
 
         return validateSuccess;
diff --git a/shared/setup/fpga_setup.cpp b/shared/setup/fpga_setup.cpp
index d73bcf04..31b79010 100644
--- a/shared/setup/fpga_setup.cpp
+++ b/shared/setup/fpga_setup.cpp
@@ -210,7 +210,7 @@ choose a device.
         int err;
 
         int world_rank = 0;
-        int world_size = 0;
+        int world_size = 1;
         
 #ifdef _USE_MPI_
         MPI_Comm_rank(MPI_COMM_WORLD, &world_rank);