From bebe1030e48e6fd6d9fb2558cbdf292951856616 Mon Sep 17 00:00:00 2001 From: lmoneta Date: Fri, 1 Jul 2022 12:27:34 +0200 Subject: [PATCH] - add optimization vectorization options for SOFIE tests - set in ONNXRuntime also the interopnumthreads to 1 --- root/tmva/sofie/CMakeLists.txt | 10 +++++++--- .../tmva/sofie/ONNXRuntimeInference_Template.cxx.in | 13 +++++++------ 2 files changed, 14 insertions(+), 9 deletions(-) diff --git a/root/tmva/sofie/CMakeLists.txt b/root/tmva/sofie/CMakeLists.txt index 118d0148..4fa4cdbb 100644 --- a/root/tmva/sofie/CMakeLists.txt +++ b/root/tmva/sofie/CMakeLists.txt @@ -190,9 +190,13 @@ add_dependencies(RDF_SOFIE_Inference SofieCompileModels) #if (ROOT_PLATFORM MATCHES "linux|macosx" AND CMAKE_SYSTEM_PROCESSOR MATCHES x86_64 AND CMAKE_CXX_COMPILER_ID MATCHES "GNU|Clang") ## assume we run only on linux/macos with gnu or gcc set(gnu-flags $<$:-fno-signaling-nans>) -target_compile_options(SOFIEInference PRIVATE ${gnu-flags} -fno-trapping-math -O3) -target_compile_options(RDF_SOFIE_Inference PRIVATE ${gnu-flags} -fno-trapping-math -O3) -#endif() +if ($APPLE) +target_compile_options(SOFIEInference PRIVATE ${gnu-flags} -ffast-math -fno-trapping-math -O3) +target_compile_options(RDF_SOFIE_Inference PRIVATE ${gnu-flags} -ffast-math -fno-trapping-math -O3) +else() +target_compile_options(SOFIEInference PRIVATE ${gnu-flags} -march=native -ffast-math -fno-trapping-math -O3) +target_compile_options(RDF_SOFIE_Inference PRIVATE ${gnu-flags} -march=native -ffast-math -fno-trapping-math -O3) +endif() endif() # endif blas endif() # endif TMVA/SOFIE diff --git a/root/tmva/sofie/ONNXRuntimeInference_Template.cxx.in b/root/tmva/sofie/ONNXRuntimeInference_Template.cxx.in index 99ed729e..01e2f38c 100644 --- a/root/tmva/sofie/ONNXRuntimeInference_Template.cxx.in +++ b/root/tmva/sofie/ONNXRuntimeInference_Template.cxx.in @@ -19,6 +19,7 @@ static void @FUNC_NAME@(benchmark::State& state, string model_path) Ort::SessionOptions session_options; session_options.SetIntraOpNumThreads(1); + session_options.SetInterOpNumThreads(1); session_options.SetGraphOptimizationLevel(GraphOptimizationLevel::ORT_ENABLE_EXTENDED); //std::cout << "benchmarking model " << model_path << std::endl; @@ -45,12 +46,12 @@ static void @FUNC_NAME@(benchmark::State& state, string model_path) for (int i = 0; i < nout; i++) output_node_dims[i] = session.GetOutputTypeInfo(i).GetTensorTypeAndShapeInfo().GetShape(); - for (int i = 0; i < nin; i++) { - std::cout << "input " << input_node_names[i] << " shape : "; - for (int j = 0; j < input_node_dims[i].size(); j++) - std::cout << " " << input_node_dims[i][j]; - std::cout << std::endl; - } + // for (int i = 0; i < nin; i++) { + // std::cout << "input " << input_node_names[i] << " shape : "; + // for (int j = 0; j < input_node_dims[i].size(); j++) + // std::cout << " " << input_node_dims[i][j]; + // std::cout << std::endl; + // } // fix negative shapes for (int i = 0; i < nin; i++) { for (int j = 0; j < input_node_dims[i].size(); j++) {