From 40645c56c89f87478d895b6041c6d8be65c10d7b Mon Sep 17 00:00:00 2001 From: Ashwini Khade Date: Fri, 29 Sep 2023 15:22:13 -0700 Subject: [PATCH] remove non relevant tests from cmake --- cmake/onnxruntime_training.cmake | 226 +--- cmake/onnxruntime_unittests.cmake | 1032 +++++++++-------- onnxruntime/test/providers/cpu/model_tests.cc | 8 + .../python/orttraining_pybind_state.cc | 425 ------- .../test/gradient/gradient_checker.h | 2 +- .../test/gradient/gradient_ops_test.cc | 2 +- .../python/onnxruntime_test_postprocess.py | 325 ------ 7 files changed, 606 insertions(+), 1414 deletions(-) delete mode 100644 orttraining/orttraining/test/python/onnxruntime_test_postprocess.py diff --git a/cmake/onnxruntime_training.cmake b/cmake/onnxruntime_training.cmake index f9ba2b341f741..1aba00b96774d 100644 --- a/cmake/onnxruntime_training.cmake +++ b/cmake/onnxruntime_training.cmake @@ -1,29 +1,26 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. -set (CXXOPTS ${cxxopts_SOURCE_DIR}/include) +set(CXXOPTS ${cxxopts_SOURCE_DIR}/include) # training lib file(GLOB_RECURSE onnxruntime_training_srcs - "${ORTTRAINING_SOURCE_DIR}/core/framework/*.h" - "${ORTTRAINING_SOURCE_DIR}/core/framework/*.cc" - "${ORTTRAINING_SOURCE_DIR}/core/framework/tensorboard/*.h" - "${ORTTRAINING_SOURCE_DIR}/core/framework/tensorboard/*.cc" - "${ORTTRAINING_SOURCE_DIR}/core/framework/adasum/*" - "${ORTTRAINING_SOURCE_DIR}/core/framework/communication/*" - "${ORTTRAINING_SOURCE_DIR}/core/session/*.h" - "${ORTTRAINING_SOURCE_DIR}/core/session/*.cc" - "${ORTTRAINING_SOURCE_DIR}/core/agent/*.h" - "${ORTTRAINING_SOURCE_DIR}/core/agent/*.cc" - ) - + "${ORTTRAINING_SOURCE_DIR}/core/framework/*.h" + "${ORTTRAINING_SOURCE_DIR}/core/framework/*.cc" + "${ORTTRAINING_SOURCE_DIR}/core/framework/tensorboard/*.h" + "${ORTTRAINING_SOURCE_DIR}/core/framework/tensorboard/*.cc" + "${ORTTRAINING_SOURCE_DIR}/core/framework/adasum/*" + "${ORTTRAINING_SOURCE_DIR}/core/framework/communication/*" + "${ORTTRAINING_SOURCE_DIR}/core/agent/*.h" + "${ORTTRAINING_SOURCE_DIR}/core/agent/*.cc" +) # This needs to be built in framework.cmake file(GLOB_RECURSE onnxruntime_training_framework_excluded_srcs CONFIGURE_DEPENDS - "${ORTTRAINING_SOURCE_DIR}/core/framework/torch/*.h" - "${ORTTRAINING_SOURCE_DIR}/core/framework/torch/*.cc" - "${ORTTRAINING_SOURCE_DIR}/core/framework/triton/*.h" - "${ORTTRAINING_SOURCE_DIR}/core/framework/triton/*.cc" + "${ORTTRAINING_SOURCE_DIR}/core/framework/torch/*.h" + "${ORTTRAINING_SOURCE_DIR}/core/framework/torch/*.cc" + "${ORTTRAINING_SOURCE_DIR}/core/framework/triton/*.h" + "${ORTTRAINING_SOURCE_DIR}/core/framework/triton/*.cc" ) list(REMOVE_ITEM onnxruntime_training_srcs ${onnxruntime_training_framework_excluded_srcs}) @@ -39,199 +36,42 @@ endif() target_include_directories(onnxruntime_training PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${ONNXRUNTIME_ROOT} ${ORTTRAINING_ROOT} ${eigen_INCLUDE_DIRS} PUBLIC ${onnxruntime_graph_header} ${MPI_CXX_INCLUDE_DIRS}) -if (onnxruntime_USE_CUDA) +if(onnxruntime_USE_CUDA) target_include_directories(onnxruntime_training PRIVATE ${onnxruntime_CUDNN_HOME}/include ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) endif() -if (onnxruntime_USE_NCCL) +if(onnxruntime_USE_NCCL) target_include_directories(onnxruntime_training PRIVATE ${NCCL_INCLUDE_DIRS}) endif() -if (onnxruntime_BUILD_UNIT_TESTS) +if(onnxruntime_BUILD_UNIT_TESTS) set_target_properties(onnxruntime_training PROPERTIES FOLDER "ONNXRuntime") source_group(TREE ${ORTTRAINING_ROOT} FILES ${onnxruntime_training_srcs}) - # training runner lib - file(GLOB_RECURSE onnxruntime_training_runner_srcs - "${ORTTRAINING_SOURCE_DIR}/models/runner/*.h" - "${ORTTRAINING_SOURCE_DIR}/models/runner/*.cc" - ) - - # perf test utils - set(onnxruntime_perf_test_src_dir ${TEST_SRC_DIR}/perftest) - set(onnxruntime_perf_test_src - "${onnxruntime_perf_test_src_dir}/utils.h") - - if(WIN32) - list(APPEND onnxruntime_perf_test_src - "${onnxruntime_perf_test_src_dir}/windows/utils.cc") - else () - list(APPEND onnxruntime_perf_test_src - "${onnxruntime_perf_test_src_dir}/posix/utils.cc") - endif() - - onnxruntime_add_static_library(onnxruntime_training_runner ${onnxruntime_training_runner_srcs} ${onnxruntime_perf_test_src}) - add_dependencies(onnxruntime_training_runner ${onnxruntime_EXTERNAL_DEPENDENCIES} onnx onnxruntime_providers) - - if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) - target_link_libraries(onnxruntime_training_runner PRIVATE Python::Python) - endif() - - onnxruntime_add_include_to_target(onnxruntime_training_runner onnxruntime_training onnxruntime_framework onnxruntime_common onnx onnx_proto ${PROTOBUF_LIB} onnxruntime_training flatbuffers::flatbuffers Boost::mp11 safeint_interface) - - target_include_directories(onnxruntime_training_runner PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${ONNXRUNTIME_ROOT} ${ORTTRAINING_ROOT} ${eigen_INCLUDE_DIRS} PUBLIC ${onnxruntime_graph_header}) - target_link_libraries(onnxruntime_training_runner PRIVATE nlohmann_json::nlohmann_json) - if (onnxruntime_USE_CUDA) - target_include_directories(onnxruntime_training_runner PUBLIC ${onnxruntime_CUDNN_HOME}/include ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}) - endif() - - if (onnxruntime_USE_NCCL) - target_include_directories(onnxruntime_training_runner PRIVATE ${NCCL_INCLUDE_DIRS}) - endif() - - if (onnxruntime_USE_ROCM) - add_definitions(-DUSE_ROCM=1) - target_include_directories(onnxruntime_training_runner PUBLIC ${onnxruntime_ROCM_HOME}/include) - endif() - - check_cxx_compiler_flag(-Wno-maybe-uninitialized HAS_NO_MAYBE_UNINITIALIZED) - if(UNIX AND NOT APPLE) - if (HAS_NO_MAYBE_UNINITIALIZED) - target_compile_options(onnxruntime_training_runner PUBLIC "-Wno-maybe-uninitialized") - endif() - endif() - - if (onnxruntime_USE_ROCM) - target_compile_options(onnxruntime_training_runner PUBLIC -D__HIP_PLATFORM_AMD__=1 -D__HIP_PLATFORM_HCC__=1) - endif() - - set_target_properties(onnxruntime_training_runner PROPERTIES FOLDER "ONNXRuntimeTest") - source_group(TREE ${REPO_ROOT} FILES ${onnxruntime_training_runner_srcs} ${onnxruntime_perf_test_src}) - - # MNIST - file(GLOB_RECURSE training_mnist_src - "${ORTTRAINING_SOURCE_DIR}/models/mnist/*.h" - "${ORTTRAINING_SOURCE_DIR}/models/mnist/mnist_data_provider.cc" - "${ORTTRAINING_SOURCE_DIR}/models/mnist/main.cc" - ) - onnxruntime_add_executable(onnxruntime_training_mnist ${training_mnist_src}) - onnxruntime_add_include_to_target(onnxruntime_training_mnist onnxruntime_common onnx onnx_proto ${PROTOBUF_LIB} onnxruntime_training flatbuffers::flatbuffers Boost::mp11 safeint_interface) - target_include_directories(onnxruntime_training_mnist PUBLIC ${CMAKE_CURRENT_BINARY_DIR} ${ONNXRUNTIME_ROOT} ${ORTTRAINING_ROOT} ${eigen_INCLUDE_DIRS} ${CXXOPTS} ${extra_includes} ${onnxruntime_graph_header} ${onnxruntime_exec_src_dir} ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}/onnx onnxruntime_training_runner) - set(ONNXRUNTIME_LIBS - onnxruntime_session - ${onnxruntime_libs} - ${PROVIDERS_MKLDNN} - ${PROVIDERS_DML} - onnxruntime_optimizer - onnxruntime_providers - onnxruntime_util - onnxruntime_framework + onnxruntime_session + ${onnxruntime_libs} + ${PROVIDERS_MKLDNN} + ${PROVIDERS_DML} + onnxruntime_optimizer + onnxruntime_providers + onnxruntime_util + onnxruntime_framework ) - if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) + if(onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) list(APPEND ONNXRUNTIME_LIBS Python::Python) endif() list(APPEND ONNXRUNTIME_LIBS - onnxruntime_graph - ${ONNXRUNTIME_MLAS_LIBS} - onnxruntime_common - onnxruntime_flatbuffers - Boost::mp11 safeint_interface + onnxruntime_graph + ${ONNXRUNTIME_MLAS_LIBS} + onnxruntime_common + onnxruntime_flatbuffers + Boost::mp11 safeint_interface ) - if (onnxruntime_ENABLE_LANGUAGE_INTEROP_OPS) - list(APPEND ONNXRUNTIME_LIBS onnxruntime_language_interop onnxruntime_pyop) - endif() - - if(UNIX AND NOT APPLE) - if (HAS_NO_MAYBE_UNINITIALIZED) - target_compile_options(onnxruntime_training_mnist PUBLIC "-Wno-maybe-uninitialized") - endif() + if(onnxruntime_ENABLE_LANGUAGE_INTEROP_OPS) + list(APPEND ONNXRUNTIME_LIBS onnxruntime_language_interop onnxruntime_pyop) endif() - target_link_libraries(onnxruntime_training_mnist PRIVATE onnxruntime_training_runner onnxruntime_training ${ONNXRUNTIME_LIBS} ${onnxruntime_EXTERNAL_LIBRARIES}) - set_target_properties(onnxruntime_training_mnist PROPERTIES FOLDER "ONNXRuntimeTest") - - # squeezenet - # Disabling build for squeezenet, as no one is using this - #[[ - file(GLOB_RECURSE training_squeezene_src - "${ORTTRAINING_SOURCE_DIR}/models/squeezenet/*.h" - "${ORTTRAINING_SOURCE_DIR}/models/squeezenet/*.cc" - ) - onnxruntime_add_executable(onnxruntime_training_squeezenet ${training_squeezene_src}) - onnxruntime_add_include_to_target(onnxruntime_training_squeezenet onnxruntime_common onnx onnx_proto ${PROTOBUF_LIB} onnxruntime_training flatbuffers::flatbuffers Boost::mp11 safeint_interface) - target_include_directories(onnxruntime_training_squeezenet PUBLIC ${ONNXRUNTIME_ROOT} ${ORTTRAINING_ROOT} ${eigen_INCLUDE_DIRS} ${extra_includes} ${onnxruntime_graph_header} ${onnxruntime_exec_src_dir} ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}/onnx onnxruntime_training_runner) - - if(UNIX AND NOT APPLE) - target_compile_options(onnxruntime_training_squeezenet PUBLIC "-Wno-maybe-uninitialized") - endif() - target_link_libraries(onnxruntime_training_squeezenet PRIVATE onnxruntime_training_runner onnxruntime_training ${ONNXRUNTIME_LIBS} ${onnxruntime_EXTERNAL_LIBRARIES}) - set_target_properties(onnxruntime_training_squeezenet PROPERTIES FOLDER "ONNXRuntimeTest") - ]] - - # BERT - file(GLOB_RECURSE training_bert_src - "${ORTTRAINING_SOURCE_DIR}/models/bert/*.h" - "${ORTTRAINING_SOURCE_DIR}/models/bert/*.cc" - ) - onnxruntime_add_executable(onnxruntime_training_bert ${training_bert_src}) - - if(UNIX AND NOT APPLE) - if (HAS_NO_MAYBE_UNINITIALIZED) - target_compile_options(onnxruntime_training_bert PUBLIC "-Wno-maybe-uninitialized") - endif() - endif() - - onnxruntime_add_include_to_target(onnxruntime_training_bert onnxruntime_common onnx onnx_proto ${PROTOBUF_LIB} onnxruntime_training flatbuffers::flatbuffers Boost::mp11 safeint_interface) - target_include_directories(onnxruntime_training_bert PUBLIC ${CMAKE_CURRENT_BINARY_DIR} ${ONNXRUNTIME_ROOT} ${ORTTRAINING_ROOT} ${MPI_CXX_INCLUDE_DIRS} ${eigen_INCLUDE_DIRS} ${CXXOPTS} ${extra_includes} ${onnxruntime_graph_header} ${onnxruntime_exec_src_dir} ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}/onnx onnxruntime_training_runner) - - # ROCM provider sources are generated, need to add include directory for generated headers - if (onnxruntime_USE_ROCM) - target_include_directories(onnxruntime_training_bert PUBLIC ${CMAKE_CURRENT_BINARY_DIR}/amdgpu/onnxruntime) - endif() - - target_link_libraries(onnxruntime_training_bert PRIVATE onnxruntime_training_runner onnxruntime_training ${ONNXRUNTIME_LIBS} ${onnxruntime_EXTERNAL_LIBRARIES}) - set_target_properties(onnxruntime_training_bert PROPERTIES FOLDER "ONNXRuntimeTest") - - # Pipeline - file(GLOB_RECURSE training_pipeline_poc_src - "${ORTTRAINING_SOURCE_DIR}/models/pipeline_poc/*.h" - "${ORTTRAINING_SOURCE_DIR}/models/pipeline_poc/*.cc" - ) - onnxruntime_add_executable(onnxruntime_training_pipeline_poc ${training_pipeline_poc_src}) - - if(UNIX AND NOT APPLE) - if (HAS_NO_MAYBE_UNINITIALIZED) - target_compile_options(onnxruntime_training_pipeline_poc PUBLIC "-Wno-maybe-uninitialized") - endif() - endif() - - onnxruntime_add_include_to_target(onnxruntime_training_pipeline_poc onnxruntime_common onnx onnx_proto ${PROTOBUF_LIB} onnxruntime_training flatbuffers::flatbuffers Boost::mp11 safeint_interface) - target_include_directories(onnxruntime_training_pipeline_poc PUBLIC ${CMAKE_CURRENT_BINARY_DIR} ${ONNXRUNTIME_ROOT} ${ORTTRAINING_ROOT} ${MPI_CXX_INCLUDE_DIRS} ${eigen_INCLUDE_DIRS} ${CXXOPTS} ${extra_includes} ${onnxruntime_graph_header} ${onnxruntime_exec_src_dir} ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}/onnx onnxruntime_training_runner) - if (onnxruntime_USE_NCCL) - target_include_directories(onnxruntime_training_pipeline_poc PRIVATE ${NCCL_INCLUDE_DIRS}) - endif() - - target_link_libraries(onnxruntime_training_pipeline_poc PRIVATE onnxruntime_training_runner onnxruntime_training ${ONNXRUNTIME_LIBS} ${onnxruntime_EXTERNAL_LIBRARIES}) - set_target_properties(onnxruntime_training_pipeline_poc PROPERTIES FOLDER "ONNXRuntimeTest") - - # GPT-2 - file(GLOB_RECURSE training_gpt2_src - "${ORTTRAINING_SOURCE_DIR}/models/gpt2/*.h" - "${ORTTRAINING_SOURCE_DIR}/models/gpt2/*.cc" - ) - onnxruntime_add_executable(onnxruntime_training_gpt2 ${training_gpt2_src}) - if(UNIX AND NOT APPLE) - if (HAS_NO_MAYBE_UNINITIALIZED) - target_compile_options(onnxruntime_training_gpt2 PUBLIC "-Wno-maybe-uninitialized") - endif() - endif() - - target_include_directories(onnxruntime_training_gpt2 PUBLIC ${CMAKE_CURRENT_BINARY_DIR} ${ONNXRUNTIME_ROOT} ${ORTTRAINING_ROOT} ${MPI_CXX_INCLUDE_DIRS} ${eigen_INCLUDE_DIRS} ${CXXOPTS} ${extra_includes} ${onnxruntime_graph_header} ${onnxruntime_exec_src_dir} ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}/onnx onnxruntime_training_runner) - - target_link_libraries(onnxruntime_training_gpt2 PRIVATE onnxruntime_training_runner onnxruntime_training ${ONNXRUNTIME_LIBS} ${onnxruntime_EXTERNAL_LIBRARIES}) - set_target_properties(onnxruntime_training_gpt2 PROPERTIES FOLDER "ONNXRuntimeTest") - endif() diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake index 3b9727ec08970..dfdf52bc98059 100644 --- a/cmake/onnxruntime_unittests.cmake +++ b/cmake/onnxruntime_unittests.cmake @@ -1,30 +1,33 @@ # Copyright (c) Microsoft Corporation. All rights reserved. # Licensed under the MIT License. -if (${CMAKE_SYSTEM_NAME} STREQUAL "iOS") +if(${CMAKE_SYSTEM_NAME} STREQUAL "iOS") find_package(XCTest REQUIRED) endif() set(TEST_SRC_DIR ${ONNXRUNTIME_ROOT}/test) set(TEST_INC_DIR ${ONNXRUNTIME_ROOT}) -if (onnxruntime_ENABLE_TRAINING) + +if(onnxruntime_ENABLE_TRAINING) list(APPEND TEST_INC_DIR ${ORTTRAINING_ROOT}) endif() -if (onnxruntime_USE_TVM) + +if(onnxruntime_USE_TVM) list(APPEND TEST_INC_DIR ${TVM_INCLUDES}) endif() set(disabled_warnings) + function(AddTest) cmake_parse_arguments(_UT "DYN" "TARGET" "LIBS;SOURCES;DEPENDS;TEST_ARGS" ${ARGN}) list(REMOVE_DUPLICATES _UT_SOURCES) - if (${CMAKE_SYSTEM_NAME} STREQUAL "iOS") + if(${CMAKE_SYSTEM_NAME} STREQUAL "iOS") onnxruntime_add_executable(${_UT_TARGET} ${TEST_SRC_DIR}/xctest/orttestmain.m) else() onnxruntime_add_executable(${_UT_TARGET} ${_UT_SOURCES}) endif() - if (_UT_DEPENDS) + if(_UT_DEPENDS) list(REMOVE_DUPLICATES _UT_DEPENDS) endif(_UT_DEPENDS) @@ -34,28 +37,29 @@ function(AddTest) source_group(TREE ${REPO_ROOT} FILES ${_UT_SOURCES}) - if (MSVC AND NOT CMAKE_SIZEOF_VOID_P EQUAL 8) - #TODO: fix the warnings, they are dangerous + if(MSVC AND NOT CMAKE_SIZEOF_VOID_P EQUAL 8) + # TODO: fix the warnings, they are dangerous target_compile_options(${_UT_TARGET} PRIVATE "/wd4244") endif() - if (MSVC) + + if(MSVC) target_compile_options(${_UT_TARGET} PRIVATE "/wd6330") endif() set_target_properties(${_UT_TARGET} PROPERTIES FOLDER "ONNXRuntimeTest") - if (MSVC) + if(MSVC) # set VS debugger working directory to the test program's directory set_target_properties(${_UT_TARGET} PROPERTIES VS_DEBUGGER_WORKING_DIRECTORY $) endif() - if (_UT_DEPENDS) + if(_UT_DEPENDS) add_dependencies(${_UT_TARGET} ${_UT_DEPENDS}) endif(_UT_DEPENDS) if(_UT_DYN) target_link_libraries(${_UT_TARGET} PRIVATE ${_UT_LIBS} GTest::gtest GTest::gmock onnxruntime ${CMAKE_DL_LIBS} - Threads::Threads) + Threads::Threads) target_compile_definitions(${_UT_TARGET} PRIVATE -DUSE_ONNXRUNTIME_DLL) else() target_link_libraries(${_UT_TARGET} PRIVATE ${_UT_LIBS} GTest::gtest GTest::gmock ${onnxruntime_EXTERNAL_LIBRARIES}) @@ -63,57 +67,65 @@ function(AddTest) onnxruntime_add_include_to_target(${_UT_TARGET} date_interface flatbuffers::flatbuffers) target_include_directories(${_UT_TARGET} PRIVATE ${TEST_INC_DIR}) - if (onnxruntime_USE_CUDA) + + if(onnxruntime_USE_CUDA) target_include_directories(${_UT_TARGET} PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} ${onnxruntime_CUDNN_HOME}/include) - if (onnxruntime_USE_NCCL) + + if(onnxruntime_USE_NCCL) target_include_directories(${_UT_TARGET} PRIVATE ${NCCL_INCLUDE_DIRS}) endif() endif() - if (onnxruntime_USE_TENSORRT) + + if(onnxruntime_USE_TENSORRT) # used for instantiating placeholder TRT builder to mitigate TRT library load/unload overhead target_include_directories(${_UT_TARGET} PRIVATE ${TENSORRT_INCLUDE_DIR}) endif() if(MSVC) target_compile_options(${_UT_TARGET} PRIVATE "$<$:SHELL:--compiler-options /utf-8>" - "$<$>:/utf-8>") + "$<$>:/utf-8>") endif() - if (WIN32) + if(WIN32) # include dbghelp in case tests throw an ORT exception, as that exception includes a stacktrace, which requires dbghelp. target_link_libraries(${_UT_TARGET} PRIVATE debug dbghelp) - if (onnxruntime_USE_CUDA) + if(onnxruntime_USE_CUDA) # disable a warning from the CUDA headers about unreferenced local functions - if (MSVC) + if(MSVC) target_compile_options(${_UT_TARGET} PRIVATE "$<$:-Xcompiler /wd4505>" - "$<$>:/wd4505>") + "$<$>:/wd4505>") endif() endif() - if (MSVC) + + if(MSVC) # warning C6326: Potential comparison of a constant with another constant. # Lot of such things came from gtest target_compile_options(${_UT_TARGET} PRIVATE "$<$:-Xcompiler /wd6326>" - "$<$>:/wd6326>") + "$<$>:/wd6326>") + # Raw new and delete. A lot of such things came from googletest. target_compile_options(${_UT_TARGET} PRIVATE "$<$:-Xcompiler /wd26409>" - "$<$>:/wd26409>") + "$<$>:/wd26409>") + # "Global initializer calls a non-constexpr function." target_compile_options(${_UT_TARGET} PRIVATE "$<$:-Xcompiler /wd26426>" - "$<$>:/wd26426>") + "$<$>:/wd26426>") endif() + target_compile_options(${_UT_TARGET} PRIVATE ${disabled_warnings}) else() target_compile_options(${_UT_TARGET} PRIVATE ${DISABLED_WARNINGS_FOR_TVM}) target_compile_options(${_UT_TARGET} PRIVATE "$<$:SHELL:-Xcompiler -Wno-error=sign-compare>" - "$<$>:-Wno-error=sign-compare>") + "$<$>:-Wno-error=sign-compare>") target_compile_options(${_UT_TARGET} PRIVATE "-Wno-error=uninitialized") endif() set(TEST_ARGS ${_UT_TEST_ARGS}) - if (onnxruntime_GENERATE_TEST_REPORTS) + + if(onnxruntime_GENERATE_TEST_REPORTS) # generate a report file next to the test program - if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten") + if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") # WebAssembly use a memory file system, so we do not use full path list(APPEND TEST_ARGS "--gtest_output=xml:$.$.results.xml") @@ -123,7 +135,7 @@ function(AddTest) endif() endif(onnxruntime_GENERATE_TEST_REPORTS) - if (${CMAKE_SYSTEM_NAME} STREQUAL "iOS") + if(${CMAKE_SYSTEM_NAME} STREQUAL "iOS") # target_sources(${_UT_TARGET} PRIVATE ${TEST_SRC_DIR}/xctest/orttestmain.m) set_target_properties(${_UT_TARGET} PROPERTIES FOLDER "ONNXRuntimeTest" MACOSX_BUNDLE_BUNDLE_NAME ${_UT_TARGET} @@ -140,13 +152,15 @@ function(AddTest) ${TEST_SRC_DIR}/xctest/xcgtest.mm ${_UT_SOURCES}) onnxruntime_configure_target(${_UT_TARGET}_xc) + if(_UT_DYN) target_link_libraries(${_UT_TARGET}_xc PRIVATE ${_UT_LIBS} GTest::gtest GTest::gmock onnxruntime ${CMAKE_DL_LIBS} - Threads::Threads) + Threads::Threads) target_compile_definitions(${_UT_TARGET}_xc PRIVATE USE_ONNXRUNTIME_DLL) else() target_link_libraries(${_UT_TARGET}_xc PRIVATE ${_UT_LIBS} GTest::gtest GTest::gmock ${onnxruntime_EXTERNAL_LIBRARIES}) endif() + onnxruntime_add_include_to_target(${_UT_TARGET}_xc date_interface flatbuffers::flatbuffers) target_include_directories(${_UT_TARGET}_xc PRIVATE ${TEST_INC_DIR}) get_target_property(${_UT_TARGET}_DEFS ${_UT_TARGET} COMPILE_DEFINITIONS) @@ -162,16 +176,16 @@ function(AddTest) xctest_add_test(xctest.${_UT_TARGET} ${_UT_TARGET}_xc) else() - if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten") + if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") # We might have already executed the following "find_program" code when we build ORT nodejs binding. # Then the program is found the result is stored in the variable and the search will not be repeated. find_program(NPM_CLI - NAMES "npm.cmd" "npm" - DOC "NPM command line client" - REQUIRED + NAMES "npm.cmd" "npm" + DOC "NPM command line client" + REQUIRED ) - if (onnxruntime_WEBASSEMBLY_RUN_TESTS_IN_BROWSER) + if(onnxruntime_WEBASSEMBLY_RUN_TESTS_IN_BROWSER) add_custom_command(TARGET ${_UT_TARGET} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different ${TEST_SRC_DIR}/wasm/package.json $ COMMAND ${CMAKE_COMMAND} -E copy_if_different ${TEST_SRC_DIR}/wasm/package-lock.json $ @@ -181,19 +195,23 @@ function(AddTest) ) set(TEST_NPM_FLAGS) - if (onnxruntime_ENABLE_WEBASSEMBLY_THREADS) + + if(onnxruntime_ENABLE_WEBASSEMBLY_THREADS) list(APPEND TEST_NPM_FLAGS "--wasm-threads") endif() + add_test(NAME ${_UT_TARGET} COMMAND ${NPM_CLI} test -- ${TEST_NPM_FLAGS} --entry=${_UT_TARGET} ${TEST_ARGS} WORKING_DIRECTORY $ ) else() set(TEST_NODE_FLAGS) - if (onnxruntime_ENABLE_WEBASSEMBLY_THREADS) + + if(onnxruntime_ENABLE_WEBASSEMBLY_THREADS) list(APPEND TEST_NODE_FLAGS "--experimental-wasm-threads") endif() - if (onnxruntime_ENABLE_WEBASSEMBLY_SIMD) + + if(onnxruntime_ENABLE_WEBASSEMBLY_SIMD) list(APPEND TEST_NODE_FLAGS "--experimental-wasm-simd") endif() @@ -214,10 +232,9 @@ endfunction(AddTest) # general program entrypoint for C++ unit tests set(onnxruntime_unittest_main_src "${TEST_SRC_DIR}/unittest_main/test_main.cc") -#Do not add '${TEST_SRC_DIR}/util/include' to your include directories directly -#Use onnxruntime_add_include_to_target or target_link_libraries, so that compile definitions -#can propagate correctly. - +# Do not add '${TEST_SRC_DIR}/util/include' to your include directories directly +# Use onnxruntime_add_include_to_target or target_link_libraries, so that compile definitions +# can propagate correctly. file(GLOB onnxruntime_test_utils_src CONFIGURE_DEPENDS "${TEST_SRC_DIR}/util/include/*.h" "${TEST_SRC_DIR}/util/*.cc" @@ -236,89 +253,86 @@ file(GLOB onnxruntime_test_quantiztion_src CONFIGURE_DEPENDS ) if(NOT onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_REDUCED_OPS_BUILD) - file(GLOB onnxruntime_test_ir_src CONFIGURE_DEPENDS "${TEST_SRC_DIR}/ir/*.cc" "${TEST_SRC_DIR}/ir/*.h" - ) + ) file(GLOB onnxruntime_test_optimizer_src CONFIGURE_DEPENDS "${TEST_SRC_DIR}/optimizer/*.cc" "${TEST_SRC_DIR}/optimizer/*.h" - ) + ) set(onnxruntime_test_framework_src_patterns "${TEST_SRC_DIR}/framework/*.cc" "${TEST_SRC_DIR}/framework/*.h" "${TEST_SRC_DIR}/platform/*.cc" - ) - -else() # minimal and/or reduced ops build + ) +else() # minimal and/or reduced ops build set(onnxruntime_test_framework_src_patterns "${TEST_SRC_DIR}/platform/*.cc" - ) + ) - if (onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_REDUCED_OPS_BUILD) + if(onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_REDUCED_OPS_BUILD) list(APPEND onnxruntime_test_framework_src_patterns "${TEST_SRC_DIR}/framework/ort_model_only_test.cc" ) endif() - if (NOT onnxruntime_MINIMAL_BUILD) + if(NOT onnxruntime_MINIMAL_BUILD) file(GLOB onnxruntime_test_ir_src CONFIGURE_DEPENDS "${TEST_SRC_DIR}/ir/*.cc" "${TEST_SRC_DIR}/ir/*.h" - ) + ) endif() endif() if((NOT onnxruntime_MINIMAL_BUILD OR onnxruntime_EXTENDED_MINIMAL_BUILD) - AND NOT onnxruntime_REDUCED_OPS_BUILD) + AND NOT onnxruntime_REDUCED_OPS_BUILD) list(APPEND onnxruntime_test_optimizer_src - "${TEST_SRC_DIR}/optimizer/runtime_optimization/graph_runtime_optimization_test.cc") + "${TEST_SRC_DIR}/optimizer/runtime_optimization/graph_runtime_optimization_test.cc") endif() file(GLOB onnxruntime_test_training_src - "${ORTTRAINING_SOURCE_DIR}/test/model/*.h" - "${ORTTRAINING_SOURCE_DIR}/test/model/*.cc" "${ORTTRAINING_SOURCE_DIR}/test/gradient/*.h" "${ORTTRAINING_SOURCE_DIR}/test/gradient/*.cc" - "${ORTTRAINING_SOURCE_DIR}/test/graph/*.h" - "${ORTTRAINING_SOURCE_DIR}/test/graph/*.cc" - "${ORTTRAINING_SOURCE_DIR}/test/session/*.h" - "${ORTTRAINING_SOURCE_DIR}/test/session/*.cc" "${ORTTRAINING_SOURCE_DIR}/test/optimizer/*.h" "${ORTTRAINING_SOURCE_DIR}/test/optimizer/*.cc" - "${ORTTRAINING_SOURCE_DIR}/test/framework/*.cc" - "${ORTTRAINING_SOURCE_DIR}/test/distributed/*.h" - "${ORTTRAINING_SOURCE_DIR}/test/distributed/*.cc" - ) + "${ORTTRAINING_SOURCE_DIR}/test/framework/crc32c_test.cc" +) + +list(REMOVE_ITEM onnxruntime_test_training_src + "${ORTTRAINING_SOURCE_DIR}/test/gradient/gradient_op_test_utils.cc" + "${ORTTRAINING_SOURCE_DIR}/test/gradient/gradient_op_test_utils.h" + "${ORTTRAINING_SOURCE_DIR}/test/gradient/gradient_ops_test.cc" + "${ORTTRAINING_SOURCE_DIR}/test/gradient/gradient_checker.h" + "${ORTTRAINING_SOURCE_DIR}/test/gradient/gradient_checker.cc" +) # TODO (baijumeswani): Remove the minimal build check here. -# The training api tests should be runnable even on a minimal build. -# This requires converting all the *.onnx files to ort format. -if (NOT onnxruntime_MINIMAL_BUILD) - if (onnxruntime_ENABLE_TRAINING_APIS) +# The training api tests should be runnable even on a minimal build. +# This requires converting all the *.onnx files to ort format. +if(NOT onnxruntime_MINIMAL_BUILD) + if(onnxruntime_ENABLE_TRAINING_APIS) file(GLOB onnxruntime_test_training_api_src "${ORTTRAINING_SOURCE_DIR}/test/training_api/common/*.cc" "${ORTTRAINING_SOURCE_DIR}/test/training_api/common/*.h" "${ORTTRAINING_SOURCE_DIR}/test/training_api/core/*.cc" "${ORTTRAINING_SOURCE_DIR}/test/training_api/core/*.h" - ) + ) endif() endif() if(WIN32) list(APPEND onnxruntime_test_framework_src_patterns "${TEST_SRC_DIR}/platform/windows/*.cc" - "${TEST_SRC_DIR}/platform/windows/logging/*.cc" ) + "${TEST_SRC_DIR}/platform/windows/logging/*.cc") endif() if(NOT onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_REDUCED_OPS_BUILD) - if(onnxruntime_USE_CUDA) - list(APPEND onnxruntime_test_framework_src_patterns ${TEST_SRC_DIR}/framework/cuda/*) + list(APPEND onnxruntime_test_framework_src_patterns ${TEST_SRC_DIR}/framework/cuda/*) endif() set(onnxruntime_test_providers_src_patterns @@ -343,6 +357,7 @@ else() set(onnxruntime_test_providers_src_patterns "${TEST_SRC_DIR}/framework/test_utils.cc" "${TEST_SRC_DIR}/framework/test_utils.h" + # TODO: Add anything that is needed for testing a minimal build ) endif() @@ -352,7 +367,7 @@ file(GLOB onnxruntime_test_providers_src CONFIGURE_DEPENDS ${onnxruntime_test_pr if(NOT onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_REDUCED_OPS_BUILD) file(GLOB_RECURSE onnxruntime_test_providers_cpu_src CONFIGURE_DEPENDS "${TEST_SRC_DIR}/providers/cpu/*" - ) + ) endif() if(onnxruntime_DISABLE_ML_OPS) @@ -361,93 +376,93 @@ endif() list(APPEND onnxruntime_test_providers_src ${onnxruntime_test_providers_cpu_src}) -if (onnxruntime_USE_CUDA AND NOT onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_REDUCED_OPS_BUILD) +if(onnxruntime_USE_CUDA AND NOT onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_REDUCED_OPS_BUILD) file(GLOB onnxruntime_test_providers_cuda_src CONFIGURE_DEPENDS "${TEST_SRC_DIR}/providers/cuda/*" - ) + ) list(APPEND onnxruntime_test_providers_src ${onnxruntime_test_providers_cuda_src}) endif() -if (onnxruntime_USE_CANN) +if(onnxruntime_USE_CANN) file(GLOB_RECURSE onnxruntime_test_providers_cann_src CONFIGURE_DEPENDS "${TEST_SRC_DIR}/providers/cann/*" - ) + ) list(APPEND onnxruntime_test_providers_src ${onnxruntime_test_providers_cann_src}) endif() # Disable training ops test for minimal build as a lot of these depend on loading an onnx model. -if (NOT onnxruntime_MINIMAL_BUILD) - if (onnxruntime_ENABLE_TRAINING_OPS) +if(NOT onnxruntime_MINIMAL_BUILD) + if(onnxruntime_ENABLE_TRAINING_OPS) file(GLOB_RECURSE orttraining_test_trainingops_cpu_src CONFIGURE_DEPENDS "${ORTTRAINING_SOURCE_DIR}/test/training_ops/compare_provider_test_utils.cc" "${ORTTRAINING_SOURCE_DIR}/test/training_ops/function_op_test_utils.cc" "${ORTTRAINING_SOURCE_DIR}/test/training_ops/cpu/*" - ) + ) - if (NOT onnxruntime_ENABLE_TRAINING) + if(NOT onnxruntime_ENABLE_TRAINING) list(REMOVE_ITEM orttraining_test_trainingops_cpu_src "${ORTTRAINING_SOURCE_DIR}/test/training_ops/cpu/tensorboard/summary_op_test.cc" - ) + ) endif() list(APPEND onnxruntime_test_providers_src ${orttraining_test_trainingops_cpu_src}) - if (onnxruntime_USE_CUDA OR onnxruntime_USE_ROCM) + if(onnxruntime_USE_CUDA OR onnxruntime_USE_ROCM) file(GLOB_RECURSE orttraining_test_trainingops_cuda_src CONFIGURE_DEPENDS "${ORTTRAINING_SOURCE_DIR}/test/training_ops/cuda/*" - ) + ) list(APPEND onnxruntime_test_providers_src ${orttraining_test_trainingops_cuda_src}) endif() endif() endif() -if (onnxruntime_USE_DNNL) +if(onnxruntime_USE_DNNL) file(GLOB_RECURSE onnxruntime_test_providers_dnnl_src CONFIGURE_DEPENDS "${TEST_SRC_DIR}/providers/dnnl/*" - ) + ) list(APPEND onnxruntime_test_providers_src ${onnxruntime_test_providers_dnnl_src}) endif() -if (onnxruntime_USE_NNAPI_BUILTIN) +if(onnxruntime_USE_NNAPI_BUILTIN) file(GLOB_RECURSE onnxruntime_test_providers_nnapi_src CONFIGURE_DEPENDS "${TEST_SRC_DIR}/providers/nnapi/*" - ) + ) list(APPEND onnxruntime_test_providers_src ${onnxruntime_test_providers_nnapi_src}) endif() -if (onnxruntime_USE_RKNPU) +if(onnxruntime_USE_RKNPU) file(GLOB_RECURSE onnxruntime_test_providers_rknpu_src CONFIGURE_DEPENDS "${TEST_SRC_DIR}/providers/rknpu/*" - ) + ) list(APPEND onnxruntime_test_providers_src ${onnxruntime_test_providers_rknpu_src}) endif() -if (NOT onnxruntime_MINIMAL_BUILD OR onnxruntime_EXTENDED_MINIMAL_BUILD) +if(NOT onnxruntime_MINIMAL_BUILD OR onnxruntime_EXTENDED_MINIMAL_BUILD) file(GLOB_RECURSE onnxruntime_test_providers_internal_testing_src CONFIGURE_DEPENDS "${TEST_SRC_DIR}/providers/internal_testing/*" - ) + ) list(APPEND onnxruntime_test_providers_src ${onnxruntime_test_providers_internal_testing_src}) endif() -set (ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR "${TEST_SRC_DIR}/shared_lib") -set (ONNXRUNTIME_GLOBAL_THREAD_POOLS_TEST_SRC_DIR "${TEST_SRC_DIR}/global_thread_pools") -set (ONNXRUNTIME_CUSTOM_OP_REGISTRATION_TEST_SRC_DIR "${TEST_SRC_DIR}/custom_op_registration") -set (ONNXRUNTIME_LOGGING_APIS_TEST_SRC_DIR "${TEST_SRC_DIR}/logging_apis") - -set (onnxruntime_shared_lib_test_SRC - ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_fixture.h - ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_session_options.cc - ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_run_options.cc - ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_allocator.cc - ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_nontensor_types.cc - ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_model_loading.cc - ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_ort_format_models.cc - ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/utils.h - ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/utils.cc - ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/custom_op_utils.h - ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/custom_op_utils.cc) - -if (NOT onnxruntime_MINIMAL_BUILD) +set(ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR "${TEST_SRC_DIR}/shared_lib") +set(ONNXRUNTIME_GLOBAL_THREAD_POOLS_TEST_SRC_DIR "${TEST_SRC_DIR}/global_thread_pools") +set(ONNXRUNTIME_CUSTOM_OP_REGISTRATION_TEST_SRC_DIR "${TEST_SRC_DIR}/custom_op_registration") +set(ONNXRUNTIME_LOGGING_APIS_TEST_SRC_DIR "${TEST_SRC_DIR}/logging_apis") + +set(onnxruntime_shared_lib_test_SRC + ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_fixture.h + ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_session_options.cc + ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_run_options.cc + ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_allocator.cc + ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_nontensor_types.cc + ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_model_loading.cc + ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_ort_format_models.cc + ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/utils.h + ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/utils.cc + ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/custom_op_utils.h + ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/custom_op_utils.cc) + +if(NOT onnxruntime_MINIMAL_BUILD) list(APPEND onnxruntime_shared_lib_test_SRC ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_inference.cc) endif() @@ -455,14 +470,13 @@ if(onnxruntime_RUN_ONNX_TESTS) list(APPEND onnxruntime_shared_lib_test_SRC ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_io_types.cc) endif() -set (onnxruntime_global_thread_pools_test_SRC - ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_fixture.h - ${ONNXRUNTIME_GLOBAL_THREAD_POOLS_TEST_SRC_DIR}/test_main.cc - ${ONNXRUNTIME_GLOBAL_THREAD_POOLS_TEST_SRC_DIR}/test_inference.cc) +set(onnxruntime_global_thread_pools_test_SRC + ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_fixture.h + ${ONNXRUNTIME_GLOBAL_THREAD_POOLS_TEST_SRC_DIR}/test_main.cc + ${ONNXRUNTIME_GLOBAL_THREAD_POOLS_TEST_SRC_DIR}/test_inference.cc) # tests from lowest level library up. # the order of libraries should be maintained, with higher libraries being added first in the list - set(onnxruntime_test_common_libs onnxruntime_test_utils onnxruntime_common @@ -489,7 +503,7 @@ set(onnxruntime_test_framework_libs onnxruntime_graph ${ONNXRUNTIME_MLAS_LIBS} onnxruntime_common - ) +) set(onnxruntime_test_server_libs onnxruntime_test_utils @@ -497,10 +511,10 @@ set(onnxruntime_test_server_libs ) if(WIN32) - list(APPEND onnxruntime_test_framework_libs Advapi32) + list(APPEND onnxruntime_test_framework_libs Advapi32) endif() -set (onnxruntime_test_providers_dependencies ${onnxruntime_EXTERNAL_DEPENDENCIES}) +set(onnxruntime_test_providers_dependencies ${onnxruntime_EXTERNAL_DEPENDENCIES}) if(onnxruntime_USE_CUDA) list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_cuda) @@ -540,7 +554,7 @@ if(onnxruntime_USE_ROCM) endif() if(onnxruntime_USE_COREML) - if (CMAKE_SYSTEM_NAME STREQUAL "Darwin" OR CMAKE_SYSTEM_NAME STREQUAL "iOS") + if(CMAKE_SYSTEM_NAME STREQUAL "Darwin" OR CMAKE_SYSTEM_NAME STREQUAL "iOS") list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_coreml onnxruntime_coreml_proto) else() list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_coreml) @@ -555,73 +569,75 @@ if(onnxruntime_USE_ARMNN) list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_armnn) endif() -if (onnxruntime_ENABLE_LANGUAGE_INTEROP_OPS) +if(onnxruntime_ENABLE_LANGUAGE_INTEROP_OPS) set(ONNXRUNTIME_INTEROP_TEST_LIBS PRIVATE onnxruntime_language_interop onnxruntime_pyop) endif() set(ONNXRUNTIME_TEST_LIBS - onnxruntime_session - ${ONNXRUNTIME_INTEROP_TEST_LIBS} - ${onnxruntime_libs} - # CUDA, ROCM, TENSORRT, MIGRAPHX, DNNL, and OpenVINO are dynamically loaded at runtime - ${PROVIDERS_NNAPI} - ${PROVIDERS_JS} - ${PROVIDERS_VITISAI} - ${PROVIDERS_QNN} - ${PROVIDERS_SNPE} - ${PROVIDERS_RKNPU} - ${PROVIDERS_DML} - ${PROVIDERS_ACL} - ${PROVIDERS_ARMNN} - ${PROVIDERS_COREML} - # ${PROVIDERS_TVM} - ${PROVIDERS_XNNPACK} - ${PROVIDERS_AZURE} - onnxruntime_optimizer - onnxruntime_providers - onnxruntime_util - ${onnxruntime_tvm_libs} - onnxruntime_framework - onnxruntime_util - onnxruntime_graph - ${ONNXRUNTIME_MLAS_LIBS} - onnxruntime_common - onnxruntime_flatbuffers + onnxruntime_session + ${ONNXRUNTIME_INTEROP_TEST_LIBS} + ${onnxruntime_libs} + + # CUDA, ROCM, TENSORRT, MIGRAPHX, DNNL, and OpenVINO are dynamically loaded at runtime + ${PROVIDERS_NNAPI} + ${PROVIDERS_JS} + ${PROVIDERS_VITISAI} + ${PROVIDERS_QNN} + ${PROVIDERS_SNPE} + ${PROVIDERS_RKNPU} + ${PROVIDERS_DML} + ${PROVIDERS_ACL} + ${PROVIDERS_ARMNN} + ${PROVIDERS_COREML} + + # ${PROVIDERS_TVM} + ${PROVIDERS_XNNPACK} + ${PROVIDERS_AZURE} + onnxruntime_optimizer + onnxruntime_providers + onnxruntime_util + ${onnxruntime_tvm_libs} + onnxruntime_framework + onnxruntime_util + onnxruntime_graph + ${ONNXRUNTIME_MLAS_LIBS} + onnxruntime_common + onnxruntime_flatbuffers ) -if (onnxruntime_ENABLE_TRAINING) - set(ONNXRUNTIME_TEST_LIBS onnxruntime_training_runner onnxruntime_training ${ONNXRUNTIME_TEST_LIBS}) +if(onnxruntime_ENABLE_TRAINING) + set(ONNXRUNTIME_TEST_LIBS onnxruntime_training ${ONNXRUNTIME_TEST_LIBS}) endif() set(onnxruntime_test_providers_libs - onnxruntime_test_utils - ${ONNXRUNTIME_TEST_LIBS} - ) + onnxruntime_test_utils + ${ONNXRUNTIME_TEST_LIBS} +) if(onnxruntime_USE_TENSORRT) - list(APPEND onnxruntime_test_framework_src_patterns ${TEST_SRC_DIR}/providers/tensorrt/*) - list(APPEND onnxruntime_test_framework_src_patterns "${ONNXRUNTIME_ROOT}/core/providers/tensorrt/tensorrt_execution_provider_utils.h") + list(APPEND onnxruntime_test_framework_src_patterns ${TEST_SRC_DIR}/providers/tensorrt/*) + list(APPEND onnxruntime_test_framework_src_patterns "${ONNXRUNTIME_ROOT}/core/providers/tensorrt/tensorrt_execution_provider_utils.h") list(APPEND onnxruntime_test_framework_libs onnxruntime_providers_tensorrt) list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_tensorrt onnxruntime_providers_shared) list(APPEND onnxruntime_test_providers_libs ${TENSORRT_LIBRARY_INFER}) endif() if(onnxruntime_USE_MIGRAPHX) - list(APPEND onnxruntime_test_framework_src_patterns ${TEST_SRC_DIR}/providers/migraphx/*) - list(APPEND onnxruntime_test_framework_src_patterns "${ONNXRUNTIME_ROOT}/core/providers/migraphx/migraphx_execution_provider_utils.h") + list(APPEND onnxruntime_test_framework_src_patterns ${TEST_SRC_DIR}/providers/migraphx/*) + list(APPEND onnxruntime_test_framework_src_patterns "${ONNXRUNTIME_ROOT}/core/providers/migraphx/migraphx_execution_provider_utils.h") list(APPEND onnxruntime_test_framework_libs onnxruntime_providers_migraphx) list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_migraphx onnxruntime_providers_shared) endif() if(onnxruntime_USE_NNAPI_BUILTIN) - list(APPEND onnxruntime_test_framework_src_patterns ${TEST_SRC_DIR}/providers/nnapi/*) + list(APPEND onnxruntime_test_framework_src_patterns ${TEST_SRC_DIR}/providers/nnapi/*) list(APPEND onnxruntime_test_framework_libs onnxruntime_providers_nnapi) list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_nnapi) list(APPEND onnxruntime_test_providers_libs onnxruntime_providers_nnapi) endif() if(onnxruntime_USE_JSEP) - list(APPEND onnxruntime_test_framework_src_patterns ${TEST_SRC_DIR}/providers/js/*) + list(APPEND onnxruntime_test_framework_src_patterns ${TEST_SRC_DIR}/providers/js/*) list(APPEND onnxruntime_test_framework_libs onnxruntime_providers_js) list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_js) list(APPEND onnxruntime_test_providers_libs onnxruntime_providers_js) @@ -635,22 +651,23 @@ if(onnxruntime_USE_QNN) endif() if(onnxruntime_USE_SNPE) - list(APPEND onnxruntime_test_framework_src_patterns ${TEST_SRC_DIR}/providers/snpe/*) + list(APPEND onnxruntime_test_framework_src_patterns ${TEST_SRC_DIR}/providers/snpe/*) list(APPEND onnxruntime_test_framework_libs onnxruntime_providers_snpe) list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_snpe) list(APPEND onnxruntime_test_providers_libs onnxruntime_providers_snpe) endif() if(onnxruntime_USE_RKNPU) - list(APPEND onnxruntime_test_framework_src_patterns ${TEST_SRC_DIR}/providers/rknpu/*) + list(APPEND onnxruntime_test_framework_src_patterns ${TEST_SRC_DIR}/providers/rknpu/*) list(APPEND onnxruntime_test_framework_libs onnxruntime_providers_rknpu) list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_rknpu) list(APPEND onnxruntime_test_providers_libs onnxruntime_providers_rknpu) endif() if(onnxruntime_USE_COREML) - list(APPEND onnxruntime_test_framework_src_patterns ${TEST_SRC_DIR}/providers/coreml/*) - if (CMAKE_SYSTEM_NAME STREQUAL "Darwin" OR CMAKE_SYSTEM_NAME STREQUAL "iOS") + list(APPEND onnxruntime_test_framework_src_patterns ${TEST_SRC_DIR}/providers/coreml/*) + + if(CMAKE_SYSTEM_NAME STREQUAL "Darwin" OR CMAKE_SYSTEM_NAME STREQUAL "iOS") list(APPEND onnxruntime_test_framework_libs onnxruntime_providers_coreml onnxruntime_coreml_proto) list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_coreml onnxruntime_coreml_proto) list(APPEND onnxruntime_test_providers_libs onnxruntime_providers_coreml onnxruntime_coreml_proto) @@ -662,103 +679,111 @@ if(onnxruntime_USE_COREML) endif() if(onnxruntime_USE_XNNPACK) - list(APPEND onnxruntime_test_framework_src_patterns ${TEST_SRC_DIR}/providers/xnnpack/*) + list(APPEND onnxruntime_test_framework_src_patterns ${TEST_SRC_DIR}/providers/xnnpack/*) list(APPEND onnxruntime_test_framework_libs onnxruntime_providers_xnnpack) list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_xnnpack) list(APPEND onnxruntime_test_providers_libs onnxruntime_providers_xnnpack) endif() if(onnxruntime_USE_AZURE) - list(APPEND onnxruntime_test_framework_src_patterns ${TEST_SRC_DIR}/providers/azure/*) + list(APPEND onnxruntime_test_framework_src_patterns ${TEST_SRC_DIR}/providers/azure/*) list(APPEND onnxruntime_test_framework_libs onnxruntime_providers_azure) list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_azure) list(APPEND onnxruntime_test_providers_libs onnxruntime_providers_azure) endif() if(WIN32) - if (onnxruntime_USE_TVM) + if(onnxruntime_USE_TVM) list(APPEND disabled_warnings ${DISABLED_WARNINGS_FOR_TVM}) endif() endif() file(GLOB onnxruntime_test_framework_src CONFIGURE_DEPENDS ${onnxruntime_test_framework_src_patterns} - ) +) -#This is a small wrapper library that shouldn't use any onnxruntime internal symbols(except onnxruntime_common). -#Because it could dynamically link to onnxruntime. Otherwise you will have two copies of onnxruntime in the same -#process and you won't know which one you are testing. +# This is a small wrapper library that shouldn't use any onnxruntime internal symbols(except onnxruntime_common). +# Because it could dynamically link to onnxruntime. Otherwise you will have two copies of onnxruntime in the same +# process and you won't know which one you are testing. onnxruntime_add_static_library(onnxruntime_test_utils ${onnxruntime_test_utils_src}) + if(MSVC) target_compile_options(onnxruntime_test_utils PRIVATE "$<$:SHELL:--compiler-options /utf-8>" - "$<$>:/utf-8>") + "$<$>:/utf-8>") target_compile_options(onnxruntime_test_utils PRIVATE "$<$:-Xcompiler /wd6326>" - "$<$>:/wd6326>") + "$<$>:/wd6326>") else() target_compile_definitions(onnxruntime_test_utils PUBLIC -DNSYNC_ATOMIC_CPP11) target_include_directories(onnxruntime_test_utils PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${ONNXRUNTIME_ROOT}) onnxruntime_add_include_to_target(onnxruntime_test_utils nsync::nsync_cpp) endif() -if (onnxruntime_USE_NCCL) + +if(onnxruntime_USE_NCCL) target_include_directories(onnxruntime_test_utils PRIVATE ${NCCL_INCLUDE_DIRS}) endif() -if (onnxruntime_USE_ROCM) + +if(onnxruntime_USE_ROCM) target_include_directories(onnxruntime_test_utils PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/amdgpu/onnxruntime ${CMAKE_CURRENT_BINARY_DIR}/amdgpu/orttraining) endif() -onnxruntime_add_include_to_target(onnxruntime_test_utils onnxruntime_common onnxruntime_framework onnxruntime_session GTest::gtest GTest::gmock onnx onnx_proto flatbuffers::flatbuffers nlohmann_json::nlohmann_json Boost::mp11 safeint_interface) - +onnxruntime_add_include_to_target(onnxruntime_test_utils onnxruntime_common onnxruntime_framework onnxruntime_session GTest::gtest GTest::gmock onnx onnx_proto flatbuffers::flatbuffers nlohmann_json::nlohmann_json Boost::mp11 safeint_interface) -if (onnxruntime_USE_DML) +if(onnxruntime_USE_DML) target_add_dml(onnxruntime_test_utils) endif() + add_dependencies(onnxruntime_test_utils ${onnxruntime_EXTERNAL_DEPENDENCIES}) target_include_directories(onnxruntime_test_utils PUBLIC "${TEST_SRC_DIR}/util/include" PRIVATE - ${eigen_INCLUDE_DIRS} ${ONNXRUNTIME_ROOT}) + ${eigen_INCLUDE_DIRS} ${ONNXRUNTIME_ROOT}) set_target_properties(onnxruntime_test_utils PROPERTIES FOLDER "ONNXRuntimeTest") source_group(TREE ${TEST_SRC_DIR} FILES ${onnxruntime_test_utils_src}) set(onnx_test_runner_src_dir ${TEST_SRC_DIR}/onnx) file(GLOB onnx_test_runner_common_srcs CONFIGURE_DEPENDS - ${onnx_test_runner_src_dir}/*.h - ${onnx_test_runner_src_dir}/*.cc) + ${onnx_test_runner_src_dir}/*.h + ${onnx_test_runner_src_dir}/*.cc) list(REMOVE_ITEM onnx_test_runner_common_srcs ${onnx_test_runner_src_dir}/main.cc) onnxruntime_add_static_library(onnx_test_runner_common ${onnx_test_runner_common_srcs}) + if(MSVC) target_compile_options(onnx_test_runner_common PRIVATE "$<$:SHELL:--compiler-options /utf-8>" - "$<$>:/utf-8>") + "$<$>:/utf-8>") else() target_compile_definitions(onnx_test_runner_common PUBLIC -DNSYNC_ATOMIC_CPP11) target_include_directories(onnx_test_runner_common PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${ONNXRUNTIME_ROOT}) onnxruntime_add_include_to_target(onnx_test_runner_common nsync::nsync_cpp) endif() -if (MSVC AND NOT CMAKE_SIZEOF_VOID_P EQUAL 8) - #TODO: fix the warnings, they are dangerous + +if(MSVC AND NOT CMAKE_SIZEOF_VOID_P EQUAL 8) + # TODO: fix the warnings, they are dangerous target_compile_options(onnx_test_runner_common PRIVATE "/wd4244") endif() + onnxruntime_add_include_to_target(onnx_test_runner_common onnxruntime_common onnxruntime_framework - onnxruntime_test_utils onnx onnx_proto re2::re2 flatbuffers::flatbuffers Boost::mp11 safeint_interface) + onnxruntime_test_utils onnx onnx_proto re2::re2 flatbuffers::flatbuffers Boost::mp11 safeint_interface) add_dependencies(onnx_test_runner_common onnx_test_data_proto ${onnxruntime_EXTERNAL_DEPENDENCIES}) target_include_directories(onnx_test_runner_common PRIVATE ${eigen_INCLUDE_DIRS} - ${CMAKE_CURRENT_BINARY_DIR} ${ONNXRUNTIME_ROOT}) + ${CMAKE_CURRENT_BINARY_DIR} ${ONNXRUNTIME_ROOT}) set_target_properties(onnx_test_runner_common PROPERTIES FOLDER "ONNXRuntimeTest") set(all_tests ${onnxruntime_test_common_src} ${onnxruntime_test_ir_src} ${onnxruntime_test_optimizer_src} - ${onnxruntime_test_framework_src} ${onnxruntime_test_providers_src} ${onnxruntime_test_quantiztion_src}) + ${onnxruntime_test_framework_src} ${onnxruntime_test_providers_src} ${onnxruntime_test_quantiztion_src}) + if(NOT TARGET onnxruntime AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten") list(APPEND all_tests ${onnxruntime_shared_lib_test_SRC}) endif() -if (onnxruntime_USE_CUDA) +if(onnxruntime_USE_CUDA) onnxruntime_add_static_library(onnxruntime_test_cuda_ops_lib ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/cuda_ops.cu) list(APPEND onnxruntime_test_common_libs onnxruntime_test_cuda_ops_lib) file(GLOB onnxruntime_test_providers_cuda_ut_src CONFIGURE_DEPENDS "${TEST_SRC_DIR}/providers/cuda/test_cases/*" ) + # onnxruntime_providers_cuda_ut is only for unittests. onnxruntime_add_shared_library_module(onnxruntime_providers_cuda_ut ${onnxruntime_test_providers_cuda_ut_src} $) config_cuda_provider_shared_module(onnxruntime_providers_cuda_ut) @@ -767,21 +792,21 @@ if (onnxruntime_USE_CUDA) list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_cuda_ut) endif() -set(all_dependencies ${onnxruntime_test_providers_dependencies} ) +set(all_dependencies ${onnxruntime_test_providers_dependencies}) -if (onnxruntime_ENABLE_TRAINING) +if(onnxruntime_ENABLE_TRAINING) list(APPEND all_tests ${onnxruntime_test_training_src}) endif() -if (onnxruntime_ENABLE_TRAINING_APIS) - list(APPEND all_tests ${onnxruntime_test_training_api_src}) +if(onnxruntime_ENABLE_TRAINING_APIS) + list(APPEND all_tests ${onnxruntime_test_training_api_src}) endif() -if (onnxruntime_USE_TVM) - list(APPEND all_tests ${onnxruntime_test_tvm_src}) +if(onnxruntime_USE_TVM) + list(APPEND all_tests ${onnxruntime_test_tvm_src}) endif() -if (onnxruntime_USE_OPENVINO) +if(onnxruntime_USE_OPENVINO) list(APPEND all_tests ${onnxruntime_test_openvino_src}) endif() @@ -792,8 +817,8 @@ if(WIN32) list(APPEND onnxruntime_test_providers_libs Advapi32) endif() -if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten") - if (NOT onnxruntime_ENABLE_WEBASSEMBLY_THREADS) +if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") + if(NOT onnxruntime_ENABLE_WEBASSEMBLY_THREADS) list(REMOVE_ITEM all_tests "${TEST_SRC_DIR}/framework/execution_frame_test.cc" "${TEST_SRC_DIR}/framework/inference_session_test.cc" @@ -807,26 +832,28 @@ if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten") endif() set(test_all_args) -if (onnxruntime_USE_TENSORRT) + +if(onnxruntime_USE_TENSORRT) # TRT EP CI takes much longer time when updating to TRT 8.2 # So, we only run trt ep and exclude other eps to reduce CI test time. # # The test names of model tests were using sequential number in the past. # This PR https://github.com/microsoft/onnxruntime/pull/10220 (Please see ExpandModelName function in model_tests.cc for more details) # made test name contain the "ep" and "model path" information, so we can easily filter the tests using cuda ep or other ep with *cpu_* or *xxx_*. - list(APPEND test_all_args "--gtest_filter=-*cpu_*:*cuda_*" ) -endif () + list(APPEND test_all_args "--gtest_filter=-*cpu_*:*cuda_*") +endif() AddTest( TARGET onnxruntime_test_all SOURCES ${all_tests} ${onnxruntime_unittest_main_src} LIBS - onnx_test_runner_common ${onnxruntime_test_providers_libs} ${onnxruntime_test_common_libs} - onnx_test_data_proto + onnx_test_runner_common ${onnxruntime_test_providers_libs} ${onnxruntime_test_common_libs} + onnx_test_data_proto DEPENDS ${all_dependencies} TEST_ARGS ${test_all_args} ) -if (MSVC) + +if(MSVC) # The warning means the type of two integral values around a binary operator is narrow than their result. # If we promote the two input values first, it could be more tolerant to integer overflow. # However, this is test code. We are less concerned. @@ -837,66 +864,75 @@ endif() # TODO fix shorten-64-to-32 warnings # there are some in builds where sizeof(size_t) != sizeof(int64_t), e.g., in 'ONNX Runtime Web CI Pipeline' -if (HAS_SHORTEN_64_TO_32 AND NOT CMAKE_SIZEOF_VOID_P EQUAL 8) +if(HAS_SHORTEN_64_TO_32 AND NOT CMAKE_SIZEOF_VOID_P EQUAL 8) target_compile_options(onnxruntime_test_all PRIVATE -Wno-error=shorten-64-to-32) endif() -if (UNIX AND onnxruntime_USE_TENSORRT) - # The test_main.cc includes NvInfer.h where it has many deprecated declarations - # simply ignore them for TensorRT EP build - set_property(TARGET onnxruntime_test_all APPEND_STRING PROPERTY COMPILE_FLAGS "-Wno-deprecated-declarations") +if(UNIX AND onnxruntime_USE_TENSORRT) + # The test_main.cc includes NvInfer.h where it has many deprecated declarations + # simply ignore them for TensorRT EP build + set_property(TARGET onnxruntime_test_all APPEND_STRING PROPERTY COMPILE_FLAGS "-Wno-deprecated-declarations") endif() -if (MSVC AND onnxruntime_ENABLE_STATIC_ANALYSIS) -# attention_op_test.cc: Function uses '49152' bytes of stack: exceeds /analyze:stacksize '16384'.. -target_compile_options(onnxruntime_test_all PRIVATE "/analyze:stacksize 131072") +if(MSVC AND onnxruntime_ENABLE_STATIC_ANALYSIS) + # attention_op_test.cc: Function uses '49152' bytes of stack: exceeds /analyze:stacksize '16384'.. + target_compile_options(onnxruntime_test_all PRIVATE "/analyze:stacksize 131072") endif() # the default logger tests conflict with the need to have an overall default logger # so skip in this type of target_compile_definitions(onnxruntime_test_all PUBLIC -DSKIP_DEFAULT_LOGGER_TESTS) -if (CMAKE_SYSTEM_NAME STREQUAL "iOS") + +if(CMAKE_SYSTEM_NAME STREQUAL "iOS") target_compile_definitions(onnxruntime_test_all_xc PUBLIC -DSKIP_DEFAULT_LOGGER_TESTS) endif() + if(onnxruntime_RUN_MODELTEST_IN_DEBUG_MODE) target_compile_definitions(onnxruntime_test_all PUBLIC -DRUN_MODELTEST_IN_DEBUG_MODE) endif() -if (onnxruntime_DEBUG_NODE_INPUTS_OUTPUTS) + +if(onnxruntime_DEBUG_NODE_INPUTS_OUTPUTS) target_compile_definitions(onnxruntime_test_all PRIVATE DEBUG_NODE_INPUTS_OUTPUTS) endif() -if (onnxruntime_ENABLE_LANGUAGE_INTEROP_OPS) +if(onnxruntime_ENABLE_LANGUAGE_INTEROP_OPS) target_link_libraries(onnxruntime_test_all PRIVATE onnxruntime_language_interop onnxruntime_pyop) endif() -if (onnxruntime_USE_ROCM) - if (onnxruntime_USE_COMPOSABLE_KERNEL) + +if(onnxruntime_USE_ROCM) + if(onnxruntime_USE_COMPOSABLE_KERNEL) target_compile_definitions(onnxruntime_test_all PRIVATE USE_COMPOSABLE_KERNEL) endif() + target_compile_options(onnxruntime_test_all PRIVATE -D__HIP_PLATFORM_AMD__=1 -D__HIP_PLATFORM_HCC__=1) - target_include_directories(onnxruntime_test_all PRIVATE ${onnxruntime_ROCM_HOME}/hipfft/include ${onnxruntime_ROCM_HOME}/include ${onnxruntime_ROCM_HOME}/hiprand/include ${onnxruntime_ROCM_HOME}/rocrand/include ${CMAKE_CURRENT_BINARY_DIR}/amdgpu/onnxruntime ${CMAKE_CURRENT_BINARY_DIR}/amdgpu/orttraining) + target_include_directories(onnxruntime_test_all PRIVATE ${onnxruntime_ROCM_HOME}/hipfft/include ${onnxruntime_ROCM_HOME}/include ${onnxruntime_ROCM_HOME}/hiprand/include ${onnxruntime_ROCM_HOME}/rocrand/include ${CMAKE_CURRENT_BINARY_DIR}/amdgpu/onnxruntime ${CMAKE_CURRENT_BINARY_DIR}/amdgpu/orttraining) endif() -if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) + +if(onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) target_link_libraries(onnxruntime_test_all PRIVATE Python::Python) endif() -if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten") + +if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") set_target_properties(onnxruntime_test_all PROPERTIES LINK_DEPENDS ${TEST_SRC_DIR}/wasm/onnxruntime_test_all_adapter.js) set_target_properties(onnxruntime_test_all PROPERTIES LINK_FLAGS "-s STACK_SIZE=5242880 -s ALLOW_MEMORY_GROWTH=1 --pre-js \"${TEST_SRC_DIR}/wasm/onnxruntime_test_all_adapter.js\" -s \"EXPORTED_RUNTIME_METHODS=['FS']\" --preload-file ${CMAKE_CURRENT_BINARY_DIR}/testdata@/testdata -s EXIT_RUNTIME=1 -s DEMANGLE_SUPPORT=1") - if (onnxruntime_ENABLE_WEBASSEMBLY_THREADS) + + if(onnxruntime_ENABLE_WEBASSEMBLY_THREADS) set_property(TARGET onnxruntime_test_all APPEND_STRING PROPERTY LINK_FLAGS " -s DEFAULT_PTHREAD_STACK_SIZE=131072 -s PROXY_TO_PTHREAD=1") endif() - if (onnxruntime_USE_JSEP) + + if(onnxruntime_USE_JSEP) set_property(TARGET onnxruntime_test_all APPEND_STRING PROPERTY LINK_FLAGS " --pre-js \"${ONNXRUNTIME_ROOT}/wasm/js_internal_api.js\"") endif() - ### - ### if you want to investigate or debug a test failure in onnxruntime_test_all, replace the following line. - ### those flags slow down the CI test significantly, so we don't use them by default. - ### - # set_property(TARGET onnxruntime_test_all APPEND_STRING PROPERTY LINK_FLAGS " -s ASSERTIONS=2 -s SAFE_HEAP=1 -s STACK_OVERFLOW_CHECK=2") + # ## + # ## if you want to investigate or debug a test failure in onnxruntime_test_all, replace the following line. + # ## those flags slow down the CI test significantly, so we don't use them by default. + # ## + # set_property(TARGET onnxruntime_test_all APPEND_STRING PROPERTY LINK_FLAGS " -s ASSERTIONS=2 -s SAFE_HEAP=1 -s STACK_OVERFLOW_CHECK=2") set_property(TARGET onnxruntime_test_all APPEND_STRING PROPERTY LINK_FLAGS " -s ASSERTIONS=0 -s SAFE_HEAP=0 -s STACK_OVERFLOW_CHECK=1") endif() -if (onnxruntime_ENABLE_ATEN) +if(onnxruntime_ENABLE_ATEN) target_compile_definitions(onnxruntime_test_all PRIVATE ENABLE_ATEN) endif() @@ -904,7 +940,8 @@ set(test_data_target onnxruntime_test_all) onnxruntime_add_static_library(onnx_test_data_proto ${TEST_SRC_DIR}/proto/tml.proto) add_dependencies(onnx_test_data_proto onnx_proto ${onnxruntime_EXTERNAL_DEPENDENCIES}) -#onnx_proto target should mark this definition as public, instead of private + +# onnx_proto target should mark this definition as public, instead of private target_compile_definitions(onnx_test_data_proto PRIVATE "-DONNX_API=") onnxruntime_add_include_to_target(onnx_test_data_proto onnx_proto) target_include_directories(onnx_test_data_proto PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) @@ -934,74 +971,80 @@ add_custom_command( ${TEST_SAMPLES_SRC} ${TEST_SAMPLES_DES}) -if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) - if (onnxruntime_USE_SNPE) +if(NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) + if(onnxruntime_USE_SNPE) add_custom_command( TARGET ${test_data_target} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${SNPE_SO_FILES} $ - ) + ) endif() - if (onnxruntime_USE_QNN) - if (NOT QNN_ARCH_ABI) + if(onnxruntime_USE_QNN) + if(NOT QNN_ARCH_ABI) string(TOLOWER ${onnxruntime_target_platform} GEN_PLATFORM) + if(MSVC) - message(STATUS "Building MSVC for architecture ${CMAKE_SYSTEM_PROCESSOR} with CMAKE_GENERATOR_PLATFORM as ${GEN_PLATFORM}") - if (${GEN_PLATFORM} STREQUAL "arm64") - set(QNN_ARCH_ABI aarch64-windows-msvc) - else() - set(QNN_ARCH_ABI x86_64-windows-msvc) - endif() + message(STATUS "Building MSVC for architecture ${CMAKE_SYSTEM_PROCESSOR} with CMAKE_GENERATOR_PLATFORM as ${GEN_PLATFORM}") + + if(${GEN_PLATFORM} STREQUAL "arm64") + set(QNN_ARCH_ABI aarch64-windows-msvc) + else() + set(QNN_ARCH_ABI x86_64-windows-msvc) + endif() else() - if (${CMAKE_SYSTEM_NAME} STREQUAL "Android") - set(QNN_ARCH_ABI aarch64-android-clang6.0) - elseif (${CMAKE_SYSTEM_NAME} STREQUAL "Linux") - if (${GEN_PLATFORM} STREQUAL "x86_64") - set(QNN_ARCH_ABI x86_64-linux-clang) - else() - set(QNN_ARCH_ABI aarch64-android) - endif() + if(${CMAKE_SYSTEM_NAME} STREQUAL "Android") + set(QNN_ARCH_ABI aarch64-android-clang6.0) + elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Linux") + if(${GEN_PLATFORM} STREQUAL "x86_64") + set(QNN_ARCH_ABI x86_64-linux-clang) + else() + set(QNN_ARCH_ABI aarch64-android) endif() + endif() endif() endif() - if (MSVC OR ${CMAKE_SYSTEM_NAME} STREQUAL "Linux") - file(GLOB QNN_LIB_FILES LIST_DIRECTORIES false "${onnxruntime_QNN_HOME}/lib/${QNN_ARCH_ABI}/*.so" "${onnxruntime_QNN_HOME}/target/${QNN_ARCH_ABI}/lib/*.so" "${onnxruntime_QNN_HOME}/lib/${QNN_ARCH_ABI}/*.dll" "${onnxruntime_QNN_HOME}/target/${QNN_ARCH_ABI}/lib/*.dll") - if (${QNN_ARCH_ABI} STREQUAL "aarch64-windows-msvc") - file(GLOB EXTRA_HTP_LIB LIST_DIRECTORIES false "${onnxruntime_QNN_HOME}/lib/hexagon-v68/unsigned/libQnnHtpV68Skel.so" "${onnxruntime_QNN_HOME}/target/hexagon-v68/lib/unsigned/libQnnHtpV68Skel.so") - list(APPEND QNN_LIB_FILES ${EXTRA_HTP_LIB}) - endif() - message(STATUS "QNN lib files: " ${QNN_LIB_FILES}) - add_custom_command( - TARGET ${test_data_target} POST_BUILD - COMMAND ${CMAKE_COMMAND} -E copy ${QNN_LIB_FILES} $ - ) + if(MSVC OR ${CMAKE_SYSTEM_NAME} STREQUAL "Linux") + file(GLOB QNN_LIB_FILES LIST_DIRECTORIES false "${onnxruntime_QNN_HOME}/lib/${QNN_ARCH_ABI}/*.so" "${onnxruntime_QNN_HOME}/target/${QNN_ARCH_ABI}/lib/*.so" "${onnxruntime_QNN_HOME}/lib/${QNN_ARCH_ABI}/*.dll" "${onnxruntime_QNN_HOME}/target/${QNN_ARCH_ABI}/lib/*.dll") + + if(${QNN_ARCH_ABI} STREQUAL "aarch64-windows-msvc") + file(GLOB EXTRA_HTP_LIB LIST_DIRECTORIES false "${onnxruntime_QNN_HOME}/lib/hexagon-v68/unsigned/libQnnHtpV68Skel.so" "${onnxruntime_QNN_HOME}/target/hexagon-v68/lib/unsigned/libQnnHtpV68Skel.so") + list(APPEND QNN_LIB_FILES ${EXTRA_HTP_LIB}) + endif() + + message(STATUS "QNN lib files: " ${QNN_LIB_FILES}) + add_custom_command( + TARGET ${test_data_target} POST_BUILD + COMMAND ${CMAKE_COMMAND} -E copy ${QNN_LIB_FILES} $ + ) endif() endif() - if (onnxruntime_USE_DNNL) + if(onnxruntime_USE_DNNL) if(onnxruntime_DNNL_GPU_RUNTIME STREQUAL "ocl" AND onnxruntime_DNNL_OPENCL_ROOT STREQUAL "") message(FATAL_ERROR "--dnnl_opencl_root required") - elseif(onnxruntime_DNNL_GPU_RUNTIME STREQUAL "" AND NOT (onnxruntime_DNNL_OPENCL_ROOT STREQUAL "")) + elseif(onnxruntime_DNNL_GPU_RUNTIME STREQUAL "" AND NOT(onnxruntime_DNNL_OPENCL_ROOT STREQUAL "")) message(FATAL_ERROR "--dnnl_gpu_runtime required") - elseif(onnxruntime_DNNL_GPU_RUNTIME STREQUAL "ocl" AND NOT (onnxruntime_DNNL_OPENCL_ROOT STREQUAL "")) - #file(TO_CMAKE_PATH ${onnxruntime_DNNL_OPENCL_ROOT} onnxruntime_DNNL_OPENCL_ROOT) - #set(DNNL_OCL_INCLUDE_DIR ${onnxruntime_DNNL_OPENCL_ROOT}/include) - #set(DNNL_GPU_CMAKE_ARGS "-DDNNL_GPU_RUNTIME=OCL " "-DOPENCLROOT=${onnxruntime_DNNL_OPENCL_ROOT}") + elseif(onnxruntime_DNNL_GPU_RUNTIME STREQUAL "ocl" AND NOT(onnxruntime_DNNL_OPENCL_ROOT STREQUAL "")) + # file(TO_CMAKE_PATH ${onnxruntime_DNNL_OPENCL_ROOT} onnxruntime_DNNL_OPENCL_ROOT) + # set(DNNL_OCL_INCLUDE_DIR ${onnxruntime_DNNL_OPENCL_ROOT}/include) + # set(DNNL_GPU_CMAKE_ARGS "-DDNNL_GPU_RUNTIME=OCL " "-DOPENCLROOT=${onnxruntime_DNNL_OPENCL_ROOT}") target_compile_definitions(onnxruntime_test_all PUBLIC -DDNNL_GPU_RUNTIME=OCL) endif() + list(APPEND onnx_test_libs dnnl) add_custom_command( TARGET ${test_data_target} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy ${DNNL_DLL_PATH} $ - ) + ) endif() + if(WIN32) - if (onnxruntime_USE_TVM) + if(onnxruntime_USE_TVM) add_custom_command( TARGET ${test_data_target} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy $ $ - ) + ) endif() endif() @@ -1015,29 +1058,31 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) endif() endif() - set(onnx_test_libs onnxruntime_test_utils ${ONNXRUNTIME_TEST_LIBS} onnx_test_data_proto ${onnxruntime_EXTERNAL_LIBRARIES}) -if (onnxruntime_ENABLE_LANGUAGE_INTEROP_OPS) +if(onnxruntime_ENABLE_LANGUAGE_INTEROP_OPS) list(APPEND onnx_test_libs onnxruntime_language_interop onnxruntime_pyop) endif() onnxruntime_add_executable(onnx_test_runner ${onnx_test_runner_src_dir}/main.cc) + if(MSVC) target_compile_options(onnx_test_runner PRIVATE "$<$:SHELL:--compiler-options /utf-8>" - "$<$>:/utf-8>") + "$<$>:/utf-8>") endif() + if(${CMAKE_SYSTEM_NAME} STREQUAL "iOS") set_target_properties(onnx_test_runner PROPERTIES XCODE_ATTRIBUTE_CODE_SIGNING_ALLOWED "NO" ) endif() -if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten") - if (onnxruntime_ENABLE_WEBASSEMBLY_THREADS) + +if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") + if(onnxruntime_ENABLE_WEBASSEMBLY_THREADS) set_target_properties(onnx_test_runner PROPERTIES LINK_FLAGS "-s NODERAWFS=1 -s ALLOW_MEMORY_GROWTH=1 -s PROXY_TO_PTHREAD=1 -s EXIT_RUNTIME=1") else() set_target_properties(onnx_test_runner PROPERTIES LINK_FLAGS "-s NODERAWFS=1 -s ALLOW_MEMORY_GROWTH=1") @@ -1046,27 +1091,30 @@ endif() target_link_libraries(onnx_test_runner PRIVATE onnx_test_runner_common ${GETOPT_LIB_WIDE} ${onnx_test_libs} nlohmann_json::nlohmann_json) target_include_directories(onnx_test_runner PRIVATE ${ONNXRUNTIME_ROOT}) -if (onnxruntime_USE_ROCM) + +if(onnxruntime_USE_ROCM) target_include_directories(onnx_test_runner PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/amdgpu/onnxruntime ${CMAKE_CURRENT_BINARY_DIR}/amdgpu/orttraining) endif() -if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) + +if(onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) target_link_libraries(onnx_test_runner PRIVATE Python::Python) endif() + set_target_properties(onnx_test_runner PROPERTIES FOLDER "ONNXRuntimeTest") -if (onnxruntime_USE_TVM) - if (WIN32) +if(onnxruntime_USE_TVM) + if(WIN32) target_link_options(onnx_test_runner PRIVATE "/STACK:4000000") endif() endif() install(TARGETS onnx_test_runner - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} - BUNDLE DESTINATION ${CMAKE_INSTALL_LIBDIR} - RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + BUNDLE DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) -if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) +if(NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) if(onnxruntime_BUILD_BENCHMARKS) SET(BENCHMARK_DIR ${TEST_SRC_DIR}/onnx/microbenchmark) onnxruntime_add_executable(onnxruntime_benchmark @@ -1085,23 +1133,26 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) ${BENCHMARK_DIR}/reduceminmax.cc) target_include_directories(onnxruntime_benchmark PRIVATE ${ONNXRUNTIME_ROOT} ${onnxruntime_graph_header} ${ONNXRUNTIME_ROOT}/core/mlas/inc) target_compile_definitions(onnxruntime_benchmark PRIVATE BENCHMARK_STATIC_DEFINE) + if(WIN32) target_compile_options(onnxruntime_benchmark PRIVATE "$<$:-Xcompiler /wd4141>" - "$<$>:/wd4141>") + "$<$>:/wd4141>") + # Avoid using new and delete. But this is a benchmark program, it's ok if it has a chance to leak. target_compile_options(onnxruntime_benchmark PRIVATE "$<$:-Xcompiler /wd26409>" - "$<$>:/wd26409>") + "$<$>:/wd26409>") target_compile_options(onnxruntime_benchmark PRIVATE "$<$:-Xcompiler /wd26400>" - "$<$>:/wd26400>") + "$<$>:/wd26400>") target_compile_options(onnxruntime_benchmark PRIVATE "$<$:-Xcompiler /wd26814>" - "$<$>:/wd26814>") + "$<$>:/wd26814>") target_compile_options(onnxruntime_benchmark PRIVATE "$<$:-Xcompiler /wd26814>" - "$<$>:/wd26497>") + "$<$>:/wd26497>") target_compile_options(onnxruntime_benchmark PRIVATE "$<$:-Xcompiler /wd26426>" - "$<$>:/wd26426>") + "$<$>:/wd26426>") target_compile_options(onnxruntime_benchmark PRIVATE "$<$:SHELL:--compiler-options /utf-8>" - "$<$>:/utf-8>") + "$<$>:/utf-8>") endif() + target_link_libraries(onnxruntime_benchmark PRIVATE onnx_test_runner_common benchmark::benchmark ${onnx_test_libs}) add_dependencies(onnxruntime_benchmark ${onnxruntime_EXTERNAL_DEPENDENCIES}) set_target_properties(onnxruntime_benchmark PROPERTIES FOLDER "ONNXRuntimeTest") @@ -1112,18 +1163,23 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) target_include_directories(onnxruntime_mlas_benchmark PRIVATE ${ONNXRUNTIME_ROOT}/core/mlas/inc) target_link_libraries(onnxruntime_mlas_benchmark PRIVATE benchmark::benchmark onnxruntime_util onnxruntime_framework ${ONNXRUNTIME_MLAS_LIBS} onnxruntime_common ${CMAKE_DL_LIBS}) target_compile_definitions(onnxruntime_mlas_benchmark PRIVATE BENCHMARK_STATIC_DEFINE) + if(WIN32) target_link_libraries(onnxruntime_mlas_benchmark PRIVATE debug Dbghelp) + # Avoid using new and delete. But this is a benchmark program, it's ok if it has a chance to leak. target_compile_options(onnxruntime_mlas_benchmark PRIVATE /wd26409) + # "Global initializer calls a non-constexpr function." BENCHMARK_CAPTURE macro needs this. target_compile_options(onnxruntime_mlas_benchmark PRIVATE /wd26426) else() target_link_libraries(onnxruntime_mlas_benchmark PRIVATE nsync::nsync_cpp ${CMAKE_DL_LIBS}) endif() - if (CPUINFO_SUPPORTED AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten") + + if(CPUINFO_SUPPORTED AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten") target_link_libraries(onnxruntime_mlas_benchmark PRIVATE cpuinfo) endif() + set_target_properties(onnxruntime_mlas_benchmark PROPERTIES FOLDER "ONNXRuntimeTest") endif() @@ -1131,83 +1187,94 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) target_compile_options(onnx_test_runner_common PRIVATE -D_CRT_SECURE_NO_WARNINGS) endif() - if (NOT onnxruntime_REDUCED_OPS_BUILD AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten") + if(NOT onnxruntime_REDUCED_OPS_BUILD AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten") add_test(NAME onnx_test_pytorch_converted COMMAND onnx_test_runner ${onnx_SOURCE_DIR}/onnx/backend/test/data/pytorch-converted) add_test(NAME onnx_test_pytorch_operator COMMAND onnx_test_runner ${onnx_SOURCE_DIR}/onnx/backend/test/data/pytorch-operator) endif() - if (CMAKE_SYSTEM_NAME STREQUAL "Android") - list(APPEND android_shared_libs log android) + if(CMAKE_SYSTEM_NAME STREQUAL "Android") + list(APPEND android_shared_libs log android) endif() endif() - -if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) - #perf test runner +if(NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) + # perf test runner set(onnxruntime_perf_test_src_dir ${TEST_SRC_DIR}/perftest) set(onnxruntime_perf_test_src_patterns - "${onnxruntime_perf_test_src_dir}/*.cc" - "${onnxruntime_perf_test_src_dir}/*.h") + "${onnxruntime_perf_test_src_dir}/*.cc" + "${onnxruntime_perf_test_src_dir}/*.h") if(WIN32) list(APPEND onnxruntime_perf_test_src_patterns "${onnxruntime_perf_test_src_dir}/windows/*.cc" - "${onnxruntime_perf_test_src_dir}/windows/*.h" ) - else () + "${onnxruntime_perf_test_src_dir}/windows/*.h") + else() list(APPEND onnxruntime_perf_test_src_patterns "${onnxruntime_perf_test_src_dir}/posix/*.cc" - "${onnxruntime_perf_test_src_dir}/posix/*.h" ) + "${onnxruntime_perf_test_src_dir}/posix/*.h") endif() file(GLOB onnxruntime_perf_test_src CONFIGURE_DEPENDS ${onnxruntime_perf_test_src_patterns} - ) + ) onnxruntime_add_executable(onnxruntime_perf_test ${onnxruntime_perf_test_src} ${ONNXRUNTIME_ROOT}/core/platform/path_lib.cc) + if(MSVC) target_compile_options(onnxruntime_perf_test PRIVATE "$<$:SHELL:--compiler-options /utf-8>" - "$<$>:/utf-8>") + "$<$>:/utf-8>") endif() + target_include_directories(onnxruntime_perf_test PRIVATE ${onnx_test_runner_src_dir} ${ONNXRUNTIME_ROOT} - ${eigen_INCLUDE_DIRS} ${onnxruntime_graph_header} ${onnxruntime_exec_src_dir} - ${CMAKE_CURRENT_BINARY_DIR}) - if (onnxruntime_USE_ROCM) + ${eigen_INCLUDE_DIRS} ${onnxruntime_graph_header} ${onnxruntime_exec_src_dir} + ${CMAKE_CURRENT_BINARY_DIR}) + + if(onnxruntime_USE_ROCM) target_include_directories(onnxruntime_perf_test PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/amdgpu/onnxruntime ${CMAKE_CURRENT_BINARY_DIR}/amdgpu/orttraining) endif() - if (WIN32) + + if(WIN32) target_compile_options(onnxruntime_perf_test PRIVATE ${disabled_warnings}) - if (NOT DEFINED SYS_PATH_LIB) + + if(NOT DEFINED SYS_PATH_LIB) set(SYS_PATH_LIB shlwapi) endif() endif() + if(${CMAKE_SYSTEM_NAME} STREQUAL "iOS") set_target_properties(onnxruntime_perf_test PROPERTIES XCODE_ATTRIBUTE_CODE_SIGNING_ALLOWED "NO" ) endif() - if (onnxruntime_BUILD_SHARED_LIB) - #It will dynamically link to onnxruntime. So please don't add onxruntime_graph/onxruntime_framework/... here. - #onnxruntime_common is kind of ok because it is thin, tiny and totally stateless. + if(onnxruntime_BUILD_SHARED_LIB) + # It will dynamically link to onnxruntime. So please don't add onxruntime_graph/onxruntime_framework/... here. + # onnxruntime_common is kind of ok because it is thin, tiny and totally stateless. set(onnxruntime_perf_test_libs - onnx_test_runner_common onnxruntime_test_utils onnxruntime_common - onnxruntime onnxruntime_flatbuffers onnx_test_data_proto - ${onnxruntime_EXTERNAL_LIBRARIES} - ${GETOPT_LIB_WIDE} ${SYS_PATH_LIB} ${CMAKE_DL_LIBS}) + onnx_test_runner_common onnxruntime_test_utils onnxruntime_common + onnxruntime onnxruntime_flatbuffers onnx_test_data_proto + ${onnxruntime_EXTERNAL_LIBRARIES} + ${GETOPT_LIB_WIDE} ${SYS_PATH_LIB} ${CMAKE_DL_LIBS}) + if(NOT WIN32) list(APPEND onnxruntime_perf_test_libs nsync::nsync_cpp) + if(onnxruntime_USE_SNPE) list(APPEND onnxruntime_perf_test_libs onnxruntime_providers_snpe) endif() endif() - if (CMAKE_SYSTEM_NAME STREQUAL "Android") + + if(CMAKE_SYSTEM_NAME STREQUAL "Android") list(APPEND onnxruntime_perf_test_libs ${android_shared_libs}) endif() + target_link_libraries(onnxruntime_perf_test PRIVATE ${onnxruntime_perf_test_libs} Threads::Threads) + if(WIN32) target_link_libraries(onnxruntime_perf_test PRIVATE debug dbghelp advapi32) endif() + if(tensorflow_C_PACKAGE_PATH) target_include_directories(onnxruntime_perf_test PRIVATE ${tensorflow_C_PACKAGE_PATH}/include) target_link_directories(onnxruntime_perf_test PRIVATE ${tensorflow_C_PACKAGE_PATH}/lib) @@ -1217,55 +1284,62 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) else() target_link_libraries(onnxruntime_perf_test PRIVATE onnx_test_runner_common ${GETOPT_LIB_WIDE} ${onnx_test_libs}) endif() + set_target_properties(onnxruntime_perf_test PROPERTIES FOLDER "ONNXRuntimeTest") - if (onnxruntime_ENABLE_LANGUAGE_INTEROP_OPS AND NOT onnxruntime_BUILD_SHARED_LIB) + if(onnxruntime_ENABLE_LANGUAGE_INTEROP_OPS AND NOT onnxruntime_BUILD_SHARED_LIB) target_link_libraries(onnxruntime_perf_test PRIVATE onnxruntime_language_interop onnxruntime_pyop) endif() - if (onnxruntime_USE_TVM) - if (WIN32) + if(onnxruntime_USE_TVM) + if(WIN32) target_link_options(onnxruntime_perf_test PRIVATE "/STACK:4000000") endif() endif() # shared lib - if (onnxruntime_BUILD_SHARED_LIB) + if(onnxruntime_BUILD_SHARED_LIB) onnxruntime_add_static_library(onnxruntime_mocked_allocator ${TEST_SRC_DIR}/util/test_allocator.cc) target_include_directories(onnxruntime_mocked_allocator PUBLIC ${TEST_SRC_DIR}/util/include) target_link_libraries(onnxruntime_mocked_allocator PRIVATE ${GSL_TARGET}) set_target_properties(onnxruntime_mocked_allocator PROPERTIES FOLDER "ONNXRuntimeTest") - ################################################################# + # ################################################################ # test inference using shared lib set(onnxruntime_shared_lib_test_LIBS onnxruntime_mocked_allocator onnxruntime_test_utils onnxruntime_common onnx_proto) + if(NOT WIN32) list(APPEND onnxruntime_shared_lib_test_LIBS nsync::nsync_cpp) + if(onnxruntime_USE_SNPE) list(APPEND onnxruntime_shared_lib_test_LIBS onnxruntime_providers_snpe) endif() endif() - if (CPUINFO_SUPPORTED AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten") + + if(CPUINFO_SUPPORTED AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten") list(APPEND onnxruntime_shared_lib_test_LIBS cpuinfo) endif() - if (onnxruntime_USE_CUDA) + + if(onnxruntime_USE_CUDA) list(APPEND onnxruntime_shared_lib_test_LIBS onnxruntime_test_cuda_ops_lib cudart) endif() - if (onnxruntime_USE_TENSORRT) + + if(onnxruntime_USE_TENSORRT) list(APPEND onnxruntime_shared_lib_test_LIBS ${TENSORRT_LIBRARY_INFER}) endif() - if (CMAKE_SYSTEM_NAME STREQUAL "Android") + + if(CMAKE_SYSTEM_NAME STREQUAL "Android") list(APPEND onnxruntime_shared_lib_test_LIBS ${android_shared_libs}) endif() AddTest(DYN - TARGET onnxruntime_shared_lib_test - SOURCES ${onnxruntime_shared_lib_test_SRC} ${onnxruntime_unittest_main_src} - LIBS ${onnxruntime_shared_lib_test_LIBS} - DEPENDS ${all_dependencies} + TARGET onnxruntime_shared_lib_test + SOURCES ${onnxruntime_shared_lib_test_SRC} ${onnxruntime_unittest_main_src} + LIBS ${onnxruntime_shared_lib_test_LIBS} + DEPENDS ${all_dependencies} ) - if (CMAKE_SYSTEM_NAME STREQUAL "Android") + if(CMAKE_SYSTEM_NAME STREQUAL "Android") target_sources(onnxruntime_shared_lib_test PRIVATE "${ONNXRUNTIME_ROOT}/core/platform/android/cxa_demangle.cc" "${TEST_SRC_DIR}/platform/android/cxa_demangle_test.cc" @@ -1273,7 +1347,7 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) target_compile_definitions(onnxruntime_shared_lib_test PRIVATE USE_DUMMY_EXA_DEMANGLE=1) endif() - if (CMAKE_SYSTEM_NAME STREQUAL "iOS") + if(CMAKE_SYSTEM_NAME STREQUAL "iOS") add_custom_command( TARGET onnxruntime_shared_lib_test POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_directory @@ -1281,19 +1355,19 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) $/testdata) endif() - if (UNIX AND onnxruntime_USE_TENSORRT) - # The test_main.cc includes NvInfer.h where it has many deprecated declarations - # simply ignore them for TensorRT EP build - set_property(TARGET onnxruntime_shared_lib_test APPEND_STRING PROPERTY COMPILE_FLAGS "-Wno-deprecated-declarations") + if(UNIX AND onnxruntime_USE_TENSORRT) + # The test_main.cc includes NvInfer.h where it has many deprecated declarations + # simply ignore them for TensorRT EP build + set_property(TARGET onnxruntime_shared_lib_test APPEND_STRING PROPERTY COMPILE_FLAGS "-Wno-deprecated-declarations") endif() # test inference using global threadpools - if (NOT CMAKE_SYSTEM_NAME MATCHES "Android|iOS" AND NOT onnxruntime_MINIMAL_BUILD) + if(NOT CMAKE_SYSTEM_NAME MATCHES "Android|iOS" AND NOT onnxruntime_MINIMAL_BUILD) AddTest(DYN - TARGET onnxruntime_global_thread_pools_test - SOURCES ${onnxruntime_global_thread_pools_test_SRC} - LIBS ${onnxruntime_shared_lib_test_LIBS} - DEPENDS ${all_dependencies} + TARGET onnxruntime_global_thread_pools_test + SOURCES ${onnxruntime_global_thread_pools_test_SRC} + LIBS ${onnxruntime_shared_lib_test_LIBS} + DEPENDS ${all_dependencies} ) endif() endif() @@ -1304,23 +1378,23 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) AddTest( TARGET onnxruntime_test_debug_node_inputs_outputs SOURCES - "${TEST_SRC_DIR}/debug_node_inputs_outputs/debug_node_inputs_outputs_utils_test.cc" - "${TEST_SRC_DIR}/framework/TestAllocatorManager.cc" - "${TEST_SRC_DIR}/framework/test_utils.cc" - "${TEST_SRC_DIR}/providers/base_tester.h" - "${TEST_SRC_DIR}/providers/base_tester.cc" - "${TEST_SRC_DIR}/providers/checkers.h" - "${TEST_SRC_DIR}/providers/checkers.cc" - "${TEST_SRC_DIR}/providers/op_tester.h" - "${TEST_SRC_DIR}/providers/op_tester.cc" - "${TEST_SRC_DIR}/providers/provider_test_utils.h" - "${TEST_SRC_DIR}/providers/tester_types.h" - ${onnxruntime_unittest_main_src} + "${TEST_SRC_DIR}/debug_node_inputs_outputs/debug_node_inputs_outputs_utils_test.cc" + "${TEST_SRC_DIR}/framework/TestAllocatorManager.cc" + "${TEST_SRC_DIR}/framework/test_utils.cc" + "${TEST_SRC_DIR}/providers/base_tester.h" + "${TEST_SRC_DIR}/providers/base_tester.cc" + "${TEST_SRC_DIR}/providers/checkers.h" + "${TEST_SRC_DIR}/providers/checkers.cc" + "${TEST_SRC_DIR}/providers/op_tester.h" + "${TEST_SRC_DIR}/providers/op_tester.cc" + "${TEST_SRC_DIR}/providers/provider_test_utils.h" + "${TEST_SRC_DIR}/providers/tester_types.h" + ${onnxruntime_unittest_main_src} LIBS ${onnxruntime_test_providers_libs} ${onnxruntime_test_common_libs} DEPENDS ${all_dependencies} ) - if (onnxruntime_USE_ROCM) + if(onnxruntime_USE_ROCM) target_include_directories(onnxruntime_test_debug_node_inputs_outputs PRIVATE ${onnxruntime_ROCM_HOME}/hipfft/include ${onnxruntime_ROCM_HOME}/include ${onnxruntime_ROCM_HOME}/hipcub/include ${onnxruntime_ROCM_HOME}/hiprand/include ${onnxruntime_ROCM_HOME}/rocrand/include) target_include_directories(onnxruntime_test_debug_node_inputs_outputs PRIVATE ${ONNXRUNTIME_ROOT} ${CMAKE_CURRENT_BINARY_DIR}/amdgpu/onnxruntime) endif(onnxruntime_USE_ROCM) @@ -1329,17 +1403,17 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) PRIVATE DEBUG_NODE_INPUTS_OUTPUTS) endif(onnxruntime_DEBUG_NODE_INPUTS_OUTPUTS) - #some ETW tools + # some ETW tools if(WIN32 AND onnxruntime_ENABLE_INSTRUMENT) onnxruntime_add_executable(generate_perf_report_from_etl ${ONNXRUNTIME_ROOT}/tool/etw/main.cc - ${ONNXRUNTIME_ROOT}/tool/etw/eparser.h ${ONNXRUNTIME_ROOT}/tool/etw/eparser.cc - ${ONNXRUNTIME_ROOT}/tool/etw/TraceSession.h ${ONNXRUNTIME_ROOT}/tool/etw/TraceSession.cc) + ${ONNXRUNTIME_ROOT}/tool/etw/eparser.h ${ONNXRUNTIME_ROOT}/tool/etw/eparser.cc + ${ONNXRUNTIME_ROOT}/tool/etw/TraceSession.h ${ONNXRUNTIME_ROOT}/tool/etw/TraceSession.cc) target_compile_definitions(generate_perf_report_from_etl PRIVATE "_CONSOLE" "_UNICODE" "UNICODE") target_link_libraries(generate_perf_report_from_etl PRIVATE tdh Advapi32) onnxruntime_add_executable(compare_two_sessions ${ONNXRUNTIME_ROOT}/tool/etw/compare_two_sessions.cc - ${ONNXRUNTIME_ROOT}/tool/etw/eparser.h ${ONNXRUNTIME_ROOT}/tool/etw/eparser.cc - ${ONNXRUNTIME_ROOT}/tool/etw/TraceSession.h ${ONNXRUNTIME_ROOT}/tool/etw/TraceSession.cc) + ${ONNXRUNTIME_ROOT}/tool/etw/eparser.h ${ONNXRUNTIME_ROOT}/tool/etw/eparser.cc + ${ONNXRUNTIME_ROOT}/tool/etw/TraceSession.h ${ONNXRUNTIME_ROOT}/tool/etw/TraceSession.cc) target_compile_definitions(compare_two_sessions PRIVATE "_CONSOLE" "_UNICODE" "UNICODE") target_link_libraries(compare_two_sessions PRIVATE ${GETOPT_LIB_WIDE} tdh Advapi32) endif() @@ -1349,45 +1423,54 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) "${TEST_SRC_DIR}/mlas/unittest/*.cpp" ) onnxruntime_add_executable(onnxruntime_mlas_test ${onnxruntime_mlas_test_src}) + if(MSVC) target_compile_options(onnxruntime_mlas_test PRIVATE "$<$:-Xcompiler /wd26409>" - "$<$>:/wd26409>") + "$<$>:/wd26409>") target_compile_options(onnxruntime_mlas_test PRIVATE "$<$:SHELL:--compiler-options /utf-8>" - "$<$>:/utf-8>") + "$<$>:/utf-8>") target_compile_options(onnxruntime_mlas_test PRIVATE "$<$:-Xcompiler /wd6326>" - "$<$>:/wd6326>") + "$<$>:/wd6326>") target_compile_options(onnxruntime_mlas_test PRIVATE "$<$:-Xcompiler /wd26426>" - "$<$>:/wd26426>") + "$<$>:/wd26426>") endif() + if(${CMAKE_SYSTEM_NAME} STREQUAL "iOS") set_target_properties(onnxruntime_mlas_test PROPERTIES XCODE_ATTRIBUTE_CODE_SIGNING_ALLOWED "NO" ) endif() + target_include_directories(onnxruntime_mlas_test PRIVATE ${ONNXRUNTIME_ROOT}/core/mlas/inc ${ONNXRUNTIME_ROOT} - ${CMAKE_CURRENT_BINARY_DIR}) + ${CMAKE_CURRENT_BINARY_DIR}) target_link_libraries(onnxruntime_mlas_test PRIVATE GTest::gtest GTest::gmock ${ONNXRUNTIME_MLAS_LIBS} onnxruntime_common) - if (CPUINFO_SUPPORTED AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten") + + if(CPUINFO_SUPPORTED AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten") target_link_libraries(onnxruntime_mlas_test PRIVATE cpuinfo) endif() + if(NOT WIN32) target_link_libraries(onnxruntime_mlas_test PRIVATE nsync::nsync_cpp ${CMAKE_DL_LIBS}) endif() - if (CMAKE_SYSTEM_NAME STREQUAL "Android") + + if(CMAKE_SYSTEM_NAME STREQUAL "Android") target_link_libraries(onnxruntime_mlas_test PRIVATE ${android_shared_libs}) endif() if(WIN32) target_link_libraries(onnxruntime_mlas_test PRIVATE debug Dbghelp Advapi32) endif() - if (onnxruntime_LINK_LIBATOMIC) + + if(onnxruntime_LINK_LIBATOMIC) target_link_libraries(onnxruntime_mlas_test PRIVATE atomic) endif() + target_link_libraries(onnxruntime_mlas_test PRIVATE Threads::Threads) set_target_properties(onnxruntime_mlas_test PROPERTIES FOLDER "ONNXRuntimeTest") - if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten") - if (onnxruntime_ENABLE_WEBASSEMBLY_THREADS) + + if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten") + if(onnxruntime_ENABLE_WEBASSEMBLY_THREADS) set_target_properties(onnxruntime_mlas_test PROPERTIES LINK_FLAGS "-s ALLOW_MEMORY_GROWTH=1 -s PROXY_TO_PTHREAD=1 -s EXIT_RUNTIME=1") else() set_target_properties(onnxruntime_mlas_test PROPERTIES LINK_FLAGS "-s ALLOW_MEMORY_GROWTH=1") @@ -1399,8 +1482,8 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) # TODO(askhade): Fix the warnings. # This has no impact on the release as the release package and the pipeline, both do not use this. # This is used by devs for testing training apis. - #if (onnxruntime_ENABLE_TRAINING_APIS) - if (0) + # if (onnxruntime_ENABLE_TRAINING_APIS) + if(0) # Only files in the trainer and common folder will be compiled into test trainer. file(GLOB training_api_test_trainer_src "${ORTTRAINING_SOURCE_DIR}/test/training_api/common/*.cc" @@ -1428,6 +1511,7 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) set(ONNXRUNTIME_TEST_LIBS onnxruntime_session ${onnxruntime_libs} + # CUDA is dynamically loaded at runtime onnxruntime_optimizer onnxruntime_providers @@ -1440,7 +1524,7 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) onnxruntime_flatbuffers ) - if (onnxruntime_ENABLE_LANGUAGE_INTEROP_OPS) + if(onnxruntime_ENABLE_LANGUAGE_INTEROP_OPS) list(APPEND ONNXRUNTIME_TEST_LIBS onnxruntime_language_interop onnxruntime_pyop) endif() @@ -1452,8 +1536,7 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) endif() endif() -if (NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten") - +if(NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten") set(custom_op_src_patterns "${TEST_SRC_DIR}/testdata/custom_op_library/*.h" "${TEST_SRC_DIR}/testdata/custom_op_library/*.cc" @@ -1464,20 +1547,21 @@ if (NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten") set(custom_op_lib_option) set(custom_op_lib_link ${GSL_TARGET}) - if (onnxruntime_USE_CUDA) + if(onnxruntime_USE_CUDA) list(APPEND custom_op_src_patterns - "${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/cuda_ops.cu" - "${TEST_SRC_DIR}/testdata/custom_op_library/cuda/cuda_ops.*") + "${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/cuda_ops.cu" + "${TEST_SRC_DIR}/testdata/custom_op_library/cuda/cuda_ops.*") list(APPEND custom_op_lib_include ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} ${onnxruntime_CUDNN_HOME}/include) - if (HAS_QSPECTRE) + + if(HAS_QSPECTRE) list(APPEND custom_op_lib_option "$<$:SHELL:-Xcompiler /Qspectre>") endif() endif() - if (onnxruntime_USE_ROCM) + if(onnxruntime_USE_ROCM) list(APPEND custom_op_src_patterns - "${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/rocm_ops.hip" - "${TEST_SRC_DIR}/testdata/custom_op_library/rocm/rocm_ops.*") + "${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/rocm_ops.hip" + "${TEST_SRC_DIR}/testdata/custom_op_library/rocm/rocm_ops.*") list(APPEND custom_op_lib_include ${onnxruntime_ROCM_HOME}/include) list(APPEND custom_op_lib_option "-D__HIP_PLATFORM_AMD__=1 -D__HIP_PLATFORM_HCC__=1") endif() @@ -1489,80 +1573,90 @@ if (NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten") target_link_libraries(custom_op_library PRIVATE ${GSL_TARGET} ${custom_op_lib_link}) if(UNIX) - if (APPLE) + if(APPLE) set(ONNXRUNTIME_CUSTOM_OP_LIB_LINK_FLAG "-Xlinker -dead_strip") else() set(ONNXRUNTIME_CUSTOM_OP_LIB_LINK_FLAG "-Xlinker --version-script=${TEST_SRC_DIR}/testdata/custom_op_library/custom_op_library.lds -Xlinker --no-undefined -Xlinker --gc-sections -z noexecstack") endif() else() set(ONNXRUNTIME_CUSTOM_OP_LIB_LINK_FLAG "-DEF:${TEST_SRC_DIR}/testdata/custom_op_library/custom_op_library.def") - if (NOT onnxruntime_USE_CUDA) + + if(NOT onnxruntime_USE_CUDA) target_compile_options(custom_op_library PRIVATE "$<$:-Xcompiler /wd26409>" - "$<$>:/wd26409>") + "$<$>:/wd26409>") endif() endif() + set_property(TARGET custom_op_library APPEND_STRING PROPERTY LINK_FLAGS ${ONNXRUNTIME_CUSTOM_OP_LIB_LINK_FLAG}) - if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) - if (onnxruntime_BUILD_JAVA AND NOT onnxruntime_ENABLE_STATIC_ANALYSIS) - message(STATUS "Running Java tests") - # native-test is added to resources so custom_op_lib can be loaded - # and we want to symlink it there - set(JAVA_NATIVE_TEST_DIR ${JAVA_OUTPUT_DIR}/native-test) - file(MAKE_DIRECTORY ${JAVA_NATIVE_TEST_DIR}) - - # delegate to gradle's test runner - if(WIN32) - add_custom_command(TARGET custom_op_library POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different $ - ${JAVA_NATIVE_TEST_DIR}/$) - # On windows ctest requires a test to be an .exe(.com) file - # With gradle wrapper we get gradlew.bat. We delegate execution to a separate .cmake file - # That can handle both .exe and .bat - add_test(NAME onnxruntime4j_test COMMAND ${CMAKE_COMMAND} - -DGRADLE_EXECUTABLE=${GRADLE_EXECUTABLE} - -DBIN_DIR=${CMAKE_CURRENT_BINARY_DIR} - -DREPO_ROOT=${REPO_ROOT} - ${ORT_PROVIDER_FLAGS} - -P ${CMAKE_CURRENT_SOURCE_DIR}/onnxruntime_java_unittests.cmake) + if(NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) + if(onnxruntime_BUILD_JAVA AND NOT onnxruntime_ENABLE_STATIC_ANALYSIS) + message(STATUS "Running Java tests") + + # native-test is added to resources so custom_op_lib can be loaded + # and we want to symlink it there + set(JAVA_NATIVE_TEST_DIR ${JAVA_OUTPUT_DIR}/native-test) + file(MAKE_DIRECTORY ${JAVA_NATIVE_TEST_DIR}) + + # delegate to gradle's test runner + if(WIN32) + add_custom_command(TARGET custom_op_library POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different $ + ${JAVA_NATIVE_TEST_DIR}/$) + + # On windows ctest requires a test to be an .exe(.com) file + # With gradle wrapper we get gradlew.bat. We delegate execution to a separate .cmake file + # That can handle both .exe and .bat + add_test(NAME onnxruntime4j_test COMMAND ${CMAKE_COMMAND} + -DGRADLE_EXECUTABLE=${GRADLE_EXECUTABLE} + -DBIN_DIR=${CMAKE_CURRENT_BINARY_DIR} + -DREPO_ROOT=${REPO_ROOT} + ${ORT_PROVIDER_FLAGS} + -P ${CMAKE_CURRENT_SOURCE_DIR}/onnxruntime_java_unittests.cmake) + else() + add_custom_command(TARGET custom_op_library POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different $ + ${JAVA_NATIVE_TEST_DIR}/$) + + if(onnxruntime_ENABLE_TRAINING_APIS) + message(STATUS "Running Java inference and training tests") + add_test(NAME onnxruntime4j_test COMMAND ${GRADLE_EXECUTABLE} cmakeCheck -DcmakeBuildDir=${CMAKE_CURRENT_BINARY_DIR} ${ORT_PROVIDER_FLAGS} -DENABLE_TRAINING_APIS=1 + WORKING_DIRECTORY ${REPO_ROOT}/java) else() - add_custom_command(TARGET custom_op_library POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different $ - ${JAVA_NATIVE_TEST_DIR}/$) - if (onnxruntime_ENABLE_TRAINING_APIS) - message(STATUS "Running Java inference and training tests") - add_test(NAME onnxruntime4j_test COMMAND ${GRADLE_EXECUTABLE} cmakeCheck -DcmakeBuildDir=${CMAKE_CURRENT_BINARY_DIR} ${ORT_PROVIDER_FLAGS} -DENABLE_TRAINING_APIS=1 - WORKING_DIRECTORY ${REPO_ROOT}/java) - else() - message(STATUS "Running Java inference tests only") - add_test(NAME onnxruntime4j_test COMMAND ${GRADLE_EXECUTABLE} cmakeCheck -DcmakeBuildDir=${CMAKE_CURRENT_BINARY_DIR} ${ORT_PROVIDER_FLAGS} - WORKING_DIRECTORY ${REPO_ROOT}/java) - endif() + message(STATUS "Running Java inference tests only") + add_test(NAME onnxruntime4j_test COMMAND ${GRADLE_EXECUTABLE} cmakeCheck -DcmakeBuildDir=${CMAKE_CURRENT_BINARY_DIR} ${ORT_PROVIDER_FLAGS} + WORKING_DIRECTORY ${REPO_ROOT}/java) endif() - set_property(TEST onnxruntime4j_test APPEND PROPERTY DEPENDS onnxruntime4j_jni) + endif() + + set_property(TEST onnxruntime4j_test APPEND PROPERTY DEPENDS onnxruntime4j_jni) endif() endif() - if (onnxruntime_BUILD_SHARED_LIB AND (NOT onnxruntime_MINIMAL_BUILD OR onnxruntime_MINIMAL_BUILD_CUSTOM_OPS)) - set (onnxruntime_customopregistration_test_SRC - ${ONNXRUNTIME_CUSTOM_OP_REGISTRATION_TEST_SRC_DIR}/test_registercustomops.cc) + if(onnxruntime_BUILD_SHARED_LIB AND(NOT onnxruntime_MINIMAL_BUILD OR onnxruntime_MINIMAL_BUILD_CUSTOM_OPS)) + set(onnxruntime_customopregistration_test_SRC + ${ONNXRUNTIME_CUSTOM_OP_REGISTRATION_TEST_SRC_DIR}/test_registercustomops.cc) set(onnxruntime_customopregistration_test_LIBS custom_op_library onnxruntime_common onnxruntime_test_utils) - if (NOT WIN32) + + if(NOT WIN32) list(APPEND onnxruntime_customopregistration_test_LIBS nsync::nsync_cpp) endif() - if (CPUINFO_SUPPORTED AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten") + + if(CPUINFO_SUPPORTED AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten") list(APPEND onnxruntime_customopregistration_test_LIBS cpuinfo) endif() - if (onnxruntime_USE_TENSORRT) + + if(onnxruntime_USE_TENSORRT) list(APPEND onnxruntime_customopregistration_test_LIBS ${TENSORRT_LIBRARY_INFER}) endif() + AddTest(DYN - TARGET onnxruntime_customopregistration_test - SOURCES ${onnxruntime_customopregistration_test_SRC} ${onnxruntime_unittest_main_src} - LIBS ${onnxruntime_customopregistration_test_LIBS} - DEPENDS ${all_dependencies} + TARGET onnxruntime_customopregistration_test + SOURCES ${onnxruntime_customopregistration_test_SRC} ${onnxruntime_unittest_main_src} + LIBS ${onnxruntime_customopregistration_test_LIBS} + DEPENDS ${all_dependencies} ) - if (CMAKE_SYSTEM_NAME STREQUAL "iOS") + if(CMAKE_SYSTEM_NAME STREQUAL "iOS") add_custom_command( TARGET onnxruntime_customopregistration_test POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_directory @@ -1570,73 +1664,71 @@ if (NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten") $/testdata) endif() - if (UNIX AND onnxruntime_USE_TENSORRT) - # The test_main.cc includes NvInfer.h where it has many deprecated declarations - # simply ignore them for TensorRT EP build - set_property(TARGET onnxruntime_customopregistration_test APPEND_STRING PROPERTY COMPILE_FLAGS "-Wno-deprecated-declarations") + if(UNIX AND onnxruntime_USE_TENSORRT) + # The test_main.cc includes NvInfer.h where it has many deprecated declarations + # simply ignore them for TensorRT EP build + set_property(TARGET onnxruntime_customopregistration_test APPEND_STRING PROPERTY COMPILE_FLAGS "-Wno-deprecated-declarations") endif() - endif() endif() # Build custom op library that returns an error OrtStatus when the exported RegisterCustomOps function is called. -if (NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten" AND (NOT onnxruntime_MINIMAL_BUILD OR onnxruntime_MINIMAL_BUILD_CUSTOM_OPS)) +if(NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten" AND(NOT onnxruntime_MINIMAL_BUILD OR onnxruntime_MINIMAL_BUILD_CUSTOM_OPS)) onnxruntime_add_shared_library_module(custom_op_invalid_library - ${TEST_SRC_DIR}/testdata/custom_op_invalid_library/custom_op_library.h - ${TEST_SRC_DIR}/testdata/custom_op_invalid_library/custom_op_library.cc) + ${TEST_SRC_DIR}/testdata/custom_op_invalid_library/custom_op_library.h + ${TEST_SRC_DIR}/testdata/custom_op_invalid_library/custom_op_library.cc) target_include_directories(custom_op_invalid_library PRIVATE ${REPO_ROOT}/include/onnxruntime/core/session) if(UNIX) - if (APPLE) + if(APPLE) set(ONNXRUNTIME_CUSTOM_OP_INVALID_LIB_LINK_FLAG "-Xlinker -dead_strip") else() string(CONCAT ONNXRUNTIME_CUSTOM_OP_INVALID_LIB_LINK_FLAG - "-Xlinker --version-script=${TEST_SRC_DIR}/testdata/custom_op_invalid_library/custom_op_library.lds " - "-Xlinker --no-undefined -Xlinker --gc-sections -z noexecstack") + "-Xlinker --version-script=${TEST_SRC_DIR}/testdata/custom_op_invalid_library/custom_op_library.lds " + "-Xlinker --no-undefined -Xlinker --gc-sections -z noexecstack") endif() else() set(ONNXRUNTIME_CUSTOM_OP_INVALID_LIB_LINK_FLAG - "-DEF:${TEST_SRC_DIR}/testdata/custom_op_invalid_library/custom_op_library.def") + "-DEF:${TEST_SRC_DIR}/testdata/custom_op_invalid_library/custom_op_library.def") endif() set_property(TARGET custom_op_invalid_library APPEND_STRING PROPERTY LINK_FLAGS - ${ONNXRUNTIME_CUSTOM_OP_INVALID_LIB_LINK_FLAG}) + ${ONNXRUNTIME_CUSTOM_OP_INVALID_LIB_LINK_FLAG}) endif() -if (NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten" AND (NOT onnxruntime_MINIMAL_BUILD OR onnxruntime_MINIMAL_BUILD_CUSTOM_OPS)) - +if(NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten" AND(NOT onnxruntime_MINIMAL_BUILD OR onnxruntime_MINIMAL_BUILD_CUSTOM_OPS)) file(GLOB_RECURSE custom_op_get_const_input_test_library_src - "${TEST_SRC_DIR}/testdata/custom_op_get_const_input_test_library/custom_op_lib.cc" - "${TEST_SRC_DIR}/testdata/custom_op_get_const_input_test_library/custom_op.h" - "${TEST_SRC_DIR}/testdata/custom_op_get_const_input_test_library/custom_op.cc" + "${TEST_SRC_DIR}/testdata/custom_op_get_const_input_test_library/custom_op_lib.cc" + "${TEST_SRC_DIR}/testdata/custom_op_get_const_input_test_library/custom_op.h" + "${TEST_SRC_DIR}/testdata/custom_op_get_const_input_test_library/custom_op.cc" ) onnxruntime_add_shared_library_module(custom_op_get_const_input_test_library ${custom_op_get_const_input_test_library_src}) onnxruntime_add_include_to_target(custom_op_get_const_input_test_library onnxruntime_common GTest::gtest GTest::gmock) target_include_directories(custom_op_get_const_input_test_library PRIVATE ${REPO_ROOT}/include/onnxruntime/core/session - ${REPO_ROOT}/include/onnxruntime/core/common) + ${REPO_ROOT}/include/onnxruntime/core/common) if(UNIX) - if (APPLE) + if(APPLE) set(ONNXRUNTIME_CUSTOM_OP_GET_CONST_INPUT_TEST_LIB_LINK_FLAG "-Xlinker -dead_strip") else() string(CONCAT ONNXRUNTIME_CUSTOM_OP_GET_CONST_INPUT_TEST_LIB_LINK_FLAG - "-Xlinker --version-script=${TEST_SRC_DIR}/testdata/custom_op_get_const_input_test_library/custom_op_lib.lds " - "-Xlinker --no-undefined -Xlinker --gc-sections -z noexecstack") + "-Xlinker --version-script=${TEST_SRC_DIR}/testdata/custom_op_get_const_input_test_library/custom_op_lib.lds " + "-Xlinker --no-undefined -Xlinker --gc-sections -z noexecstack") endif() else() set(ONNXRUNTIME_CUSTOM_OP_GET_CONST_INPUT_TEST_LIB_LINK_FLAG - "-DEF:${TEST_SRC_DIR}/testdata/custom_op_get_const_input_test_library/custom_op_lib.def") + "-DEF:${TEST_SRC_DIR}/testdata/custom_op_get_const_input_test_library/custom_op_lib.def") endif() set_property(TARGET custom_op_get_const_input_test_library APPEND_STRING PROPERTY LINK_FLAGS - ${ONNXRUNTIME_CUSTOM_OP_GET_CONST_INPUT_TEST_LIB_LINK_FLAG}) + ${ONNXRUNTIME_CUSTOM_OP_GET_CONST_INPUT_TEST_LIB_LINK_FLAG}) endif() -if (onnxruntime_BUILD_SHARED_LIB AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten" AND NOT onnxruntime_MINIMAL_BUILD) - set (onnxruntime_logging_apis_test_SRC - ${ONNXRUNTIME_LOGGING_APIS_TEST_SRC_DIR}/test_logging_apis.cc) +if(onnxruntime_BUILD_SHARED_LIB AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten" AND NOT onnxruntime_MINIMAL_BUILD) + set(onnxruntime_logging_apis_test_SRC + ${ONNXRUNTIME_LOGGING_APIS_TEST_SRC_DIR}/test_logging_apis.cc) set(onnxruntime_logging_apis_test_LIBS onnxruntime_common onnxruntime_test_utils) @@ -1645,44 +1737,44 @@ if (onnxruntime_BUILD_SHARED_LIB AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten" endif() AddTest(DYN - TARGET onnxruntime_logging_apis_test - SOURCES ${onnxruntime_logging_apis_test_SRC} - LIBS ${onnxruntime_logging_apis_test_LIBS} - DEPENDS ${all_dependencies} + TARGET onnxruntime_logging_apis_test + SOURCES ${onnxruntime_logging_apis_test_SRC} + LIBS ${onnxruntime_logging_apis_test_LIBS} + DEPENDS ${all_dependencies} ) endif() -if (NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten" AND onnxruntime_USE_OPENVINO AND (NOT onnxruntime_MINIMAL_BUILD OR - onnxruntime_MINIMAL_BUILD_CUSTOM_OPS)) +if(NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten" AND onnxruntime_USE_OPENVINO AND(NOT onnxruntime_MINIMAL_BUILD OR + onnxruntime_MINIMAL_BUILD_CUSTOM_OPS)) onnxruntime_add_shared_library_module(custom_op_openvino_wrapper_library - ${TEST_SRC_DIR}/testdata/custom_op_openvino_wrapper_library/custom_op_lib.cc - ${TEST_SRC_DIR}/testdata/custom_op_openvino_wrapper_library/openvino_wrapper.cc) + ${TEST_SRC_DIR}/testdata/custom_op_openvino_wrapper_library/custom_op_lib.cc + ${TEST_SRC_DIR}/testdata/custom_op_openvino_wrapper_library/openvino_wrapper.cc) target_include_directories(custom_op_openvino_wrapper_library PRIVATE ${REPO_ROOT}/include/onnxruntime/core/session) target_link_libraries(custom_op_openvino_wrapper_library PRIVATE openvino::runtime) if(UNIX) - if (APPLE) + if(APPLE) set(ONNXRUNTIME_CUSTOM_OP_OPENVINO_WRAPPER_LIB_LINK_FLAG "-Xlinker -dead_strip") else() string(CONCAT ONNXRUNTIME_CUSTOM_OP_OPENVINO_WRAPPER_LIB_LINK_FLAG - "-Xlinker --version-script=${TEST_SRC_DIR}/testdata/custom_op_openvino_wrapper_library/custom_op_lib.lds " - "-Xlinker --no-undefined -Xlinker --gc-sections -z noexecstack") + "-Xlinker --version-script=${TEST_SRC_DIR}/testdata/custom_op_openvino_wrapper_library/custom_op_lib.lds " + "-Xlinker --no-undefined -Xlinker --gc-sections -z noexecstack") endif() else() set(ONNXRUNTIME_CUSTOM_OP_OPENVINO_WRAPPER_LIB_LINK_FLAG - "-DEF:${TEST_SRC_DIR}/testdata/custom_op_openvino_wrapper_library/custom_op_lib.def") + "-DEF:${TEST_SRC_DIR}/testdata/custom_op_openvino_wrapper_library/custom_op_lib.def") endif() set_property(TARGET custom_op_openvino_wrapper_library APPEND_STRING PROPERTY LINK_FLAGS - ${ONNXRUNTIME_CUSTOM_OP_OPENVINO_WRAPPER_LIB_LINK_FLAG}) + ${ONNXRUNTIME_CUSTOM_OP_OPENVINO_WRAPPER_LIB_LINK_FLAG}) endif() # limit to only test on windows first, due to a runtime path issue on linux -if (NOT onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_EXTENDED_MINIMAL_BUILD - AND NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin|iOS" - AND NOT CMAKE_SYSTEM_NAME STREQUAL "Android" - AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten" - AND NOT onnxruntime_USE_ROCM) +if(NOT onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_EXTENDED_MINIMAL_BUILD + AND NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin|iOS" + AND NOT CMAKE_SYSTEM_NAME STREQUAL "Android" + AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten" + AND NOT onnxruntime_USE_ROCM) file(GLOB_RECURSE test_execution_provider_srcs "${REPO_ROOT}/onnxruntime/test/testdata/custom_execution_provider_library/*.h" "${REPO_ROOT}/onnxruntime/test/testdata/custom_execution_provider_library/*.cc" @@ -1696,9 +1788,11 @@ if (NOT onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_EXTENDED_MINIMAL_BUILD target_include_directories(test_execution_provider PRIVATE $) target_include_directories(test_execution_provider PRIVATE $) target_include_directories(test_execution_provider PRIVATE ${ONNXRUNTIME_ROOT} ${CMAKE_CURRENT_BINARY_DIR} ${ORTTRAINING_ROOT}) - if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) + + if(onnxruntime_ENABLE_TRAINING_TORCH_INTEROP) target_link_libraries(test_execution_provider PRIVATE Python::Python) endif() + if(APPLE) set_property(TARGET test_execution_provider APPEND_STRING PROPERTY LINK_FLAGS "-Xlinker -exported_symbols_list ${REPO_ROOT}/onnxruntime/test/testdata/custom_execution_provider_library/exported_symbols.lst") elseif(UNIX) diff --git a/onnxruntime/test/providers/cpu/model_tests.cc b/onnxruntime/test/providers/cpu/model_tests.cc index ef2d7e31654ba..c47bb37500337 100644 --- a/onnxruntime/test/providers/cpu/model_tests.cc +++ b/onnxruntime/test/providers/cpu/model_tests.cc @@ -942,6 +942,14 @@ ::std::vector<::std::basic_string> GetParameterStrings() { #ifdef USE_DML provider_names[provider_name_dml] = {opset7, opset8, opset9, opset10, opset11, opset12, opset13, opset14, opset15, opset16, opset17, opset18}; #endif + +// These tests are already run in the CPU builds, therefore removing the CPU EP tests from CUDA build for training. +// Note: These are inference tests, we run these in training builds as an extra check. Therefore reducing +// the number of times these are run will reduce the CI time. +#if defined(ENABLE_TRAINING_CORE) && defined(USE_CUDA) + provider_names.erase(provider_name_cpu); +#endif + std::vector> v; // Permanently exclude following tests because ORT support only opset starting from 7, // Please make no more changes to the list diff --git a/orttraining/orttraining/python/orttraining_pybind_state.cc b/orttraining/orttraining/python/orttraining_pybind_state.cc index 3f3aa396e6ca0..08fd1a660d6b6 100644 --- a/orttraining/orttraining/python/orttraining_pybind_state.cc +++ b/orttraining/orttraining/python/orttraining_pybind_state.cc @@ -18,7 +18,6 @@ #include "core/session/environment.h" #include "core/session/custom_ops.h" #include "core/dlpack/dlpack_converter.h" -#include "orttraining/core/session/training_session.h" #include "orttraining/core/agent/training_agent.h" #include "orttraining/core/graph/gradient_config.h" #include "orttraining/core/graph/optimizer_config.h" @@ -108,65 +107,6 @@ GetExecutionProvidersForTrainingApis(OrtDevice device) { } } // namespace #endif -struct TrainingParameters { - std::string loss_output_name; - std::unordered_set weights_to_train; - std::unordered_set weights_not_to_train; - - onnxruntime::training::TrainingSession::ImmutableWeights immutable_weights; - - // optimizer - std::string training_optimizer_name; - std::string lr_params_feed_name = "Learning_Rate"; - std::unordered_map> optimizer_attributes_map; - std::unordered_map> optimizer_int_attributes_map; - onnxruntime::training::TrainingSession::OptimizerState optimizer_initial_state; - std::unordered_map> sliced_schema; - std::unordered_map sliced_axes; - std::vector sliced_tensor_names; - bool use_fp16_moments = false; - - bool use_mixed_precision = false; - bool allreduce_post_accumulation = false; - float loss_scale = 0.0f; - int world_rank = 0; - int world_size = 1; - int local_rank = 0; - int local_size = 1; - int gradient_accumulation_steps = 1; - int data_parallel_size = 1; - int horizontal_parallel_size = 1; - int pipeline_parallel_size = 1; - int num_pipeline_micro_batches = 1; - int deepspeed_zero_stage = 0; - bool enable_grad_norm_clip = true; - bool set_gradients_as_graph_outputs = false; - bool use_memory_efficient_gradient = false; - - std::string pipeline_cut_info_string = {}; - - // recompute - bool attn_dropout_recompute = false; - bool gelu_recompute = false; - bool transformer_layer_recompute = false; - int number_recompute_layers = 0; - bool enable_adasum = false; - - // transformation - int propagate_cast_ops_level = 1; - std::vector propagate_cast_ops_allow; - GraphTransformerConfiguration::PropagateCastOpsConfiguration::Strategy propagate_cast_ops_strategy = - GraphTransformerConfiguration::PropagateCastOpsConfiguration::Strategy::FloodFill; - - // graph dumping - std::string model_after_graph_transforms_path; - std::string model_with_gradient_graph_path; - std::string model_with_training_graph_path; -}; - -struct TrainingConfigurationResult { - optional loss_scale_input_name; -}; #ifdef ENABLE_TRAINING_APIS // Thin wrapper over internal C++ Optimizer @@ -206,185 +146,6 @@ struct PyGradientGraphBuilderContext { local_registries_(local_registries) {} }; -// TODO: this method does not handle parallel optimization. -TrainingConfigurationResult ConfigureSessionForTraining( - training::PipelineTrainingSession* sess, TrainingParameters& parameters) { - // TODO tix, refactor the mpi related code to populate all fields correctly by default. - ORT_ENFORCE(parameters.data_parallel_size <= parameters.world_size, "data_parallel_size: ", parameters.data_parallel_size, ", world_size: ", parameters.world_size); - ORT_ENFORCE(parameters.horizontal_parallel_size <= parameters.world_size, "horizontal_parallel_size: ", parameters.horizontal_parallel_size, ", world_size: ", parameters.world_size); - ORT_ENFORCE(parameters.pipeline_parallel_size <= parameters.world_size, "pipeline_parallel_size: ", parameters.pipeline_parallel_size, ", world_size: ", parameters.world_size); - - // When DxHxP != the total number of ranks, we try adjusting D so that DxHxP == the total number of ranks. - if (parameters.world_size != parameters.data_parallel_size * parameters.horizontal_parallel_size * parameters.pipeline_parallel_size) { - ORT_ENFORCE(parameters.world_size % parameters.horizontal_parallel_size * parameters.pipeline_parallel_size == 0, - "D, H, P sizes are incorrect. To enable automatic correction, total number of ranks must be a divisible by HxP."); - - const auto new_data_parallel_size = parameters.world_size / (parameters.horizontal_parallel_size * parameters.pipeline_parallel_size); - parameters.data_parallel_size = new_data_parallel_size; - - const std::string msg = "Cannot distribute " + std::to_string(parameters.world_size) + " ranks for distributed computation with D=" + std::to_string(parameters.data_parallel_size) + - ", H=" + std::to_string(parameters.horizontal_parallel_size) + ", P=" + std::to_string(parameters.pipeline_parallel_size) + ", so D is automatically changed to " + std::to_string(new_data_parallel_size); - LOGS(*(sess->GetLogger()), WARNING) << msg; - } - - training::PipelineTrainingSession::TrainingConfiguration config{}; - config.weight_names_to_train = parameters.weights_to_train; - config.weight_names_to_not_train = parameters.weights_not_to_train; - config.immutable_weights = parameters.immutable_weights; - config.gradient_accumulation_steps = parameters.gradient_accumulation_steps; - - config.distributed_config.world_rank = parameters.world_rank; - config.distributed_config.world_size = parameters.world_size; - config.distributed_config.local_rank = parameters.local_rank; - config.distributed_config.local_size = parameters.local_size; - config.distributed_config.data_parallel_size = parameters.data_parallel_size; - config.distributed_config.horizontal_parallel_size = parameters.horizontal_parallel_size; - config.distributed_config.pipeline_parallel_size = parameters.pipeline_parallel_size; - config.distributed_config.num_pipeline_micro_batches = parameters.num_pipeline_micro_batches; - config.distributed_config.sliced_schema = parameters.sliced_schema; - config.distributed_config.sliced_axes = parameters.sliced_axes; - config.distributed_config.sliced_tensor_names = parameters.sliced_tensor_names; - - if (parameters.use_mixed_precision) { - training::PipelineTrainingSession::TrainingConfiguration::MixedPrecisionConfiguration mp{}; - mp.use_mixed_precision_initializers = true; - - config.mixed_precision_config = mp; - } - - if (config.distributed_config.pipeline_parallel_size > 1) { - training::PipelineTrainingSession::TrainingConfiguration::PipelineConfiguration pipeline_config; - - // Currently don't support auto-partition. User needs to pass in cut information for pipeline - pipeline_config.do_partition = true; - assert(!parameters.pipeline_cut_info_string.empty()); - - auto process_with_delimiter = [](std::string& input_str, const std::string& delimiter) { - std::vector result; - size_t pos = 0; - while ((pos = input_str.find(delimiter)) != std::string::npos) { - std::string token = input_str.substr(0, pos); - result.emplace_back(token); - input_str.erase(0, pos + delimiter.length()); - } - // push the last split of substring into result. - result.emplace_back(input_str); - return result; - }; - - auto process_cut_info = [&](std::string& cut_info_string) { - std::vector cut_list; - const std::string group_delimiter = ","; - const std::string edge_delimiter = ":"; - const std::string consumer_delimiter = "/"; - const std::string producer_consumer_delimiter = "-"; - - auto cut_info_groups = process_with_delimiter(cut_info_string, group_delimiter); - for (auto& cut_info_group : cut_info_groups) { - PipelineTrainingSession::TrainingConfiguration::CutInfo cut_info; - auto cut_edges = process_with_delimiter(cut_info_group, edge_delimiter); - for (auto& cut_edge : cut_edges) { - auto process_edge = process_with_delimiter(cut_edge, producer_consumer_delimiter); - if (process_edge.size() == 1) { - PipelineTrainingSession::TrainingConfiguration::CutEdge edge{process_edge[0]}; - cut_info.emplace_back(edge); - } else { - ORT_ENFORCE(process_edge.size() == 2); - auto consumer_list = process_with_delimiter(process_edge[1], consumer_delimiter); - - PipelineTrainingSession::TrainingConfiguration::CutEdge edge{process_edge[0], consumer_list}; - cut_info.emplace_back(edge); - } - } - cut_list.emplace_back(cut_info); - } - return cut_list; - }; - - pipeline_config.cut_list = process_cut_info(parameters.pipeline_cut_info_string); - config.pipeline_config = pipeline_config; - } - config.loss_name = parameters.loss_output_name; - - if (!parameters.training_optimizer_name.empty()) { - training::PipelineTrainingSession::TrainingConfiguration::OptimizerConfiguration opt{}; - opt.name = parameters.training_optimizer_name; - opt.learning_rate_input_name = parameters.lr_params_feed_name; - opt.weight_attributes_generator = [¶meters](const std::string& weight_name) { - const auto it = parameters.optimizer_attributes_map.find(weight_name); - ORT_ENFORCE( - it != parameters.optimizer_attributes_map.end(), - "Failed to find attribute map for weight ", weight_name); - return it->second; - }; - opt.weight_int_attributes_generator = [¶meters](const std::string& weight_name) { - const auto it = parameters.optimizer_int_attributes_map.find(weight_name); - ORT_ENFORCE( - it != parameters.optimizer_int_attributes_map.end(), - "Failed to find int attribute map for weight ", weight_name); - return it->second; - }; - opt.use_mixed_precision_moments = parameters.use_fp16_moments; - opt.do_all_reduce_in_mixed_precision_type = true; - // TODO: this mapping is temporary. - // For now, nccl allreduce kernel only implements for allreduce_post_accumulation - // hovorod allreduce kernel only implements for not allreduce_post_accumulation. - // eventually we will have one all reduce kernel and let opt to have - // an allreduce_post_accumulation option and remove the use_nccl option. - opt.use_nccl = parameters.allreduce_post_accumulation; - opt.deepspeed_zero = onnxruntime::training::ZeROConfig(parameters.deepspeed_zero_stage); - opt.enable_grad_norm_clip = parameters.enable_grad_norm_clip; - - // TODO reduction types - if (parameters.enable_adasum) { -#ifdef USE_CUDA - opt.adasum_reduction_type = training::AdasumReductionType::GpuHierarchicalReduction; -#else - opt.adasum_reduction_type = training::AdasumReductionType::CpuReduction; -#endif - } - - config.optimizer_config = opt; - } - - if (!parameters.optimizer_initial_state.empty()) { - config.init_optimizer_states = parameters.optimizer_initial_state; - } - - config.gradient_graph_config.use_memory_efficient_gradient = parameters.use_memory_efficient_gradient; - config.gradient_graph_config.set_gradients_as_graph_outputs = parameters.set_gradients_as_graph_outputs; - - config.graph_transformer_config.attn_dropout_recompute = parameters.attn_dropout_recompute; - config.graph_transformer_config.gelu_recompute = parameters.gelu_recompute; - config.graph_transformer_config.transformer_layer_recompute = parameters.transformer_layer_recompute; - config.graph_transformer_config.number_recompute_layers = parameters.number_recompute_layers; - config.graph_transformer_config.propagate_cast_ops_config.strategy = parameters.propagate_cast_ops_strategy; - config.graph_transformer_config.propagate_cast_ops_config.level = parameters.propagate_cast_ops_level; - config.graph_transformer_config.propagate_cast_ops_config.allow = parameters.propagate_cast_ops_allow; - - if (!parameters.model_after_graph_transforms_path.empty()) { - config.model_after_graph_transforms_path = ToPathString(parameters.model_after_graph_transforms_path); - } - if (!parameters.model_with_gradient_graph_path.empty()) { - config.model_with_gradient_graph_path = ToPathString(parameters.model_with_gradient_graph_path); - } - if (!parameters.model_with_training_graph_path.empty()) { - config.model_with_training_graph_path = ToPathString(parameters.model_with_training_graph_path); - } - - training::PipelineTrainingSession::TrainingConfigurationResult config_result{}; - - OrtPybindThrowIfError(sess->ConfigureForTraining(config, config_result)); - - TrainingConfigurationResult python_config_result{}; - if (config_result.mixed_precision_config_result.has_value()) { - const auto& mp_config_result = config_result.mixed_precision_config_result.value(); - python_config_result.loss_scale_input_name = mp_config_result.loss_scale_input_name; - } - - return python_config_result; -} - #if defined(USE_MPI) void CopyMPIContextToTrainingParameters(TrainingParameters& parameters, const logging::Logger* logger) { LOGS(*logger, INFO) << "MPIContext::GetInstance().GetWorldRank(): " << MPIContext::GetInstance().GetWorldRank(); @@ -448,68 +209,6 @@ void addObjectMethodsForTraining(py::module& m, ExecutionProviderRegistrationFn ORT_ENFORCE(num_entries_erased == 1, "NodeArg not found in cache: ", node_arg_name); }); - py::class_ parameters(m, "TrainingParameters", R"pbdoc(Configuration information for training.)pbdoc"); - parameters.def(py::init()) - .def_readwrite("loss_output_name", &TrainingParameters::loss_output_name) - .def_readwrite("immutable_weights", &TrainingParameters::immutable_weights) - .def_readwrite("weights_not_to_train", &TrainingParameters::weights_not_to_train) - .def_readwrite("weights_to_train", &TrainingParameters::weights_to_train) - .def_readwrite("sliced_tensor_names", &TrainingParameters::sliced_tensor_names) - .def_readwrite("training_optimizer_name", &TrainingParameters::training_optimizer_name) - .def_readwrite("lr_params_feed_name", &TrainingParameters::lr_params_feed_name) - .def_readwrite("optimizer_attributes_map", &TrainingParameters::optimizer_attributes_map) - .def_readwrite("optimizer_int_attributes_map", &TrainingParameters::optimizer_int_attributes_map) - .def_readwrite("sliced_schema", &TrainingParameters::sliced_schema) - .def_readwrite("sliced_axes", &TrainingParameters::sliced_axes) - .def_readwrite("use_fp16_moments", &TrainingParameters::use_fp16_moments) - .def_readwrite("use_mixed_precision", &TrainingParameters::use_mixed_precision) - .def_readwrite("allreduce_post_accumulation", &TrainingParameters::allreduce_post_accumulation) - .def_readwrite("loss_scale", &TrainingParameters::loss_scale) - .def_readwrite("world_rank", &TrainingParameters::world_rank) - .def_readwrite("world_size", &TrainingParameters::world_size) - .def_readwrite("data_parallel_size", &TrainingParameters::data_parallel_size) - .def_readwrite("horizontal_parallel_size", &TrainingParameters::horizontal_parallel_size) - .def_readwrite("pipeline_parallel_size", &TrainingParameters::pipeline_parallel_size) - .def_readwrite("pipeline_cut_info_string", &TrainingParameters::pipeline_cut_info_string) - .def_readwrite("num_pipeline_micro_batches", &TrainingParameters::num_pipeline_micro_batches) - .def_readwrite("gradient_accumulation_steps", &TrainingParameters::gradient_accumulation_steps) - .def_readwrite("deepspeed_zero_stage", &TrainingParameters::deepspeed_zero_stage) - .def_readwrite("enable_grad_norm_clip", &TrainingParameters::enable_grad_norm_clip) - .def_readwrite("set_gradients_as_graph_outputs", &TrainingParameters::set_gradients_as_graph_outputs) - .def_readwrite("use_memory_efficient_gradient", &TrainingParameters::use_memory_efficient_gradient) - .def_readwrite("attn_dropout_recompute", &TrainingParameters::attn_dropout_recompute) - .def_readwrite("gelu_recompute", &TrainingParameters::gelu_recompute) - .def_readwrite("transformer_layer_recompute", &TrainingParameters::transformer_layer_recompute) - .def_readwrite("number_recompute_layers", &TrainingParameters::number_recompute_layers) - .def_readwrite("data_parallel_size", &TrainingParameters::data_parallel_size) - .def_readwrite("horizontal_parallel_size", &TrainingParameters::horizontal_parallel_size) - .def_readwrite("pipeline_parallel_size", &TrainingParameters::pipeline_parallel_size) - .def("set_optimizer_initial_state", - [](TrainingParameters& parameters, const std::unordered_map>& py_state) -> void { - onnxruntime::training::TrainingSession::OptimizerState optim_state; - for (const auto& weight_it : py_state) { - auto state = weight_it.second; - NameMLValMap state_tensors; - for (auto& initializer : state) { - OrtValue ml_value; - - // InputDeflist is null because parameters havent been tied to session yet - // Likewise, there is no need to specify the name (as the name was previously used to lookup the def list) - CreateGenericMLValue(nullptr, GetAllocator(), "", initializer.second, &ml_value, true); - ThrowIfPyErrOccured(); - state_tensors.emplace(initializer.first, ml_value); - } - optim_state.emplace(weight_it.first, state_tensors); - } - parameters.optimizer_initial_state = optim_state; - }) - .def_readwrite("model_after_graph_transforms_path", &TrainingParameters::model_after_graph_transforms_path) - .def_readwrite("model_with_gradient_graph_path", &TrainingParameters::model_with_gradient_graph_path) - .def_readwrite("model_with_training_graph_path", &TrainingParameters::model_with_training_graph_path) - .def_readwrite("enable_adasum", &TrainingParameters::enable_adasum) - .def_readwrite("propagate_cast_ops_level", &TrainingParameters::propagate_cast_ops_level) - .def_readwrite("propagate_cast_ops_allow", &TrainingParameters::propagate_cast_ops_allow); - #if defined(USE_MPI) m.def("get_mpi_context_local_rank", []() -> int { return MPIContext::GetInstance().GetLocalRank(); }); m.def("get_mpi_context_local_size", []() -> int { return MPIContext::GetInstance().GetLocalSize(); }); @@ -579,130 +278,6 @@ void addObjectMethodsForTraining(py::module& m, ExecutionProviderRegistrationFn }); #endif - py::class_ config_result(m, "TrainingConfigurationResult", "pbdoc(Configuration result for training.)pbdoc"); - config_result.def(py::init()) - .def_property_readonly("loss_scale_input_name", [](const TrainingConfigurationResult& result) -> py::object { - if (result.loss_scale_input_name.has_value()) { - return py::str{result.loss_scale_input_name.value()}; - } - return py::none(); - }); - - // Thin wrapper over internal C++ InferenceSession to accommodate custom op library management for the Python user - struct PyTrainingSession : public PyInferenceSession { - PyTrainingSession(std::shared_ptr env, const PySessionOptions& so) - : PyInferenceSession(env, std::make_unique(so.value, *env)) { - } - ~PyTrainingSession() = default; - }; - - py::class_ training_session(m, "TrainingSession"); - training_session - .def(py::init([](const PySessionOptions& so) { - auto& training_env = GetTrainingEnv(); - return std::make_unique(training_env.GetORTEnv(), so); - })) - .def(py::init([]() { - auto& training_env = GetTrainingEnv(); - return std::make_unique(training_env.GetORTEnv(), GetDefaultCPUSessionOptions()); - })) - .def("finalize", [](py::object) { -#if defined(USE_MPI) -#ifdef _WIN32 - // https://docs.microsoft.com/en-us/windows/win32/dlls/dynamic-link-library-best-practices - // shutdown_mpi() is not called within MPIContext destructor because of DllMain's restriction - // call shutdown_mpi() here instead. - MPIContext::shutdown_mpi(); -#endif -#endif - }) - .def("load_model", [ep_registration_fn](PyTrainingSession* sess, const std::string& path, TrainingParameters& parameters, const std::vector& provider_types, const ProviderOptionsVector& provider_options) { - OrtPybindThrowIfError(sess->GetSessionHandle()->Load(path)); - -#if defined(USE_MPI) - bool use_nccl = parameters.allreduce_post_accumulation; - if (!use_nccl && parameters.world_size > 1) - CopyMPIContextToTrainingParameters(parameters, sess->GetSessionHandle()->GetLogger()); -#endif - const auto config_result = ConfigureSessionForTraining(static_cast(sess->GetSessionHandle()), parameters); - - ProviderOptionsVector merged_options; - ResolveExtraProviderOptions(provider_types, provider_options, merged_options); - - InitializeSession(sess->GetSessionHandle(), ep_registration_fn, provider_types, merged_options); - - return config_result; - }) - .def("read_bytes", [ep_registration_fn](PyTrainingSession* sess, const py::bytes& serialized_model, TrainingParameters& parameters, const std::vector& provider_types, const ProviderOptionsVector& provider_options) { - std::istringstream buffer(serialized_model); - OrtPybindThrowIfError(sess->GetSessionHandle()->Load(buffer)); - -#if defined(USE_MPI) - bool use_nccl = parameters.allreduce_post_accumulation; - if (!use_nccl && parameters.world_size > 1) - CopyMPIContextToTrainingParameters(parameters, sess->GetSessionHandle()->GetLogger()); -#endif - const auto config_result = ConfigureSessionForTraining(static_cast(sess->GetSessionHandle()), parameters); - ProviderOptionsVector merged_options; - ResolveExtraProviderOptions(provider_types, provider_options, merged_options); - - InitializeSession(sess->GetSessionHandle(), ep_registration_fn, provider_types, merged_options); - - return config_result; - }) - .def("get_state", [](PyTrainingSession* sess) { - NameMLValMap state_tensors; - ORT_THROW_IF_ERROR(static_cast(sess->GetSessionHandle())->GetStateTensors(state_tensors)); - auto& data_transfer_manager = sess->GetSessionHandle()->GetDataTransferManager(); - // convert to numpy array - std::map rmap; - for (auto& kv : state_tensors) { - if (kv.second.IsTensor()) { - py::object obj; - const Tensor& rtensor = kv.second.Get(); - GetPyObjFromTensor(rtensor, obj, &data_transfer_manager); - rmap.insert({kv.first, obj}); - } else { - throw std::runtime_error("Non tensor type in session state tensors is not expected."); - } - } - return rmap; - }) - .def("get_model_state", [](PyTrainingSession* sess, bool include_mixed_precision_weights) { - std::unordered_map model_state_tensors; - ORT_THROW_IF_ERROR(static_cast(sess->GetSessionHandle())->GetModelState(model_state_tensors, include_mixed_precision_weights)); - auto& data_transfer_manager = sess->GetSessionHandle()->GetDataTransferManager(); - return ConvertORTTensorMapToNumpy(model_state_tensors, data_transfer_manager); - }) - .def("get_optimizer_state", [](PyTrainingSession* sess) { - std::unordered_map opt_state_tensors; - ORT_THROW_IF_ERROR(static_cast(sess->GetSessionHandle())->GetOptimizerState(opt_state_tensors)); - auto& data_transfer_manager = sess->GetSessionHandle()->GetDataTransferManager(); - return ConvertORTTensorMapToNumpy(opt_state_tensors, data_transfer_manager); - }) - .def("get_partition_info_map", [](PyTrainingSession* sess) { - std::unordered_map>> part_info_map; - ORT_THROW_IF_ERROR(static_cast(sess->GetSessionHandle())->GetPartitionInfoMap(part_info_map)); - return part_info_map; - }) - .def("load_state", [](PyTrainingSession* sess, std::unordered_map& state, bool strict) { - NameMLValMap state_tensors; - for (auto initializer : state) { - OrtValue ml_value; - auto px = sess->GetSessionHandle()->GetModelInputs(); - if (!px.first.IsOK() || !px.second) { - throw std::runtime_error("Either failed to get model inputs from the session object or the input def list was null"); - } - CreateGenericMLValue(px.second, GetAllocator(), initializer.first, initializer.second, &ml_value); - ThrowIfPyErrOccured(); - state_tensors.insert(std::make_pair(initializer.first, ml_value)); - } - ORT_THROW_IF_ERROR(static_cast(sess->GetSessionHandle())->SetStateTensors(state_tensors, strict)); - }) - .def("is_output_fp32_node", [](PyTrainingSession* sess, const std::string& output_name) { - return static_cast(sess->GetSessionHandle())->IsGraphOutputFp32Node(output_name); - }); - py::class_(m, "PartialGraphExecutionState") .def(py::init([]() { return std::make_unique(); diff --git a/orttraining/orttraining/test/gradient/gradient_checker.h b/orttraining/orttraining/test/gradient/gradient_checker.h index e2820da5682c6..d1a7c5efb94b4 100644 --- a/orttraining/orttraining/test/gradient/gradient_checker.h +++ b/orttraining/orttraining/test/gradient/gradient_checker.h @@ -17,11 +17,11 @@ limitations under the License. #pragma once #include "test/providers/provider_test_utils.h" -#include "orttraining/core/session/training_session.h" #include "orttraining/test/gradient/gradient_op_test_utils.h" namespace onnxruntime { namespace test { +using training::OpDef; // TODO: This class currently assumes the inputs share types and the outputs share a type. // However there are cases like MaxPool and Gather where this is not true. diff --git a/orttraining/orttraining/test/gradient/gradient_ops_test.cc b/orttraining/orttraining/test/gradient/gradient_ops_test.cc index d4e18dbfd2290..7bbc856d65bea 100644 --- a/orttraining/orttraining/test/gradient/gradient_ops_test.cc +++ b/orttraining/orttraining/test/gradient/gradient_ops_test.cc @@ -15,7 +15,7 @@ #include "test/providers/provider_test_utils.h" #include "test/util/include/test_random_seed.h" #include "orttraining/test/gradient/gradient_checker.h" -#include "orttraining/test/gradient/gradient_op_test_utils.h" +// #include "orttraining/test/gradient/gradient_op_test_utils.h" #include "test/util/include/default_providers.h" #include "test/common/cuda_op_test_utils.h" diff --git a/orttraining/orttraining/test/python/onnxruntime_test_postprocess.py b/orttraining/orttraining/test/python/onnxruntime_test_postprocess.py deleted file mode 100644 index d5298cf8e860e..0000000000000 --- a/orttraining/orttraining/test/python/onnxruntime_test_postprocess.py +++ /dev/null @@ -1,325 +0,0 @@ -import os -import unittest - -import torch -import torch.nn as nn -from orttraining_test_bert_postprocess import postprocess_model -from orttraining_test_data_loader import create_ort_test_dataloader -from orttraining_test_transformers import BertForPreTraining, BertModelTest -from orttraining_test_utils import map_optimizer_attributes - -import onnxruntime -from onnxruntime.capi.ort_trainer import ( # noqa: F401 - IODescription, - LossScaler, - ModelDescription, - ORTTrainer, - generate_sample, -) - -torch.manual_seed(1) -onnxruntime.set_seed(1) - - -class Test_PostPasses(unittest.TestCase): # noqa: N801 - def get_onnx_model( - self, model, model_desc, inputs, device, _enable_internal_postprocess=True, _extra_postprocess=None - ): - lr_desc = IODescription( - "Learning_Rate", - [ - 1, - ], - torch.float32, - ) - model = ORTTrainer( - model, - None, - model_desc, - "LambOptimizer", - map_optimizer_attributes, - lr_desc, - device, - world_rank=0, - world_size=1, - _opset_version=14, - _enable_internal_postprocess=_enable_internal_postprocess, - _extra_postprocess=_extra_postprocess, - ) - - model.train_step(*inputs) - return model.onnx_model_ - - def count_all_nodes(self, model): - return len(model.graph.node) - - def count_nodes(self, model, node_type): - count = 0 - for node in model.graph.node: - if node.op_type == node_type: - count += 1 - return count - - def find_nodes(self, model, node_type): - nodes = [] - for node in model.graph.node: - if node.op_type == node_type: - nodes.append(node) - return nodes - - def get_name(self, name): - if os.path.exists(name): - return name - rel = os.path.join("testdata", name) - if os.path.exists(rel): - return rel - this = os.path.dirname(__file__) - data = os.path.join(this, "..", "..", "..", "..", "onnxruntime", "test", "testdata") - res = os.path.join(data, name) - if os.path.exists(res): - return res - raise FileNotFoundError(f"Unable to find '{name}' or '{rel}' or '{res}'") - - def test_layer_norm(self): - class LayerNormNet(nn.Module): - def __init__(self, target): - super().__init__() - self.ln_1 = nn.LayerNorm(10) - self.loss = nn.CrossEntropyLoss() - self.target = target - - def forward(self, x): - output1 = self.ln_1(x) - loss = self.loss(output1, self.target) - return loss, output1 - - device = torch.device("cpu") - target = torch.ones(20, 10, 10, dtype=torch.int64).to(device) - model = LayerNormNet(target) - input = torch.randn(20, 5, 10, 10, dtype=torch.float32).to(device) - - input_desc = IODescription("input", [], "float32") - output0_desc = IODescription("output0", [], "float32") - output1_desc = IODescription("output1", [20, 5, 10, 10], "float32") - model_desc = ModelDescription([input_desc], [output0_desc, output1_desc]) - - learning_rate = torch.tensor([1.0000000e00]).to(device) - input_args = [input, learning_rate] - - onnx_model = self.get_onnx_model(model, model_desc, input_args, device) - - count_layer_norm = self.count_nodes(onnx_model, "LayerNormalization") - count_nodes = self.count_all_nodes(onnx_model) - - assert count_layer_norm == 0 - assert count_nodes == 3 - - def test_expand(self): - class ExpandNet(nn.Module): - def __init__(self, target): - super().__init__() - self.loss = nn.CrossEntropyLoss() - self.target = target - self.linear = torch.nn.Linear(2, 2) - - def forward(self, x, x1): - output = x.expand_as(x1) - output = self.linear(output) - output = output + output - loss = self.loss(output, self.target) - return loss, output - - device = torch.device("cpu") - target = torch.ones(5, 5, 2, dtype=torch.int64).to(device) - model = ExpandNet(target).to(device) - - x = torch.randn(5, 3, 1, 2, dtype=torch.float32).to(device) - x1 = torch.randn(5, 3, 5, 2, dtype=torch.float32).to(device) - - input0_desc = IODescription("x", [5, 3, 1, 2], "float32") - input1_desc = IODescription("x1", [5, 3, 5, 2], "float32") - output0_desc = IODescription("output0", [], "float32") - output1_desc = IODescription("output1", [5, 3, 5, 2], "float32") - model_desc = ModelDescription([input0_desc, input1_desc], [output0_desc, output1_desc]) - - learning_rate = torch.tensor([1.0000000e00]).to(device) - input_args = [x, x1, learning_rate] - - onnx_model = self.get_onnx_model(model, model_desc, input_args, device) - - # check that expand output has shape - expand_nodes = self.find_nodes(onnx_model, "Expand") - assert len(expand_nodes) == 1 - - model_info = onnx_model.graph.value_info - assert model_info[0].name == expand_nodes[0].output[0] - assert model_info[0].type == onnx_model.graph.input[1].type - - def test_bert(self): - device = torch.device("cpu") - - model_tester = BertModelTest.BertModelTester(self) - ( - config, - input_ids, - token_type_ids, - input_mask, - sequence_labels, - token_labels, - choice_labels, - ) = model_tester.prepare_config_and_inputs() - - model = BertForPreTraining(config=config) - model.eval() - - loss, prediction_scores, seq_relationship_score = model( - input_ids, - attention_mask=input_mask, - token_type_ids=token_type_ids, - masked_lm_labels=token_labels, - next_sentence_label=sequence_labels, - ) - - model_desc = ModelDescription( - [ - model_tester.input_ids_desc, - model_tester.attention_mask_desc, - model_tester.token_type_ids_desc, - model_tester.masked_lm_labels_desc, - model_tester.next_sentence_label_desc, - ], - [model_tester.loss_desc, model_tester.prediction_scores_desc, model_tester.seq_relationship_scores_desc], - ) - - from collections import namedtuple - - MyArgs = namedtuple( - "MyArgs", "local_rank world_size max_steps learning_rate warmup_proportion batch_size seq_len" - ) - args = MyArgs( - local_rank=0, - world_size=1, - max_steps=100, - learning_rate=0.00001, - warmup_proportion=0.01, - batch_size=13, - seq_len=7, - ) - - dataset_len = 100 - dataloader = create_ort_test_dataloader(model_desc.inputs_, args.batch_size, args.seq_len, dataset_len, device) - learning_rate = torch.tensor(1.0e0, dtype=torch.float32).to(device) - for b in dataloader: - batch = b - break - learning_rate = torch.tensor([1.00e00]).to(device) - inputs = [*batch, learning_rate] - - onnx_model = self.get_onnx_model(model, model_desc, inputs, device, _extra_postprocess=postprocess_model) - - self._bert_helper(onnx_model) - - def _bert_helper(self, onnx_model): - # count layer_norm - count_layer_norm = self.count_nodes(onnx_model, "LayerNormalization") - assert count_layer_norm == 0 - - # get expand node and check output shape - expand_nodes = self.find_nodes(onnx_model, "Expand") - assert len(expand_nodes) == 1 - - model_info = onnx_model.graph.value_info - assert model_info[0].name == expand_nodes[0].output[0] - assert model_info[0].type == onnx_model.graph.input[0].type - - def test_extra_postpass(self): - def postpass_replace_first_add_with_sub(model): - # this post pass replaces the first Add node with Sub in the model. - # Previous graph - # (subgraph 1) (subgraph 2) - # | | - # | | - # |________ ________| - # | | - # Add - # | - # (subgraph 3) - # - # Post graph - # (subgraph 1) (subgraph 2) - # | | - # | | - # |________ ________| - # | | - # Sub - # | - # (subgraph 3) - add_nodes = [n for n in model.graph.node if n.op_type == "Add"] - add_nodes[0].op_type = "Sub" - - class MultiAdd(nn.Module): - def __init__(self, target): - super().__init__() - self.loss = nn.CrossEntropyLoss() - self.target = target - self.linear = torch.nn.Linear(2, 2, bias=False) - - def forward(self, x, x1): - output = x + x1 - output = output + x - output = output + x1 - output = self.linear(output) - loss = self.loss(output, self.target) - return loss, output - - device = torch.device("cpu") - target = torch.ones(5, 2, dtype=torch.int64).to(device) - model = MultiAdd(target).to(device) - - x = torch.randn(5, 5, 2, dtype=torch.float32).to(device) - x1 = torch.randn(5, 5, 2, dtype=torch.float32).to(device) - - input0_desc = IODescription("x", [5, 5, 2], "float32") - input1_desc = IODescription("x1", [5, 5, 2], "float32") - output0_desc = IODescription("output0", [], "float32") - output1_desc = IODescription("output1", [5, 5, 2], "float32") - model_desc = ModelDescription([input0_desc, input1_desc], [output0_desc, output1_desc]) - - learning_rate = torch.tensor([1.0000000e00]).to(device) - input_args = [x, x1, learning_rate] - - onnx_model = self.get_onnx_model( - model, model_desc, input_args, device, _extra_postprocess=postpass_replace_first_add_with_sub - ) - - # check that extra postpass is called, and called only once. - add_nodes = self.find_nodes(onnx_model, "Add") - sub_nodes = self.find_nodes(onnx_model, "Sub") - assert len(add_nodes) == 2 - assert len(sub_nodes) == 1 - - unprocessed_onnx_model = self.get_onnx_model( - model, model_desc, input_args, device, _extra_postprocess=None, _enable_internal_postprocess=False - ) - # check that the model is unchanged. - add_nodes = self.find_nodes(unprocessed_onnx_model, "Add") - sub_nodes = self.find_nodes(unprocessed_onnx_model, "Sub") - assert len(add_nodes) == 3 - assert len(sub_nodes) == 0 - - processed_onnx_model = self.get_onnx_model( - unprocessed_onnx_model, - model_desc, - input_args, - device, - _extra_postprocess=postpass_replace_first_add_with_sub, - ) - # check that extra postpass is called, and called only once. - add_nodes = self.find_nodes(processed_onnx_model, "Add") - sub_nodes = self.find_nodes(processed_onnx_model, "Sub") - assert len(add_nodes) == 2 - assert len(sub_nodes) == 1 - - -if __name__ == "__main__": - unittest.main(module=__name__, buffer=True)