From 40645c56c89f87478d895b6041c6d8be65c10d7b Mon Sep 17 00:00:00 2001
From: Ashwini Khade <askhade@microsoft.com>
Date: Fri, 29 Sep 2023 15:22:13 -0700
Subject: [PATCH] remove non relevant tests from cmake

---
 cmake/onnxruntime_training.cmake              |  226 +---
 cmake/onnxruntime_unittests.cmake             | 1032 +++++++++--------
 onnxruntime/test/providers/cpu/model_tests.cc |    8 +
 .../python/orttraining_pybind_state.cc        |  425 -------
 .../test/gradient/gradient_checker.h          |    2 +-
 .../test/gradient/gradient_ops_test.cc        |    2 +-
 .../python/onnxruntime_test_postprocess.py    |  325 ------
 7 files changed, 606 insertions(+), 1414 deletions(-)
 delete mode 100644 orttraining/orttraining/test/python/onnxruntime_test_postprocess.py

diff --git a/cmake/onnxruntime_training.cmake b/cmake/onnxruntime_training.cmake
index f9ba2b341f741..1aba00b96774d 100644
--- a/cmake/onnxruntime_training.cmake
+++ b/cmake/onnxruntime_training.cmake
@@ -1,29 +1,26 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
 
-set (CXXOPTS ${cxxopts_SOURCE_DIR}/include)
+set(CXXOPTS ${cxxopts_SOURCE_DIR}/include)
 
 # training lib
 file(GLOB_RECURSE onnxruntime_training_srcs
-    "${ORTTRAINING_SOURCE_DIR}/core/framework/*.h"
-    "${ORTTRAINING_SOURCE_DIR}/core/framework/*.cc"
-    "${ORTTRAINING_SOURCE_DIR}/core/framework/tensorboard/*.h"
-    "${ORTTRAINING_SOURCE_DIR}/core/framework/tensorboard/*.cc"
-    "${ORTTRAINING_SOURCE_DIR}/core/framework/adasum/*"
-    "${ORTTRAINING_SOURCE_DIR}/core/framework/communication/*"
-    "${ORTTRAINING_SOURCE_DIR}/core/session/*.h"
-    "${ORTTRAINING_SOURCE_DIR}/core/session/*.cc"
-    "${ORTTRAINING_SOURCE_DIR}/core/agent/*.h"
-    "${ORTTRAINING_SOURCE_DIR}/core/agent/*.cc"
-    )
-
+  "${ORTTRAINING_SOURCE_DIR}/core/framework/*.h"
+  "${ORTTRAINING_SOURCE_DIR}/core/framework/*.cc"
+  "${ORTTRAINING_SOURCE_DIR}/core/framework/tensorboard/*.h"
+  "${ORTTRAINING_SOURCE_DIR}/core/framework/tensorboard/*.cc"
+  "${ORTTRAINING_SOURCE_DIR}/core/framework/adasum/*"
+  "${ORTTRAINING_SOURCE_DIR}/core/framework/communication/*"
+  "${ORTTRAINING_SOURCE_DIR}/core/agent/*.h"
+  "${ORTTRAINING_SOURCE_DIR}/core/agent/*.cc"
+)
 
 # This needs to be built in framework.cmake
 file(GLOB_RECURSE onnxruntime_training_framework_excluded_srcs CONFIGURE_DEPENDS
-    "${ORTTRAINING_SOURCE_DIR}/core/framework/torch/*.h"
-    "${ORTTRAINING_SOURCE_DIR}/core/framework/torch/*.cc"
-    "${ORTTRAINING_SOURCE_DIR}/core/framework/triton/*.h"
-    "${ORTTRAINING_SOURCE_DIR}/core/framework/triton/*.cc"
+  "${ORTTRAINING_SOURCE_DIR}/core/framework/torch/*.h"
+  "${ORTTRAINING_SOURCE_DIR}/core/framework/torch/*.cc"
+  "${ORTTRAINING_SOURCE_DIR}/core/framework/triton/*.h"
+  "${ORTTRAINING_SOURCE_DIR}/core/framework/triton/*.cc"
 )
 
 list(REMOVE_ITEM onnxruntime_training_srcs ${onnxruntime_training_framework_excluded_srcs})
@@ -39,199 +36,42 @@ endif()
 
 target_include_directories(onnxruntime_training PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${ONNXRUNTIME_ROOT} ${ORTTRAINING_ROOT} ${eigen_INCLUDE_DIRS} PUBLIC ${onnxruntime_graph_header} ${MPI_CXX_INCLUDE_DIRS})
 
-if (onnxruntime_USE_CUDA)
+if(onnxruntime_USE_CUDA)
   target_include_directories(onnxruntime_training PRIVATE ${onnxruntime_CUDNN_HOME}/include ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
 endif()
 
-if (onnxruntime_USE_NCCL)
+if(onnxruntime_USE_NCCL)
   target_include_directories(onnxruntime_training PRIVATE ${NCCL_INCLUDE_DIRS})
 endif()
 
-if (onnxruntime_BUILD_UNIT_TESTS)
+if(onnxruntime_BUILD_UNIT_TESTS)
   set_target_properties(onnxruntime_training PROPERTIES FOLDER "ONNXRuntime")
   source_group(TREE ${ORTTRAINING_ROOT} FILES ${onnxruntime_training_srcs})
 
-  # training runner lib
-  file(GLOB_RECURSE onnxruntime_training_runner_srcs
-      "${ORTTRAINING_SOURCE_DIR}/models/runner/*.h"
-      "${ORTTRAINING_SOURCE_DIR}/models/runner/*.cc"
-  )
-
-  # perf test utils
-  set(onnxruntime_perf_test_src_dir ${TEST_SRC_DIR}/perftest)
-  set(onnxruntime_perf_test_src
-  "${onnxruntime_perf_test_src_dir}/utils.h")
-
-  if(WIN32)
-    list(APPEND onnxruntime_perf_test_src
-      "${onnxruntime_perf_test_src_dir}/windows/utils.cc")
-  else ()
-    list(APPEND onnxruntime_perf_test_src
-      "${onnxruntime_perf_test_src_dir}/posix/utils.cc")
-  endif()
-
-  onnxruntime_add_static_library(onnxruntime_training_runner ${onnxruntime_training_runner_srcs} ${onnxruntime_perf_test_src})
-  add_dependencies(onnxruntime_training_runner ${onnxruntime_EXTERNAL_DEPENDENCIES} onnx onnxruntime_providers)
-
-  if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
-    target_link_libraries(onnxruntime_training_runner PRIVATE Python::Python)
-  endif()
-
-  onnxruntime_add_include_to_target(onnxruntime_training_runner onnxruntime_training onnxruntime_framework onnxruntime_common onnx onnx_proto ${PROTOBUF_LIB} onnxruntime_training flatbuffers::flatbuffers Boost::mp11 safeint_interface)
-
-  target_include_directories(onnxruntime_training_runner PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${ONNXRUNTIME_ROOT} ${ORTTRAINING_ROOT} ${eigen_INCLUDE_DIRS} PUBLIC ${onnxruntime_graph_header})
-  target_link_libraries(onnxruntime_training_runner PRIVATE nlohmann_json::nlohmann_json)
-  if (onnxruntime_USE_CUDA)
-    target_include_directories(onnxruntime_training_runner PUBLIC ${onnxruntime_CUDNN_HOME}/include ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
-  endif()
-
-  if (onnxruntime_USE_NCCL)
-    target_include_directories(onnxruntime_training_runner PRIVATE ${NCCL_INCLUDE_DIRS})
-  endif()
-
-  if (onnxruntime_USE_ROCM)
-    add_definitions(-DUSE_ROCM=1)
-    target_include_directories(onnxruntime_training_runner PUBLIC ${onnxruntime_ROCM_HOME}/include)
-  endif()
-
-  check_cxx_compiler_flag(-Wno-maybe-uninitialized HAS_NO_MAYBE_UNINITIALIZED)
-  if(UNIX AND NOT APPLE)
-    if (HAS_NO_MAYBE_UNINITIALIZED)
-      target_compile_options(onnxruntime_training_runner PUBLIC "-Wno-maybe-uninitialized")
-    endif()
-  endif()
-
-  if (onnxruntime_USE_ROCM)
-    target_compile_options(onnxruntime_training_runner PUBLIC -D__HIP_PLATFORM_AMD__=1 -D__HIP_PLATFORM_HCC__=1)
-  endif()
-
-  set_target_properties(onnxruntime_training_runner PROPERTIES FOLDER "ONNXRuntimeTest")
-  source_group(TREE ${REPO_ROOT} FILES ${onnxruntime_training_runner_srcs} ${onnxruntime_perf_test_src})
-
-  # MNIST
-  file(GLOB_RECURSE training_mnist_src
-      "${ORTTRAINING_SOURCE_DIR}/models/mnist/*.h"
-      "${ORTTRAINING_SOURCE_DIR}/models/mnist/mnist_data_provider.cc"
-      "${ORTTRAINING_SOURCE_DIR}/models/mnist/main.cc"
-  )
-  onnxruntime_add_executable(onnxruntime_training_mnist ${training_mnist_src})
-  onnxruntime_add_include_to_target(onnxruntime_training_mnist onnxruntime_common onnx onnx_proto ${PROTOBUF_LIB} onnxruntime_training flatbuffers::flatbuffers Boost::mp11 safeint_interface)
-  target_include_directories(onnxruntime_training_mnist PUBLIC ${CMAKE_CURRENT_BINARY_DIR} ${ONNXRUNTIME_ROOT} ${ORTTRAINING_ROOT} ${eigen_INCLUDE_DIRS} ${CXXOPTS} ${extra_includes} ${onnxruntime_graph_header} ${onnxruntime_exec_src_dir} ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}/onnx onnxruntime_training_runner)
-
   set(ONNXRUNTIME_LIBS
-      onnxruntime_session
-      ${onnxruntime_libs}
-      ${PROVIDERS_MKLDNN}
-      ${PROVIDERS_DML}
-      onnxruntime_optimizer
-      onnxruntime_providers
-      onnxruntime_util
-      onnxruntime_framework
+    onnxruntime_session
+    ${onnxruntime_libs}
+    ${PROVIDERS_MKLDNN}
+    ${PROVIDERS_DML}
+    onnxruntime_optimizer
+    onnxruntime_providers
+    onnxruntime_util
+    onnxruntime_framework
   )
 
-  if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
+  if(onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
     list(APPEND ONNXRUNTIME_LIBS Python::Python)
   endif()
 
   list(APPEND ONNXRUNTIME_LIBS
-      onnxruntime_graph
-      ${ONNXRUNTIME_MLAS_LIBS}
-      onnxruntime_common
-      onnxruntime_flatbuffers
-      Boost::mp11 safeint_interface
+    onnxruntime_graph
+    ${ONNXRUNTIME_MLAS_LIBS}
+    onnxruntime_common
+    onnxruntime_flatbuffers
+    Boost::mp11 safeint_interface
   )
 
-  if (onnxruntime_ENABLE_LANGUAGE_INTEROP_OPS)
-      list(APPEND ONNXRUNTIME_LIBS onnxruntime_language_interop onnxruntime_pyop)
-  endif()
-
-  if(UNIX AND NOT APPLE)
-    if (HAS_NO_MAYBE_UNINITIALIZED)
-      target_compile_options(onnxruntime_training_mnist PUBLIC "-Wno-maybe-uninitialized")
-    endif()
+  if(onnxruntime_ENABLE_LANGUAGE_INTEROP_OPS)
+    list(APPEND ONNXRUNTIME_LIBS onnxruntime_language_interop onnxruntime_pyop)
   endif()
-  target_link_libraries(onnxruntime_training_mnist PRIVATE onnxruntime_training_runner onnxruntime_training ${ONNXRUNTIME_LIBS} ${onnxruntime_EXTERNAL_LIBRARIES})
-  set_target_properties(onnxruntime_training_mnist PROPERTIES FOLDER "ONNXRuntimeTest")
-
-  # squeezenet
-  # Disabling build for squeezenet, as no one is using this
-  #[[
-  file(GLOB_RECURSE training_squeezene_src
-      "${ORTTRAINING_SOURCE_DIR}/models/squeezenet/*.h"
-      "${ORTTRAINING_SOURCE_DIR}/models/squeezenet/*.cc"
-  )
-  onnxruntime_add_executable(onnxruntime_training_squeezenet ${training_squeezene_src})
-  onnxruntime_add_include_to_target(onnxruntime_training_squeezenet onnxruntime_common onnx onnx_proto ${PROTOBUF_LIB} onnxruntime_training flatbuffers::flatbuffers Boost::mp11 safeint_interface)
-  target_include_directories(onnxruntime_training_squeezenet PUBLIC ${ONNXRUNTIME_ROOT} ${ORTTRAINING_ROOT} ${eigen_INCLUDE_DIRS} ${extra_includes} ${onnxruntime_graph_header} ${onnxruntime_exec_src_dir} ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}/onnx onnxruntime_training_runner)
-
-  if(UNIX AND NOT APPLE)
-    target_compile_options(onnxruntime_training_squeezenet PUBLIC "-Wno-maybe-uninitialized")
-  endif()
-  target_link_libraries(onnxruntime_training_squeezenet PRIVATE onnxruntime_training_runner onnxruntime_training ${ONNXRUNTIME_LIBS} ${onnxruntime_EXTERNAL_LIBRARIES})
-  set_target_properties(onnxruntime_training_squeezenet PROPERTIES FOLDER "ONNXRuntimeTest")
-  ]]
-
-  # BERT
-  file(GLOB_RECURSE training_bert_src
-      "${ORTTRAINING_SOURCE_DIR}/models/bert/*.h"
-      "${ORTTRAINING_SOURCE_DIR}/models/bert/*.cc"
-  )
-  onnxruntime_add_executable(onnxruntime_training_bert ${training_bert_src})
-
-  if(UNIX AND NOT APPLE)
-    if (HAS_NO_MAYBE_UNINITIALIZED)
-      target_compile_options(onnxruntime_training_bert PUBLIC "-Wno-maybe-uninitialized")
-    endif()
-  endif()
-
-  onnxruntime_add_include_to_target(onnxruntime_training_bert onnxruntime_common onnx onnx_proto ${PROTOBUF_LIB} onnxruntime_training flatbuffers::flatbuffers Boost::mp11 safeint_interface)
-  target_include_directories(onnxruntime_training_bert PUBLIC ${CMAKE_CURRENT_BINARY_DIR} ${ONNXRUNTIME_ROOT} ${ORTTRAINING_ROOT} ${MPI_CXX_INCLUDE_DIRS} ${eigen_INCLUDE_DIRS} ${CXXOPTS} ${extra_includes} ${onnxruntime_graph_header} ${onnxruntime_exec_src_dir} ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}/onnx onnxruntime_training_runner)
-
-  # ROCM provider sources are generated, need to add include directory for generated headers
-  if (onnxruntime_USE_ROCM)
-    target_include_directories(onnxruntime_training_bert PUBLIC ${CMAKE_CURRENT_BINARY_DIR}/amdgpu/onnxruntime)
-  endif()
-
-  target_link_libraries(onnxruntime_training_bert PRIVATE onnxruntime_training_runner onnxruntime_training ${ONNXRUNTIME_LIBS} ${onnxruntime_EXTERNAL_LIBRARIES})
-  set_target_properties(onnxruntime_training_bert PROPERTIES FOLDER "ONNXRuntimeTest")
-
-  # Pipeline
-  file(GLOB_RECURSE training_pipeline_poc_src
-      "${ORTTRAINING_SOURCE_DIR}/models/pipeline_poc/*.h"
-      "${ORTTRAINING_SOURCE_DIR}/models/pipeline_poc/*.cc"
-  )
-  onnxruntime_add_executable(onnxruntime_training_pipeline_poc ${training_pipeline_poc_src})
-
-  if(UNIX AND NOT APPLE)
-    if (HAS_NO_MAYBE_UNINITIALIZED)
-      target_compile_options(onnxruntime_training_pipeline_poc PUBLIC "-Wno-maybe-uninitialized")
-    endif()
-  endif()
-
-  onnxruntime_add_include_to_target(onnxruntime_training_pipeline_poc onnxruntime_common onnx onnx_proto ${PROTOBUF_LIB} onnxruntime_training flatbuffers::flatbuffers Boost::mp11 safeint_interface)
-  target_include_directories(onnxruntime_training_pipeline_poc PUBLIC ${CMAKE_CURRENT_BINARY_DIR} ${ONNXRUNTIME_ROOT} ${ORTTRAINING_ROOT} ${MPI_CXX_INCLUDE_DIRS} ${eigen_INCLUDE_DIRS} ${CXXOPTS} ${extra_includes} ${onnxruntime_graph_header} ${onnxruntime_exec_src_dir} ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}/onnx onnxruntime_training_runner)
-  if (onnxruntime_USE_NCCL)
-    target_include_directories(onnxruntime_training_pipeline_poc PRIVATE ${NCCL_INCLUDE_DIRS})
-  endif()
-
-  target_link_libraries(onnxruntime_training_pipeline_poc PRIVATE onnxruntime_training_runner onnxruntime_training ${ONNXRUNTIME_LIBS} ${onnxruntime_EXTERNAL_LIBRARIES})
-  set_target_properties(onnxruntime_training_pipeline_poc PROPERTIES FOLDER "ONNXRuntimeTest")
-
-  # GPT-2
-  file(GLOB_RECURSE training_gpt2_src
-      "${ORTTRAINING_SOURCE_DIR}/models/gpt2/*.h"
-      "${ORTTRAINING_SOURCE_DIR}/models/gpt2/*.cc"
-  )
-  onnxruntime_add_executable(onnxruntime_training_gpt2 ${training_gpt2_src})
-  if(UNIX AND NOT APPLE)
-    if (HAS_NO_MAYBE_UNINITIALIZED)
-      target_compile_options(onnxruntime_training_gpt2 PUBLIC "-Wno-maybe-uninitialized")
-    endif()
-  endif()
-
-  target_include_directories(onnxruntime_training_gpt2 PUBLIC ${CMAKE_CURRENT_BINARY_DIR} ${ONNXRUNTIME_ROOT} ${ORTTRAINING_ROOT} ${MPI_CXX_INCLUDE_DIRS} ${eigen_INCLUDE_DIRS} ${CXXOPTS} ${extra_includes} ${onnxruntime_graph_header} ${onnxruntime_exec_src_dir} ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CURRENT_BINARY_DIR}/onnx onnxruntime_training_runner)
-
-  target_link_libraries(onnxruntime_training_gpt2 PRIVATE onnxruntime_training_runner onnxruntime_training ${ONNXRUNTIME_LIBS} ${onnxruntime_EXTERNAL_LIBRARIES})
-  set_target_properties(onnxruntime_training_gpt2 PROPERTIES FOLDER "ONNXRuntimeTest")
-
 endif()
diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake
index 3b9727ec08970..dfdf52bc98059 100644
--- a/cmake/onnxruntime_unittests.cmake
+++ b/cmake/onnxruntime_unittests.cmake
@@ -1,30 +1,33 @@
 # Copyright (c) Microsoft Corporation. All rights reserved.
 # Licensed under the MIT License.
-if (${CMAKE_SYSTEM_NAME} STREQUAL "iOS")
+if(${CMAKE_SYSTEM_NAME} STREQUAL "iOS")
   find_package(XCTest REQUIRED)
 endif()
 
 set(TEST_SRC_DIR ${ONNXRUNTIME_ROOT}/test)
 set(TEST_INC_DIR ${ONNXRUNTIME_ROOT})
-if (onnxruntime_ENABLE_TRAINING)
+
+if(onnxruntime_ENABLE_TRAINING)
   list(APPEND TEST_INC_DIR ${ORTTRAINING_ROOT})
 endif()
-if (onnxruntime_USE_TVM)
+
+if(onnxruntime_USE_TVM)
   list(APPEND TEST_INC_DIR ${TVM_INCLUDES})
 endif()
 
 set(disabled_warnings)
+
 function(AddTest)
   cmake_parse_arguments(_UT "DYN" "TARGET" "LIBS;SOURCES;DEPENDS;TEST_ARGS" ${ARGN})
   list(REMOVE_DUPLICATES _UT_SOURCES)
 
-  if (${CMAKE_SYSTEM_NAME} STREQUAL "iOS")
+  if(${CMAKE_SYSTEM_NAME} STREQUAL "iOS")
     onnxruntime_add_executable(${_UT_TARGET} ${TEST_SRC_DIR}/xctest/orttestmain.m)
   else()
     onnxruntime_add_executable(${_UT_TARGET} ${_UT_SOURCES})
   endif()
 
-  if (_UT_DEPENDS)
+  if(_UT_DEPENDS)
     list(REMOVE_DUPLICATES _UT_DEPENDS)
   endif(_UT_DEPENDS)
 
@@ -34,28 +37,29 @@ function(AddTest)
 
   source_group(TREE ${REPO_ROOT} FILES ${_UT_SOURCES})
 
-  if (MSVC AND NOT CMAKE_SIZEOF_VOID_P EQUAL 8)
-    #TODO: fix the warnings, they are dangerous
+  if(MSVC AND NOT CMAKE_SIZEOF_VOID_P EQUAL 8)
+    # TODO: fix the warnings, they are dangerous
     target_compile_options(${_UT_TARGET} PRIVATE "/wd4244")
   endif()
-  if (MSVC)
+
+  if(MSVC)
     target_compile_options(${_UT_TARGET} PRIVATE "/wd6330")
   endif()
 
   set_target_properties(${_UT_TARGET} PROPERTIES FOLDER "ONNXRuntimeTest")
 
-  if (MSVC)
+  if(MSVC)
     # set VS debugger working directory to the test program's directory
     set_target_properties(${_UT_TARGET} PROPERTIES VS_DEBUGGER_WORKING_DIRECTORY $<TARGET_FILE_DIR:${_UT_TARGET}>)
   endif()
 
-  if (_UT_DEPENDS)
+  if(_UT_DEPENDS)
     add_dependencies(${_UT_TARGET} ${_UT_DEPENDS})
   endif(_UT_DEPENDS)
 
   if(_UT_DYN)
     target_link_libraries(${_UT_TARGET} PRIVATE ${_UT_LIBS} GTest::gtest GTest::gmock onnxruntime ${CMAKE_DL_LIBS}
-            Threads::Threads)
+      Threads::Threads)
     target_compile_definitions(${_UT_TARGET} PRIVATE -DUSE_ONNXRUNTIME_DLL)
   else()
     target_link_libraries(${_UT_TARGET} PRIVATE ${_UT_LIBS} GTest::gtest GTest::gmock ${onnxruntime_EXTERNAL_LIBRARIES})
@@ -63,57 +67,65 @@ function(AddTest)
 
   onnxruntime_add_include_to_target(${_UT_TARGET} date_interface flatbuffers::flatbuffers)
   target_include_directories(${_UT_TARGET} PRIVATE ${TEST_INC_DIR})
-  if (onnxruntime_USE_CUDA)
+
+  if(onnxruntime_USE_CUDA)
     target_include_directories(${_UT_TARGET} PRIVATE ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} ${onnxruntime_CUDNN_HOME}/include)
-    if (onnxruntime_USE_NCCL)
+
+    if(onnxruntime_USE_NCCL)
       target_include_directories(${_UT_TARGET} PRIVATE ${NCCL_INCLUDE_DIRS})
     endif()
   endif()
-  if (onnxruntime_USE_TENSORRT)
+
+  if(onnxruntime_USE_TENSORRT)
     # used for instantiating placeholder TRT builder to mitigate TRT library load/unload overhead
     target_include_directories(${_UT_TARGET} PRIVATE ${TENSORRT_INCLUDE_DIR})
   endif()
 
   if(MSVC)
     target_compile_options(${_UT_TARGET} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /utf-8>"
-            "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/utf-8>")
+      "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/utf-8>")
   endif()
 
-  if (WIN32)
+  if(WIN32)
     # include dbghelp in case tests throw an ORT exception, as that exception includes a stacktrace, which requires dbghelp.
     target_link_libraries(${_UT_TARGET} PRIVATE debug dbghelp)
 
-    if (onnxruntime_USE_CUDA)
+    if(onnxruntime_USE_CUDA)
       # disable a warning from the CUDA headers about unreferenced local functions
-      if (MSVC)
+      if(MSVC)
         target_compile_options(${_UT_TARGET} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler /wd4505>"
-                "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd4505>")
+          "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd4505>")
       endif()
     endif()
-    if (MSVC)
+
+    if(MSVC)
       # warning C6326: Potential comparison of a constant with another constant.
       # Lot of such things came from gtest
       target_compile_options(${_UT_TARGET} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler /wd6326>"
-                "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd6326>")
+        "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd6326>")
+
       # Raw new and delete. A lot of such things came from googletest.
       target_compile_options(${_UT_TARGET} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler /wd26409>"
-                "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd26409>")
+        "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd26409>")
+
       # "Global initializer calls a non-constexpr function."
       target_compile_options(${_UT_TARGET} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler /wd26426>"
-                "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd26426>")
+        "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd26426>")
     endif()
+
     target_compile_options(${_UT_TARGET} PRIVATE ${disabled_warnings})
   else()
     target_compile_options(${_UT_TARGET} PRIVATE ${DISABLED_WARNINGS_FOR_TVM})
     target_compile_options(${_UT_TARGET} PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:-Xcompiler -Wno-error=sign-compare>"
-            "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:-Wno-error=sign-compare>")
+      "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:-Wno-error=sign-compare>")
     target_compile_options(${_UT_TARGET} PRIVATE "-Wno-error=uninitialized")
   endif()
 
   set(TEST_ARGS ${_UT_TEST_ARGS})
-  if (onnxruntime_GENERATE_TEST_REPORTS)
+
+  if(onnxruntime_GENERATE_TEST_REPORTS)
     # generate a report file next to the test program
-    if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+    if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
       # WebAssembly use a memory file system, so we do not use full path
       list(APPEND TEST_ARGS
         "--gtest_output=xml:$<TARGET_FILE_NAME:${_UT_TARGET}>.$<CONFIG>.results.xml")
@@ -123,7 +135,7 @@ function(AddTest)
     endif()
   endif(onnxruntime_GENERATE_TEST_REPORTS)
 
-  if (${CMAKE_SYSTEM_NAME} STREQUAL "iOS")
+  if(${CMAKE_SYSTEM_NAME} STREQUAL "iOS")
     # target_sources(${_UT_TARGET} PRIVATE ${TEST_SRC_DIR}/xctest/orttestmain.m)
     set_target_properties(${_UT_TARGET} PROPERTIES FOLDER "ONNXRuntimeTest"
       MACOSX_BUNDLE_BUNDLE_NAME ${_UT_TARGET}
@@ -140,13 +152,15 @@ function(AddTest)
       ${TEST_SRC_DIR}/xctest/xcgtest.mm
       ${_UT_SOURCES})
     onnxruntime_configure_target(${_UT_TARGET}_xc)
+
     if(_UT_DYN)
       target_link_libraries(${_UT_TARGET}_xc PRIVATE ${_UT_LIBS} GTest::gtest GTest::gmock onnxruntime ${CMAKE_DL_LIBS}
-              Threads::Threads)
+        Threads::Threads)
       target_compile_definitions(${_UT_TARGET}_xc PRIVATE USE_ONNXRUNTIME_DLL)
     else()
       target_link_libraries(${_UT_TARGET}_xc PRIVATE ${_UT_LIBS} GTest::gtest GTest::gmock ${onnxruntime_EXTERNAL_LIBRARIES})
     endif()
+
     onnxruntime_add_include_to_target(${_UT_TARGET}_xc date_interface flatbuffers::flatbuffers)
     target_include_directories(${_UT_TARGET}_xc PRIVATE ${TEST_INC_DIR})
     get_target_property(${_UT_TARGET}_DEFS ${_UT_TARGET} COMPILE_DEFINITIONS)
@@ -162,16 +176,16 @@ function(AddTest)
 
     xctest_add_test(xctest.${_UT_TARGET} ${_UT_TARGET}_xc)
   else()
-    if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+    if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
       # We might have already executed the following "find_program" code when we build ORT nodejs binding.
       # Then the program is found the result is stored in the variable and the search will not be repeated.
       find_program(NPM_CLI
-         NAMES "npm.cmd" "npm"
-         DOC "NPM command line client"
-         REQUIRED
+        NAMES "npm.cmd" "npm"
+        DOC "NPM command line client"
+        REQUIRED
       )
 
-      if (onnxruntime_WEBASSEMBLY_RUN_TESTS_IN_BROWSER)
+      if(onnxruntime_WEBASSEMBLY_RUN_TESTS_IN_BROWSER)
         add_custom_command(TARGET ${_UT_TARGET} POST_BUILD
           COMMAND ${CMAKE_COMMAND} -E copy_if_different ${TEST_SRC_DIR}/wasm/package.json $<TARGET_FILE_DIR:${_UT_TARGET}>
           COMMAND ${CMAKE_COMMAND} -E copy_if_different ${TEST_SRC_DIR}/wasm/package-lock.json $<TARGET_FILE_DIR:${_UT_TARGET}>
@@ -181,19 +195,23 @@ function(AddTest)
         )
 
         set(TEST_NPM_FLAGS)
-        if (onnxruntime_ENABLE_WEBASSEMBLY_THREADS)
+
+        if(onnxruntime_ENABLE_WEBASSEMBLY_THREADS)
           list(APPEND TEST_NPM_FLAGS "--wasm-threads")
         endif()
+
         add_test(NAME ${_UT_TARGET}
           COMMAND ${NPM_CLI} test -- ${TEST_NPM_FLAGS} --entry=${_UT_TARGET} ${TEST_ARGS}
           WORKING_DIRECTORY $<TARGET_FILE_DIR:${_UT_TARGET}>
         )
       else()
         set(TEST_NODE_FLAGS)
-        if (onnxruntime_ENABLE_WEBASSEMBLY_THREADS)
+
+        if(onnxruntime_ENABLE_WEBASSEMBLY_THREADS)
           list(APPEND TEST_NODE_FLAGS "--experimental-wasm-threads")
         endif()
-        if (onnxruntime_ENABLE_WEBASSEMBLY_SIMD)
+
+        if(onnxruntime_ENABLE_WEBASSEMBLY_SIMD)
           list(APPEND TEST_NODE_FLAGS "--experimental-wasm-simd")
         endif()
 
@@ -214,10 +232,9 @@ endfunction(AddTest)
 # general program entrypoint for C++ unit tests
 set(onnxruntime_unittest_main_src "${TEST_SRC_DIR}/unittest_main/test_main.cc")
 
-#Do not add '${TEST_SRC_DIR}/util/include' to your include directories directly
-#Use onnxruntime_add_include_to_target or target_link_libraries, so that compile definitions
-#can propagate correctly.
-
+# Do not add '${TEST_SRC_DIR}/util/include' to your include directories directly
+# Use onnxruntime_add_include_to_target or target_link_libraries, so that compile definitions
+# can propagate correctly.
 file(GLOB onnxruntime_test_utils_src CONFIGURE_DEPENDS
   "${TEST_SRC_DIR}/util/include/*.h"
   "${TEST_SRC_DIR}/util/*.cc"
@@ -236,89 +253,86 @@ file(GLOB onnxruntime_test_quantiztion_src CONFIGURE_DEPENDS
 )
 
 if(NOT onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_REDUCED_OPS_BUILD)
-
   file(GLOB onnxruntime_test_ir_src CONFIGURE_DEPENDS
     "${TEST_SRC_DIR}/ir/*.cc"
     "${TEST_SRC_DIR}/ir/*.h"
-    )
+  )
 
   file(GLOB onnxruntime_test_optimizer_src CONFIGURE_DEPENDS
     "${TEST_SRC_DIR}/optimizer/*.cc"
     "${TEST_SRC_DIR}/optimizer/*.h"
-    )
+  )
 
   set(onnxruntime_test_framework_src_patterns
     "${TEST_SRC_DIR}/framework/*.cc"
     "${TEST_SRC_DIR}/framework/*.h"
     "${TEST_SRC_DIR}/platform/*.cc"
-    )
-
-else()  # minimal and/or reduced ops build
+  )
 
+else() # minimal and/or reduced ops build
   set(onnxruntime_test_framework_src_patterns
     "${TEST_SRC_DIR}/platform/*.cc"
-    )
+  )
 
-  if (onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_REDUCED_OPS_BUILD)
+  if(onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_REDUCED_OPS_BUILD)
     list(APPEND onnxruntime_test_framework_src_patterns
       "${TEST_SRC_DIR}/framework/ort_model_only_test.cc"
     )
   endif()
 
-  if (NOT onnxruntime_MINIMAL_BUILD)
+  if(NOT onnxruntime_MINIMAL_BUILD)
     file(GLOB onnxruntime_test_ir_src CONFIGURE_DEPENDS
       "${TEST_SRC_DIR}/ir/*.cc"
       "${TEST_SRC_DIR}/ir/*.h"
-      )
+    )
   endif()
 endif()
 
 if((NOT onnxruntime_MINIMAL_BUILD OR onnxruntime_EXTENDED_MINIMAL_BUILD)
-   AND NOT onnxruntime_REDUCED_OPS_BUILD)
+  AND NOT onnxruntime_REDUCED_OPS_BUILD)
   list(APPEND onnxruntime_test_optimizer_src
-       "${TEST_SRC_DIR}/optimizer/runtime_optimization/graph_runtime_optimization_test.cc")
+    "${TEST_SRC_DIR}/optimizer/runtime_optimization/graph_runtime_optimization_test.cc")
 endif()
 
 file(GLOB onnxruntime_test_training_src
-  "${ORTTRAINING_SOURCE_DIR}/test/model/*.h"
-  "${ORTTRAINING_SOURCE_DIR}/test/model/*.cc"
   "${ORTTRAINING_SOURCE_DIR}/test/gradient/*.h"
   "${ORTTRAINING_SOURCE_DIR}/test/gradient/*.cc"
-  "${ORTTRAINING_SOURCE_DIR}/test/graph/*.h"
-  "${ORTTRAINING_SOURCE_DIR}/test/graph/*.cc"
-  "${ORTTRAINING_SOURCE_DIR}/test/session/*.h"
-  "${ORTTRAINING_SOURCE_DIR}/test/session/*.cc"
   "${ORTTRAINING_SOURCE_DIR}/test/optimizer/*.h"
   "${ORTTRAINING_SOURCE_DIR}/test/optimizer/*.cc"
-  "${ORTTRAINING_SOURCE_DIR}/test/framework/*.cc"
-  "${ORTTRAINING_SOURCE_DIR}/test/distributed/*.h"
-  "${ORTTRAINING_SOURCE_DIR}/test/distributed/*.cc"
-  )
+  "${ORTTRAINING_SOURCE_DIR}/test/framework/crc32c_test.cc"
+)
+
+list(REMOVE_ITEM onnxruntime_test_training_src
+  "${ORTTRAINING_SOURCE_DIR}/test/gradient/gradient_op_test_utils.cc"
+  "${ORTTRAINING_SOURCE_DIR}/test/gradient/gradient_op_test_utils.h"
+  "${ORTTRAINING_SOURCE_DIR}/test/gradient/gradient_ops_test.cc"
+  "${ORTTRAINING_SOURCE_DIR}/test/gradient/gradient_checker.h"
+  "${ORTTRAINING_SOURCE_DIR}/test/gradient/gradient_checker.cc"
+)
 
 # TODO (baijumeswani): Remove the minimal build check here.
-#                      The training api tests should be runnable even on a minimal build.
-#                      This requires converting all the *.onnx files to ort format.
-if (NOT onnxruntime_MINIMAL_BUILD)
-  if (onnxruntime_ENABLE_TRAINING_APIS)
+# The training api tests should be runnable even on a minimal build.
+# This requires converting all the *.onnx files to ort format.
+if(NOT onnxruntime_MINIMAL_BUILD)
+  if(onnxruntime_ENABLE_TRAINING_APIS)
     file(GLOB onnxruntime_test_training_api_src
       "${ORTTRAINING_SOURCE_DIR}/test/training_api/common/*.cc"
       "${ORTTRAINING_SOURCE_DIR}/test/training_api/common/*.h"
       "${ORTTRAINING_SOURCE_DIR}/test/training_api/core/*.cc"
       "${ORTTRAINING_SOURCE_DIR}/test/training_api/core/*.h"
-      )
+    )
   endif()
 endif()
 
 if(WIN32)
   list(APPEND onnxruntime_test_framework_src_patterns
     "${TEST_SRC_DIR}/platform/windows/*.cc"
-    "${TEST_SRC_DIR}/platform/windows/logging/*.cc" )
+    "${TEST_SRC_DIR}/platform/windows/logging/*.cc")
 endif()
 
 if(NOT onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_REDUCED_OPS_BUILD)
-
   if(onnxruntime_USE_CUDA)
-    list(APPEND onnxruntime_test_framework_src_patterns  ${TEST_SRC_DIR}/framework/cuda/*)
+    list(APPEND onnxruntime_test_framework_src_patterns ${TEST_SRC_DIR}/framework/cuda/*)
   endif()
 
   set(onnxruntime_test_providers_src_patterns
@@ -343,6 +357,7 @@ else()
   set(onnxruntime_test_providers_src_patterns
     "${TEST_SRC_DIR}/framework/test_utils.cc"
     "${TEST_SRC_DIR}/framework/test_utils.h"
+
     # TODO: Add anything that is needed for testing a minimal build
   )
 endif()
@@ -352,7 +367,7 @@ file(GLOB onnxruntime_test_providers_src CONFIGURE_DEPENDS ${onnxruntime_test_pr
 if(NOT onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_REDUCED_OPS_BUILD)
   file(GLOB_RECURSE onnxruntime_test_providers_cpu_src CONFIGURE_DEPENDS
     "${TEST_SRC_DIR}/providers/cpu/*"
-    )
+  )
 endif()
 
 if(onnxruntime_DISABLE_ML_OPS)
@@ -361,93 +376,93 @@ endif()
 
 list(APPEND onnxruntime_test_providers_src ${onnxruntime_test_providers_cpu_src})
 
-if (onnxruntime_USE_CUDA AND NOT onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_REDUCED_OPS_BUILD)
+if(onnxruntime_USE_CUDA AND NOT onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_REDUCED_OPS_BUILD)
   file(GLOB onnxruntime_test_providers_cuda_src CONFIGURE_DEPENDS
     "${TEST_SRC_DIR}/providers/cuda/*"
-    )
+  )
   list(APPEND onnxruntime_test_providers_src ${onnxruntime_test_providers_cuda_src})
 endif()
 
-if (onnxruntime_USE_CANN)
+if(onnxruntime_USE_CANN)
   file(GLOB_RECURSE onnxruntime_test_providers_cann_src CONFIGURE_DEPENDS
     "${TEST_SRC_DIR}/providers/cann/*"
-    )
+  )
   list(APPEND onnxruntime_test_providers_src ${onnxruntime_test_providers_cann_src})
 endif()
 
 # Disable training ops test for minimal build as a lot of these depend on loading an onnx model.
-if (NOT onnxruntime_MINIMAL_BUILD)
-  if (onnxruntime_ENABLE_TRAINING_OPS)
+if(NOT onnxruntime_MINIMAL_BUILD)
+  if(onnxruntime_ENABLE_TRAINING_OPS)
     file(GLOB_RECURSE orttraining_test_trainingops_cpu_src CONFIGURE_DEPENDS
       "${ORTTRAINING_SOURCE_DIR}/test/training_ops/compare_provider_test_utils.cc"
       "${ORTTRAINING_SOURCE_DIR}/test/training_ops/function_op_test_utils.cc"
       "${ORTTRAINING_SOURCE_DIR}/test/training_ops/cpu/*"
-      )
+    )
 
-    if (NOT onnxruntime_ENABLE_TRAINING)
+    if(NOT onnxruntime_ENABLE_TRAINING)
       list(REMOVE_ITEM orttraining_test_trainingops_cpu_src
         "${ORTTRAINING_SOURCE_DIR}/test/training_ops/cpu/tensorboard/summary_op_test.cc"
-        )
+      )
     endif()
 
     list(APPEND onnxruntime_test_providers_src ${orttraining_test_trainingops_cpu_src})
 
-    if (onnxruntime_USE_CUDA OR onnxruntime_USE_ROCM)
+    if(onnxruntime_USE_CUDA OR onnxruntime_USE_ROCM)
       file(GLOB_RECURSE orttraining_test_trainingops_cuda_src CONFIGURE_DEPENDS
         "${ORTTRAINING_SOURCE_DIR}/test/training_ops/cuda/*"
-        )
+      )
       list(APPEND onnxruntime_test_providers_src ${orttraining_test_trainingops_cuda_src})
     endif()
   endif()
 endif()
 
-if (onnxruntime_USE_DNNL)
+if(onnxruntime_USE_DNNL)
   file(GLOB_RECURSE onnxruntime_test_providers_dnnl_src CONFIGURE_DEPENDS
     "${TEST_SRC_DIR}/providers/dnnl/*"
-    )
+  )
   list(APPEND onnxruntime_test_providers_src ${onnxruntime_test_providers_dnnl_src})
 endif()
 
-if (onnxruntime_USE_NNAPI_BUILTIN)
+if(onnxruntime_USE_NNAPI_BUILTIN)
   file(GLOB_RECURSE onnxruntime_test_providers_nnapi_src CONFIGURE_DEPENDS
     "${TEST_SRC_DIR}/providers/nnapi/*"
-    )
+  )
   list(APPEND onnxruntime_test_providers_src ${onnxruntime_test_providers_nnapi_src})
 endif()
 
-if (onnxruntime_USE_RKNPU)
+if(onnxruntime_USE_RKNPU)
   file(GLOB_RECURSE onnxruntime_test_providers_rknpu_src CONFIGURE_DEPENDS
     "${TEST_SRC_DIR}/providers/rknpu/*"
-    )
+  )
   list(APPEND onnxruntime_test_providers_src ${onnxruntime_test_providers_rknpu_src})
 endif()
 
-if (NOT onnxruntime_MINIMAL_BUILD OR onnxruntime_EXTENDED_MINIMAL_BUILD)
+if(NOT onnxruntime_MINIMAL_BUILD OR onnxruntime_EXTENDED_MINIMAL_BUILD)
   file(GLOB_RECURSE onnxruntime_test_providers_internal_testing_src CONFIGURE_DEPENDS
     "${TEST_SRC_DIR}/providers/internal_testing/*"
-    )
+  )
   list(APPEND onnxruntime_test_providers_src ${onnxruntime_test_providers_internal_testing_src})
 endif()
 
-set (ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR "${TEST_SRC_DIR}/shared_lib")
-set (ONNXRUNTIME_GLOBAL_THREAD_POOLS_TEST_SRC_DIR "${TEST_SRC_DIR}/global_thread_pools")
-set (ONNXRUNTIME_CUSTOM_OP_REGISTRATION_TEST_SRC_DIR "${TEST_SRC_DIR}/custom_op_registration")
-set (ONNXRUNTIME_LOGGING_APIS_TEST_SRC_DIR "${TEST_SRC_DIR}/logging_apis")
-
-set (onnxruntime_shared_lib_test_SRC
-          ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_fixture.h
-          ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_session_options.cc
-          ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_run_options.cc
-          ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_allocator.cc
-          ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_nontensor_types.cc
-          ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_model_loading.cc
-          ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_ort_format_models.cc
-          ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/utils.h
-          ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/utils.cc
-          ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/custom_op_utils.h
-          ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/custom_op_utils.cc)
-
-if (NOT onnxruntime_MINIMAL_BUILD)
+set(ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR "${TEST_SRC_DIR}/shared_lib")
+set(ONNXRUNTIME_GLOBAL_THREAD_POOLS_TEST_SRC_DIR "${TEST_SRC_DIR}/global_thread_pools")
+set(ONNXRUNTIME_CUSTOM_OP_REGISTRATION_TEST_SRC_DIR "${TEST_SRC_DIR}/custom_op_registration")
+set(ONNXRUNTIME_LOGGING_APIS_TEST_SRC_DIR "${TEST_SRC_DIR}/logging_apis")
+
+set(onnxruntime_shared_lib_test_SRC
+  ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_fixture.h
+  ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_session_options.cc
+  ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_run_options.cc
+  ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_allocator.cc
+  ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_nontensor_types.cc
+  ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_model_loading.cc
+  ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_ort_format_models.cc
+  ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/utils.h
+  ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/utils.cc
+  ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/custom_op_utils.h
+  ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/custom_op_utils.cc)
+
+if(NOT onnxruntime_MINIMAL_BUILD)
   list(APPEND onnxruntime_shared_lib_test_SRC ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_inference.cc)
 endif()
 
@@ -455,14 +470,13 @@ if(onnxruntime_RUN_ONNX_TESTS)
   list(APPEND onnxruntime_shared_lib_test_SRC ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_io_types.cc)
 endif()
 
-set (onnxruntime_global_thread_pools_test_SRC
-          ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_fixture.h
-          ${ONNXRUNTIME_GLOBAL_THREAD_POOLS_TEST_SRC_DIR}/test_main.cc
-          ${ONNXRUNTIME_GLOBAL_THREAD_POOLS_TEST_SRC_DIR}/test_inference.cc)
+set(onnxruntime_global_thread_pools_test_SRC
+  ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/test_fixture.h
+  ${ONNXRUNTIME_GLOBAL_THREAD_POOLS_TEST_SRC_DIR}/test_main.cc
+  ${ONNXRUNTIME_GLOBAL_THREAD_POOLS_TEST_SRC_DIR}/test_inference.cc)
 
 # tests from lowest level library up.
 # the order of libraries should be maintained, with higher libraries being added first in the list
-
 set(onnxruntime_test_common_libs
   onnxruntime_test_utils
   onnxruntime_common
@@ -489,7 +503,7 @@ set(onnxruntime_test_framework_libs
   onnxruntime_graph
   ${ONNXRUNTIME_MLAS_LIBS}
   onnxruntime_common
-  )
+)
 
 set(onnxruntime_test_server_libs
   onnxruntime_test_utils
@@ -497,10 +511,10 @@ set(onnxruntime_test_server_libs
 )
 
 if(WIN32)
-    list(APPEND onnxruntime_test_framework_libs Advapi32)
+  list(APPEND onnxruntime_test_framework_libs Advapi32)
 endif()
 
-set (onnxruntime_test_providers_dependencies ${onnxruntime_EXTERNAL_DEPENDENCIES})
+set(onnxruntime_test_providers_dependencies ${onnxruntime_EXTERNAL_DEPENDENCIES})
 
 if(onnxruntime_USE_CUDA)
   list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_cuda)
@@ -540,7 +554,7 @@ if(onnxruntime_USE_ROCM)
 endif()
 
 if(onnxruntime_USE_COREML)
-  if (CMAKE_SYSTEM_NAME STREQUAL "Darwin" OR CMAKE_SYSTEM_NAME STREQUAL "iOS")
+  if(CMAKE_SYSTEM_NAME STREQUAL "Darwin" OR CMAKE_SYSTEM_NAME STREQUAL "iOS")
     list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_coreml onnxruntime_coreml_proto)
   else()
     list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_coreml)
@@ -555,73 +569,75 @@ if(onnxruntime_USE_ARMNN)
   list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_armnn)
 endif()
 
-if (onnxruntime_ENABLE_LANGUAGE_INTEROP_OPS)
+if(onnxruntime_ENABLE_LANGUAGE_INTEROP_OPS)
   set(ONNXRUNTIME_INTEROP_TEST_LIBS PRIVATE onnxruntime_language_interop onnxruntime_pyop)
 endif()
 
 set(ONNXRUNTIME_TEST_LIBS
-    onnxruntime_session
-    ${ONNXRUNTIME_INTEROP_TEST_LIBS}
-    ${onnxruntime_libs}
-    # CUDA, ROCM, TENSORRT, MIGRAPHX, DNNL, and OpenVINO are dynamically loaded at runtime
-    ${PROVIDERS_NNAPI}
-    ${PROVIDERS_JS}
-    ${PROVIDERS_VITISAI}
-    ${PROVIDERS_QNN}
-    ${PROVIDERS_SNPE}
-    ${PROVIDERS_RKNPU}
-    ${PROVIDERS_DML}
-    ${PROVIDERS_ACL}
-    ${PROVIDERS_ARMNN}
-    ${PROVIDERS_COREML}
-    # ${PROVIDERS_TVM}
-    ${PROVIDERS_XNNPACK}
-    ${PROVIDERS_AZURE}
-    onnxruntime_optimizer
-    onnxruntime_providers
-    onnxruntime_util
-    ${onnxruntime_tvm_libs}
-    onnxruntime_framework
-    onnxruntime_util
-    onnxruntime_graph
-    ${ONNXRUNTIME_MLAS_LIBS}
-    onnxruntime_common
-    onnxruntime_flatbuffers
+  onnxruntime_session
+  ${ONNXRUNTIME_INTEROP_TEST_LIBS}
+  ${onnxruntime_libs}
+
+  # CUDA, ROCM, TENSORRT, MIGRAPHX, DNNL, and OpenVINO are dynamically loaded at runtime
+  ${PROVIDERS_NNAPI}
+  ${PROVIDERS_JS}
+  ${PROVIDERS_VITISAI}
+  ${PROVIDERS_QNN}
+  ${PROVIDERS_SNPE}
+  ${PROVIDERS_RKNPU}
+  ${PROVIDERS_DML}
+  ${PROVIDERS_ACL}
+  ${PROVIDERS_ARMNN}
+  ${PROVIDERS_COREML}
+
+  # ${PROVIDERS_TVM}
+  ${PROVIDERS_XNNPACK}
+  ${PROVIDERS_AZURE}
+  onnxruntime_optimizer
+  onnxruntime_providers
+  onnxruntime_util
+  ${onnxruntime_tvm_libs}
+  onnxruntime_framework
+  onnxruntime_util
+  onnxruntime_graph
+  ${ONNXRUNTIME_MLAS_LIBS}
+  onnxruntime_common
+  onnxruntime_flatbuffers
 )
 
-if (onnxruntime_ENABLE_TRAINING)
-  set(ONNXRUNTIME_TEST_LIBS onnxruntime_training_runner onnxruntime_training ${ONNXRUNTIME_TEST_LIBS})
+if(onnxruntime_ENABLE_TRAINING)
+  set(ONNXRUNTIME_TEST_LIBS onnxruntime_training ${ONNXRUNTIME_TEST_LIBS})
 endif()
 
 set(onnxruntime_test_providers_libs
-    onnxruntime_test_utils
-    ${ONNXRUNTIME_TEST_LIBS}
-  )
+  onnxruntime_test_utils
+  ${ONNXRUNTIME_TEST_LIBS}
+)
 
 if(onnxruntime_USE_TENSORRT)
-  list(APPEND onnxruntime_test_framework_src_patterns  ${TEST_SRC_DIR}/providers/tensorrt/*)
-  list(APPEND onnxruntime_test_framework_src_patterns  "${ONNXRUNTIME_ROOT}/core/providers/tensorrt/tensorrt_execution_provider_utils.h")
+  list(APPEND onnxruntime_test_framework_src_patterns ${TEST_SRC_DIR}/providers/tensorrt/*)
+  list(APPEND onnxruntime_test_framework_src_patterns "${ONNXRUNTIME_ROOT}/core/providers/tensorrt/tensorrt_execution_provider_utils.h")
   list(APPEND onnxruntime_test_framework_libs onnxruntime_providers_tensorrt)
   list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_tensorrt onnxruntime_providers_shared)
   list(APPEND onnxruntime_test_providers_libs ${TENSORRT_LIBRARY_INFER})
 endif()
 
 if(onnxruntime_USE_MIGRAPHX)
-  list(APPEND onnxruntime_test_framework_src_patterns  ${TEST_SRC_DIR}/providers/migraphx/*)
-  list(APPEND onnxruntime_test_framework_src_patterns  "${ONNXRUNTIME_ROOT}/core/providers/migraphx/migraphx_execution_provider_utils.h")
+  list(APPEND onnxruntime_test_framework_src_patterns ${TEST_SRC_DIR}/providers/migraphx/*)
+  list(APPEND onnxruntime_test_framework_src_patterns "${ONNXRUNTIME_ROOT}/core/providers/migraphx/migraphx_execution_provider_utils.h")
   list(APPEND onnxruntime_test_framework_libs onnxruntime_providers_migraphx)
   list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_migraphx onnxruntime_providers_shared)
 endif()
 
 if(onnxruntime_USE_NNAPI_BUILTIN)
-  list(APPEND onnxruntime_test_framework_src_patterns  ${TEST_SRC_DIR}/providers/nnapi/*)
+  list(APPEND onnxruntime_test_framework_src_patterns ${TEST_SRC_DIR}/providers/nnapi/*)
   list(APPEND onnxruntime_test_framework_libs onnxruntime_providers_nnapi)
   list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_nnapi)
   list(APPEND onnxruntime_test_providers_libs onnxruntime_providers_nnapi)
 endif()
 
 if(onnxruntime_USE_JSEP)
-  list(APPEND onnxruntime_test_framework_src_patterns  ${TEST_SRC_DIR}/providers/js/*)
+  list(APPEND onnxruntime_test_framework_src_patterns ${TEST_SRC_DIR}/providers/js/*)
   list(APPEND onnxruntime_test_framework_libs onnxruntime_providers_js)
   list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_js)
   list(APPEND onnxruntime_test_providers_libs onnxruntime_providers_js)
@@ -635,22 +651,23 @@ if(onnxruntime_USE_QNN)
 endif()
 
 if(onnxruntime_USE_SNPE)
-  list(APPEND onnxruntime_test_framework_src_patterns  ${TEST_SRC_DIR}/providers/snpe/*)
+  list(APPEND onnxruntime_test_framework_src_patterns ${TEST_SRC_DIR}/providers/snpe/*)
   list(APPEND onnxruntime_test_framework_libs onnxruntime_providers_snpe)
   list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_snpe)
   list(APPEND onnxruntime_test_providers_libs onnxruntime_providers_snpe)
 endif()
 
 if(onnxruntime_USE_RKNPU)
-  list(APPEND onnxruntime_test_framework_src_patterns  ${TEST_SRC_DIR}/providers/rknpu/*)
+  list(APPEND onnxruntime_test_framework_src_patterns ${TEST_SRC_DIR}/providers/rknpu/*)
   list(APPEND onnxruntime_test_framework_libs onnxruntime_providers_rknpu)
   list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_rknpu)
   list(APPEND onnxruntime_test_providers_libs onnxruntime_providers_rknpu)
 endif()
 
 if(onnxruntime_USE_COREML)
-  list(APPEND onnxruntime_test_framework_src_patterns  ${TEST_SRC_DIR}/providers/coreml/*)
-  if (CMAKE_SYSTEM_NAME STREQUAL "Darwin" OR CMAKE_SYSTEM_NAME STREQUAL "iOS")
+  list(APPEND onnxruntime_test_framework_src_patterns ${TEST_SRC_DIR}/providers/coreml/*)
+
+  if(CMAKE_SYSTEM_NAME STREQUAL "Darwin" OR CMAKE_SYSTEM_NAME STREQUAL "iOS")
     list(APPEND onnxruntime_test_framework_libs onnxruntime_providers_coreml onnxruntime_coreml_proto)
     list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_coreml onnxruntime_coreml_proto)
     list(APPEND onnxruntime_test_providers_libs onnxruntime_providers_coreml onnxruntime_coreml_proto)
@@ -662,103 +679,111 @@ if(onnxruntime_USE_COREML)
 endif()
 
 if(onnxruntime_USE_XNNPACK)
-  list(APPEND onnxruntime_test_framework_src_patterns  ${TEST_SRC_DIR}/providers/xnnpack/*)
+  list(APPEND onnxruntime_test_framework_src_patterns ${TEST_SRC_DIR}/providers/xnnpack/*)
   list(APPEND onnxruntime_test_framework_libs onnxruntime_providers_xnnpack)
   list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_xnnpack)
   list(APPEND onnxruntime_test_providers_libs onnxruntime_providers_xnnpack)
 endif()
 
 if(onnxruntime_USE_AZURE)
-  list(APPEND onnxruntime_test_framework_src_patterns  ${TEST_SRC_DIR}/providers/azure/*)
+  list(APPEND onnxruntime_test_framework_src_patterns ${TEST_SRC_DIR}/providers/azure/*)
   list(APPEND onnxruntime_test_framework_libs onnxruntime_providers_azure)
   list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_azure)
   list(APPEND onnxruntime_test_providers_libs onnxruntime_providers_azure)
 endif()
 
 if(WIN32)
-  if (onnxruntime_USE_TVM)
+  if(onnxruntime_USE_TVM)
     list(APPEND disabled_warnings ${DISABLED_WARNINGS_FOR_TVM})
   endif()
 endif()
 
 file(GLOB onnxruntime_test_framework_src CONFIGURE_DEPENDS
   ${onnxruntime_test_framework_src_patterns}
-  )
+)
 
-#This is a small wrapper library that shouldn't use any onnxruntime internal symbols(except onnxruntime_common).
-#Because it could dynamically link to onnxruntime. Otherwise you will have two copies of onnxruntime in the same
-#process and you won't know which one you are testing.
+# This is a small wrapper library that shouldn't use any onnxruntime internal symbols(except onnxruntime_common).
+# Because it could dynamically link to onnxruntime. Otherwise you will have two copies of onnxruntime in the same
+# process and you won't know which one you are testing.
 onnxruntime_add_static_library(onnxruntime_test_utils ${onnxruntime_test_utils_src})
+
 if(MSVC)
   target_compile_options(onnxruntime_test_utils PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /utf-8>"
-          "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/utf-8>")
+    "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/utf-8>")
   target_compile_options(onnxruntime_test_utils PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler /wd6326>"
-                "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd6326>")
+    "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd6326>")
 else()
   target_compile_definitions(onnxruntime_test_utils PUBLIC -DNSYNC_ATOMIC_CPP11)
   target_include_directories(onnxruntime_test_utils PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${ONNXRUNTIME_ROOT})
   onnxruntime_add_include_to_target(onnxruntime_test_utils nsync::nsync_cpp)
 endif()
-if (onnxruntime_USE_NCCL)
+
+if(onnxruntime_USE_NCCL)
   target_include_directories(onnxruntime_test_utils PRIVATE ${NCCL_INCLUDE_DIRS})
 endif()
-if (onnxruntime_USE_ROCM)
+
+if(onnxruntime_USE_ROCM)
   target_include_directories(onnxruntime_test_utils PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/amdgpu/onnxruntime ${CMAKE_CURRENT_BINARY_DIR}/amdgpu/orttraining)
 endif()
-onnxruntime_add_include_to_target(onnxruntime_test_utils onnxruntime_common onnxruntime_framework onnxruntime_session GTest::gtest GTest::gmock onnx onnx_proto flatbuffers::flatbuffers nlohmann_json::nlohmann_json Boost::mp11 safeint_interface)
-
 
+onnxruntime_add_include_to_target(onnxruntime_test_utils onnxruntime_common onnxruntime_framework onnxruntime_session GTest::gtest GTest::gmock onnx onnx_proto flatbuffers::flatbuffers nlohmann_json::nlohmann_json Boost::mp11 safeint_interface)
 
-if (onnxruntime_USE_DML)
+if(onnxruntime_USE_DML)
   target_add_dml(onnxruntime_test_utils)
 endif()
+
 add_dependencies(onnxruntime_test_utils ${onnxruntime_EXTERNAL_DEPENDENCIES})
 target_include_directories(onnxruntime_test_utils PUBLIC "${TEST_SRC_DIR}/util/include" PRIVATE
-        ${eigen_INCLUDE_DIRS} ${ONNXRUNTIME_ROOT})
+  ${eigen_INCLUDE_DIRS} ${ONNXRUNTIME_ROOT})
 set_target_properties(onnxruntime_test_utils PROPERTIES FOLDER "ONNXRuntimeTest")
 source_group(TREE ${TEST_SRC_DIR} FILES ${onnxruntime_test_utils_src})
 
 set(onnx_test_runner_src_dir ${TEST_SRC_DIR}/onnx)
 file(GLOB onnx_test_runner_common_srcs CONFIGURE_DEPENDS
-    ${onnx_test_runner_src_dir}/*.h
-    ${onnx_test_runner_src_dir}/*.cc)
+  ${onnx_test_runner_src_dir}/*.h
+  ${onnx_test_runner_src_dir}/*.cc)
 
 list(REMOVE_ITEM onnx_test_runner_common_srcs ${onnx_test_runner_src_dir}/main.cc)
 
 onnxruntime_add_static_library(onnx_test_runner_common ${onnx_test_runner_common_srcs})
+
 if(MSVC)
   target_compile_options(onnx_test_runner_common PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /utf-8>"
-          "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/utf-8>")
+    "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/utf-8>")
 else()
   target_compile_definitions(onnx_test_runner_common PUBLIC -DNSYNC_ATOMIC_CPP11)
   target_include_directories(onnx_test_runner_common PRIVATE ${CMAKE_CURRENT_BINARY_DIR} ${ONNXRUNTIME_ROOT})
   onnxruntime_add_include_to_target(onnx_test_runner_common nsync::nsync_cpp)
 endif()
-if (MSVC AND NOT CMAKE_SIZEOF_VOID_P EQUAL 8)
-  #TODO: fix the warnings, they are dangerous
+
+if(MSVC AND NOT CMAKE_SIZEOF_VOID_P EQUAL 8)
+  # TODO: fix the warnings, they are dangerous
   target_compile_options(onnx_test_runner_common PRIVATE "/wd4244")
 endif()
+
 onnxruntime_add_include_to_target(onnx_test_runner_common onnxruntime_common onnxruntime_framework
-        onnxruntime_test_utils onnx onnx_proto re2::re2 flatbuffers::flatbuffers Boost::mp11 safeint_interface)
+  onnxruntime_test_utils onnx onnx_proto re2::re2 flatbuffers::flatbuffers Boost::mp11 safeint_interface)
 
 add_dependencies(onnx_test_runner_common onnx_test_data_proto ${onnxruntime_EXTERNAL_DEPENDENCIES})
 target_include_directories(onnx_test_runner_common PRIVATE ${eigen_INCLUDE_DIRS}
-        ${CMAKE_CURRENT_BINARY_DIR} ${ONNXRUNTIME_ROOT})
+  ${CMAKE_CURRENT_BINARY_DIR} ${ONNXRUNTIME_ROOT})
 
 set_target_properties(onnx_test_runner_common PROPERTIES FOLDER "ONNXRuntimeTest")
 
 set(all_tests ${onnxruntime_test_common_src} ${onnxruntime_test_ir_src} ${onnxruntime_test_optimizer_src}
-        ${onnxruntime_test_framework_src} ${onnxruntime_test_providers_src} ${onnxruntime_test_quantiztion_src})
+  ${onnxruntime_test_framework_src} ${onnxruntime_test_providers_src} ${onnxruntime_test_quantiztion_src})
+
 if(NOT TARGET onnxruntime AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
   list(APPEND all_tests ${onnxruntime_shared_lib_test_SRC})
 endif()
 
-if (onnxruntime_USE_CUDA)
+if(onnxruntime_USE_CUDA)
   onnxruntime_add_static_library(onnxruntime_test_cuda_ops_lib ${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/cuda_ops.cu)
   list(APPEND onnxruntime_test_common_libs onnxruntime_test_cuda_ops_lib)
   file(GLOB onnxruntime_test_providers_cuda_ut_src CONFIGURE_DEPENDS
     "${TEST_SRC_DIR}/providers/cuda/test_cases/*"
   )
+
   # onnxruntime_providers_cuda_ut is only for unittests.
   onnxruntime_add_shared_library_module(onnxruntime_providers_cuda_ut ${onnxruntime_test_providers_cuda_ut_src} $<TARGET_OBJECTS:onnxruntime_providers_cuda_obj>)
   config_cuda_provider_shared_module(onnxruntime_providers_cuda_ut)
@@ -767,21 +792,21 @@ if (onnxruntime_USE_CUDA)
   list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_cuda_ut)
 endif()
 
-set(all_dependencies ${onnxruntime_test_providers_dependencies} )
+set(all_dependencies ${onnxruntime_test_providers_dependencies})
 
-if (onnxruntime_ENABLE_TRAINING)
+if(onnxruntime_ENABLE_TRAINING)
   list(APPEND all_tests ${onnxruntime_test_training_src})
 endif()
 
-if (onnxruntime_ENABLE_TRAINING_APIS)
-    list(APPEND all_tests ${onnxruntime_test_training_api_src})
+if(onnxruntime_ENABLE_TRAINING_APIS)
+  list(APPEND all_tests ${onnxruntime_test_training_api_src})
 endif()
 
-if (onnxruntime_USE_TVM)
-    list(APPEND all_tests ${onnxruntime_test_tvm_src})
+if(onnxruntime_USE_TVM)
+  list(APPEND all_tests ${onnxruntime_test_tvm_src})
 endif()
 
-if (onnxruntime_USE_OPENVINO)
+if(onnxruntime_USE_OPENVINO)
   list(APPEND all_tests ${onnxruntime_test_openvino_src})
 endif()
 
@@ -792,8 +817,8 @@ if(WIN32)
   list(APPEND onnxruntime_test_providers_libs Advapi32)
 endif()
 
-if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
-  if (NOT onnxruntime_ENABLE_WEBASSEMBLY_THREADS)
+if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+  if(NOT onnxruntime_ENABLE_WEBASSEMBLY_THREADS)
     list(REMOVE_ITEM all_tests
       "${TEST_SRC_DIR}/framework/execution_frame_test.cc"
       "${TEST_SRC_DIR}/framework/inference_session_test.cc"
@@ -807,26 +832,28 @@ if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
 endif()
 
 set(test_all_args)
-if (onnxruntime_USE_TENSORRT)
+
+if(onnxruntime_USE_TENSORRT)
   # TRT EP CI takes much longer time when updating to TRT 8.2
   # So, we only run trt ep and exclude other eps to reduce CI test time.
   #
   # The test names of model tests were using sequential number in the past.
   # This PR https://github.com/microsoft/onnxruntime/pull/10220 (Please see ExpandModelName function in model_tests.cc for more details)
   # made test name contain the "ep" and "model path" information, so we can easily filter the tests using cuda ep or other ep with *cpu_* or *xxx_*.
-  list(APPEND test_all_args "--gtest_filter=-*cpu_*:*cuda_*" )
-endif ()
+  list(APPEND test_all_args "--gtest_filter=-*cpu_*:*cuda_*")
+endif()
 
 AddTest(
   TARGET onnxruntime_test_all
   SOURCES ${all_tests} ${onnxruntime_unittest_main_src}
   LIBS
-    onnx_test_runner_common ${onnxruntime_test_providers_libs} ${onnxruntime_test_common_libs}
-    onnx_test_data_proto
+  onnx_test_runner_common ${onnxruntime_test_providers_libs} ${onnxruntime_test_common_libs}
+  onnx_test_data_proto
   DEPENDS ${all_dependencies}
   TEST_ARGS ${test_all_args}
 )
-if (MSVC)
+
+if(MSVC)
   # The warning means the type of two integral values around a binary operator is narrow than their result.
   # If we promote the two input values first, it could be more tolerant to integer overflow.
   # However, this is test code. We are less concerned.
@@ -837,66 +864,75 @@ endif()
 
 # TODO fix shorten-64-to-32 warnings
 # there are some in builds where sizeof(size_t) != sizeof(int64_t), e.g., in 'ONNX Runtime Web CI Pipeline'
-if (HAS_SHORTEN_64_TO_32 AND NOT CMAKE_SIZEOF_VOID_P EQUAL 8)
+if(HAS_SHORTEN_64_TO_32 AND NOT CMAKE_SIZEOF_VOID_P EQUAL 8)
   target_compile_options(onnxruntime_test_all PRIVATE -Wno-error=shorten-64-to-32)
 endif()
 
-if (UNIX AND onnxruntime_USE_TENSORRT)
-    # The test_main.cc includes NvInfer.h where it has many deprecated declarations  
-    # simply ignore them for TensorRT EP build
-    set_property(TARGET onnxruntime_test_all APPEND_STRING PROPERTY COMPILE_FLAGS "-Wno-deprecated-declarations")
+if(UNIX AND onnxruntime_USE_TENSORRT)
+  # The test_main.cc includes NvInfer.h where it has many deprecated declarations
+  # simply ignore them for TensorRT EP build
+  set_property(TARGET onnxruntime_test_all APPEND_STRING PROPERTY COMPILE_FLAGS "-Wno-deprecated-declarations")
 endif()
 
-if (MSVC AND onnxruntime_ENABLE_STATIC_ANALYSIS)
-# attention_op_test.cc: Function uses '49152' bytes of stack:  exceeds /analyze:stacksize '16384'..
-target_compile_options(onnxruntime_test_all PRIVATE  "/analyze:stacksize 131072")
+if(MSVC AND onnxruntime_ENABLE_STATIC_ANALYSIS)
+  # attention_op_test.cc: Function uses '49152' bytes of stack:  exceeds /analyze:stacksize '16384'..
+  target_compile_options(onnxruntime_test_all PRIVATE "/analyze:stacksize 131072")
 endif()
 
 # the default logger tests conflict with the need to have an overall default logger
 # so skip in this type of
 target_compile_definitions(onnxruntime_test_all PUBLIC -DSKIP_DEFAULT_LOGGER_TESTS)
-if (CMAKE_SYSTEM_NAME STREQUAL "iOS")
+
+if(CMAKE_SYSTEM_NAME STREQUAL "iOS")
   target_compile_definitions(onnxruntime_test_all_xc PUBLIC -DSKIP_DEFAULT_LOGGER_TESTS)
 endif()
+
 if(onnxruntime_RUN_MODELTEST_IN_DEBUG_MODE)
   target_compile_definitions(onnxruntime_test_all PUBLIC -DRUN_MODELTEST_IN_DEBUG_MODE)
 endif()
-if (onnxruntime_DEBUG_NODE_INPUTS_OUTPUTS)
+
+if(onnxruntime_DEBUG_NODE_INPUTS_OUTPUTS)
   target_compile_definitions(onnxruntime_test_all PRIVATE DEBUG_NODE_INPUTS_OUTPUTS)
 endif()
 
-if (onnxruntime_ENABLE_LANGUAGE_INTEROP_OPS)
+if(onnxruntime_ENABLE_LANGUAGE_INTEROP_OPS)
   target_link_libraries(onnxruntime_test_all PRIVATE onnxruntime_language_interop onnxruntime_pyop)
 endif()
-if (onnxruntime_USE_ROCM)
-  if (onnxruntime_USE_COMPOSABLE_KERNEL)
+
+if(onnxruntime_USE_ROCM)
+  if(onnxruntime_USE_COMPOSABLE_KERNEL)
     target_compile_definitions(onnxruntime_test_all PRIVATE USE_COMPOSABLE_KERNEL)
   endif()
+
   target_compile_options(onnxruntime_test_all PRIVATE -D__HIP_PLATFORM_AMD__=1 -D__HIP_PLATFORM_HCC__=1)
-  target_include_directories(onnxruntime_test_all PRIVATE  ${onnxruntime_ROCM_HOME}/hipfft/include ${onnxruntime_ROCM_HOME}/include ${onnxruntime_ROCM_HOME}/hiprand/include ${onnxruntime_ROCM_HOME}/rocrand/include ${CMAKE_CURRENT_BINARY_DIR}/amdgpu/onnxruntime ${CMAKE_CURRENT_BINARY_DIR}/amdgpu/orttraining)
+  target_include_directories(onnxruntime_test_all PRIVATE ${onnxruntime_ROCM_HOME}/hipfft/include ${onnxruntime_ROCM_HOME}/include ${onnxruntime_ROCM_HOME}/hiprand/include ${onnxruntime_ROCM_HOME}/rocrand/include ${CMAKE_CURRENT_BINARY_DIR}/amdgpu/onnxruntime ${CMAKE_CURRENT_BINARY_DIR}/amdgpu/orttraining)
 endif()
-if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
+
+if(onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
   target_link_libraries(onnxruntime_test_all PRIVATE Python::Python)
 endif()
-if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+
+if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
   set_target_properties(onnxruntime_test_all PROPERTIES LINK_DEPENDS ${TEST_SRC_DIR}/wasm/onnxruntime_test_all_adapter.js)
   set_target_properties(onnxruntime_test_all PROPERTIES LINK_FLAGS "-s STACK_SIZE=5242880 -s ALLOW_MEMORY_GROWTH=1 --pre-js \"${TEST_SRC_DIR}/wasm/onnxruntime_test_all_adapter.js\" -s \"EXPORTED_RUNTIME_METHODS=['FS']\" --preload-file ${CMAKE_CURRENT_BINARY_DIR}/testdata@/testdata -s EXIT_RUNTIME=1 -s DEMANGLE_SUPPORT=1")
-  if (onnxruntime_ENABLE_WEBASSEMBLY_THREADS)
+
+  if(onnxruntime_ENABLE_WEBASSEMBLY_THREADS)
     set_property(TARGET onnxruntime_test_all APPEND_STRING PROPERTY LINK_FLAGS " -s DEFAULT_PTHREAD_STACK_SIZE=131072 -s PROXY_TO_PTHREAD=1")
   endif()
-  if (onnxruntime_USE_JSEP)
+
+  if(onnxruntime_USE_JSEP)
     set_property(TARGET onnxruntime_test_all APPEND_STRING PROPERTY LINK_FLAGS " --pre-js \"${ONNXRUNTIME_ROOT}/wasm/js_internal_api.js\"")
   endif()
 
-  ###
-  ### if you want to investigate or debug a test failure in onnxruntime_test_all, replace the following line.
-  ### those flags slow down the CI test significantly, so we don't use them by default.
-  ###
-  #   set_property(TARGET onnxruntime_test_all APPEND_STRING PROPERTY LINK_FLAGS " -s ASSERTIONS=2 -s SAFE_HEAP=1 -s STACK_OVERFLOW_CHECK=2")
+  # ##
+  # ## if you want to investigate or debug a test failure in onnxruntime_test_all, replace the following line.
+  # ## those flags slow down the CI test significantly, so we don't use them by default.
+  # ##
+  # set_property(TARGET onnxruntime_test_all APPEND_STRING PROPERTY LINK_FLAGS " -s ASSERTIONS=2 -s SAFE_HEAP=1 -s STACK_OVERFLOW_CHECK=2")
   set_property(TARGET onnxruntime_test_all APPEND_STRING PROPERTY LINK_FLAGS " -s ASSERTIONS=0 -s SAFE_HEAP=0 -s STACK_OVERFLOW_CHECK=1")
 endif()
 
-if (onnxruntime_ENABLE_ATEN)
+if(onnxruntime_ENABLE_ATEN)
   target_compile_definitions(onnxruntime_test_all PRIVATE ENABLE_ATEN)
 endif()
 
@@ -904,7 +940,8 @@ set(test_data_target onnxruntime_test_all)
 
 onnxruntime_add_static_library(onnx_test_data_proto ${TEST_SRC_DIR}/proto/tml.proto)
 add_dependencies(onnx_test_data_proto onnx_proto ${onnxruntime_EXTERNAL_DEPENDENCIES})
-#onnx_proto target should mark this definition as public, instead of private
+
+# onnx_proto target should mark this definition as public, instead of private
 target_compile_definitions(onnx_test_data_proto PRIVATE "-DONNX_API=")
 onnxruntime_add_include_to_target(onnx_test_data_proto onnx_proto)
 target_include_directories(onnx_test_data_proto PRIVATE ${CMAKE_CURRENT_BINARY_DIR})
@@ -934,74 +971,80 @@ add_custom_command(
   ${TEST_SAMPLES_SRC}
   ${TEST_SAMPLES_DES})
 
-if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
-  if (onnxruntime_USE_SNPE)
+if(NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
+  if(onnxruntime_USE_SNPE)
     add_custom_command(
       TARGET ${test_data_target} POST_BUILD
       COMMAND ${CMAKE_COMMAND} -E copy ${SNPE_SO_FILES} $<TARGET_FILE_DIR:${test_data_target}>
-      )
+    )
   endif()
 
-  if (onnxruntime_USE_QNN)
-    if (NOT QNN_ARCH_ABI)
+  if(onnxruntime_USE_QNN)
+    if(NOT QNN_ARCH_ABI)
       string(TOLOWER ${onnxruntime_target_platform} GEN_PLATFORM)
+
       if(MSVC)
-          message(STATUS "Building MSVC for architecture ${CMAKE_SYSTEM_PROCESSOR} with CMAKE_GENERATOR_PLATFORM as ${GEN_PLATFORM}")
-          if (${GEN_PLATFORM} STREQUAL "arm64")
-            set(QNN_ARCH_ABI aarch64-windows-msvc)
-          else()
-            set(QNN_ARCH_ABI x86_64-windows-msvc)
-          endif()
+        message(STATUS "Building MSVC for architecture ${CMAKE_SYSTEM_PROCESSOR} with CMAKE_GENERATOR_PLATFORM as ${GEN_PLATFORM}")
+
+        if(${GEN_PLATFORM} STREQUAL "arm64")
+          set(QNN_ARCH_ABI aarch64-windows-msvc)
+        else()
+          set(QNN_ARCH_ABI x86_64-windows-msvc)
+        endif()
       else()
-          if (${CMAKE_SYSTEM_NAME} STREQUAL "Android")
-            set(QNN_ARCH_ABI aarch64-android-clang6.0)
-          elseif (${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
-            if (${GEN_PLATFORM} STREQUAL "x86_64")
-              set(QNN_ARCH_ABI x86_64-linux-clang)
-            else()
-              set(QNN_ARCH_ABI aarch64-android)
-            endif()
+        if(${CMAKE_SYSTEM_NAME} STREQUAL "Android")
+          set(QNN_ARCH_ABI aarch64-android-clang6.0)
+        elseif(${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
+          if(${GEN_PLATFORM} STREQUAL "x86_64")
+            set(QNN_ARCH_ABI x86_64-linux-clang)
+          else()
+            set(QNN_ARCH_ABI aarch64-android)
           endif()
+        endif()
       endif()
     endif()
 
-    if (MSVC OR ${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
-        file(GLOB QNN_LIB_FILES LIST_DIRECTORIES false "${onnxruntime_QNN_HOME}/lib/${QNN_ARCH_ABI}/*.so" "${onnxruntime_QNN_HOME}/target/${QNN_ARCH_ABI}/lib/*.so" "${onnxruntime_QNN_HOME}/lib/${QNN_ARCH_ABI}/*.dll" "${onnxruntime_QNN_HOME}/target/${QNN_ARCH_ABI}/lib/*.dll")
-        if (${QNN_ARCH_ABI} STREQUAL "aarch64-windows-msvc")
-          file(GLOB EXTRA_HTP_LIB LIST_DIRECTORIES false "${onnxruntime_QNN_HOME}/lib/hexagon-v68/unsigned/libQnnHtpV68Skel.so" "${onnxruntime_QNN_HOME}/target/hexagon-v68/lib/unsigned/libQnnHtpV68Skel.so")
-          list(APPEND QNN_LIB_FILES ${EXTRA_HTP_LIB})
-        endif()
-        message(STATUS "QNN lib files: " ${QNN_LIB_FILES})
-        add_custom_command(
-          TARGET ${test_data_target} POST_BUILD
-          COMMAND ${CMAKE_COMMAND} -E copy ${QNN_LIB_FILES} $<TARGET_FILE_DIR:${test_data_target}>
-          )
+    if(MSVC OR ${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
+      file(GLOB QNN_LIB_FILES LIST_DIRECTORIES false "${onnxruntime_QNN_HOME}/lib/${QNN_ARCH_ABI}/*.so" "${onnxruntime_QNN_HOME}/target/${QNN_ARCH_ABI}/lib/*.so" "${onnxruntime_QNN_HOME}/lib/${QNN_ARCH_ABI}/*.dll" "${onnxruntime_QNN_HOME}/target/${QNN_ARCH_ABI}/lib/*.dll")
+
+      if(${QNN_ARCH_ABI} STREQUAL "aarch64-windows-msvc")
+        file(GLOB EXTRA_HTP_LIB LIST_DIRECTORIES false "${onnxruntime_QNN_HOME}/lib/hexagon-v68/unsigned/libQnnHtpV68Skel.so" "${onnxruntime_QNN_HOME}/target/hexagon-v68/lib/unsigned/libQnnHtpV68Skel.so")
+        list(APPEND QNN_LIB_FILES ${EXTRA_HTP_LIB})
+      endif()
+
+      message(STATUS "QNN lib files: " ${QNN_LIB_FILES})
+      add_custom_command(
+        TARGET ${test_data_target} POST_BUILD
+        COMMAND ${CMAKE_COMMAND} -E copy ${QNN_LIB_FILES} $<TARGET_FILE_DIR:${test_data_target}>
+      )
     endif()
   endif()
 
-  if (onnxruntime_USE_DNNL)
+  if(onnxruntime_USE_DNNL)
     if(onnxruntime_DNNL_GPU_RUNTIME STREQUAL "ocl" AND onnxruntime_DNNL_OPENCL_ROOT STREQUAL "")
       message(FATAL_ERROR "--dnnl_opencl_root required")
-    elseif(onnxruntime_DNNL_GPU_RUNTIME STREQUAL "" AND NOT (onnxruntime_DNNL_OPENCL_ROOT STREQUAL ""))
+    elseif(onnxruntime_DNNL_GPU_RUNTIME STREQUAL "" AND NOT(onnxruntime_DNNL_OPENCL_ROOT STREQUAL ""))
       message(FATAL_ERROR "--dnnl_gpu_runtime required")
-    elseif(onnxruntime_DNNL_GPU_RUNTIME STREQUAL "ocl" AND NOT (onnxruntime_DNNL_OPENCL_ROOT STREQUAL ""))
-      #file(TO_CMAKE_PATH ${onnxruntime_DNNL_OPENCL_ROOT} onnxruntime_DNNL_OPENCL_ROOT)
-      #set(DNNL_OCL_INCLUDE_DIR ${onnxruntime_DNNL_OPENCL_ROOT}/include)
-      #set(DNNL_GPU_CMAKE_ARGS "-DDNNL_GPU_RUNTIME=OCL " "-DOPENCLROOT=${onnxruntime_DNNL_OPENCL_ROOT}")
+    elseif(onnxruntime_DNNL_GPU_RUNTIME STREQUAL "ocl" AND NOT(onnxruntime_DNNL_OPENCL_ROOT STREQUAL ""))
+      # file(TO_CMAKE_PATH ${onnxruntime_DNNL_OPENCL_ROOT} onnxruntime_DNNL_OPENCL_ROOT)
+      # set(DNNL_OCL_INCLUDE_DIR ${onnxruntime_DNNL_OPENCL_ROOT}/include)
+      # set(DNNL_GPU_CMAKE_ARGS "-DDNNL_GPU_RUNTIME=OCL " "-DOPENCLROOT=${onnxruntime_DNNL_OPENCL_ROOT}")
       target_compile_definitions(onnxruntime_test_all PUBLIC -DDNNL_GPU_RUNTIME=OCL)
     endif()
+
     list(APPEND onnx_test_libs dnnl)
     add_custom_command(
       TARGET ${test_data_target} POST_BUILD
       COMMAND ${CMAKE_COMMAND} -E copy ${DNNL_DLL_PATH} $<TARGET_FILE_DIR:${test_data_target}>
-      )
+    )
   endif()
+
   if(WIN32)
-    if (onnxruntime_USE_TVM)
+    if(onnxruntime_USE_TVM)
       add_custom_command(
         TARGET ${test_data_target} POST_BUILD
         COMMAND ${CMAKE_COMMAND} -E copy $<TARGET_FILE:tvm> $<TARGET_FILE_DIR:${test_data_target}>
-        )
+      )
     endif()
   endif()
 
@@ -1015,29 +1058,31 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
   endif()
 endif()
 
-
 set(onnx_test_libs
   onnxruntime_test_utils
   ${ONNXRUNTIME_TEST_LIBS}
   onnx_test_data_proto
   ${onnxruntime_EXTERNAL_LIBRARIES})
 
-if (onnxruntime_ENABLE_LANGUAGE_INTEROP_OPS)
+if(onnxruntime_ENABLE_LANGUAGE_INTEROP_OPS)
   list(APPEND onnx_test_libs onnxruntime_language_interop onnxruntime_pyop)
 endif()
 
 onnxruntime_add_executable(onnx_test_runner ${onnx_test_runner_src_dir}/main.cc)
+
 if(MSVC)
   target_compile_options(onnx_test_runner PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /utf-8>"
-          "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/utf-8>")
+    "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/utf-8>")
 endif()
+
 if(${CMAKE_SYSTEM_NAME} STREQUAL "iOS")
   set_target_properties(onnx_test_runner PROPERTIES
     XCODE_ATTRIBUTE_CODE_SIGNING_ALLOWED "NO"
   )
 endif()
-if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
-  if (onnxruntime_ENABLE_WEBASSEMBLY_THREADS)
+
+if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+  if(onnxruntime_ENABLE_WEBASSEMBLY_THREADS)
     set_target_properties(onnx_test_runner PROPERTIES LINK_FLAGS "-s NODERAWFS=1 -s ALLOW_MEMORY_GROWTH=1 -s PROXY_TO_PTHREAD=1 -s EXIT_RUNTIME=1")
   else()
     set_target_properties(onnx_test_runner PROPERTIES LINK_FLAGS "-s NODERAWFS=1 -s ALLOW_MEMORY_GROWTH=1")
@@ -1046,27 +1091,30 @@ endif()
 
 target_link_libraries(onnx_test_runner PRIVATE onnx_test_runner_common ${GETOPT_LIB_WIDE} ${onnx_test_libs} nlohmann_json::nlohmann_json)
 target_include_directories(onnx_test_runner PRIVATE ${ONNXRUNTIME_ROOT})
-if (onnxruntime_USE_ROCM)
+
+if(onnxruntime_USE_ROCM)
   target_include_directories(onnx_test_runner PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/amdgpu/onnxruntime ${CMAKE_CURRENT_BINARY_DIR}/amdgpu/orttraining)
 endif()
-if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
+
+if(onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
   target_link_libraries(onnx_test_runner PRIVATE Python::Python)
 endif()
+
 set_target_properties(onnx_test_runner PROPERTIES FOLDER "ONNXRuntimeTest")
 
-if (onnxruntime_USE_TVM)
-  if (WIN32)
+if(onnxruntime_USE_TVM)
+  if(WIN32)
     target_link_options(onnx_test_runner PRIVATE "/STACK:4000000")
   endif()
 endif()
 
 install(TARGETS onnx_test_runner
-        ARCHIVE  DESTINATION ${CMAKE_INSTALL_LIBDIR}
-        LIBRARY  DESTINATION ${CMAKE_INSTALL_LIBDIR}
-        BUNDLE   DESTINATION ${CMAKE_INSTALL_LIBDIR}
-        RUNTIME  DESTINATION ${CMAKE_INSTALL_BINDIR})
+  ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+  LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
+  BUNDLE DESTINATION ${CMAKE_INSTALL_LIBDIR}
+  RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
 
-if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
+if(NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
   if(onnxruntime_BUILD_BENCHMARKS)
     SET(BENCHMARK_DIR ${TEST_SRC_DIR}/onnx/microbenchmark)
     onnxruntime_add_executable(onnxruntime_benchmark
@@ -1085,23 +1133,26 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
       ${BENCHMARK_DIR}/reduceminmax.cc)
     target_include_directories(onnxruntime_benchmark PRIVATE ${ONNXRUNTIME_ROOT} ${onnxruntime_graph_header} ${ONNXRUNTIME_ROOT}/core/mlas/inc)
     target_compile_definitions(onnxruntime_benchmark PRIVATE BENCHMARK_STATIC_DEFINE)
+
     if(WIN32)
       target_compile_options(onnxruntime_benchmark PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler /wd4141>"
-                        "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd4141>")
+        "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd4141>")
+
       # Avoid using new and delete. But this is a benchmark program, it's ok if it has a chance to leak.
       target_compile_options(onnxruntime_benchmark PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler /wd26409>"
-                        "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd26409>")
+        "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd26409>")
       target_compile_options(onnxruntime_benchmark PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler /wd26400>"
-                        "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd26400>")
+        "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd26400>")
       target_compile_options(onnxruntime_benchmark PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler /wd26814>"
-                        "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd26814>")
+        "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd26814>")
       target_compile_options(onnxruntime_benchmark PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler /wd26814>"
-                        "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd26497>")
+        "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd26497>")
       target_compile_options(onnxruntime_benchmark PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler /wd26426>"
-                        "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd26426>")
+        "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd26426>")
       target_compile_options(onnxruntime_benchmark PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /utf-8>"
-              "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/utf-8>")
+        "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/utf-8>")
     endif()
+
     target_link_libraries(onnxruntime_benchmark PRIVATE onnx_test_runner_common benchmark::benchmark ${onnx_test_libs})
     add_dependencies(onnxruntime_benchmark ${onnxruntime_EXTERNAL_DEPENDENCIES})
     set_target_properties(onnxruntime_benchmark PROPERTIES FOLDER "ONNXRuntimeTest")
@@ -1112,18 +1163,23 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
     target_include_directories(onnxruntime_mlas_benchmark PRIVATE ${ONNXRUNTIME_ROOT}/core/mlas/inc)
     target_link_libraries(onnxruntime_mlas_benchmark PRIVATE benchmark::benchmark onnxruntime_util onnxruntime_framework ${ONNXRUNTIME_MLAS_LIBS} onnxruntime_common ${CMAKE_DL_LIBS})
     target_compile_definitions(onnxruntime_mlas_benchmark PRIVATE BENCHMARK_STATIC_DEFINE)
+
     if(WIN32)
       target_link_libraries(onnxruntime_mlas_benchmark PRIVATE debug Dbghelp)
+
       # Avoid using new and delete. But this is a benchmark program, it's ok if it has a chance to leak.
       target_compile_options(onnxruntime_mlas_benchmark PRIVATE /wd26409)
+
       # "Global initializer calls a non-constexpr function." BENCHMARK_CAPTURE macro needs this.
       target_compile_options(onnxruntime_mlas_benchmark PRIVATE /wd26426)
     else()
       target_link_libraries(onnxruntime_mlas_benchmark PRIVATE nsync::nsync_cpp ${CMAKE_DL_LIBS})
     endif()
-    if (CPUINFO_SUPPORTED AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+
+    if(CPUINFO_SUPPORTED AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
       target_link_libraries(onnxruntime_mlas_benchmark PRIVATE cpuinfo)
     endif()
+
     set_target_properties(onnxruntime_mlas_benchmark PROPERTIES FOLDER "ONNXRuntimeTest")
   endif()
 
@@ -1131,83 +1187,94 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
     target_compile_options(onnx_test_runner_common PRIVATE -D_CRT_SECURE_NO_WARNINGS)
   endif()
 
-  if (NOT onnxruntime_REDUCED_OPS_BUILD AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+  if(NOT onnxruntime_REDUCED_OPS_BUILD AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
     add_test(NAME onnx_test_pytorch_converted
       COMMAND onnx_test_runner ${onnx_SOURCE_DIR}/onnx/backend/test/data/pytorch-converted)
     add_test(NAME onnx_test_pytorch_operator
       COMMAND onnx_test_runner ${onnx_SOURCE_DIR}/onnx/backend/test/data/pytorch-operator)
   endif()
 
-  if (CMAKE_SYSTEM_NAME STREQUAL "Android")
-      list(APPEND android_shared_libs log android)
+  if(CMAKE_SYSTEM_NAME STREQUAL "Android")
+    list(APPEND android_shared_libs log android)
   endif()
 endif()
 
-
-if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
-  #perf test runner
+if(NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
+  # perf test runner
   set(onnxruntime_perf_test_src_dir ${TEST_SRC_DIR}/perftest)
   set(onnxruntime_perf_test_src_patterns
-  "${onnxruntime_perf_test_src_dir}/*.cc"
-  "${onnxruntime_perf_test_src_dir}/*.h")
+    "${onnxruntime_perf_test_src_dir}/*.cc"
+    "${onnxruntime_perf_test_src_dir}/*.h")
 
   if(WIN32)
     list(APPEND onnxruntime_perf_test_src_patterns
       "${onnxruntime_perf_test_src_dir}/windows/*.cc"
-      "${onnxruntime_perf_test_src_dir}/windows/*.h" )
-  else ()
+      "${onnxruntime_perf_test_src_dir}/windows/*.h")
+  else()
     list(APPEND onnxruntime_perf_test_src_patterns
       "${onnxruntime_perf_test_src_dir}/posix/*.cc"
-      "${onnxruntime_perf_test_src_dir}/posix/*.h" )
+      "${onnxruntime_perf_test_src_dir}/posix/*.h")
   endif()
 
   file(GLOB onnxruntime_perf_test_src CONFIGURE_DEPENDS
     ${onnxruntime_perf_test_src_patterns}
-    )
+  )
   onnxruntime_add_executable(onnxruntime_perf_test ${onnxruntime_perf_test_src} ${ONNXRUNTIME_ROOT}/core/platform/path_lib.cc)
+
   if(MSVC)
     target_compile_options(onnxruntime_perf_test PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /utf-8>"
-            "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/utf-8>")
+      "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/utf-8>")
   endif()
+
   target_include_directories(onnxruntime_perf_test PRIVATE ${onnx_test_runner_src_dir} ${ONNXRUNTIME_ROOT}
-          ${eigen_INCLUDE_DIRS} ${onnxruntime_graph_header} ${onnxruntime_exec_src_dir}
-          ${CMAKE_CURRENT_BINARY_DIR})
-  if (onnxruntime_USE_ROCM)
+    ${eigen_INCLUDE_DIRS} ${onnxruntime_graph_header} ${onnxruntime_exec_src_dir}
+    ${CMAKE_CURRENT_BINARY_DIR})
+
+  if(onnxruntime_USE_ROCM)
     target_include_directories(onnxruntime_perf_test PRIVATE ${CMAKE_CURRENT_BINARY_DIR}/amdgpu/onnxruntime ${CMAKE_CURRENT_BINARY_DIR}/amdgpu/orttraining)
   endif()
-  if (WIN32)
+
+  if(WIN32)
     target_compile_options(onnxruntime_perf_test PRIVATE ${disabled_warnings})
-    if (NOT DEFINED SYS_PATH_LIB)
+
+    if(NOT DEFINED SYS_PATH_LIB)
       set(SYS_PATH_LIB shlwapi)
     endif()
   endif()
+
   if(${CMAKE_SYSTEM_NAME} STREQUAL "iOS")
     set_target_properties(onnxruntime_perf_test PROPERTIES
       XCODE_ATTRIBUTE_CODE_SIGNING_ALLOWED "NO"
     )
   endif()
 
-  if (onnxruntime_BUILD_SHARED_LIB)
-    #It will dynamically link to onnxruntime. So please don't add onxruntime_graph/onxruntime_framework/... here.
-    #onnxruntime_common is kind of ok because it is thin, tiny and totally stateless.
+  if(onnxruntime_BUILD_SHARED_LIB)
+    # It will dynamically link to onnxruntime. So please don't add onxruntime_graph/onxruntime_framework/... here.
+    # onnxruntime_common is kind of ok because it is thin, tiny and totally stateless.
     set(onnxruntime_perf_test_libs
-            onnx_test_runner_common onnxruntime_test_utils onnxruntime_common
-            onnxruntime onnxruntime_flatbuffers onnx_test_data_proto
-            ${onnxruntime_EXTERNAL_LIBRARIES}
-            ${GETOPT_LIB_WIDE} ${SYS_PATH_LIB} ${CMAKE_DL_LIBS})
+      onnx_test_runner_common onnxruntime_test_utils onnxruntime_common
+      onnxruntime onnxruntime_flatbuffers onnx_test_data_proto
+      ${onnxruntime_EXTERNAL_LIBRARIES}
+      ${GETOPT_LIB_WIDE} ${SYS_PATH_LIB} ${CMAKE_DL_LIBS})
+
     if(NOT WIN32)
       list(APPEND onnxruntime_perf_test_libs nsync::nsync_cpp)
+
       if(onnxruntime_USE_SNPE)
         list(APPEND onnxruntime_perf_test_libs onnxruntime_providers_snpe)
       endif()
     endif()
-    if (CMAKE_SYSTEM_NAME STREQUAL "Android")
+
+    if(CMAKE_SYSTEM_NAME STREQUAL "Android")
       list(APPEND onnxruntime_perf_test_libs ${android_shared_libs})
     endif()
+
     target_link_libraries(onnxruntime_perf_test PRIVATE ${onnxruntime_perf_test_libs} Threads::Threads)
+
     if(WIN32)
       target_link_libraries(onnxruntime_perf_test PRIVATE debug dbghelp advapi32)
     endif()
+
     if(tensorflow_C_PACKAGE_PATH)
       target_include_directories(onnxruntime_perf_test PRIVATE ${tensorflow_C_PACKAGE_PATH}/include)
       target_link_directories(onnxruntime_perf_test PRIVATE ${tensorflow_C_PACKAGE_PATH}/lib)
@@ -1217,55 +1284,62 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
   else()
     target_link_libraries(onnxruntime_perf_test PRIVATE onnx_test_runner_common ${GETOPT_LIB_WIDE} ${onnx_test_libs})
   endif()
+
   set_target_properties(onnxruntime_perf_test PROPERTIES FOLDER "ONNXRuntimeTest")
 
-  if (onnxruntime_ENABLE_LANGUAGE_INTEROP_OPS AND NOT onnxruntime_BUILD_SHARED_LIB)
+  if(onnxruntime_ENABLE_LANGUAGE_INTEROP_OPS AND NOT onnxruntime_BUILD_SHARED_LIB)
     target_link_libraries(onnxruntime_perf_test PRIVATE onnxruntime_language_interop onnxruntime_pyop)
   endif()
 
-  if (onnxruntime_USE_TVM)
-    if (WIN32)
+  if(onnxruntime_USE_TVM)
+    if(WIN32)
       target_link_options(onnxruntime_perf_test PRIVATE "/STACK:4000000")
     endif()
   endif()
 
   # shared lib
-  if (onnxruntime_BUILD_SHARED_LIB)
+  if(onnxruntime_BUILD_SHARED_LIB)
     onnxruntime_add_static_library(onnxruntime_mocked_allocator ${TEST_SRC_DIR}/util/test_allocator.cc)
     target_include_directories(onnxruntime_mocked_allocator PUBLIC ${TEST_SRC_DIR}/util/include)
     target_link_libraries(onnxruntime_mocked_allocator PRIVATE ${GSL_TARGET})
     set_target_properties(onnxruntime_mocked_allocator PROPERTIES FOLDER "ONNXRuntimeTest")
 
-    #################################################################
+    # ################################################################
     # test inference using shared lib
     set(onnxruntime_shared_lib_test_LIBS onnxruntime_mocked_allocator onnxruntime_test_utils onnxruntime_common onnx_proto)
+
     if(NOT WIN32)
       list(APPEND onnxruntime_shared_lib_test_LIBS nsync::nsync_cpp)
+
       if(onnxruntime_USE_SNPE)
         list(APPEND onnxruntime_shared_lib_test_LIBS onnxruntime_providers_snpe)
       endif()
     endif()
-    if (CPUINFO_SUPPORTED AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+
+    if(CPUINFO_SUPPORTED AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
       list(APPEND onnxruntime_shared_lib_test_LIBS cpuinfo)
     endif()
-    if (onnxruntime_USE_CUDA)
+
+    if(onnxruntime_USE_CUDA)
       list(APPEND onnxruntime_shared_lib_test_LIBS onnxruntime_test_cuda_ops_lib cudart)
     endif()
-    if (onnxruntime_USE_TENSORRT)
+
+    if(onnxruntime_USE_TENSORRT)
       list(APPEND onnxruntime_shared_lib_test_LIBS ${TENSORRT_LIBRARY_INFER})
     endif()
-    if (CMAKE_SYSTEM_NAME STREQUAL "Android")
+
+    if(CMAKE_SYSTEM_NAME STREQUAL "Android")
       list(APPEND onnxruntime_shared_lib_test_LIBS ${android_shared_libs})
     endif()
 
     AddTest(DYN
-            TARGET onnxruntime_shared_lib_test
-            SOURCES ${onnxruntime_shared_lib_test_SRC} ${onnxruntime_unittest_main_src}
-            LIBS ${onnxruntime_shared_lib_test_LIBS}
-            DEPENDS ${all_dependencies}
+      TARGET onnxruntime_shared_lib_test
+      SOURCES ${onnxruntime_shared_lib_test_SRC} ${onnxruntime_unittest_main_src}
+      LIBS ${onnxruntime_shared_lib_test_LIBS}
+      DEPENDS ${all_dependencies}
     )
 
-    if (CMAKE_SYSTEM_NAME STREQUAL "Android")
+    if(CMAKE_SYSTEM_NAME STREQUAL "Android")
       target_sources(onnxruntime_shared_lib_test PRIVATE
         "${ONNXRUNTIME_ROOT}/core/platform/android/cxa_demangle.cc"
         "${TEST_SRC_DIR}/platform/android/cxa_demangle_test.cc"
@@ -1273,7 +1347,7 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
       target_compile_definitions(onnxruntime_shared_lib_test PRIVATE USE_DUMMY_EXA_DEMANGLE=1)
     endif()
 
-    if (CMAKE_SYSTEM_NAME STREQUAL "iOS")
+    if(CMAKE_SYSTEM_NAME STREQUAL "iOS")
       add_custom_command(
         TARGET onnxruntime_shared_lib_test POST_BUILD
         COMMAND ${CMAKE_COMMAND} -E copy_directory
@@ -1281,19 +1355,19 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
         $<TARGET_FILE_DIR:onnxruntime_shared_lib_test>/testdata)
     endif()
 
-    if (UNIX AND onnxruntime_USE_TENSORRT)
-        # The test_main.cc includes NvInfer.h where it has many deprecated declarations  
-        # simply ignore them for TensorRT EP build
-        set_property(TARGET onnxruntime_shared_lib_test APPEND_STRING PROPERTY COMPILE_FLAGS "-Wno-deprecated-declarations")
+    if(UNIX AND onnxruntime_USE_TENSORRT)
+      # The test_main.cc includes NvInfer.h where it has many deprecated declarations
+      # simply ignore them for TensorRT EP build
+      set_property(TARGET onnxruntime_shared_lib_test APPEND_STRING PROPERTY COMPILE_FLAGS "-Wno-deprecated-declarations")
     endif()
 
     # test inference using global threadpools
-    if (NOT CMAKE_SYSTEM_NAME MATCHES "Android|iOS" AND NOT onnxruntime_MINIMAL_BUILD)
+    if(NOT CMAKE_SYSTEM_NAME MATCHES "Android|iOS" AND NOT onnxruntime_MINIMAL_BUILD)
       AddTest(DYN
-              TARGET onnxruntime_global_thread_pools_test
-              SOURCES ${onnxruntime_global_thread_pools_test_SRC}
-              LIBS ${onnxruntime_shared_lib_test_LIBS}
-              DEPENDS ${all_dependencies}
+        TARGET onnxruntime_global_thread_pools_test
+        SOURCES ${onnxruntime_global_thread_pools_test_SRC}
+        LIBS ${onnxruntime_shared_lib_test_LIBS}
+        DEPENDS ${all_dependencies}
       )
     endif()
   endif()
@@ -1304,23 +1378,23 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
     AddTest(
       TARGET onnxruntime_test_debug_node_inputs_outputs
       SOURCES
-        "${TEST_SRC_DIR}/debug_node_inputs_outputs/debug_node_inputs_outputs_utils_test.cc"
-        "${TEST_SRC_DIR}/framework/TestAllocatorManager.cc"
-        "${TEST_SRC_DIR}/framework/test_utils.cc"
-        "${TEST_SRC_DIR}/providers/base_tester.h"
-        "${TEST_SRC_DIR}/providers/base_tester.cc"
-        "${TEST_SRC_DIR}/providers/checkers.h"
-        "${TEST_SRC_DIR}/providers/checkers.cc"
-        "${TEST_SRC_DIR}/providers/op_tester.h"
-        "${TEST_SRC_DIR}/providers/op_tester.cc"
-        "${TEST_SRC_DIR}/providers/provider_test_utils.h"
-        "${TEST_SRC_DIR}/providers/tester_types.h"
-        ${onnxruntime_unittest_main_src}
+      "${TEST_SRC_DIR}/debug_node_inputs_outputs/debug_node_inputs_outputs_utils_test.cc"
+      "${TEST_SRC_DIR}/framework/TestAllocatorManager.cc"
+      "${TEST_SRC_DIR}/framework/test_utils.cc"
+      "${TEST_SRC_DIR}/providers/base_tester.h"
+      "${TEST_SRC_DIR}/providers/base_tester.cc"
+      "${TEST_SRC_DIR}/providers/checkers.h"
+      "${TEST_SRC_DIR}/providers/checkers.cc"
+      "${TEST_SRC_DIR}/providers/op_tester.h"
+      "${TEST_SRC_DIR}/providers/op_tester.cc"
+      "${TEST_SRC_DIR}/providers/provider_test_utils.h"
+      "${TEST_SRC_DIR}/providers/tester_types.h"
+      ${onnxruntime_unittest_main_src}
       LIBS ${onnxruntime_test_providers_libs} ${onnxruntime_test_common_libs}
       DEPENDS ${all_dependencies}
     )
 
-    if (onnxruntime_USE_ROCM)
+    if(onnxruntime_USE_ROCM)
       target_include_directories(onnxruntime_test_debug_node_inputs_outputs PRIVATE ${onnxruntime_ROCM_HOME}/hipfft/include ${onnxruntime_ROCM_HOME}/include ${onnxruntime_ROCM_HOME}/hipcub/include ${onnxruntime_ROCM_HOME}/hiprand/include ${onnxruntime_ROCM_HOME}/rocrand/include)
       target_include_directories(onnxruntime_test_debug_node_inputs_outputs PRIVATE ${ONNXRUNTIME_ROOT} ${CMAKE_CURRENT_BINARY_DIR}/amdgpu/onnxruntime)
     endif(onnxruntime_USE_ROCM)
@@ -1329,17 +1403,17 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
       PRIVATE DEBUG_NODE_INPUTS_OUTPUTS)
   endif(onnxruntime_DEBUG_NODE_INPUTS_OUTPUTS)
 
-  #some ETW tools
+  # some ETW tools
   if(WIN32 AND onnxruntime_ENABLE_INSTRUMENT)
     onnxruntime_add_executable(generate_perf_report_from_etl ${ONNXRUNTIME_ROOT}/tool/etw/main.cc
-            ${ONNXRUNTIME_ROOT}/tool/etw/eparser.h ${ONNXRUNTIME_ROOT}/tool/etw/eparser.cc
-            ${ONNXRUNTIME_ROOT}/tool/etw/TraceSession.h ${ONNXRUNTIME_ROOT}/tool/etw/TraceSession.cc)
+      ${ONNXRUNTIME_ROOT}/tool/etw/eparser.h ${ONNXRUNTIME_ROOT}/tool/etw/eparser.cc
+      ${ONNXRUNTIME_ROOT}/tool/etw/TraceSession.h ${ONNXRUNTIME_ROOT}/tool/etw/TraceSession.cc)
     target_compile_definitions(generate_perf_report_from_etl PRIVATE "_CONSOLE" "_UNICODE" "UNICODE")
     target_link_libraries(generate_perf_report_from_etl PRIVATE tdh Advapi32)
 
     onnxruntime_add_executable(compare_two_sessions ${ONNXRUNTIME_ROOT}/tool/etw/compare_two_sessions.cc
-            ${ONNXRUNTIME_ROOT}/tool/etw/eparser.h ${ONNXRUNTIME_ROOT}/tool/etw/eparser.cc
-            ${ONNXRUNTIME_ROOT}/tool/etw/TraceSession.h ${ONNXRUNTIME_ROOT}/tool/etw/TraceSession.cc)
+      ${ONNXRUNTIME_ROOT}/tool/etw/eparser.h ${ONNXRUNTIME_ROOT}/tool/etw/eparser.cc
+      ${ONNXRUNTIME_ROOT}/tool/etw/TraceSession.h ${ONNXRUNTIME_ROOT}/tool/etw/TraceSession.cc)
     target_compile_definitions(compare_two_sessions PRIVATE "_CONSOLE" "_UNICODE" "UNICODE")
     target_link_libraries(compare_two_sessions PRIVATE ${GETOPT_LIB_WIDE} tdh Advapi32)
   endif()
@@ -1349,45 +1423,54 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
     "${TEST_SRC_DIR}/mlas/unittest/*.cpp"
   )
   onnxruntime_add_executable(onnxruntime_mlas_test ${onnxruntime_mlas_test_src})
+
   if(MSVC)
     target_compile_options(onnxruntime_mlas_test PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler /wd26409>"
-                "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd26409>")
+      "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd26409>")
     target_compile_options(onnxruntime_mlas_test PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:--compiler-options /utf-8>"
-            "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/utf-8>")
+      "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/utf-8>")
     target_compile_options(onnxruntime_mlas_test PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler /wd6326>"
-                "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd6326>")
+      "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd6326>")
     target_compile_options(onnxruntime_mlas_test PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler /wd26426>"
-                "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd26426>")
+      "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd26426>")
   endif()
+
   if(${CMAKE_SYSTEM_NAME} STREQUAL "iOS")
     set_target_properties(onnxruntime_mlas_test PROPERTIES
       XCODE_ATTRIBUTE_CODE_SIGNING_ALLOWED "NO"
     )
   endif()
+
   target_include_directories(onnxruntime_mlas_test PRIVATE ${ONNXRUNTIME_ROOT}/core/mlas/inc ${ONNXRUNTIME_ROOT}
-          ${CMAKE_CURRENT_BINARY_DIR})
+    ${CMAKE_CURRENT_BINARY_DIR})
   target_link_libraries(onnxruntime_mlas_test PRIVATE GTest::gtest GTest::gmock ${ONNXRUNTIME_MLAS_LIBS} onnxruntime_common)
-  if (CPUINFO_SUPPORTED AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+
+  if(CPUINFO_SUPPORTED AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
     target_link_libraries(onnxruntime_mlas_test PRIVATE cpuinfo)
   endif()
+
   if(NOT WIN32)
     target_link_libraries(onnxruntime_mlas_test PRIVATE nsync::nsync_cpp ${CMAKE_DL_LIBS})
   endif()
-  if (CMAKE_SYSTEM_NAME STREQUAL "Android")
+
+  if(CMAKE_SYSTEM_NAME STREQUAL "Android")
     target_link_libraries(onnxruntime_mlas_test PRIVATE ${android_shared_libs})
   endif()
 
   if(WIN32)
     target_link_libraries(onnxruntime_mlas_test PRIVATE debug Dbghelp Advapi32)
   endif()
-  if (onnxruntime_LINK_LIBATOMIC)
+
+  if(onnxruntime_LINK_LIBATOMIC)
     target_link_libraries(onnxruntime_mlas_test PRIVATE atomic)
   endif()
+
   target_link_libraries(onnxruntime_mlas_test PRIVATE Threads::Threads)
 
   set_target_properties(onnxruntime_mlas_test PROPERTIES FOLDER "ONNXRuntimeTest")
-  if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
-    if (onnxruntime_ENABLE_WEBASSEMBLY_THREADS)
+
+  if(CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+    if(onnxruntime_ENABLE_WEBASSEMBLY_THREADS)
       set_target_properties(onnxruntime_mlas_test PROPERTIES LINK_FLAGS "-s ALLOW_MEMORY_GROWTH=1 -s PROXY_TO_PTHREAD=1 -s EXIT_RUNTIME=1")
     else()
       set_target_properties(onnxruntime_mlas_test PROPERTIES LINK_FLAGS "-s ALLOW_MEMORY_GROWTH=1")
@@ -1399,8 +1482,8 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
   # TODO(askhade): Fix the warnings.
   # This has no impact on the release as the release package and the pipeline, both do not use this.
   # This is used by devs for testing training apis.
-  #if (onnxruntime_ENABLE_TRAINING_APIS)
-  if (0)
+  # if (onnxruntime_ENABLE_TRAINING_APIS)
+  if(0)
     # Only files in the trainer and common folder will be compiled into test trainer.
     file(GLOB training_api_test_trainer_src
       "${ORTTRAINING_SOURCE_DIR}/test/training_api/common/*.cc"
@@ -1428,6 +1511,7 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
     set(ONNXRUNTIME_TEST_LIBS
       onnxruntime_session
       ${onnxruntime_libs}
+
       # CUDA is dynamically loaded at runtime
       onnxruntime_optimizer
       onnxruntime_providers
@@ -1440,7 +1524,7 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
       onnxruntime_flatbuffers
     )
 
-    if (onnxruntime_ENABLE_LANGUAGE_INTEROP_OPS)
+    if(onnxruntime_ENABLE_LANGUAGE_INTEROP_OPS)
       list(APPEND ONNXRUNTIME_TEST_LIBS onnxruntime_language_interop onnxruntime_pyop)
     endif()
 
@@ -1452,8 +1536,7 @@ if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
   endif()
 endif()
 
-if (NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
-
+if(NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
   set(custom_op_src_patterns
     "${TEST_SRC_DIR}/testdata/custom_op_library/*.h"
     "${TEST_SRC_DIR}/testdata/custom_op_library/*.cc"
@@ -1464,20 +1547,21 @@ if (NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
   set(custom_op_lib_option)
   set(custom_op_lib_link ${GSL_TARGET})
 
-  if (onnxruntime_USE_CUDA)
+  if(onnxruntime_USE_CUDA)
     list(APPEND custom_op_src_patterns
-        "${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/cuda_ops.cu"
-        "${TEST_SRC_DIR}/testdata/custom_op_library/cuda/cuda_ops.*")
+      "${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/cuda_ops.cu"
+      "${TEST_SRC_DIR}/testdata/custom_op_library/cuda/cuda_ops.*")
     list(APPEND custom_op_lib_include ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES} ${onnxruntime_CUDNN_HOME}/include)
-    if (HAS_QSPECTRE)
+
+    if(HAS_QSPECTRE)
       list(APPEND custom_op_lib_option "$<$<COMPILE_LANGUAGE:CUDA>:SHELL:-Xcompiler /Qspectre>")
     endif()
   endif()
 
-  if (onnxruntime_USE_ROCM)
+  if(onnxruntime_USE_ROCM)
     list(APPEND custom_op_src_patterns
-        "${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/rocm_ops.hip"
-        "${TEST_SRC_DIR}/testdata/custom_op_library/rocm/rocm_ops.*")
+      "${ONNXRUNTIME_SHARED_LIB_TEST_SRC_DIR}/rocm_ops.hip"
+      "${TEST_SRC_DIR}/testdata/custom_op_library/rocm/rocm_ops.*")
     list(APPEND custom_op_lib_include ${onnxruntime_ROCM_HOME}/include)
     list(APPEND custom_op_lib_option "-D__HIP_PLATFORM_AMD__=1 -D__HIP_PLATFORM_HCC__=1")
   endif()
@@ -1489,80 +1573,90 @@ if (NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
   target_link_libraries(custom_op_library PRIVATE ${GSL_TARGET} ${custom_op_lib_link})
 
   if(UNIX)
-    if (APPLE)
+    if(APPLE)
       set(ONNXRUNTIME_CUSTOM_OP_LIB_LINK_FLAG "-Xlinker -dead_strip")
     else()
       set(ONNXRUNTIME_CUSTOM_OP_LIB_LINK_FLAG "-Xlinker --version-script=${TEST_SRC_DIR}/testdata/custom_op_library/custom_op_library.lds -Xlinker --no-undefined -Xlinker --gc-sections -z noexecstack")
     endif()
   else()
     set(ONNXRUNTIME_CUSTOM_OP_LIB_LINK_FLAG "-DEF:${TEST_SRC_DIR}/testdata/custom_op_library/custom_op_library.def")
-    if (NOT onnxruntime_USE_CUDA)
+
+    if(NOT onnxruntime_USE_CUDA)
       target_compile_options(custom_op_library PRIVATE "$<$<COMPILE_LANGUAGE:CUDA>:-Xcompiler /wd26409>"
-                    "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd26409>")
+        "$<$<NOT:$<COMPILE_LANGUAGE:CUDA>>:/wd26409>")
     endif()
   endif()
+
   set_property(TARGET custom_op_library APPEND_STRING PROPERTY LINK_FLAGS ${ONNXRUNTIME_CUSTOM_OP_LIB_LINK_FLAG})
 
-  if (NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
-    if (onnxruntime_BUILD_JAVA AND NOT onnxruntime_ENABLE_STATIC_ANALYSIS)
-        message(STATUS "Running Java tests")
-        # native-test is added to resources so custom_op_lib can be loaded
-        # and we want to symlink it there
-        set(JAVA_NATIVE_TEST_DIR ${JAVA_OUTPUT_DIR}/native-test)
-        file(MAKE_DIRECTORY ${JAVA_NATIVE_TEST_DIR})
-
-        # delegate to gradle's test runner
-        if(WIN32)
-          add_custom_command(TARGET custom_op_library POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different $<TARGET_FILE:custom_op_library>
-                          ${JAVA_NATIVE_TEST_DIR}/$<TARGET_FILE_NAME:custom_op_library>)
-          # On windows ctest requires a test to be an .exe(.com) file
-          # With gradle wrapper we get gradlew.bat. We delegate execution to a separate .cmake file
-          # That can handle both .exe and .bat
-          add_test(NAME onnxruntime4j_test COMMAND ${CMAKE_COMMAND}
-            -DGRADLE_EXECUTABLE=${GRADLE_EXECUTABLE}
-            -DBIN_DIR=${CMAKE_CURRENT_BINARY_DIR}
-            -DREPO_ROOT=${REPO_ROOT}
-            ${ORT_PROVIDER_FLAGS}
-            -P ${CMAKE_CURRENT_SOURCE_DIR}/onnxruntime_java_unittests.cmake)
+  if(NOT onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
+    if(onnxruntime_BUILD_JAVA AND NOT onnxruntime_ENABLE_STATIC_ANALYSIS)
+      message(STATUS "Running Java tests")
+
+      # native-test is added to resources so custom_op_lib can be loaded
+      # and we want to symlink it there
+      set(JAVA_NATIVE_TEST_DIR ${JAVA_OUTPUT_DIR}/native-test)
+      file(MAKE_DIRECTORY ${JAVA_NATIVE_TEST_DIR})
+
+      # delegate to gradle's test runner
+      if(WIN32)
+        add_custom_command(TARGET custom_op_library POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different $<TARGET_FILE:custom_op_library>
+          ${JAVA_NATIVE_TEST_DIR}/$<TARGET_FILE_NAME:custom_op_library>)
+
+        # On windows ctest requires a test to be an .exe(.com) file
+        # With gradle wrapper we get gradlew.bat. We delegate execution to a separate .cmake file
+        # That can handle both .exe and .bat
+        add_test(NAME onnxruntime4j_test COMMAND ${CMAKE_COMMAND}
+          -DGRADLE_EXECUTABLE=${GRADLE_EXECUTABLE}
+          -DBIN_DIR=${CMAKE_CURRENT_BINARY_DIR}
+          -DREPO_ROOT=${REPO_ROOT}
+          ${ORT_PROVIDER_FLAGS}
+          -P ${CMAKE_CURRENT_SOURCE_DIR}/onnxruntime_java_unittests.cmake)
+      else()
+        add_custom_command(TARGET custom_op_library POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different $<TARGET_FILE:custom_op_library>
+          ${JAVA_NATIVE_TEST_DIR}/$<TARGET_LINKER_FILE_NAME:custom_op_library>)
+
+        if(onnxruntime_ENABLE_TRAINING_APIS)
+          message(STATUS "Running Java inference and training tests")
+          add_test(NAME onnxruntime4j_test COMMAND ${GRADLE_EXECUTABLE} cmakeCheck -DcmakeBuildDir=${CMAKE_CURRENT_BINARY_DIR} ${ORT_PROVIDER_FLAGS} -DENABLE_TRAINING_APIS=1
+            WORKING_DIRECTORY ${REPO_ROOT}/java)
         else()
-          add_custom_command(TARGET custom_op_library POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different $<TARGET_FILE:custom_op_library>
-                          ${JAVA_NATIVE_TEST_DIR}/$<TARGET_LINKER_FILE_NAME:custom_op_library>)
-          if (onnxruntime_ENABLE_TRAINING_APIS)
-            message(STATUS "Running Java inference and training tests")
-            add_test(NAME onnxruntime4j_test COMMAND ${GRADLE_EXECUTABLE} cmakeCheck -DcmakeBuildDir=${CMAKE_CURRENT_BINARY_DIR} ${ORT_PROVIDER_FLAGS} -DENABLE_TRAINING_APIS=1
-                          WORKING_DIRECTORY ${REPO_ROOT}/java)
-          else()
-            message(STATUS "Running Java inference tests only")
-            add_test(NAME onnxruntime4j_test COMMAND ${GRADLE_EXECUTABLE} cmakeCheck -DcmakeBuildDir=${CMAKE_CURRENT_BINARY_DIR} ${ORT_PROVIDER_FLAGS}
-                          WORKING_DIRECTORY ${REPO_ROOT}/java)
-          endif()
+          message(STATUS "Running Java inference tests only")
+          add_test(NAME onnxruntime4j_test COMMAND ${GRADLE_EXECUTABLE} cmakeCheck -DcmakeBuildDir=${CMAKE_CURRENT_BINARY_DIR} ${ORT_PROVIDER_FLAGS}
+            WORKING_DIRECTORY ${REPO_ROOT}/java)
         endif()
-        set_property(TEST onnxruntime4j_test APPEND PROPERTY DEPENDS onnxruntime4j_jni)
+      endif()
+
+      set_property(TEST onnxruntime4j_test APPEND PROPERTY DEPENDS onnxruntime4j_jni)
     endif()
   endif()
 
-  if (onnxruntime_BUILD_SHARED_LIB AND (NOT onnxruntime_MINIMAL_BUILD OR onnxruntime_MINIMAL_BUILD_CUSTOM_OPS))
-    set (onnxruntime_customopregistration_test_SRC
-            ${ONNXRUNTIME_CUSTOM_OP_REGISTRATION_TEST_SRC_DIR}/test_registercustomops.cc)
+  if(onnxruntime_BUILD_SHARED_LIB AND(NOT onnxruntime_MINIMAL_BUILD OR onnxruntime_MINIMAL_BUILD_CUSTOM_OPS))
+    set(onnxruntime_customopregistration_test_SRC
+      ${ONNXRUNTIME_CUSTOM_OP_REGISTRATION_TEST_SRC_DIR}/test_registercustomops.cc)
 
     set(onnxruntime_customopregistration_test_LIBS custom_op_library onnxruntime_common onnxruntime_test_utils)
-    if (NOT WIN32)
+
+    if(NOT WIN32)
       list(APPEND onnxruntime_customopregistration_test_LIBS nsync::nsync_cpp)
     endif()
-    if (CPUINFO_SUPPORTED AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
+
+    if(CPUINFO_SUPPORTED AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
       list(APPEND onnxruntime_customopregistration_test_LIBS cpuinfo)
     endif()
-    if (onnxruntime_USE_TENSORRT)
+
+    if(onnxruntime_USE_TENSORRT)
       list(APPEND onnxruntime_customopregistration_test_LIBS ${TENSORRT_LIBRARY_INFER})
     endif()
+
     AddTest(DYN
-            TARGET onnxruntime_customopregistration_test
-            SOURCES ${onnxruntime_customopregistration_test_SRC} ${onnxruntime_unittest_main_src}
-            LIBS ${onnxruntime_customopregistration_test_LIBS}
-            DEPENDS ${all_dependencies}
+      TARGET onnxruntime_customopregistration_test
+      SOURCES ${onnxruntime_customopregistration_test_SRC} ${onnxruntime_unittest_main_src}
+      LIBS ${onnxruntime_customopregistration_test_LIBS}
+      DEPENDS ${all_dependencies}
     )
 
-    if (CMAKE_SYSTEM_NAME STREQUAL "iOS")
+    if(CMAKE_SYSTEM_NAME STREQUAL "iOS")
       add_custom_command(
         TARGET onnxruntime_customopregistration_test POST_BUILD
         COMMAND ${CMAKE_COMMAND} -E copy_directory
@@ -1570,73 +1664,71 @@ if (NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
         $<TARGET_FILE_DIR:onnxruntime_customopregistration_test>/testdata)
     endif()
 
-    if (UNIX AND onnxruntime_USE_TENSORRT)
-        # The test_main.cc includes NvInfer.h where it has many deprecated declarations  
-        # simply ignore them for TensorRT EP build
-        set_property(TARGET onnxruntime_customopregistration_test APPEND_STRING PROPERTY COMPILE_FLAGS "-Wno-deprecated-declarations")
+    if(UNIX AND onnxruntime_USE_TENSORRT)
+      # The test_main.cc includes NvInfer.h where it has many deprecated declarations
+      # simply ignore them for TensorRT EP build
+      set_property(TARGET onnxruntime_customopregistration_test APPEND_STRING PROPERTY COMPILE_FLAGS "-Wno-deprecated-declarations")
     endif()
-
   endif()
 endif()
 
 # Build custom op library that returns an error OrtStatus when the exported RegisterCustomOps function is called.
-if (NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten" AND (NOT onnxruntime_MINIMAL_BUILD OR onnxruntime_MINIMAL_BUILD_CUSTOM_OPS))
+if(NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten" AND(NOT onnxruntime_MINIMAL_BUILD OR onnxruntime_MINIMAL_BUILD_CUSTOM_OPS))
   onnxruntime_add_shared_library_module(custom_op_invalid_library
-                                        ${TEST_SRC_DIR}/testdata/custom_op_invalid_library/custom_op_library.h
-                                        ${TEST_SRC_DIR}/testdata/custom_op_invalid_library/custom_op_library.cc)
+    ${TEST_SRC_DIR}/testdata/custom_op_invalid_library/custom_op_library.h
+    ${TEST_SRC_DIR}/testdata/custom_op_invalid_library/custom_op_library.cc)
   target_include_directories(custom_op_invalid_library PRIVATE ${REPO_ROOT}/include/onnxruntime/core/session)
 
   if(UNIX)
-    if (APPLE)
+    if(APPLE)
       set(ONNXRUNTIME_CUSTOM_OP_INVALID_LIB_LINK_FLAG "-Xlinker -dead_strip")
     else()
       string(CONCAT ONNXRUNTIME_CUSTOM_OP_INVALID_LIB_LINK_FLAG
-             "-Xlinker --version-script=${TEST_SRC_DIR}/testdata/custom_op_invalid_library/custom_op_library.lds "
-             "-Xlinker --no-undefined -Xlinker --gc-sections -z noexecstack")
+        "-Xlinker --version-script=${TEST_SRC_DIR}/testdata/custom_op_invalid_library/custom_op_library.lds "
+        "-Xlinker --no-undefined -Xlinker --gc-sections -z noexecstack")
     endif()
   else()
     set(ONNXRUNTIME_CUSTOM_OP_INVALID_LIB_LINK_FLAG
-        "-DEF:${TEST_SRC_DIR}/testdata/custom_op_invalid_library/custom_op_library.def")
+      "-DEF:${TEST_SRC_DIR}/testdata/custom_op_invalid_library/custom_op_library.def")
   endif()
 
   set_property(TARGET custom_op_invalid_library APPEND_STRING PROPERTY LINK_FLAGS
-               ${ONNXRUNTIME_CUSTOM_OP_INVALID_LIB_LINK_FLAG})
+    ${ONNXRUNTIME_CUSTOM_OP_INVALID_LIB_LINK_FLAG})
 endif()
 
-if (NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten" AND (NOT onnxruntime_MINIMAL_BUILD OR onnxruntime_MINIMAL_BUILD_CUSTOM_OPS))
-
+if(NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten" AND(NOT onnxruntime_MINIMAL_BUILD OR onnxruntime_MINIMAL_BUILD_CUSTOM_OPS))
   file(GLOB_RECURSE custom_op_get_const_input_test_library_src
-        "${TEST_SRC_DIR}/testdata/custom_op_get_const_input_test_library/custom_op_lib.cc"
-        "${TEST_SRC_DIR}/testdata/custom_op_get_const_input_test_library/custom_op.h"
-        "${TEST_SRC_DIR}/testdata/custom_op_get_const_input_test_library/custom_op.cc"
+    "${TEST_SRC_DIR}/testdata/custom_op_get_const_input_test_library/custom_op_lib.cc"
+    "${TEST_SRC_DIR}/testdata/custom_op_get_const_input_test_library/custom_op.h"
+    "${TEST_SRC_DIR}/testdata/custom_op_get_const_input_test_library/custom_op.cc"
   )
 
   onnxruntime_add_shared_library_module(custom_op_get_const_input_test_library ${custom_op_get_const_input_test_library_src})
 
   onnxruntime_add_include_to_target(custom_op_get_const_input_test_library onnxruntime_common GTest::gtest GTest::gmock)
   target_include_directories(custom_op_get_const_input_test_library PRIVATE ${REPO_ROOT}/include/onnxruntime/core/session
-                                                                            ${REPO_ROOT}/include/onnxruntime/core/common)
+    ${REPO_ROOT}/include/onnxruntime/core/common)
 
   if(UNIX)
-    if (APPLE)
+    if(APPLE)
       set(ONNXRUNTIME_CUSTOM_OP_GET_CONST_INPUT_TEST_LIB_LINK_FLAG "-Xlinker -dead_strip")
     else()
       string(CONCAT ONNXRUNTIME_CUSTOM_OP_GET_CONST_INPUT_TEST_LIB_LINK_FLAG
-             "-Xlinker --version-script=${TEST_SRC_DIR}/testdata/custom_op_get_const_input_test_library/custom_op_lib.lds "
-             "-Xlinker --no-undefined -Xlinker --gc-sections -z noexecstack")
+        "-Xlinker --version-script=${TEST_SRC_DIR}/testdata/custom_op_get_const_input_test_library/custom_op_lib.lds "
+        "-Xlinker --no-undefined -Xlinker --gc-sections -z noexecstack")
     endif()
   else()
     set(ONNXRUNTIME_CUSTOM_OP_GET_CONST_INPUT_TEST_LIB_LINK_FLAG
-        "-DEF:${TEST_SRC_DIR}/testdata/custom_op_get_const_input_test_library/custom_op_lib.def")
+      "-DEF:${TEST_SRC_DIR}/testdata/custom_op_get_const_input_test_library/custom_op_lib.def")
   endif()
 
   set_property(TARGET custom_op_get_const_input_test_library APPEND_STRING PROPERTY LINK_FLAGS
-               ${ONNXRUNTIME_CUSTOM_OP_GET_CONST_INPUT_TEST_LIB_LINK_FLAG})
+    ${ONNXRUNTIME_CUSTOM_OP_GET_CONST_INPUT_TEST_LIB_LINK_FLAG})
 endif()
 
-if (onnxruntime_BUILD_SHARED_LIB AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten" AND NOT onnxruntime_MINIMAL_BUILD)
-  set (onnxruntime_logging_apis_test_SRC
-       ${ONNXRUNTIME_LOGGING_APIS_TEST_SRC_DIR}/test_logging_apis.cc)
+if(onnxruntime_BUILD_SHARED_LIB AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten" AND NOT onnxruntime_MINIMAL_BUILD)
+  set(onnxruntime_logging_apis_test_SRC
+    ${ONNXRUNTIME_LOGGING_APIS_TEST_SRC_DIR}/test_logging_apis.cc)
 
   set(onnxruntime_logging_apis_test_LIBS onnxruntime_common onnxruntime_test_utils)
 
@@ -1645,44 +1737,44 @@ if (onnxruntime_BUILD_SHARED_LIB AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten"
   endif()
 
   AddTest(DYN
-          TARGET onnxruntime_logging_apis_test
-          SOURCES ${onnxruntime_logging_apis_test_SRC}
-          LIBS ${onnxruntime_logging_apis_test_LIBS}
-          DEPENDS ${all_dependencies}
+    TARGET onnxruntime_logging_apis_test
+    SOURCES ${onnxruntime_logging_apis_test_SRC}
+    LIBS ${onnxruntime_logging_apis_test_LIBS}
+    DEPENDS ${all_dependencies}
   )
 endif()
 
-if (NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten" AND onnxruntime_USE_OPENVINO AND (NOT onnxruntime_MINIMAL_BUILD OR
-                                                                        onnxruntime_MINIMAL_BUILD_CUSTOM_OPS))
+if(NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten" AND onnxruntime_USE_OPENVINO AND(NOT onnxruntime_MINIMAL_BUILD OR
+  onnxruntime_MINIMAL_BUILD_CUSTOM_OPS))
   onnxruntime_add_shared_library_module(custom_op_openvino_wrapper_library
-                                        ${TEST_SRC_DIR}/testdata/custom_op_openvino_wrapper_library/custom_op_lib.cc
-                                        ${TEST_SRC_DIR}/testdata/custom_op_openvino_wrapper_library/openvino_wrapper.cc)
+    ${TEST_SRC_DIR}/testdata/custom_op_openvino_wrapper_library/custom_op_lib.cc
+    ${TEST_SRC_DIR}/testdata/custom_op_openvino_wrapper_library/openvino_wrapper.cc)
   target_include_directories(custom_op_openvino_wrapper_library PRIVATE ${REPO_ROOT}/include/onnxruntime/core/session)
   target_link_libraries(custom_op_openvino_wrapper_library PRIVATE openvino::runtime)
 
   if(UNIX)
-    if (APPLE)
+    if(APPLE)
       set(ONNXRUNTIME_CUSTOM_OP_OPENVINO_WRAPPER_LIB_LINK_FLAG "-Xlinker -dead_strip")
     else()
       string(CONCAT ONNXRUNTIME_CUSTOM_OP_OPENVINO_WRAPPER_LIB_LINK_FLAG
-             "-Xlinker --version-script=${TEST_SRC_DIR}/testdata/custom_op_openvino_wrapper_library/custom_op_lib.lds "
-             "-Xlinker --no-undefined -Xlinker --gc-sections -z noexecstack")
+        "-Xlinker --version-script=${TEST_SRC_DIR}/testdata/custom_op_openvino_wrapper_library/custom_op_lib.lds "
+        "-Xlinker --no-undefined -Xlinker --gc-sections -z noexecstack")
     endif()
   else()
     set(ONNXRUNTIME_CUSTOM_OP_OPENVINO_WRAPPER_LIB_LINK_FLAG
-        "-DEF:${TEST_SRC_DIR}/testdata/custom_op_openvino_wrapper_library/custom_op_lib.def")
+      "-DEF:${TEST_SRC_DIR}/testdata/custom_op_openvino_wrapper_library/custom_op_lib.def")
   endif()
 
   set_property(TARGET custom_op_openvino_wrapper_library APPEND_STRING PROPERTY LINK_FLAGS
-               ${ONNXRUNTIME_CUSTOM_OP_OPENVINO_WRAPPER_LIB_LINK_FLAG})
+    ${ONNXRUNTIME_CUSTOM_OP_OPENVINO_WRAPPER_LIB_LINK_FLAG})
 endif()
 
 # limit to only test on windows first, due to a runtime path issue on linux
-if (NOT onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_EXTENDED_MINIMAL_BUILD
-                                  AND NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin|iOS"
-                                  AND NOT CMAKE_SYSTEM_NAME STREQUAL "Android"
-                                  AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten"
-                                  AND NOT onnxruntime_USE_ROCM)
+if(NOT onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_EXTENDED_MINIMAL_BUILD
+  AND NOT ${CMAKE_SYSTEM_NAME} MATCHES "Darwin|iOS"
+  AND NOT CMAKE_SYSTEM_NAME STREQUAL "Android"
+  AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten"
+  AND NOT onnxruntime_USE_ROCM)
   file(GLOB_RECURSE test_execution_provider_srcs
     "${REPO_ROOT}/onnxruntime/test/testdata/custom_execution_provider_library/*.h"
     "${REPO_ROOT}/onnxruntime/test/testdata/custom_execution_provider_library/*.cc"
@@ -1696,9 +1788,11 @@ if (NOT onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_EXTENDED_MINIMAL_BUILD
   target_include_directories(test_execution_provider PRIVATE $<TARGET_PROPERTY:onnx,INTERFACE_INCLUDE_DIRECTORIES>)
   target_include_directories(test_execution_provider PRIVATE $<TARGET_PROPERTY:onnxruntime_common,INTERFACE_INCLUDE_DIRECTORIES>)
   target_include_directories(test_execution_provider PRIVATE ${ONNXRUNTIME_ROOT} ${CMAKE_CURRENT_BINARY_DIR} ${ORTTRAINING_ROOT})
-  if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
+
+  if(onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
     target_link_libraries(test_execution_provider PRIVATE Python::Python)
   endif()
+
   if(APPLE)
     set_property(TARGET test_execution_provider APPEND_STRING PROPERTY LINK_FLAGS "-Xlinker -exported_symbols_list ${REPO_ROOT}/onnxruntime/test/testdata/custom_execution_provider_library/exported_symbols.lst")
   elseif(UNIX)
diff --git a/onnxruntime/test/providers/cpu/model_tests.cc b/onnxruntime/test/providers/cpu/model_tests.cc
index ef2d7e31654ba..c47bb37500337 100644
--- a/onnxruntime/test/providers/cpu/model_tests.cc
+++ b/onnxruntime/test/providers/cpu/model_tests.cc
@@ -942,6 +942,14 @@ ::std::vector<::std::basic_string<ORTCHAR_T>> GetParameterStrings() {
 #ifdef USE_DML
   provider_names[provider_name_dml] = {opset7, opset8, opset9, opset10, opset11, opset12, opset13, opset14, opset15, opset16, opset17, opset18};
 #endif
+
+// These tests are already run in the CPU builds, therefore removing the CPU EP tests from CUDA build for training.
+// Note: These are inference tests, we run these in training builds as an extra check. Therefore reducing
+// the number of times these are run will reduce the CI time.
+#if defined(ENABLE_TRAINING_CORE) && defined(USE_CUDA)
+  provider_names.erase(provider_name_cpu);
+#endif
+
   std::vector<std::basic_string<ORTCHAR_T>> v;
   // Permanently exclude following tests because ORT support only opset starting from 7,
   // Please make no more changes to the list
diff --git a/orttraining/orttraining/python/orttraining_pybind_state.cc b/orttraining/orttraining/python/orttraining_pybind_state.cc
index 3f3aa396e6ca0..08fd1a660d6b6 100644
--- a/orttraining/orttraining/python/orttraining_pybind_state.cc
+++ b/orttraining/orttraining/python/orttraining_pybind_state.cc
@@ -18,7 +18,6 @@
 #include "core/session/environment.h"
 #include "core/session/custom_ops.h"
 #include "core/dlpack/dlpack_converter.h"
-#include "orttraining/core/session/training_session.h"
 #include "orttraining/core/agent/training_agent.h"
 #include "orttraining/core/graph/gradient_config.h"
 #include "orttraining/core/graph/optimizer_config.h"
@@ -108,65 +107,6 @@ GetExecutionProvidersForTrainingApis(OrtDevice device) {
 }
 }  // namespace
 #endif
-struct TrainingParameters {
-  std::string loss_output_name;
-  std::unordered_set<std::string> weights_to_train;
-  std::unordered_set<std::string> weights_not_to_train;
-
-  onnxruntime::training::TrainingSession::ImmutableWeights immutable_weights;
-
-  // optimizer
-  std::string training_optimizer_name;
-  std::string lr_params_feed_name = "Learning_Rate";
-  std::unordered_map<std::string, std::unordered_map<std::string, float>> optimizer_attributes_map;
-  std::unordered_map<std::string, std::unordered_map<std::string, int64_t>> optimizer_int_attributes_map;
-  onnxruntime::training::TrainingSession::OptimizerState optimizer_initial_state;
-  std::unordered_map<std::string, std::vector<int>> sliced_schema;
-  std::unordered_map<std::string, int> sliced_axes;
-  std::vector<std::string> sliced_tensor_names;
-  bool use_fp16_moments = false;
-
-  bool use_mixed_precision = false;
-  bool allreduce_post_accumulation = false;
-  float loss_scale = 0.0f;
-  int world_rank = 0;
-  int world_size = 1;
-  int local_rank = 0;
-  int local_size = 1;
-  int gradient_accumulation_steps = 1;
-  int data_parallel_size = 1;
-  int horizontal_parallel_size = 1;
-  int pipeline_parallel_size = 1;
-  int num_pipeline_micro_batches = 1;
-  int deepspeed_zero_stage = 0;
-  bool enable_grad_norm_clip = true;
-  bool set_gradients_as_graph_outputs = false;
-  bool use_memory_efficient_gradient = false;
-
-  std::string pipeline_cut_info_string = {};
-
-  // recompute
-  bool attn_dropout_recompute = false;
-  bool gelu_recompute = false;
-  bool transformer_layer_recompute = false;
-  int number_recompute_layers = 0;
-  bool enable_adasum = false;
-
-  // transformation
-  int propagate_cast_ops_level = 1;
-  std::vector<std::string> propagate_cast_ops_allow;
-  GraphTransformerConfiguration::PropagateCastOpsConfiguration::Strategy propagate_cast_ops_strategy =
-      GraphTransformerConfiguration::PropagateCastOpsConfiguration::Strategy::FloodFill;
-
-  // graph dumping
-  std::string model_after_graph_transforms_path;
-  std::string model_with_gradient_graph_path;
-  std::string model_with_training_graph_path;
-};
-
-struct TrainingConfigurationResult {
-  optional<std::string> loss_scale_input_name;
-};
 
 #ifdef ENABLE_TRAINING_APIS
 // Thin wrapper over internal C++ Optimizer
@@ -206,185 +146,6 @@ struct PyGradientGraphBuilderContext {
         local_registries_(local_registries) {}
 };
 
-// TODO: this method does not handle parallel optimization.
-TrainingConfigurationResult ConfigureSessionForTraining(
-    training::PipelineTrainingSession* sess, TrainingParameters& parameters) {
-  // TODO tix, refactor the mpi related code to populate all fields correctly by default.
-  ORT_ENFORCE(parameters.data_parallel_size <= parameters.world_size, "data_parallel_size: ", parameters.data_parallel_size, ", world_size: ", parameters.world_size);
-  ORT_ENFORCE(parameters.horizontal_parallel_size <= parameters.world_size, "horizontal_parallel_size: ", parameters.horizontal_parallel_size, ", world_size: ", parameters.world_size);
-  ORT_ENFORCE(parameters.pipeline_parallel_size <= parameters.world_size, "pipeline_parallel_size: ", parameters.pipeline_parallel_size, ", world_size: ", parameters.world_size);
-
-  // When DxHxP != the total number of ranks, we try adjusting D so that DxHxP == the total number of ranks.
-  if (parameters.world_size != parameters.data_parallel_size * parameters.horizontal_parallel_size * parameters.pipeline_parallel_size) {
-    ORT_ENFORCE(parameters.world_size % parameters.horizontal_parallel_size * parameters.pipeline_parallel_size == 0,
-                "D, H, P sizes are incorrect. To enable automatic correction, total number of ranks must be a divisible by HxP.");
-
-    const auto new_data_parallel_size = parameters.world_size / (parameters.horizontal_parallel_size * parameters.pipeline_parallel_size);
-    parameters.data_parallel_size = new_data_parallel_size;
-
-    const std::string msg = "Cannot distribute " + std::to_string(parameters.world_size) + " ranks for distributed computation with D=" + std::to_string(parameters.data_parallel_size) +
-                            ", H=" + std::to_string(parameters.horizontal_parallel_size) + ", P=" + std::to_string(parameters.pipeline_parallel_size) + ", so D is automatically changed to " + std::to_string(new_data_parallel_size);
-    LOGS(*(sess->GetLogger()), WARNING) << msg;
-  }
-
-  training::PipelineTrainingSession::TrainingConfiguration config{};
-  config.weight_names_to_train = parameters.weights_to_train;
-  config.weight_names_to_not_train = parameters.weights_not_to_train;
-  config.immutable_weights = parameters.immutable_weights;
-  config.gradient_accumulation_steps = parameters.gradient_accumulation_steps;
-
-  config.distributed_config.world_rank = parameters.world_rank;
-  config.distributed_config.world_size = parameters.world_size;
-  config.distributed_config.local_rank = parameters.local_rank;
-  config.distributed_config.local_size = parameters.local_size;
-  config.distributed_config.data_parallel_size = parameters.data_parallel_size;
-  config.distributed_config.horizontal_parallel_size = parameters.horizontal_parallel_size;
-  config.distributed_config.pipeline_parallel_size = parameters.pipeline_parallel_size;
-  config.distributed_config.num_pipeline_micro_batches = parameters.num_pipeline_micro_batches;
-  config.distributed_config.sliced_schema = parameters.sliced_schema;
-  config.distributed_config.sliced_axes = parameters.sliced_axes;
-  config.distributed_config.sliced_tensor_names = parameters.sliced_tensor_names;
-
-  if (parameters.use_mixed_precision) {
-    training::PipelineTrainingSession::TrainingConfiguration::MixedPrecisionConfiguration mp{};
-    mp.use_mixed_precision_initializers = true;
-
-    config.mixed_precision_config = mp;
-  }
-
-  if (config.distributed_config.pipeline_parallel_size > 1) {
-    training::PipelineTrainingSession::TrainingConfiguration::PipelineConfiguration pipeline_config;
-
-    // Currently don't support auto-partition. User needs to pass in cut information for pipeline
-    pipeline_config.do_partition = true;
-    assert(!parameters.pipeline_cut_info_string.empty());
-
-    auto process_with_delimiter = [](std::string& input_str, const std::string& delimiter) {
-      std::vector<std::string> result;
-      size_t pos = 0;
-      while ((pos = input_str.find(delimiter)) != std::string::npos) {
-        std::string token = input_str.substr(0, pos);
-        result.emplace_back(token);
-        input_str.erase(0, pos + delimiter.length());
-      }
-      // push the last split of substring into result.
-      result.emplace_back(input_str);
-      return result;
-    };
-
-    auto process_cut_info = [&](std::string& cut_info_string) {
-      std::vector<PipelineTrainingSession::TrainingConfiguration::CutInfo> cut_list;
-      const std::string group_delimiter = ",";
-      const std::string edge_delimiter = ":";
-      const std::string consumer_delimiter = "/";
-      const std::string producer_consumer_delimiter = "-";
-
-      auto cut_info_groups = process_with_delimiter(cut_info_string, group_delimiter);
-      for (auto& cut_info_group : cut_info_groups) {
-        PipelineTrainingSession::TrainingConfiguration::CutInfo cut_info;
-        auto cut_edges = process_with_delimiter(cut_info_group, edge_delimiter);
-        for (auto& cut_edge : cut_edges) {
-          auto process_edge = process_with_delimiter(cut_edge, producer_consumer_delimiter);
-          if (process_edge.size() == 1) {
-            PipelineTrainingSession::TrainingConfiguration::CutEdge edge{process_edge[0]};
-            cut_info.emplace_back(edge);
-          } else {
-            ORT_ENFORCE(process_edge.size() == 2);
-            auto consumer_list = process_with_delimiter(process_edge[1], consumer_delimiter);
-
-            PipelineTrainingSession::TrainingConfiguration::CutEdge edge{process_edge[0], consumer_list};
-            cut_info.emplace_back(edge);
-          }
-        }
-        cut_list.emplace_back(cut_info);
-      }
-      return cut_list;
-    };
-
-    pipeline_config.cut_list = process_cut_info(parameters.pipeline_cut_info_string);
-    config.pipeline_config = pipeline_config;
-  }
-  config.loss_name = parameters.loss_output_name;
-
-  if (!parameters.training_optimizer_name.empty()) {
-    training::PipelineTrainingSession::TrainingConfiguration::OptimizerConfiguration opt{};
-    opt.name = parameters.training_optimizer_name;
-    opt.learning_rate_input_name = parameters.lr_params_feed_name;
-    opt.weight_attributes_generator = [&parameters](const std::string& weight_name) {
-      const auto it = parameters.optimizer_attributes_map.find(weight_name);
-      ORT_ENFORCE(
-          it != parameters.optimizer_attributes_map.end(),
-          "Failed to find attribute map for weight ", weight_name);
-      return it->second;
-    };
-    opt.weight_int_attributes_generator = [&parameters](const std::string& weight_name) {
-      const auto it = parameters.optimizer_int_attributes_map.find(weight_name);
-      ORT_ENFORCE(
-          it != parameters.optimizer_int_attributes_map.end(),
-          "Failed to find int attribute map for weight ", weight_name);
-      return it->second;
-    };
-    opt.use_mixed_precision_moments = parameters.use_fp16_moments;
-    opt.do_all_reduce_in_mixed_precision_type = true;
-    // TODO: this mapping is temporary.
-    // For now, nccl allreduce kernel only implements for allreduce_post_accumulation
-    // hovorod allreduce kernel only implements for not allreduce_post_accumulation.
-    // eventually we will have one all reduce kernel and let opt to have
-    // an allreduce_post_accumulation option and remove the use_nccl option.
-    opt.use_nccl = parameters.allreduce_post_accumulation;
-    opt.deepspeed_zero = onnxruntime::training::ZeROConfig(parameters.deepspeed_zero_stage);
-    opt.enable_grad_norm_clip = parameters.enable_grad_norm_clip;
-
-    // TODO reduction types
-    if (parameters.enable_adasum) {
-#ifdef USE_CUDA
-      opt.adasum_reduction_type = training::AdasumReductionType::GpuHierarchicalReduction;
-#else
-      opt.adasum_reduction_type = training::AdasumReductionType::CpuReduction;
-#endif
-    }
-
-    config.optimizer_config = opt;
-  }
-
-  if (!parameters.optimizer_initial_state.empty()) {
-    config.init_optimizer_states = parameters.optimizer_initial_state;
-  }
-
-  config.gradient_graph_config.use_memory_efficient_gradient = parameters.use_memory_efficient_gradient;
-  config.gradient_graph_config.set_gradients_as_graph_outputs = parameters.set_gradients_as_graph_outputs;
-
-  config.graph_transformer_config.attn_dropout_recompute = parameters.attn_dropout_recompute;
-  config.graph_transformer_config.gelu_recompute = parameters.gelu_recompute;
-  config.graph_transformer_config.transformer_layer_recompute = parameters.transformer_layer_recompute;
-  config.graph_transformer_config.number_recompute_layers = parameters.number_recompute_layers;
-  config.graph_transformer_config.propagate_cast_ops_config.strategy = parameters.propagate_cast_ops_strategy;
-  config.graph_transformer_config.propagate_cast_ops_config.level = parameters.propagate_cast_ops_level;
-  config.graph_transformer_config.propagate_cast_ops_config.allow = parameters.propagate_cast_ops_allow;
-
-  if (!parameters.model_after_graph_transforms_path.empty()) {
-    config.model_after_graph_transforms_path = ToPathString(parameters.model_after_graph_transforms_path);
-  }
-  if (!parameters.model_with_gradient_graph_path.empty()) {
-    config.model_with_gradient_graph_path = ToPathString(parameters.model_with_gradient_graph_path);
-  }
-  if (!parameters.model_with_training_graph_path.empty()) {
-    config.model_with_training_graph_path = ToPathString(parameters.model_with_training_graph_path);
-  }
-
-  training::PipelineTrainingSession::TrainingConfigurationResult config_result{};
-
-  OrtPybindThrowIfError(sess->ConfigureForTraining(config, config_result));
-
-  TrainingConfigurationResult python_config_result{};
-  if (config_result.mixed_precision_config_result.has_value()) {
-    const auto& mp_config_result = config_result.mixed_precision_config_result.value();
-    python_config_result.loss_scale_input_name = mp_config_result.loss_scale_input_name;
-  }
-
-  return python_config_result;
-}
-
 #if defined(USE_MPI)
 void CopyMPIContextToTrainingParameters(TrainingParameters& parameters, const logging::Logger* logger) {
   LOGS(*logger, INFO) << "MPIContext::GetInstance().GetWorldRank(): " << MPIContext::GetInstance().GetWorldRank();
@@ -448,68 +209,6 @@ void addObjectMethodsForTraining(py::module& m, ExecutionProviderRegistrationFn
         ORT_ENFORCE(num_entries_erased == 1, "NodeArg not found in cache: ", node_arg_name);
       });
 
-  py::class_<TrainingParameters> parameters(m, "TrainingParameters", R"pbdoc(Configuration information for training.)pbdoc");
-  parameters.def(py::init())
-      .def_readwrite("loss_output_name", &TrainingParameters::loss_output_name)
-      .def_readwrite("immutable_weights", &TrainingParameters::immutable_weights)
-      .def_readwrite("weights_not_to_train", &TrainingParameters::weights_not_to_train)
-      .def_readwrite("weights_to_train", &TrainingParameters::weights_to_train)
-      .def_readwrite("sliced_tensor_names", &TrainingParameters::sliced_tensor_names)
-      .def_readwrite("training_optimizer_name", &TrainingParameters::training_optimizer_name)
-      .def_readwrite("lr_params_feed_name", &TrainingParameters::lr_params_feed_name)
-      .def_readwrite("optimizer_attributes_map", &TrainingParameters::optimizer_attributes_map)
-      .def_readwrite("optimizer_int_attributes_map", &TrainingParameters::optimizer_int_attributes_map)
-      .def_readwrite("sliced_schema", &TrainingParameters::sliced_schema)
-      .def_readwrite("sliced_axes", &TrainingParameters::sliced_axes)
-      .def_readwrite("use_fp16_moments", &TrainingParameters::use_fp16_moments)
-      .def_readwrite("use_mixed_precision", &TrainingParameters::use_mixed_precision)
-      .def_readwrite("allreduce_post_accumulation", &TrainingParameters::allreduce_post_accumulation)
-      .def_readwrite("loss_scale", &TrainingParameters::loss_scale)
-      .def_readwrite("world_rank", &TrainingParameters::world_rank)
-      .def_readwrite("world_size", &TrainingParameters::world_size)
-      .def_readwrite("data_parallel_size", &TrainingParameters::data_parallel_size)
-      .def_readwrite("horizontal_parallel_size", &TrainingParameters::horizontal_parallel_size)
-      .def_readwrite("pipeline_parallel_size", &TrainingParameters::pipeline_parallel_size)
-      .def_readwrite("pipeline_cut_info_string", &TrainingParameters::pipeline_cut_info_string)
-      .def_readwrite("num_pipeline_micro_batches", &TrainingParameters::num_pipeline_micro_batches)
-      .def_readwrite("gradient_accumulation_steps", &TrainingParameters::gradient_accumulation_steps)
-      .def_readwrite("deepspeed_zero_stage", &TrainingParameters::deepspeed_zero_stage)
-      .def_readwrite("enable_grad_norm_clip", &TrainingParameters::enable_grad_norm_clip)
-      .def_readwrite("set_gradients_as_graph_outputs", &TrainingParameters::set_gradients_as_graph_outputs)
-      .def_readwrite("use_memory_efficient_gradient", &TrainingParameters::use_memory_efficient_gradient)
-      .def_readwrite("attn_dropout_recompute", &TrainingParameters::attn_dropout_recompute)
-      .def_readwrite("gelu_recompute", &TrainingParameters::gelu_recompute)
-      .def_readwrite("transformer_layer_recompute", &TrainingParameters::transformer_layer_recompute)
-      .def_readwrite("number_recompute_layers", &TrainingParameters::number_recompute_layers)
-      .def_readwrite("data_parallel_size", &TrainingParameters::data_parallel_size)
-      .def_readwrite("horizontal_parallel_size", &TrainingParameters::horizontal_parallel_size)
-      .def_readwrite("pipeline_parallel_size", &TrainingParameters::pipeline_parallel_size)
-      .def("set_optimizer_initial_state",
-           [](TrainingParameters& parameters, const std::unordered_map<std::string, std::unordered_map<std::string, py::object>>& py_state) -> void {
-             onnxruntime::training::TrainingSession::OptimizerState optim_state;
-             for (const auto& weight_it : py_state) {
-               auto state = weight_it.second;
-               NameMLValMap state_tensors;
-               for (auto& initializer : state) {
-                 OrtValue ml_value;
-
-                 // InputDeflist is null because parameters havent been tied to session yet
-                 // Likewise, there is no need to specify the name (as the name was previously used to lookup the def list)
-                 CreateGenericMLValue(nullptr, GetAllocator(), "", initializer.second, &ml_value, true);
-                 ThrowIfPyErrOccured();
-                 state_tensors.emplace(initializer.first, ml_value);
-               }
-               optim_state.emplace(weight_it.first, state_tensors);
-             }
-             parameters.optimizer_initial_state = optim_state;
-           })
-      .def_readwrite("model_after_graph_transforms_path", &TrainingParameters::model_after_graph_transforms_path)
-      .def_readwrite("model_with_gradient_graph_path", &TrainingParameters::model_with_gradient_graph_path)
-      .def_readwrite("model_with_training_graph_path", &TrainingParameters::model_with_training_graph_path)
-      .def_readwrite("enable_adasum", &TrainingParameters::enable_adasum)
-      .def_readwrite("propagate_cast_ops_level", &TrainingParameters::propagate_cast_ops_level)
-      .def_readwrite("propagate_cast_ops_allow", &TrainingParameters::propagate_cast_ops_allow);
-
 #if defined(USE_MPI)
   m.def("get_mpi_context_local_rank", []() -> int { return MPIContext::GetInstance().GetLocalRank(); });
   m.def("get_mpi_context_local_size", []() -> int { return MPIContext::GetInstance().GetLocalSize(); });
@@ -579,130 +278,6 @@ void addObjectMethodsForTraining(py::module& m, ExecutionProviderRegistrationFn
         });
 #endif
 
-  py::class_<TrainingConfigurationResult> config_result(m, "TrainingConfigurationResult", "pbdoc(Configuration result for training.)pbdoc");
-  config_result.def(py::init())
-      .def_property_readonly("loss_scale_input_name", [](const TrainingConfigurationResult& result) -> py::object {
-        if (result.loss_scale_input_name.has_value()) {
-          return py::str{result.loss_scale_input_name.value()};
-        }
-        return py::none();
-      });
-
-  // Thin wrapper over internal C++ InferenceSession to accommodate custom op library management for the Python user
-  struct PyTrainingSession : public PyInferenceSession {
-    PyTrainingSession(std::shared_ptr<Environment> env, const PySessionOptions& so)
-        : PyInferenceSession(env, std::make_unique<PipelineTrainingSession>(so.value, *env)) {
-    }
-    ~PyTrainingSession() = default;
-  };
-
-  py::class_<PyTrainingSession, PyInferenceSession> training_session(m, "TrainingSession");
-  training_session
-      .def(py::init([](const PySessionOptions& so) {
-        auto& training_env = GetTrainingEnv();
-        return std::make_unique<PyTrainingSession>(training_env.GetORTEnv(), so);
-      }))
-      .def(py::init([]() {
-        auto& training_env = GetTrainingEnv();
-        return std::make_unique<PyTrainingSession>(training_env.GetORTEnv(), GetDefaultCPUSessionOptions());
-      }))
-      .def("finalize", [](py::object) {
-#if defined(USE_MPI)
-#ifdef _WIN32
-        // https://docs.microsoft.com/en-us/windows/win32/dlls/dynamic-link-library-best-practices
-        // shutdown_mpi() is not called within MPIContext destructor because of DllMain's restriction
-        // call shutdown_mpi() here instead.
-        MPIContext::shutdown_mpi();
-#endif
-#endif
-      })
-      .def("load_model", [ep_registration_fn](PyTrainingSession* sess, const std::string& path, TrainingParameters& parameters, const std::vector<std::string>& provider_types, const ProviderOptionsVector& provider_options) {
-        OrtPybindThrowIfError(sess->GetSessionHandle()->Load(path));
-
-#if defined(USE_MPI)
-        bool use_nccl = parameters.allreduce_post_accumulation;
-        if (!use_nccl && parameters.world_size > 1)
-          CopyMPIContextToTrainingParameters(parameters, sess->GetSessionHandle()->GetLogger());
-#endif
-        const auto config_result = ConfigureSessionForTraining(static_cast<PipelineTrainingSession*>(sess->GetSessionHandle()), parameters);
-
-        ProviderOptionsVector merged_options;
-        ResolveExtraProviderOptions(provider_types, provider_options, merged_options);
-
-        InitializeSession(sess->GetSessionHandle(), ep_registration_fn, provider_types, merged_options);
-
-        return config_result;
-      })
-      .def("read_bytes", [ep_registration_fn](PyTrainingSession* sess, const py::bytes& serialized_model, TrainingParameters& parameters, const std::vector<std::string>& provider_types, const ProviderOptionsVector& provider_options) {
-        std::istringstream buffer(serialized_model);
-        OrtPybindThrowIfError(sess->GetSessionHandle()->Load(buffer));
-
-#if defined(USE_MPI)
-        bool use_nccl = parameters.allreduce_post_accumulation;
-        if (!use_nccl && parameters.world_size > 1)
-          CopyMPIContextToTrainingParameters(parameters, sess->GetSessionHandle()->GetLogger());
-#endif
-        const auto config_result = ConfigureSessionForTraining(static_cast<PipelineTrainingSession*>(sess->GetSessionHandle()), parameters);
-        ProviderOptionsVector merged_options;
-        ResolveExtraProviderOptions(provider_types, provider_options, merged_options);
-
-        InitializeSession(sess->GetSessionHandle(), ep_registration_fn, provider_types, merged_options);
-
-        return config_result;
-      })
-      .def("get_state", [](PyTrainingSession* sess) {
-        NameMLValMap state_tensors;
-        ORT_THROW_IF_ERROR(static_cast<PipelineTrainingSession*>(sess->GetSessionHandle())->GetStateTensors(state_tensors));
-        auto& data_transfer_manager = sess->GetSessionHandle()->GetDataTransferManager();
-        // convert to numpy array
-        std::map<std::string, py::object> rmap;
-        for (auto& kv : state_tensors) {
-          if (kv.second.IsTensor()) {
-            py::object obj;
-            const Tensor& rtensor = kv.second.Get<Tensor>();
-            GetPyObjFromTensor(rtensor, obj, &data_transfer_manager);
-            rmap.insert({kv.first, obj});
-          } else {
-            throw std::runtime_error("Non tensor type in session state tensors is not expected.");
-          }
-        }
-        return rmap;
-      })
-      .def("get_model_state", [](PyTrainingSession* sess, bool include_mixed_precision_weights) {
-        std::unordered_map<std::string, NameMLValMap> model_state_tensors;
-        ORT_THROW_IF_ERROR(static_cast<TrainingSession*>(sess->GetSessionHandle())->GetModelState(model_state_tensors, include_mixed_precision_weights));
-        auto& data_transfer_manager = sess->GetSessionHandle()->GetDataTransferManager();
-        return ConvertORTTensorMapToNumpy(model_state_tensors, data_transfer_manager);
-      })
-      .def("get_optimizer_state", [](PyTrainingSession* sess) {
-        std::unordered_map<std::string, NameMLValMap> opt_state_tensors;
-        ORT_THROW_IF_ERROR(static_cast<TrainingSession*>(sess->GetSessionHandle())->GetOptimizerState(opt_state_tensors));
-        auto& data_transfer_manager = sess->GetSessionHandle()->GetDataTransferManager();
-        return ConvertORTTensorMapToNumpy(opt_state_tensors, data_transfer_manager);
-      })
-      .def("get_partition_info_map", [](PyTrainingSession* sess) {
-        std::unordered_map<std::string, std::unordered_map<std::string, std::vector<int>>> part_info_map;
-        ORT_THROW_IF_ERROR(static_cast<TrainingSession*>(sess->GetSessionHandle())->GetPartitionInfoMap(part_info_map));
-        return part_info_map;
-      })
-      .def("load_state", [](PyTrainingSession* sess, std::unordered_map<std::string, py::object>& state, bool strict) {
-        NameMLValMap state_tensors;
-        for (auto initializer : state) {
-          OrtValue ml_value;
-          auto px = sess->GetSessionHandle()->GetModelInputs();
-          if (!px.first.IsOK() || !px.second) {
-            throw std::runtime_error("Either failed to get model inputs from the session object or the input def list was null");
-          }
-          CreateGenericMLValue(px.second, GetAllocator(), initializer.first, initializer.second, &ml_value);
-          ThrowIfPyErrOccured();
-          state_tensors.insert(std::make_pair(initializer.first, ml_value));
-        }
-        ORT_THROW_IF_ERROR(static_cast<PipelineTrainingSession*>(sess->GetSessionHandle())->SetStateTensors(state_tensors, strict));
-      })
-      .def("is_output_fp32_node", [](PyTrainingSession* sess, const std::string& output_name) {
-        return static_cast<PipelineTrainingSession*>(sess->GetSessionHandle())->IsGraphOutputFp32Node(output_name);
-      });
-
   py::class_<PartialGraphExecutionState>(m, "PartialGraphExecutionState")
       .def(py::init([]() {
         return std::make_unique<PartialGraphExecutionState>();
diff --git a/orttraining/orttraining/test/gradient/gradient_checker.h b/orttraining/orttraining/test/gradient/gradient_checker.h
index e2820da5682c6..d1a7c5efb94b4 100644
--- a/orttraining/orttraining/test/gradient/gradient_checker.h
+++ b/orttraining/orttraining/test/gradient/gradient_checker.h
@@ -17,11 +17,11 @@ limitations under the License.
 
 #pragma once
 #include "test/providers/provider_test_utils.h"
-#include "orttraining/core/session/training_session.h"
 #include "orttraining/test/gradient/gradient_op_test_utils.h"
 
 namespace onnxruntime {
 namespace test {
+using training::OpDef;
 
 // TODO: This class currently assumes the inputs share types and the outputs share a type.
 // However there are cases like MaxPool and Gather where this is not true.
diff --git a/orttraining/orttraining/test/gradient/gradient_ops_test.cc b/orttraining/orttraining/test/gradient/gradient_ops_test.cc
index d4e18dbfd2290..7bbc856d65bea 100644
--- a/orttraining/orttraining/test/gradient/gradient_ops_test.cc
+++ b/orttraining/orttraining/test/gradient/gradient_ops_test.cc
@@ -15,7 +15,7 @@
 #include "test/providers/provider_test_utils.h"
 #include "test/util/include/test_random_seed.h"
 #include "orttraining/test/gradient/gradient_checker.h"
-#include "orttraining/test/gradient/gradient_op_test_utils.h"
+// #include "orttraining/test/gradient/gradient_op_test_utils.h"
 #include "test/util/include/default_providers.h"
 #include "test/common/cuda_op_test_utils.h"
 
diff --git a/orttraining/orttraining/test/python/onnxruntime_test_postprocess.py b/orttraining/orttraining/test/python/onnxruntime_test_postprocess.py
deleted file mode 100644
index d5298cf8e860e..0000000000000
--- a/orttraining/orttraining/test/python/onnxruntime_test_postprocess.py
+++ /dev/null
@@ -1,325 +0,0 @@
-import os
-import unittest
-
-import torch
-import torch.nn as nn
-from orttraining_test_bert_postprocess import postprocess_model
-from orttraining_test_data_loader import create_ort_test_dataloader
-from orttraining_test_transformers import BertForPreTraining, BertModelTest
-from orttraining_test_utils import map_optimizer_attributes
-
-import onnxruntime
-from onnxruntime.capi.ort_trainer import (  # noqa: F401
-    IODescription,
-    LossScaler,
-    ModelDescription,
-    ORTTrainer,
-    generate_sample,
-)
-
-torch.manual_seed(1)
-onnxruntime.set_seed(1)
-
-
-class Test_PostPasses(unittest.TestCase):  # noqa: N801
-    def get_onnx_model(
-        self, model, model_desc, inputs, device, _enable_internal_postprocess=True, _extra_postprocess=None
-    ):
-        lr_desc = IODescription(
-            "Learning_Rate",
-            [
-                1,
-            ],
-            torch.float32,
-        )
-        model = ORTTrainer(
-            model,
-            None,
-            model_desc,
-            "LambOptimizer",
-            map_optimizer_attributes,
-            lr_desc,
-            device,
-            world_rank=0,
-            world_size=1,
-            _opset_version=14,
-            _enable_internal_postprocess=_enable_internal_postprocess,
-            _extra_postprocess=_extra_postprocess,
-        )
-
-        model.train_step(*inputs)
-        return model.onnx_model_
-
-    def count_all_nodes(self, model):
-        return len(model.graph.node)
-
-    def count_nodes(self, model, node_type):
-        count = 0
-        for node in model.graph.node:
-            if node.op_type == node_type:
-                count += 1
-        return count
-
-    def find_nodes(self, model, node_type):
-        nodes = []
-        for node in model.graph.node:
-            if node.op_type == node_type:
-                nodes.append(node)
-        return nodes
-
-    def get_name(self, name):
-        if os.path.exists(name):
-            return name
-        rel = os.path.join("testdata", name)
-        if os.path.exists(rel):
-            return rel
-        this = os.path.dirname(__file__)
-        data = os.path.join(this, "..", "..", "..", "..", "onnxruntime", "test", "testdata")
-        res = os.path.join(data, name)
-        if os.path.exists(res):
-            return res
-        raise FileNotFoundError(f"Unable to find '{name}' or '{rel}' or '{res}'")
-
-    def test_layer_norm(self):
-        class LayerNormNet(nn.Module):
-            def __init__(self, target):
-                super().__init__()
-                self.ln_1 = nn.LayerNorm(10)
-                self.loss = nn.CrossEntropyLoss()
-                self.target = target
-
-            def forward(self, x):
-                output1 = self.ln_1(x)
-                loss = self.loss(output1, self.target)
-                return loss, output1
-
-        device = torch.device("cpu")
-        target = torch.ones(20, 10, 10, dtype=torch.int64).to(device)
-        model = LayerNormNet(target)
-        input = torch.randn(20, 5, 10, 10, dtype=torch.float32).to(device)
-
-        input_desc = IODescription("input", [], "float32")
-        output0_desc = IODescription("output0", [], "float32")
-        output1_desc = IODescription("output1", [20, 5, 10, 10], "float32")
-        model_desc = ModelDescription([input_desc], [output0_desc, output1_desc])
-
-        learning_rate = torch.tensor([1.0000000e00]).to(device)
-        input_args = [input, learning_rate]
-
-        onnx_model = self.get_onnx_model(model, model_desc, input_args, device)
-
-        count_layer_norm = self.count_nodes(onnx_model, "LayerNormalization")
-        count_nodes = self.count_all_nodes(onnx_model)
-
-        assert count_layer_norm == 0
-        assert count_nodes == 3
-
-    def test_expand(self):
-        class ExpandNet(nn.Module):
-            def __init__(self, target):
-                super().__init__()
-                self.loss = nn.CrossEntropyLoss()
-                self.target = target
-                self.linear = torch.nn.Linear(2, 2)
-
-            def forward(self, x, x1):
-                output = x.expand_as(x1)
-                output = self.linear(output)
-                output = output + output
-                loss = self.loss(output, self.target)
-                return loss, output
-
-        device = torch.device("cpu")
-        target = torch.ones(5, 5, 2, dtype=torch.int64).to(device)
-        model = ExpandNet(target).to(device)
-
-        x = torch.randn(5, 3, 1, 2, dtype=torch.float32).to(device)
-        x1 = torch.randn(5, 3, 5, 2, dtype=torch.float32).to(device)
-
-        input0_desc = IODescription("x", [5, 3, 1, 2], "float32")
-        input1_desc = IODescription("x1", [5, 3, 5, 2], "float32")
-        output0_desc = IODescription("output0", [], "float32")
-        output1_desc = IODescription("output1", [5, 3, 5, 2], "float32")
-        model_desc = ModelDescription([input0_desc, input1_desc], [output0_desc, output1_desc])
-
-        learning_rate = torch.tensor([1.0000000e00]).to(device)
-        input_args = [x, x1, learning_rate]
-
-        onnx_model = self.get_onnx_model(model, model_desc, input_args, device)
-
-        # check that expand output has shape
-        expand_nodes = self.find_nodes(onnx_model, "Expand")
-        assert len(expand_nodes) == 1
-
-        model_info = onnx_model.graph.value_info
-        assert model_info[0].name == expand_nodes[0].output[0]
-        assert model_info[0].type == onnx_model.graph.input[1].type
-
-    def test_bert(self):
-        device = torch.device("cpu")
-
-        model_tester = BertModelTest.BertModelTester(self)
-        (
-            config,
-            input_ids,
-            token_type_ids,
-            input_mask,
-            sequence_labels,
-            token_labels,
-            choice_labels,
-        ) = model_tester.prepare_config_and_inputs()
-
-        model = BertForPreTraining(config=config)
-        model.eval()
-
-        loss, prediction_scores, seq_relationship_score = model(
-            input_ids,
-            attention_mask=input_mask,
-            token_type_ids=token_type_ids,
-            masked_lm_labels=token_labels,
-            next_sentence_label=sequence_labels,
-        )
-
-        model_desc = ModelDescription(
-            [
-                model_tester.input_ids_desc,
-                model_tester.attention_mask_desc,
-                model_tester.token_type_ids_desc,
-                model_tester.masked_lm_labels_desc,
-                model_tester.next_sentence_label_desc,
-            ],
-            [model_tester.loss_desc, model_tester.prediction_scores_desc, model_tester.seq_relationship_scores_desc],
-        )
-
-        from collections import namedtuple
-
-        MyArgs = namedtuple(
-            "MyArgs", "local_rank world_size max_steps learning_rate warmup_proportion batch_size seq_len"
-        )
-        args = MyArgs(
-            local_rank=0,
-            world_size=1,
-            max_steps=100,
-            learning_rate=0.00001,
-            warmup_proportion=0.01,
-            batch_size=13,
-            seq_len=7,
-        )
-
-        dataset_len = 100
-        dataloader = create_ort_test_dataloader(model_desc.inputs_, args.batch_size, args.seq_len, dataset_len, device)
-        learning_rate = torch.tensor(1.0e0, dtype=torch.float32).to(device)
-        for b in dataloader:
-            batch = b
-            break
-        learning_rate = torch.tensor([1.00e00]).to(device)
-        inputs = [*batch, learning_rate]
-
-        onnx_model = self.get_onnx_model(model, model_desc, inputs, device, _extra_postprocess=postprocess_model)
-
-        self._bert_helper(onnx_model)
-
-    def _bert_helper(self, onnx_model):
-        # count layer_norm
-        count_layer_norm = self.count_nodes(onnx_model, "LayerNormalization")
-        assert count_layer_norm == 0
-
-        # get expand node and check output shape
-        expand_nodes = self.find_nodes(onnx_model, "Expand")
-        assert len(expand_nodes) == 1
-
-        model_info = onnx_model.graph.value_info
-        assert model_info[0].name == expand_nodes[0].output[0]
-        assert model_info[0].type == onnx_model.graph.input[0].type
-
-    def test_extra_postpass(self):
-        def postpass_replace_first_add_with_sub(model):
-            # this post pass replaces the first Add node with Sub in the model.
-            # Previous graph
-            #   (subgraph 1)        (subgraph 2)
-            #        |                   |
-            #        |                   |
-            #        |________   ________|
-            #                 | |
-            #                 Add
-            #                  |
-            #             (subgraph 3)
-            #
-            # Post graph
-            #   (subgraph 1)        (subgraph 2)
-            #        |                   |
-            #        |                   |
-            #        |________   ________|
-            #                 | |
-            #                 Sub
-            #                  |
-            #             (subgraph 3)
-            add_nodes = [n for n in model.graph.node if n.op_type == "Add"]
-            add_nodes[0].op_type = "Sub"
-
-        class MultiAdd(nn.Module):
-            def __init__(self, target):
-                super().__init__()
-                self.loss = nn.CrossEntropyLoss()
-                self.target = target
-                self.linear = torch.nn.Linear(2, 2, bias=False)
-
-            def forward(self, x, x1):
-                output = x + x1
-                output = output + x
-                output = output + x1
-                output = self.linear(output)
-                loss = self.loss(output, self.target)
-                return loss, output
-
-        device = torch.device("cpu")
-        target = torch.ones(5, 2, dtype=torch.int64).to(device)
-        model = MultiAdd(target).to(device)
-
-        x = torch.randn(5, 5, 2, dtype=torch.float32).to(device)
-        x1 = torch.randn(5, 5, 2, dtype=torch.float32).to(device)
-
-        input0_desc = IODescription("x", [5, 5, 2], "float32")
-        input1_desc = IODescription("x1", [5, 5, 2], "float32")
-        output0_desc = IODescription("output0", [], "float32")
-        output1_desc = IODescription("output1", [5, 5, 2], "float32")
-        model_desc = ModelDescription([input0_desc, input1_desc], [output0_desc, output1_desc])
-
-        learning_rate = torch.tensor([1.0000000e00]).to(device)
-        input_args = [x, x1, learning_rate]
-
-        onnx_model = self.get_onnx_model(
-            model, model_desc, input_args, device, _extra_postprocess=postpass_replace_first_add_with_sub
-        )
-
-        # check that extra postpass is called, and called only once.
-        add_nodes = self.find_nodes(onnx_model, "Add")
-        sub_nodes = self.find_nodes(onnx_model, "Sub")
-        assert len(add_nodes) == 2
-        assert len(sub_nodes) == 1
-
-        unprocessed_onnx_model = self.get_onnx_model(
-            model, model_desc, input_args, device, _extra_postprocess=None, _enable_internal_postprocess=False
-        )
-        # check that the model is unchanged.
-        add_nodes = self.find_nodes(unprocessed_onnx_model, "Add")
-        sub_nodes = self.find_nodes(unprocessed_onnx_model, "Sub")
-        assert len(add_nodes) == 3
-        assert len(sub_nodes) == 0
-
-        processed_onnx_model = self.get_onnx_model(
-            unprocessed_onnx_model,
-            model_desc,
-            input_args,
-            device,
-            _extra_postprocess=postpass_replace_first_add_with_sub,
-        )
-        # check that extra postpass is called, and called only once.
-        add_nodes = self.find_nodes(processed_onnx_model, "Add")
-        sub_nodes = self.find_nodes(processed_onnx_model, "Sub")
-        assert len(add_nodes) == 2
-        assert len(sub_nodes) == 1
-
-
-if __name__ == "__main__":
-    unittest.main(module=__name__, buffer=True)