Skip to content

Commit

Permalink
Merge branch 'main' into chi/trt10
Browse files Browse the repository at this point in the history
  • Loading branch information
chilo-ms committed Mar 22, 2024
2 parents e2c9d2b + 7e84ba0 commit 79d459c
Show file tree
Hide file tree
Showing 244 changed files with 6,647 additions and 3,290 deletions.
8 changes: 8 additions & 0 deletions .github/workflows/codeql.yml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,14 @@ jobs:
# Details on CodeQL's query packs refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
queries: security-extended,security-and-quality

# Setup Java to use a version that is not too old for the project
- if: ${{ matrix.language == 'java' }}
name: Setup Java 11
uses: actions/setup-java@v4
with:
java-version: '11'
distribution: 'microsoft'

# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
# If this step fails, then you should remove it and run the build manually (see below)
- if: ${{ matrix.language != 'cpp' }}
Expand Down
4 changes: 2 additions & 2 deletions cgmanifests/generated/cgmanifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@
"component": {
"type": "git",
"git": {
"commitHash": "6df40a2471737b27271bdd9b900ab5f3aec746c7",
"commitHash": "0100f6a5779831fa7a651e4b67ef389a8752bd9b",
"repositoryUrl": "https://github.com/google/flatbuffers.git"
},
"comments": "flatbuffers"
Expand Down Expand Up @@ -206,7 +206,7 @@
"component": {
"type": "git",
"git": {
"commitHash": "c11386eb632eec7c1c2aa323142f73519f946e2a",
"commitHash": "150e7527d5286ddd3a995c228dedf8d76a7a86bc",
"repositoryUrl": "https://github.com/intel/neural-speed.git"
},
"comments": "neural_speed"
Expand Down
47 changes: 14 additions & 33 deletions cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ option(onnxruntime_USE_QNN "Build with QNN support" OFF)
option(onnxruntime_USE_SNPE "Build with SNPE support" OFF)
option(onnxruntime_USE_RKNPU "Build with RKNPU support" OFF)
option(onnxruntime_USE_DNNL "Build with DNNL support" OFF)
option(onnxruntime_USE_NEURAL_SPEED "Build with Neural Speed support" OFF)
option(onnxruntime_USE_NEURAL_SPEED "Build with Neural Speed support" ON)
option(onnxruntime_USE_JSEP "Build with JavaScript implemented kernels support" OFF)
option(onnxruntime_BUILD_UNIT_TESTS "Build ONNXRuntime unit tests" ON)
option(onnxruntime_BUILD_CSHARP "Build C# library" OFF)
Expand Down Expand Up @@ -325,8 +325,8 @@ if (onnxruntime_USE_ROCM)
# replicate strategy used by pytorch to get ROCM_VERSION
# https://github.com/pytorch/pytorch/blob/5c5b71b6eebae76d744261715231093e62f0d090/cmake/public/LoadHIP.cmake
# with modification
if (EXISTS "${onnxruntime_ROCM_HOME}/.info/version-dev")
file(READ "${onnxruntime_ROCM_HOME}/.info/version-dev" ROCM_VERSION_DEV_RAW)
if (EXISTS "${onnxruntime_ROCM_HOME}/.info/version")
file(READ "${onnxruntime_ROCM_HOME}/.info/version" ROCM_VERSION_DEV_RAW)
string(REGEX MATCH "^([0-9]+)\.([0-9]+)\.([0-9]+)-.*$" ROCM_VERSION_MATCH ${ROCM_VERSION_DEV_RAW})
elseif (EXISTS "${onnxruntime_ROCM_HOME}/include/rocm_version.h")
file(READ "${onnxruntime_ROCM_HOME}/include/rocm_version.h" ROCM_VERSION_H_RAW)
Expand All @@ -345,7 +345,7 @@ if (onnxruntime_USE_ROCM)
else()
message(FATAL_ERROR "Cannot determine ROCm version string")
endif()
message("\n***** ROCm version from ${onnxruntime_ROCM_HOME}/.info/version-dev ****\n")
message("\n***** ROCm version from ${onnxruntime_ROCM_HOME}/.info/version ****\n")
message("ROCM_VERSION_DEV: ${ROCM_VERSION_DEV}")
message("ROCM_VERSION_DEV_MAJOR: ${ROCM_VERSION_DEV_MAJOR}")
message("ROCM_VERSION_DEV_MINOR: ${ROCM_VERSION_DEV_MINOR}")
Expand Down Expand Up @@ -1206,7 +1206,7 @@ if (onnxruntime_USE_DNNL)
add_compile_definitions(DNNL_OPENMP)
endif()

if (onnxruntime_USE_NEURAL_SPEED AND NOT onnxruntime_MINIMAL_BUILD)
if (onnxruntime_USE_NEURAL_SPEED AND NOT onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_USE_TVM)
include(neural_speed)
if (USE_NEURAL_SPEED)
list(APPEND onnxruntime_EXTERNAL_LIBRARIES neural_speed::bestla)
Expand Down Expand Up @@ -1290,34 +1290,6 @@ if (onnxruntime_USE_OPENVINO)

add_definitions(-DUSE_OPENVINO=1)

if (EXISTS "$ENV{INTEL_OPENVINO_DIR}/deployment_tools/inference_engine/version.txt")
file(READ $ENV{INTEL_OPENVINO_DIR}/deployment_tools/inference_engine/version.txt VER)
endif()

if (NOT DEFINED ENV{INTEL_OPENVINO_DIR})
message(FATAL_ERROR "[Couldn't locate OpenVINO] OpenVINO may not have been initialized")
endif()

# Check OpenVINO version for support
if ($ENV{INTEL_OPENVINO_DIR} MATCHES "2023.0")
set(OPENVINO_VERSION "2023.0")
add_definitions(-DOPENVINO_2023_0=1)
elseif ($ENV{INTEL_OPENVINO_DIR} MATCHES "2023.1")
set(OPENVINO_VERSION "2023.1")
add_definitions(-DOPENVINO_2023_1=1)
elseif ($ENV{INTEL_OPENVINO_DIR} MATCHES "2023.2")
set(OPENVINO_VERSION "2023.2")
add_definitions(-DOPENVINO_2023_2=1)
elseif ($ENV{INTEL_OPENVINO_DIR} MATCHES "2023.3")
set(OPENVINO_VERSION "2023.3")
add_definitions(-DOPENVINO_2023_3=1)
elseif ($ENV{INTEL_OPENVINO_DIR} MATCHES "openvino")
set(OPENVINO_VERSION "2023.3")
add_definitions(-DOPENVINO_2023_3=1)
else()
message(FATAL_ERROR "Unsupported OpenVINO version: ${INTEL_OPENVINO_DIR}")
endif()

if (onnxruntime_USE_OPENVINO_GPU_FP32)
add_definitions(-DOPENVINO_CONFIG_GPU_FP32=1)
endif()
Expand All @@ -1334,6 +1306,10 @@ if (onnxruntime_USE_OPENVINO)
add_definitions(-DOPENVINO_CONFIG_CPU_FP16=1)
endif()

if (onnxruntime_USE_OPENVINO_NPU)
add_definitions(-DOPENVINO_CONFIG_NPU=1)
endif()

if (onnxruntime_USE_OPENVINO_GPU_FP32_NP)
add_definitions(-DOPENVINO_CONFIG_GPU_FP32=1)
add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
Expand All @@ -1354,6 +1330,11 @@ if (onnxruntime_USE_OPENVINO)
add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
endif()

if (onnxruntime_USE_OPENVINO_NPU_NP)
add_definitions(-DOPENVINO_CONFIG_NPU=1)
add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
endif()

if (onnxruntime_USE_OPENVINO_HETERO)
add_definitions(-DOPENVINO_CONFIG_HETERO=1)
add_definitions(-DDEVICE_NAME="${onnxruntime_USE_OPENVINO_DEVICE}")
Expand Down
9 changes: 9 additions & 0 deletions cmake/adjust_global_compile_flags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,15 @@ if (CMAKE_SYSTEM_NAME STREQUAL "Android")
string(APPEND CMAKE_ASM_FLAGS_RELEASE " -O3")
endif()

# Suggested by https://gitlab.kitware.com/cmake/cmake/-/issues/20132
# MacCatalyst is not well supported in CMake
# The error that can emerge without this flag can look like:
# "clang : error : overriding '-mmacosx-version-min=11.0' option with '-target x86_64-apple-ios14.0-macabi' [-Werror,-Woverriding-t-option]"
if (PLATFORM_NAME STREQUAL "macabi")
add_compile_options(-Wno-overriding-t-option)
add_link_options(-Wno-overriding-t-option)
endif()

# Enable space optimization for gcc/clang
# Cannot use "-ffunction-sections -fdata-sections" if we enable bitcode (iOS)
if (NOT MSVC AND NOT onnxruntime_ENABLE_BITCODE)
Expand Down
4 changes: 2 additions & 2 deletions cmake/deps.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ dlpack;https://github.com/dmlc/dlpack/archive/refs/tags/v0.6.zip;4d565dd2e5b3132
# Until the 3.4.1 release this is the best option we have.
# Issue link: https://gitlab.com/libeigen/eigen/-/issues/2744
eigen;https://gitlab.com/libeigen/eigen/-/archive/e7248b26a1ed53fa030c5c459f7ea095dfd276ac/eigen-e7248b26a1ed53fa030c5c459f7ea095dfd276ac.zip;be8be39fdbc6e60e94fa7870b280707069b5b81a
flatbuffers;https://github.com/google/flatbuffers/archive/refs/tags/v1.12.0.zip;ba0a75fd12dbef8f6557a74e611b7a3d0c5fe7bf
flatbuffers;https://github.com/google/flatbuffers/archive/refs/tags/v23.5.26.zip;59422c3b5e573dd192fead2834d25951f1c1670c
fp16;https://github.com/Maratyszcza/FP16/archive/0a92994d729ff76a58f692d3028ca1b64b145d91.zip;b985f6985a05a1c03ff1bb71190f66d8f98a1494
fxdiv;https://github.com/Maratyszcza/FXdiv/archive/63058eff77e11aa15bf531df5dd34395ec3017c8.zip;a5658f4036402dbca7cebee32be57fb8149811e1
google_benchmark;https://github.com/google/benchmark/archive/refs/tags/v1.8.3.zip;bf9870756ee3f8d2d3b346b24ee3600a41c74d3d
Expand All @@ -35,7 +35,7 @@ microsoft_gsl;https://github.com/microsoft/GSL/archive/refs/tags/v4.0.0.zip;cf36
microsoft_wil;https://github.com/microsoft/wil/archive/refs/tags/v1.0.230629.1.zip;e4a542a323c070376f7c2d1973d0f7ddbc1d2fa5
mimalloc;https://github.com/microsoft/mimalloc/archive/refs/tags/v2.1.1.zip;d5ee7d34223d0567892db5179849939c8769dc41
mp11;https://github.com/boostorg/mp11/archive/refs/tags/boost-1.82.0.zip;9bc9e01dffb64d9e0773b2e44d2f22c51aace063
neural_speed;https://github.com/intel/neural-speed/archive/refs/tags/bestlav0.1.1.zip;65b0f7a0d04f72f0d5a8d48af70f0366f2ab3939
neural_speed;https://github.com/intel/neural-speed/archive/refs/tags/v0.3.zip;5ec64e3071edc7347ebd8a81679cf06e2bb9b851
onnx;https://github.com/onnx/onnx/archive/refs/tags/v1.15.0.zip;54c3f960a0541c5d8d3e60c2933e11f5d3688a11
#use the commit of Final DDS removal. DDS output is now supported by ORT TRT.
onnx_tensorrt;https://github.com/onnx/onnx-tensorrt/archive/bacfaaa951653cd4e72efe727a543567cb38f7de.zip;26434329612e804164ab7baa6ae629ada56c1b26
Expand Down
1 change: 1 addition & 0 deletions cmake/external/neural_speed.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ if(USE_NEURAL_SPEED)
neural_speed
URL ${DEP_URL_neural_speed}
URL_HASH SHA1=${DEP_SHA1_neural_speed}
PATCH_COMMAND ${Patch_EXECUTABLE} -p1 < ${PROJECT_SOURCE_DIR}/patches/neural_speed/150e7527d5286ddd3a995c228dedf8d76a7a86bc.patch
)
set(BTLA_USE_OPENMP OFF)
onnxruntime_fetchcontent_makeavailable(neural_speed)
Expand Down
36 changes: 28 additions & 8 deletions cmake/external/onnxruntime_external_deps.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,16 @@ foreach(ONNXRUNTIME_DEP IN LISTS ONNXRUNTIME_DEPS_LIST)
set(DEP_URL_${ONNXRUNTIME_DEP_NAME} ${ONNXRUNTIME_DEP_URL})
# The third column is SHA1 hash value
set(DEP_SHA1_${ONNXRUNTIME_DEP_NAME} ${ONNXRUNTIME_DEP})

if(ONNXRUNTIME_DEP_URL MATCHES "^https://")
# Search a local mirror folder
string(REGEX REPLACE "^https://" "${REPO_ROOT}/mirror/" LOCAL_URL "${ONNXRUNTIME_DEP_URL}")

if(EXISTS "${LOCAL_URL}")
cmake_path(ABSOLUTE_PATH LOCAL_URL)
set(DEP_URL_${ONNXRUNTIME_DEP_NAME} "${LOCAL_URL}")
endif()
endif()
endif()
endforeach()

Expand Down Expand Up @@ -109,7 +119,7 @@ FetchContent_Declare(
URL ${DEP_URL_flatbuffers}
URL_HASH SHA1=${DEP_SHA1_flatbuffers}
PATCH_COMMAND ${ONNXRUNTIME_FLATBUFFERS_PATCH_COMMAND}
FIND_PACKAGE_ARGS 1.12.0...<2.0.0 NAMES Flatbuffers
FIND_PACKAGE_ARGS 23.5.9 NAMES Flatbuffers
)

# Download a protoc binary from Internet if needed
Expand Down Expand Up @@ -305,13 +315,23 @@ if (CPUINFO_SUPPORTED)
set(CPUINFO_BUILD_UNIT_TESTS OFF CACHE INTERNAL "")
set(CPUINFO_BUILD_MOCK_TESTS OFF CACHE INTERNAL "")
set(CPUINFO_BUILD_BENCHMARKS OFF CACHE INTERNAL "")

FetchContent_Declare(
pytorch_cpuinfo
URL ${DEP_URL_pytorch_cpuinfo}
URL_HASH SHA1=${DEP_SHA1_pytorch_cpuinfo}
FIND_PACKAGE_ARGS NAMES cpuinfo
)
if(onnxruntime_target_platform STREQUAL "ARM64EC")
message("Applying a patch for Windows ARM64EC in cpuinfo")
FetchContent_Declare(
pytorch_cpuinfo
URL ${DEP_URL_pytorch_cpuinfo}
URL_HASH SHA1=${DEP_SHA1_pytorch_cpuinfo}
PATCH_COMMAND ${Patch_EXECUTABLE} -p1 < ${PROJECT_SOURCE_DIR}/patches/cpuinfo/9bb12d342fd9479679d505d93a478a6f9cd50a47.patch
FIND_PACKAGE_ARGS NAMES cpuinfo
)
else()
FetchContent_Declare(
pytorch_cpuinfo
URL ${DEP_URL_pytorch_cpuinfo}
URL_HASH SHA1=${DEP_SHA1_pytorch_cpuinfo}
FIND_PACKAGE_ARGS NAMES cpuinfo
)
endif()
set(ONNXRUNTIME_CPUINFO_PROJ pytorch_cpuinfo)
endif()

Expand Down
72 changes: 72 additions & 0 deletions cmake/maccatalyst_prepare_objects_for_prelink.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
#!/usr/bin/env python3
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

import os
import shutil
import sys


# Note: This script is mainly used for sanity checking/validating the files in the .a library equal to the .o files
# in the source dir to handle the case of source files having duplicate names under different subdirectories for
# each onnxruntime library. (Only applicable when doing a Mac Catalyst build.)
def main():
source_dir = sys.argv[1]
dest_dir = sys.argv[2]
files_from_static_lib = sys.argv[3]
files_from_source_dir = []
for subdir, _, files in os.walk(source_dir):
for file_name in files:
if file_name.endswith(".o"):
files_from_source_dir.append(file_name.strip())
dest_name_without_extension, _ = os.path.splitext(file_name)
counter = 0

dest_file = f"{dest_name_without_extension}.o"
while os.path.exists(os.path.join(dest_dir, dest_file)):
print("Duplicate file name from source: " + os.path.join(source_dir, subdir, file_name))
counter += 1
dest_file = f"{dest_name_without_extension}_{counter}.o"
print("Renamed file name in destination: " + os.path.join(dest_dir, dest_file))

destination_path = os.path.join(dest_dir, dest_file)
source_file = os.path.join(source_dir, subdir, file_name)
shutil.copy(source_file, destination_path)

# Sanity check to ensure the number of .o object from the original cmake source directory matches with the number
# of .o files extracted from each .a onnxruntime library
file_lists_from_static_lib = []
with open(files_from_static_lib) as file:
filenames = file.readlines()
for filename in filenames:
file_lists_from_static_lib.append(filename.strip())

sorted_list1 = sorted(file_lists_from_static_lib)
sorted_list2 = sorted(files_from_source_dir)

if len(sorted_list1) != len(sorted_list2):
print(
"Caught a mismatch in the number of .o object files from the original cmake source directory: ",
len(sorted_list1),
"the number of .o files extracted from the static onnxruntime lib: ",
len(sorted_list2),
"for: ",
os.path.basename(source_dir),
)

if sorted_list1 == sorted_list2:
print(
"Sanity check passed: object files from original source directory matches with files extracted "
"from static library for: ",
os.path.basename(source_dir),
)
else:
print(
"Error: Mismatch between object files from original source directory "
"and the .o files extracted from static library for: ",
os.path.basename(source_dir),
)


if __name__ == "__main__":
main()
36 changes: 29 additions & 7 deletions cmake/onnxruntime.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,13 @@ endif()

# Assemble the Apple static framework (iOS and macOS)
if(onnxruntime_BUILD_APPLE_FRAMEWORK)
set(STATIC_FRAMEWORK_OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}-${CMAKE_OSX_SYSROOT})
# when building for mac catalyst, the CMAKE_OSX_SYSROOT is set to MacOSX as well, to avoid duplication,
# we specify as `-macabi` in the name of the output static apple framework directory.
if (PLATFORM_NAME STREQUAL "macabi")
set(STATIC_FRAMEWORK_OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}-macabi)
else()
set(STATIC_FRAMEWORK_OUTPUT_DIR ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_BUILD_TYPE}-${CMAKE_OSX_SYSROOT})
endif()

# Setup the various directories required. Remove any existing ones so we start with a clean directory.
set(STATIC_LIB_DIR ${CMAKE_CURRENT_BINARY_DIR}/static_libraries)
Expand All @@ -299,18 +305,34 @@ if(onnxruntime_BUILD_APPLE_FRAMEWORK)
# to enforce symbol visibility. doing it this way limits the symbols included from the .a files to symbols used
# by the ORT .o files.

# If it's an onnxruntime library, extract .o files to a separate directory for each library to avoid any clashes
# with filenames (e.g. utils.o)
# If it's an onnxruntime library, extract .o files from the original cmake build path to a separate directory for
# each library to avoid any clashes with filenames (e.g. utils.o)
foreach(_LIB ${onnxruntime_INTERNAL_LIBRARIES} )
GET_TARGET_PROPERTY(_LIB_TYPE ${_LIB} TYPE)
if(_LIB_TYPE STREQUAL "STATIC_LIBRARY")
set(CUR_STATIC_LIB_OBJ_DIR ${STATIC_LIB_TEMP_DIR}/$<TARGET_LINKER_FILE_BASE_NAME:${_LIB}>)
add_custom_command(TARGET onnxruntime POST_BUILD
COMMAND ${CMAKE_COMMAND} -E make_directory ${CUR_STATIC_LIB_OBJ_DIR})

add_custom_command(TARGET onnxruntime POST_BUILD
COMMAND ar ARGS -x $<TARGET_FILE:${_LIB}>
WORKING_DIRECTORY ${CUR_STATIC_LIB_OBJ_DIR})
if (PLATFORM_NAME STREQUAL "macabi")
# There exists several duplicate names for source files under different subdirectories within
# each onnxruntime library. (e.g. onnxruntime/contrib_ops/cpu/element_wise_ops.o
# vs. onnxruntime/providers/core/cpu/math/element_wise_ops.o)
# In that case, using 'ar ARGS -x' to extract the .o files from .a lib would possibly cause duplicate naming files being overwritten
# and lead to missing undefined symbol error in the generated binary.
# So we use the below python script as a sanity check to do a recursive find of all .o files in ${CUR_TARGET_CMAKE_SOURCE_LIB_DIR}
# and verifies that matches the content of the .a, and then copy from the source dir.
# TODO: The copying action here isn't really necessary. For future fix, consider using the script extracts from the ar with the rename to potentially
# make both maccatalyst and other builds do the same thing.
set(CUR_TARGET_CMAKE_SOURCE_LIB_DIR ${CMAKE_CURRENT_BINARY_DIR}/CMakeFiles/${_LIB}.dir)
add_custom_command(TARGET onnxruntime POST_BUILD
COMMAND ar -t $<TARGET_FILE:${_LIB}> | grep "\.o$" > ${_LIB}.object_file_list.txt
COMMAND ${CMAKE_COMMAND} -E env python3 ${CMAKE_CURRENT_SOURCE_DIR}/maccatalyst_prepare_objects_for_prelink.py ${CUR_TARGET_CMAKE_SOURCE_LIB_DIR} ${CUR_STATIC_LIB_OBJ_DIR} ${CUR_STATIC_LIB_OBJ_DIR}/${_LIB}.object_file_list.txt
WORKING_DIRECTORY ${CUR_STATIC_LIB_OBJ_DIR})
else()
add_custom_command(TARGET onnxruntime POST_BUILD
COMMAND ar ARGS -x $<TARGET_FILE:${_LIB}>
WORKING_DIRECTORY ${CUR_STATIC_LIB_OBJ_DIR})
endif()
endif()
endforeach()

Expand Down
6 changes: 6 additions & 0 deletions cmake/onnxruntime_mlas.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -631,6 +631,12 @@ if (WIN32)
endif()
endif()

if (PLATFORM_NAME STREQUAL "macabi")
# Needed for maccatalyst C compilation
# i.e. the flags below add "--target=x86_64-apple-ios14.0-macabi -ffunction-sections -fdata-sections"
target_compile_options(onnxruntime_mlas PRIVATE ${CMAKE_C_FLAGS})
endif()

if (NOT onnxruntime_BUILD_SHARED_LIB)
install(TARGETS onnxruntime_mlas
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
Expand Down
Loading

0 comments on commit 79d459c

Please sign in to comment.