Skip to content

Commit

Permalink
Merge branch 'main' into support/vcpkg
Browse files Browse the repository at this point in the history
  • Loading branch information
luncliff authored Aug 4, 2024
2 parents 779ff2b + 2653226 commit 47551f6
Show file tree
Hide file tree
Showing 239 changed files with 16,804 additions and 3,068 deletions.
31 changes: 18 additions & 13 deletions .github/labeler.yml
Original file line number Diff line number Diff line change
@@ -1,20 +1,25 @@
api:javascript: '/\bjavascript\b/i'
api:CSharp: '/(\bc\s*sharp\b|\bc#)/i'
api:java: '/\bjava\b/i'
api:javascript: '/\bjavascript\b/i'
ep:ACL: '/\bacl\b/i'
ep:ArmNN: '/\barmnn\b/i'
ep:CUDA: '/\bcuda\b/i'
ep:DML: '/(\bdirectml\b|\bdml\b)/i'
ep:MIGraphX: '/\bmigraphx\b/i'
ep:oneDNN: '/\bonednn\b/i'
ep:CANN: '/\bcann\b/i'
ep:CoreML: '/\bcore\s*ml\b/i'
ep:DML: '/(\bdirect\s*ml\b|\bdml\b)/i'
ep:MIGraphX: '/\bmi\s*graph\s*x\b/i'
ep:oneDNN: '/\bone\s*dnn\b/i'
ep:OpenVINO: '/\bopen\s*vino\b/i'
ep:RockchipNPU: '/\brockchip\b/i'
ep:QNN: '/\bqnn\b/i'
ep:RockchipNPU: '/\brockchip(?:npu)?\b/i'
ep:ROCm: '/\brocm\b/i'
ep:TensorRT: '/(\btensor\s*rt\b|\btrt\b)/i'
ep:SNPE: '/\bsnpe\b/i'
ep:tvm: '/\btvm\b/i'
ep:VitisAI: '/\bvitis(?:ai)?\b/i'
platform:jetson: '/\bjetson\b/i'
platform:mobile: '/(\bobj(?:ective)?-?c\b|\bnnapi\b|\bcore-?ml\b|\bmobile\b|\bandroid\b|\bios\b|\bxamarin\b|\bmaui\b)/i'
platform:web: '/(\bwebgl\b|\bweb-?gpu\b|\bwasm\b|\bonnxruntime-node\b|\bonnxruntime-web\b)/i'
platform:windows: '/(\bwindows\b|\bwinrt\b|\bwinml\b)/i'
model:transformer: '/(\bbert\b|\bgpt-?2\b|\bhugging-?face\b|\blong-?former\b|\bt5\b)/i'
quantization: '/(is this a quantized model\?\n\nYes|\bquantization\b)/i'
ep:WebGPU: '/\bwebgpu\b/i'
ep:WebNN: '/\bwebnn\b/i'
ep:Xnnpack: '/\bxnn\s*pack\b/i'
.NET: '/(\bdot\s*net\b|\bnuget\b|\.net\b)/i'
platform:jetson: '/(\bjetson\b|\bjetpack\b)/i'
platform:mobile: '/(\bobj(?:ective)?-?c\b|\bnnapi\b|\bmobile\b|\bandroid\b|\bios\b|\bxamarin\b|\bmaui\b)/i'
platform:web: '/(\bwebgl\b|\bweb-?gpu\b|\bwasm\b|\bonnxruntime-node\b|\bonnxruntime-web\b|\bonnxruntime-react-native\b|\bnpm\b|\btransformers\.js\b)/i'
model:transformer: '/\btransformers(?!\.js)\b/i'
4 changes: 4 additions & 0 deletions .github/title-only-labeler.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
ep:CUDA: '/\bcuda\b/i'
ep:TensorRT: '/(\btensor\s*rt\b|\btrt\b)/i'
platform:windows: '/(\bwindows\b|\bwinrt\b|\bwinml\b)/i'
quantization: '/(quant|\bqdq\b)/i'
20 changes: 20 additions & 0 deletions .github/workflows/title-only-labeler.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
name: "Title Only Issue Labeler"
on:
issues:
types: [opened, edited]

permissions:
issues: write

jobs:
triage:
runs-on: ubuntu-latest
steps:
- uses: github/[email protected]
with:
repo-token: "${{ secrets.GITHUB_TOKEN }}"
configuration-path: .github/title-only-labeler.yml
not-before: 2020-01-15T02:54:32Z
enable-versioned-regex: 0
include-title: 1
include-body: 0
12 changes: 11 additions & 1 deletion cgmanifests/generated/cgmanifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -216,7 +216,7 @@
"component": {
"type": "git",
"git": {
"commitHash": "06adf4461ac84035bee658c6cf5df39f7ab6071d",
"commitHash": "f161f95883b4ebd8cb789de5efc67b73c0a6e694",
"repositoryUrl": "https://github.com/onnx/onnx-tensorrt.git"
},
"comments": "onnx_tensorrt"
Expand Down Expand Up @@ -351,6 +351,16 @@
},
"comments": "directx_headers"
}
},
{
"component": {
"type": "git",
"git": {
"commitHash": "98ca4e1941fe3263f128f74f10063a3ea35c7019",
"repositoryUrl": "https://github.com/NVIDIA/cudnn-frontend.git"
},
"comments": "cudnn_frontend"
}
}
]
}
15 changes: 0 additions & 15 deletions cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -732,18 +732,6 @@ set(ORT_PROVIDER_FLAGS)
set(ORT_PROVIDER_CMAKE_FLAGS)

if (onnxruntime_USE_CUDA)
if (onnxruntime_USE_CUDA_NHWC_OPS)
add_compile_definitions(ENABLE_CUDA_NHWC_OPS)
endif()
# Give more hints for the generator, with FindCUDAToolkit.cmake
find_package(CUDAToolkit REQUIRED)
if(CMAKE_GENERATOR MATCHES "Visual Studio")
set(CMAKE_VS_PLATFORM_TOOLSET_CUDA "${CUDAToolkit_VERSION_MAJOR}.${CUDAToolkit_VERSION_MINOR}")
set(CMAKE_VS_PLATFORM_TOOLSET_CUDA_CUSTOM_DIR "${CUDAToolkit_TARGET_DIR}/")
endif()
if(NOT DEFINED CMAKE_CUDA_COMPILER)
get_filename_component(CMAKE_CUDA_COMPILER "${CUDAToolkit_NVCC_EXECUTABLE}" ABSOLUTE)
endif()
enable_language(CUDA)
message( STATUS "CMAKE_CUDA_COMPILER_VERSION: ${CMAKE_CUDA_COMPILER_VERSION}")

Expand Down Expand Up @@ -1457,9 +1445,6 @@ if (onnxruntime_USE_CUDA)
file(TO_CMAKE_PATH CUDAToolkit_ROOT ${onnxruntime_CUDA_HOME})
endif()
find_package(CUDAToolkit REQUIRED)
if(onnxruntime_CUDNN_HOME)
file(TO_CMAKE_PATH ${onnxruntime_CUDNN_HOME} onnxruntime_CUDNN_HOME)
endif()
if (NOT CMAKE_CUDA_ARCHITECTURES)
if (CMAKE_LIBRARY_ARCHITECTURE STREQUAL "aarch64-linux-gnu")
# Support for Jetson/Tegra ARM devices
Expand Down
5 changes: 3 additions & 2 deletions cmake/deps.txt
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,8 @@ mimalloc;https://github.com/microsoft/mimalloc/archive/refs/tags/v2.1.1.zip;d5ee
mp11;https://github.com/boostorg/mp11/archive/refs/tags/boost-1.82.0.zip;9bc9e01dffb64d9e0773b2e44d2f22c51aace063
neural_speed;https://github.com/intel/neural-speed/archive/refs/tags/v0.3.zip;5ec64e3071edc7347ebd8a81679cf06e2bb9b851
onnx;https://github.com/onnx/onnx/archive/refs/tags/v1.16.1.zip;2eb9198bb352757d5ff13977cbe0634898e0837c
#use the latest commit of 10.0-GA
onnx_tensorrt;https://github.com/onnx/onnx-tensorrt/archive/06adf4461ac84035bee658c6cf5df39f7ab6071d.zip;46dceef659d75d276e7914a8057c2282269d5e7b
#use the latest commit of 10.2-GA
onnx_tensorrt;https://github.com/onnx/onnx-tensorrt/archive/f161f95883b4ebd8cb789de5efc67b73c0a6e694.zip;2148d0c79a171abf2b9451f3bfec164e85caf2ef
protobuf;https://github.com/protocolbuffers/protobuf/archive/refs/tags/v21.12.zip;7cf2733949036c7d52fda017badcab093fe73bfa
protoc_win64;https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-win64.zip;b4521f7ada5b260380f94c4bd7f1b7684c76969a
protoc_win32;https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-win32.zip;3688010318192c46ce73213cdfb6b3e5656da874
Expand All @@ -58,3 +58,4 @@ utf8_range;https://github.com/protocolbuffers/utf8_range/archive/72c943dea2b9240
extensions;https://github.com/microsoft/onnxruntime-extensions/archive/94142d8391c9791ec71c38336436319a2d4ac7a0.zip;4365ac5140338b4cb75a39944a4be276e3829b3c
composable_kernel;https://github.com/ROCmSoftwarePlatform/composable_kernel/archive/204da9c522cebec5220bba52cd3542ebcaf99e7a.zip;1827348efd47831c13074245274d41b7cae8a557
directx_headers;https://github.com/microsoft/DirectX-Headers/archive/refs/tags/v1.613.1.zip;47653509a3371eabb156360f42faf582f314bf2e
cudnn_frontend;https://github.com/NVIDIA/cudnn-frontend/archive/refs/tags/v1.5.2.zip;11071a47594b20f00af09aad83e0d5203ccf6029
111 changes: 111 additions & 0 deletions cmake/external/cuDNN.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
add_library(CUDNN::cudnn_all INTERFACE IMPORTED)

find_path(
CUDNN_INCLUDE_DIR cudnn.h
HINTS $ENV{CUDNN_PATH} ${CUDNN_PATH} ${Python_SITEARCH}/nvidia/cudnn ${CUDAToolkit_INCLUDE_DIRS}
PATH_SUFFIXES include
REQUIRED
)

file(READ "${CUDNN_INCLUDE_DIR}/cudnn_version.h" cudnn_version_header)
string(REGEX MATCH "#define CUDNN_MAJOR [1-9]+" macrodef "${cudnn_version_header}")
string(REGEX MATCH "[1-9]+" CUDNN_MAJOR_VERSION "${macrodef}")

function(find_cudnn_library NAME)
find_library(
${NAME}_LIBRARY ${NAME} "lib${NAME}.so.${CUDNN_MAJOR_VERSION}"
HINTS $ENV{CUDNN_PATH} ${CUDNN_PATH} ${Python_SITEARCH}/nvidia/cudnn ${CUDAToolkit_LIBRARY_DIR}
PATH_SUFFIXES lib64 lib/x64 lib
REQUIRED
)

if(${NAME}_LIBRARY)
add_library(CUDNN::${NAME} UNKNOWN IMPORTED)
set_target_properties(
CUDNN::${NAME} PROPERTIES
INTERFACE_INCLUDE_DIRECTORIES ${CUDNN_INCLUDE_DIR}
IMPORTED_LOCATION ${${NAME}_LIBRARY}
)
message(STATUS "${NAME} found at ${${NAME}_LIBRARY}.")
else()
message(STATUS "${NAME} not found.")
endif()


endfunction()

find_cudnn_library(cudnn)

include (FindPackageHandleStandardArgs)
find_package_handle_standard_args(
LIBRARY REQUIRED_VARS
CUDNN_INCLUDE_DIR cudnn_LIBRARY
)

if(CUDNN_INCLUDE_DIR AND cudnn_LIBRARY)

message(STATUS "cuDNN: ${cudnn_LIBRARY}")
message(STATUS "cuDNN: ${CUDNN_INCLUDE_DIR}")

set(CUDNN_FOUND ON CACHE INTERNAL "cuDNN Library Found")

else()

set(CUDNN_FOUND OFF CACHE INTERNAL "cuDNN Library Not Found")

endif()

target_include_directories(
CUDNN::cudnn_all
INTERFACE
$<INSTALL_INTERFACE:include>
$<BUILD_INTERFACE:${CUDNN_INCLUDE_DIR}>
)

target_link_libraries(
CUDNN::cudnn_all
INTERFACE
CUDNN::cudnn
)

if(CUDNN_MAJOR_VERSION EQUAL 8)
find_cudnn_library(cudnn_adv_infer)
find_cudnn_library(cudnn_adv_train)
find_cudnn_library(cudnn_cnn_infer)
find_cudnn_library(cudnn_cnn_train)
find_cudnn_library(cudnn_ops_infer)
find_cudnn_library(cudnn_ops_train)

target_link_libraries(
CUDNN::cudnn_all
INTERFACE
CUDNN::cudnn_adv_train
CUDNN::cudnn_ops_train
CUDNN::cudnn_cnn_train
CUDNN::cudnn_adv_infer
CUDNN::cudnn_cnn_infer
CUDNN::cudnn_ops_infer
)
elseif(CUDNN_MAJOR_VERSION EQUAL 9)
find_cudnn_library(cudnn_cnn)
find_cudnn_library(cudnn_adv)
find_cudnn_library(cudnn_graph)
find_cudnn_library(cudnn_ops)
find_cudnn_library(cudnn_engines_runtime_compiled)
find_cudnn_library(cudnn_engines_precompiled)
find_cudnn_library(cudnn_heuristic)

target_link_libraries(
CUDNN::cudnn_all
INTERFACE
CUDNN::cudnn_adv
CUDNN::cudnn_ops
CUDNN::cudnn_cnn
CUDNN::cudnn_graph
CUDNN::cudnn_engines_runtime_compiled
CUDNN::cudnn_engines_precompiled
CUDNN::cudnn_heuristic
)
endif()

mark_as_advanced(CUDNN_INCLUDE_DIR)
12 changes: 12 additions & 0 deletions cmake/external/cudnn_frontend.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
include(FetchContent)
FetchContent_Declare(
cudnn_frontend
URL ${DEP_URL_cudnn_frontend}
URL_HASH SHA1=${DEP_SHA1_cudnn_frontend}
)

set(CUDNN_FRONTEND_BUILD_SAMPLES OFF)
set(CUDNN_FRONTEND_BUILD_UNIT_TESTS OFF)
set(CUDNN_FRONTEND_BUILD_PYTHON_BINDINGS OFF)
set(CUDNN_PATH ${onnxruntime_CUDNN_HOME})
FetchContent_MakeAvailable(cudnn_frontend)
19 changes: 6 additions & 13 deletions cmake/external/onnxruntime_external_deps.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,6 @@ if (onnxruntime_BUILD_UNIT_TESTS)
if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
set(gtest_disable_pthreads ON)
endif()
if (${CMAKE_SYSTEM_NAME} MATCHES "AIX")
set(gtest_disable_pthreads ON CACHE BOOL "gtest_disable_pthreads" FORCE)
endif()
set(INSTALL_GTEST OFF CACHE BOOL "" FORCE)
if (IOS OR ANDROID)
# on mobile platforms the absl flags class dumps the flag names (assumably for binary size), which breaks passing
Expand Down Expand Up @@ -638,20 +635,16 @@ endif()

message(STATUS "Finished fetching external dependencies")


set(onnxruntime_LINK_DIRS )

if (onnxruntime_USE_CUDA)
#TODO: combine onnxruntime_CUDNN_HOME and onnxruntime_CUDA_HOME, assume they are the same
find_package(CUDAToolkit REQUIRED)
if (WIN32)
if(onnxruntime_CUDNN_HOME)
list(APPEND onnxruntime_LINK_DIRS ${onnxruntime_CUDNN_HOME}/lib ${onnxruntime_CUDNN_HOME}/lib/x64)
endif()
else()
if(onnxruntime_CUDNN_HOME)
list(APPEND onnxruntime_LINK_DIRS ${onnxruntime_CUDNN_HOME}/lib ${onnxruntime_CUDNN_HOME}/lib64)
endif()

if(onnxruntime_CUDNN_HOME)
file(TO_CMAKE_PATH ${onnxruntime_CUDNN_HOME} onnxruntime_CUDNN_HOME)
set(CUDNN_PATH ${onnxruntime_CUDNN_HOME})
endif()
include(cuDNN)
endif()

if(onnxruntime_USE_SNPE)
Expand Down
12 changes: 8 additions & 4 deletions cmake/onnxruntime.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -38,10 +38,14 @@ function(get_c_cxx_api_headers HEADERS_VAR)

# need to add header files for enabled EPs
foreach(f ${ONNXRUNTIME_PROVIDER_NAMES})
file(GLOB _provider_headers CONFIGURE_DEPENDS
"${REPO_ROOT}/include/onnxruntime/core/providers/${f}/*.h"
)
list(APPEND _headers ${_provider_headers})
# The header files in include/onnxruntime/core/providers/cuda directory cannot be flattened to the same directory
# with onnxruntime_c_api.h . Most other EPs probably also do not work in this way.
if((NOT f STREQUAL cuda) AND (NOT f STREQUAL rocm))
file(GLOB _provider_headers CONFIGURE_DEPENDS
"${REPO_ROOT}/include/onnxruntime/core/providers/${f}/*.h"
)
list(APPEND _headers ${_provider_headers})
endif()
endforeach()

set(${HEADERS_VAR} ${_headers} PARENT_SCOPE)
Expand Down
2 changes: 1 addition & 1 deletion cmake/onnxruntime_framework.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ endif()
if(onnxruntime_USE_TENSORRT OR onnxruntime_USE_NCCL)
# TODO: for now, core framework depends on CUDA. It should be moved to TensorRT EP
# TODO: provider_bridge_ort.cc should not include nccl.h
target_include_directories(onnxruntime_framework PRIVATE ${ONNXRUNTIME_ROOT} ${eigen_INCLUDE_DIRS} ${onnxruntime_CUDNN_HOME}/include PUBLIC ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
target_include_directories(onnxruntime_framework PRIVATE ${ONNXRUNTIME_ROOT} ${eigen_INCLUDE_DIRS} PUBLIC ${CMAKE_CURRENT_BINARY_DIR} ${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES})
else()
target_include_directories(onnxruntime_framework PRIVATE ${ONNXRUNTIME_ROOT} ${eigen_INCLUDE_DIRS} PUBLIC ${CMAKE_CURRENT_BINARY_DIR})
endif()
Expand Down
13 changes: 11 additions & 2 deletions cmake/onnxruntime_mlas.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -555,8 +555,17 @@ else()
${MLAS_SRC_DIR}/intrinsics/avx2/qdwconv_avx2.cpp
${MLAS_SRC_DIR}/sqnbitgemm_kernel_avx2.cpp
)
set_source_files_properties(${mlas_platform_srcs_avx2} PROPERTIES COMPILE_FLAGS "-mavx2 -mfma")

message(STATUS "CMAKE_CXX_COMPILER_ID: ${CMAKE_CXX_COMPILER_ID}")
message(STATUS "CMAKE_CXX_COMPILER_VERSION: ${CMAKE_CXX_COMPILER_VERSION}")

if(NOT "${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" OR CMAKE_CXX_COMPILER_VERSION VERSION_GREATER "10")
message(STATUS "Using -mavx2 -mfma -mavxvnni flags")
set_source_files_properties(${mlas_platform_srcs_avx2} PROPERTIES COMPILE_FLAGS "-mavx2 -mfma -mavxvnni")
else()
message(STATUS "Using -mavx2 -mfma flags")
set_source_files_properties(${mlas_platform_srcs_avx2} PROPERTIES COMPILE_FLAGS "-mavx2 -mfma")
endif()
set(mlas_platform_srcs_avx512f
${MLAS_SRC_DIR}/x86_64/DgemmKernelAvx512F.S
${MLAS_SRC_DIR}/x86_64/SgemmKernelAvx512F.S
Expand All @@ -575,7 +584,7 @@ else()
${MLAS_SRC_DIR}/x86_64/ConvSymKernelAvx512Core.S
${MLAS_SRC_DIR}/sqnbitgemm_kernel_avx512.cpp
)
set_source_files_properties(${mlas_platform_srcs_avx512core} PROPERTIES COMPILE_FLAGS "-mavx512bw -mavx512dq -mavx512vl")
set_source_files_properties(${mlas_platform_srcs_avx512core} PROPERTIES COMPILE_FLAGS "-mfma -mavx512vnni -mavx512bw -mavx512dq -mavx512vl")

set(mlas_platform_srcs_avx512vnni
${MLAS_SRC_DIR}/sqnbitgemm_kernel_avx512vnni.cpp
Expand Down
1 change: 1 addition & 0 deletions cmake/onnxruntime_providers_cpu.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,7 @@ if (onnxruntime_ENABLE_TRAINING)
endif()

install(FILES ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/cpu/cpu_provider_factory.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/)
install(FILES ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/resource.h ${PROJECT_SOURCE_DIR}/../include/onnxruntime/core/providers/custom_op_context.h DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime/core/providers)
set_target_properties(onnxruntime_providers PROPERTIES LINKER_LANGUAGE CXX)
set_target_properties(onnxruntime_providers PROPERTIES FOLDER "ONNXRuntime")

Expand Down
Loading

0 comments on commit 47551f6

Please sign in to comment.