Skip to content

Commit

Permalink
Merge main
Browse files Browse the repository at this point in the history
  • Loading branch information
adrianlizarraga committed Jun 22, 2024
2 parents 4b2af75 + b95982e commit 578d47d
Show file tree
Hide file tree
Showing 220 changed files with 5,699 additions and 24,311 deletions.
138 changes: 138 additions & 0 deletions .github/workflows/sca.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
name: Windows_SCA
on:
push:
branches:
- main
- rel-*
pull_request:

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.run_id }}
cancel-in-progress: true

env:
AZCOPY_AUTO_LOGIN_TYPE: MSI
AZCOPY_MSI_CLIENT_ID: 63b63039-6328-442f-954b-5a64d124e5b4

jobs:
Onnxruntime-SCA-training-CUDA:
permissions:
security-events: write
runs-on: ["self-hosted", "1ES.Pool=onnxruntime-github-vs2022-mms"]
steps:
- uses: actions/checkout@v4
with:
submodules: false
- uses: actions/setup-python@v5
with:
python-version: '3.11.x'
architecture: 'x64'

- uses: actions/setup-node@v4
with:
node-version: 18

- name: Download cuda
run: azcopy.exe cp --recursive "https://lotusscus.blob.core.windows.net/models/cuda_sdk/v11.8" cuda_sdk


- name: Install ONNX
run: |
&tools\ci_build\github\windows\install_third_party_deps.ps1 -cpu_arch x64 -install_prefix D:\b\Debug\installed -build_config Debug
# The build machine doesn't have a GPU. So the value of CMAKE_CUDA_ARCHITECTURES doesn't matter.
- name: Build code
env:
CAExcludePath: 'C:\Program Files;D:\b;${{ github.workspace }}\cmake'
run: python tools\ci_build\build.py --windows_sdk_version 10.0.22621.0 --enable_training --build_java --compile_no_warning_as_error --config Debug --build_dir D:\b --skip_submodule_sync --build_csharp --update --build --parallel --cmake_generator "Visual Studio 17 2022" --build_shared_lib --enable_pybind --cmake_extra_defines onnxruntime_USE_CUSTOM_STATIC_ANALYSIS_RULES=ON --cmake_extra_defines onnxruntime_ENABLE_STATIC_ANALYSIS=ON --cmake_extra_defines onnxruntime_REDIRECT_STATIC_ANALYSIS_OUTPUTS_TO_FILE=ON --use_cuda --cuda_home=${{ github.workspace }}\cuda_sdk\v11.8 --use_binskim_compliant_compile_flags --enable_cuda_profiling --cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=75

- name: Generate sarif
working-directory: D:\b
run: npx @microsoft/sarif-multitool merge *.sarif --recurse --output-directory=${{ github.workspace }}\output --output-file=MergeResult.sarif --merge-runs && dir ${{ github.workspace }}\output

- name: Upload SARIF to GitHub
uses: github/codeql-action/upload-sarif@v3
continue-on-error: true
with:
sarif_file: ${{ github.workspace }}\output\MergeResult.sarif
category: VS_SCA

# No python
Onnxruntime-SCA-win32-WINML-x64:
permissions:
security-events: write
runs-on: ["self-hosted", "1ES.Pool=onnxruntime-github-vs2022-mms"]
steps:
- uses: actions/checkout@v4
with:
submodules: false
- uses: actions/setup-python@v5
with:
python-version: '3.11.x'
architecture: 'x64'

- uses: actions/setup-node@v4
with:
node-version: 18

- name: Delete build folder
run: |
if (Test-Path D:\b) { Remove-Item -Recurse -Force D:\b }
&tools\ci_build\github\windows\install_third_party_deps.ps1 -cpu_arch x64 -install_prefix D:\b\Debug\installed -build_config Debug
# The build machine doesn't have a GPU. So the value of CMAKE_CUDA_ARCHITECTURES doesn't matter.
- name: Build code
env:
CAExcludePath: 'C:\Program Files;D:\b;${{ github.workspace }}\cmake'
run: python tools\ci_build\build.py --build_java --compile_no_warning_as_error --config Debug --build_dir D:\b --skip_submodule_sync --build_csharp --update --build --parallel --cmake_generator "Visual Studio 17 2022" --build_shared_lib --cmake_extra_defines onnxruntime_USE_CUSTOM_STATIC_ANALYSIS_RULES=ON --cmake_extra_defines onnxruntime_ENABLE_STATIC_ANALYSIS=ON --cmake_extra_defines onnxruntime_REDIRECT_STATIC_ANALYSIS_OUTPUTS_TO_FILE=ON --ms_experimental --use_dml --use_winml --disable_rtti --enable_wcos --build_shared_lib

- name: Generate sarif
working-directory: D:\b
run: npx @microsoft/sarif-multitool merge *.sarif --recurse --output-directory=${{ github.workspace }}\output --output-file=MergeResult.sarif --merge-runs && dir ${{ github.workspace }}\output

- name: Upload SARIF to GitHub
uses: github/codeql-action/upload-sarif@v3
continue-on-error: true
with:
sarif_file: ${{ github.workspace }}\output\MergeResult.sarif
category: VS_SCA_WIN32_WINML_X64

# No java, No python
Onnxruntime-SCA-win32-WINML-x86:
permissions:
security-events: write
runs-on: ["self-hosted", "1ES.Pool=onnxruntime-github-vs2022-mms"]
steps:
- uses: actions/checkout@v4
with:
submodules: false
- uses: actions/setup-python@v5
with:
python-version: '3.11.x'
architecture: 'x86'

- uses: actions/setup-node@v4
with:
node-version: 18

- name: Delete build folder
run: |
if (Test-Path D:\b) { Remove-Item -Recurse -Force D:\b }
&tools\ci_build\github\windows\install_third_party_deps.ps1 -cpu_arch x86 -install_prefix D:\b\Debug\installed -build_config Debug
# The build machine doesn't have a GPU. So the value of CMAKE_CUDA_ARCHITECTURES doesn't matter.
- name: Build code
env:
CAExcludePath: 'C:\Program Files;D:\b;${{ github.workspace }}\cmake'
run: python tools\ci_build\build.py --compile_no_warning_as_error --config Debug --build_dir D:\b --skip_submodule_sync --build_csharp --update --build --parallel --cmake_generator "Visual Studio 17 2022" --build_shared_lib --cmake_extra_defines onnxruntime_USE_CUSTOM_STATIC_ANALYSIS_RULES=ON --cmake_extra_defines onnxruntime_ENABLE_STATIC_ANALYSIS=ON --cmake_extra_defines onnxruntime_REDIRECT_STATIC_ANALYSIS_OUTPUTS_TO_FILE=ON --ms_experimental --use_dml --use_winml --disable_rtti --enable_wcos --build_shared_lib

- name: Generate sarif
working-directory: D:\b
run: npx @microsoft/sarif-multitool merge *.sarif --recurse --output-directory=${{ github.workspace }}\output --output-file=MergeResult.sarif --merge-runs && dir ${{ github.workspace }}\output

- name: Upload SARIF to GitHub
uses: github/codeql-action/upload-sarif@v3
continue-on-error: true
with:
sarif_file: ${{ github.workspace }}\output\MergeResult.sarif
category: VS_SCA_WIN32_WINML_X86
2 changes: 1 addition & 1 deletion .pipelines/OneBranch.Nuget-WindowsAI-Pipeline.Official.yml
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ extends:
AuthCertName: '53d54d02-SSL-AutoRotate'
AuthSignCertName: '53d54d02-978d-4305-8572-583cf6711c4f'

FolderPath: ${{ parameters.FolderPath }}
FolderPath: $(Build.ArtifactStagingDirectory)
Pattern: '*.nupkg'
SessionTimeout: 90
ServiceEndpointUrl: 'https://api.esrp.microsoft.com/api/v2'
Expand Down
4 changes: 2 additions & 2 deletions cgmanifests/generated/cgmanifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -256,7 +256,7 @@
"component": {
"type": "git",
"git": {
"commitHash": "80dc998efced8ceb2be59756668a7e90e8bef917",
"commitHash": "3e9dfa2866941655c56877882565e7577de6fc7b",
"repositoryUrl": "https://github.com/pybind/pybind11.git"
},
"comments": "pybind11"
Expand Down Expand Up @@ -336,7 +336,7 @@
"component": {
"type": "git",
"git": {
"commitHash": "5356c4a943a35e74d7cdc69486afcb8703b9a59a",
"commitHash": "204da9c522cebec5220bba52cd3542ebcaf99e7a",
"repositoryUrl": "https://github.com/ROCmSoftwarePlatform/composable_kernel.git"
},
"comments": "composable_kernel"
Expand Down
16 changes: 7 additions & 9 deletions cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,6 @@ option(onnxruntime_GCOV_COVERAGE "Compile with options necessary to run code cov
option(onnxruntime_DONT_VECTORIZE "Do not vectorize operations in Eigen" OFF)

option(onnxruntime_USE_FULL_PROTOBUF "Link to libprotobuf instead of libprotobuf-lite when this option is ON" OFF)
option(onnxruntime_ENABLE_LANGUAGE_INTEROP_OPS "Enable operator implemented in language other than cpp" OFF)
option(onnxruntime_DEBUG_NODE_INPUTS_OUTPUTS "Dump debug information about node inputs and outputs when executing the model." OFF)
cmake_dependent_option(onnxruntime_DEBUG_NODE_INPUTS_OUTPUTS_ENABLE_DUMP_TO_SQLDB "Build dump debug information about node inputs and outputs with support for sql database." OFF "onnxruntime_DEBUG_NODE_INPUTS_OUTPUTS" OFF)
option(onnxruntime_USE_DML "Build with DirectML support" OFF)
Expand Down Expand Up @@ -241,6 +240,7 @@ option(onnxruntime_ENABLE_TRITON "Enable Triton" OFF)

# composable kernel is managed automatically, unless user want to explicitly disable it, it should not be manually set
option(onnxruntime_USE_COMPOSABLE_KERNEL "Enable composable kernel for ROCm EP" ON)
option(onnxruntime_USE_COMPOSABLE_KERNEL_CK_TILE "Enable ck_tile for composable kernel" ON)
option(onnxruntime_USE_ROCBLAS_EXTENSION_API "Enable rocblas tuning for ROCm EP" OFF)
option(onnxruntime_USE_TRITON_KERNEL "Enable triton compiled kernel" OFF)
option(onnxruntime_BUILD_KERNEL_EXPLORER "Build Kernel Explorer for testing and profiling GPU kernels" OFF)
Expand Down Expand Up @@ -367,6 +367,11 @@ if (onnxruntime_USE_ROCM)
if (onnxruntime_USE_COMPOSABLE_KERNEL AND ROCM_VERSION_DEV VERSION_LESS "5.3")
message(WARNING "composable kernel is only supported on ROCm >= 5.3")
set(onnxruntime_USE_COMPOSABLE_KERNEL OFF)
set(onnxruntime_USE_COMPOSABLE_KERNEL_CK_TILE OFF)
endif()
if (onnxruntime_USE_COMPOSABLE_KERNEL_CK_TILE AND ROCM_VERSION_DEV VERSION_LESS "6.0")
message(WARNING "ck_tile can only be enabled on ROCm >= 6.0 due to compatibility and compilation speed, disable automatically")
set(onnxruntime_USE_COMPOSABLE_KERNEL_CK_TILE OFF)
endif()
endif()

Expand Down Expand Up @@ -433,13 +438,6 @@ if (onnxruntime_ENABLE_MEMORY_PROFILE)
endif()

set(ONNX_ML 1)
if (NOT onnxruntime_ENABLE_PYTHON)
set(onnxruntime_ENABLE_LANGUAGE_INTEROP_OPS OFF)
endif()

if (onnxruntime_ENABLE_LANGUAGE_INTEROP_OPS)
add_compile_definitions(ENABLE_LANGUAGE_INTEROP_OPS)
endif()

if (NOT (UNIX AND onnxruntime_ENABLE_PYTHON AND onnxruntime_ENABLE_TRAINING AND (NOT onnxruntime_BUILD_SHARED_LIB)))
if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
Expand Down Expand Up @@ -572,7 +570,7 @@ endif()
#Need python to generate def file
if (onnxruntime_BUILD_SHARED_LIB OR onnxruntime_ENABLE_PYTHON)
if (onnxruntime_ENABLE_PYTHON)
if (onnxruntime_ENABLE_LANGUAGE_INTEROP_OPS OR onnxruntime_REQUIRE_PYTHON_EMBED_LIB)
if (onnxruntime_REQUIRE_PYTHON_EMBED_LIB)
find_package(Python 3.8 COMPONENTS Interpreter Development NumPy)
else()
find_package(Python 3.8 COMPONENTS Interpreter Development.Module NumPy)
Expand Down
2 changes: 1 addition & 1 deletion cmake/adjust_global_compile_flags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,7 @@ else()
string(APPEND CMAKE_CXX_FLAGS " -mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl")
string(APPEND CMAKE_C_FLAGS " -mavx512f -mavx512cd -mavx512bw -mavx512dq -mavx512vl")
endif()
if (CMAKE_SYSTEM_NAME STREQUAL "Android" AND Onnxruntime_GCOV_COVERAGE)
if (CMAKE_SYSTEM_NAME STREQUAL "Android" AND onnxruntime_GCOV_COVERAGE)
string(APPEND CMAKE_CXX_FLAGS " -g -O0 --coverage ")
string(APPEND CMAKE_C_FLAGS " -g -O0 --coverage ")
endif()
Expand Down
4 changes: 2 additions & 2 deletions cmake/deps.txt
Original file line number Diff line number Diff line change
Expand Up @@ -48,13 +48,13 @@ protoc_linux_aarch64;https://github.com/protocolbuffers/protobuf/releases/downlo
protoc_mac_universal;https://github.com/protocolbuffers/protobuf/releases/download/v21.12/protoc-21.12-osx-universal_binary.zip;23710c3d1c2036d8d65a6a22234372fa2d7af9ef
psimd;https://github.com/Maratyszcza/psimd/archive/072586a71b55b7f8c584153d223e95687148a900.zip;1f5454b01f06f9656b77e4a5e2e31d7422487013
pthreadpool;https://github.com/Maratyszcza/pthreadpool/archive/4fe0e1e183925bf8cfa6aae24237e724a96479b8.zip;07a0aa91dd9bf86f31b95497e00f31d8a261a4bd
pybind11;https://github.com/pybind/pybind11/archive/refs/tags/v2.10.1.zip;769b6aa67a77f17a770960f604b727645b6f6a13
pybind11;https://github.com/pybind/pybind11/archive/refs/tags/v2.12.0.zip;8482f57ed55c7b100672815a311d5450858723fb
pytorch_cpuinfo;https://github.com/pytorch/cpuinfo/archive/959002f82d7962a473d8bf301845f2af720e0aa4.zip;85da3caa60eb2b148613b443fbc2bfdc30689965
re2;https://github.com/google/re2/archive/refs/tags/2024-05-01.tar.gz;206cfee5ee0b4c6844680ba66275e9e8faa77405
safeint;https://github.com/dcleblanc/SafeInt/archive/refs/tags/3.0.28.zip;23f252040ff6cb9f1fd18575b32fa8fb5928daac
tensorboard;https://github.com/tensorflow/tensorboard/archive/373eb09e4c5d2b3cc2493f0949dc4be6b6a45e81.zip;67b833913605a4f3f499894ab11528a702c2b381
cutlass;https://github.com/NVIDIA/cutlass/archive/refs/tags/v3.5.0.zip;ae038931b9fc2c416c17d9cda91d9706b343f56d
utf8_range;https://github.com/protocolbuffers/utf8_range/archive/72c943dea2b9240cd09efde15191e144bc7c7d38.zip;9925739c9debc0efa2adcb194d371a35b6a03156
extensions;https://github.com/microsoft/onnxruntime-extensions/archive/94142d8391c9791ec71c38336436319a2d4ac7a0.zip;4365ac5140338b4cb75a39944a4be276e3829b3c
composable_kernel;https://github.com/ROCmSoftwarePlatform/composable_kernel/archive/5356c4a943a35e74d7cdc69486afcb8703b9a59a.zip;522382c2af437e09124287e5879ab64af5b2e299
composable_kernel;https://github.com/ROCmSoftwarePlatform/composable_kernel/archive/204da9c522cebec5220bba52cd3542ebcaf99e7a.zip;1827348efd47831c13074245274d41b7cae8a557
directx_headers;https://github.com/microsoft/DirectX-Headers/archive/refs/tags/v1.613.1.zip;47653509a3371eabb156360f42faf582f314bf2e
38 changes: 38 additions & 0 deletions cmake/external/composable_kernel.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ FetchContent_Declare(composable_kernel
FetchContent_GetProperties(composable_kernel)
if(NOT composable_kernel_POPULATED)
FetchContent_Populate(composable_kernel)
set(GPU_TARGETS ${CMAKE_HIP_ARCHITECTURES})
set(BUILD_DEV OFF CACHE BOOL "Disable -Weverything, otherwise, error: 'constexpr' specifier is incompatible with C++98 [-Werror,-Wc++98-compat]" FORCE)
# Exclude i8 device gemm instances due to excessive long compilation time and not being used
set(DTYPES fp32 fp16 bf16 fp8)
Expand All @@ -22,4 +23,41 @@ if(NOT composable_kernel_POPULATED)
${composable_kernel_BINARY_DIR}/include
${composable_kernel_SOURCE_DIR}/library/include)
target_compile_definitions(onnxruntime_composable_kernel_includes INTERFACE __fp32__ __fp16__ __bf16__)

execute_process(
COMMAND ${Python3_EXECUTABLE} ${composable_kernel_SOURCE_DIR}/example/ck_tile/01_fmha/generate.py
--list_blobs ${composable_kernel_BINARY_DIR}/blob_list.txt
COMMAND_ERROR_IS_FATAL ANY
)
file(STRINGS ${composable_kernel_BINARY_DIR}/blob_list.txt generated_fmha_srcs)
add_custom_command(
OUTPUT ${generated_fmha_srcs}
COMMAND ${Python3_EXECUTABLE} ${composable_kernel_SOURCE_DIR}/example/ck_tile/01_fmha/generate.py --output_dir ${composable_kernel_BINARY_DIR}
DEPENDS ${composable_kernel_SOURCE_DIR}/example/ck_tile/01_fmha/generate.py ${composable_kernel_BINARY_DIR}/blob_list.txt
)
set_source_files_properties(${generated_fmha_srcs} PROPERTIES LANGUAGE HIP GENERATED TRUE)
add_custom_target(gen_fmha_srcs DEPENDS ${generated_fmha_srcs}) # dummy target for dependencies
# code generation complete

set(fmha_srcs
${generated_fmha_srcs}
${composable_kernel_SOURCE_DIR}/example/ck_tile/01_fmha/fmha_fwd.cpp
${composable_kernel_SOURCE_DIR}/example/ck_tile/01_fmha/fmha_fwd.hpp
${composable_kernel_SOURCE_DIR}/example/ck_tile/01_fmha/bias.hpp
${composable_kernel_SOURCE_DIR}/example/ck_tile/01_fmha/mask.hpp
)
add_library(onnxruntime_composable_kernel_fmha STATIC EXCLUDE_FROM_ALL ${generated_fmha_srcs})
target_link_libraries(onnxruntime_composable_kernel_fmha PUBLIC onnxruntime_composable_kernel_includes)
target_include_directories(onnxruntime_composable_kernel_fmha PUBLIC ${composable_kernel_SOURCE_DIR}/example/ck_tile/01_fmha)
add_dependencies(onnxruntime_composable_kernel_fmha gen_fmha_srcs)

# ck tile only supports MI200+ GPUs at the moment
get_target_property(archs onnxruntime_composable_kernel_fmha HIP_ARCHITECTURES)
string(REPLACE "," ";" archs "${archs}")
set(original_archs ${archs})
list(FILTER archs INCLUDE REGEX "(gfx942|gfx90a)")
if (NOT original_archs EQUAL archs)
message(WARNING "ck tile only supports archs: ${archs} among the originally specified ${original_archs}")
endif()
set_target_properties(onnxruntime_composable_kernel_fmha PROPERTIES HIP_ARCHITECTURES "${archs}")
endif()
7 changes: 0 additions & 7 deletions cmake/onnxruntime.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -205,13 +205,6 @@ set(onnxruntime_INTERNAL_LIBRARIES
onnxruntime_flatbuffers
)

if (onnxruntime_ENABLE_LANGUAGE_INTEROP_OPS)
list(APPEND onnxruntime_INTERNAL_LIBRARIES
onnxruntime_language_interop
onnxruntime_pyop
)
endif()

if (onnxruntime_USE_EXTENSIONS)
list(APPEND onnxruntime_INTERNAL_LIBRARIES
onnxruntime_extensions
Expand Down
3 changes: 3 additions & 0 deletions cmake/onnxruntime_kernel_explorer.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ elseif (onnxruntime_USE_ROCM)
target_compile_definitions(kernel_explorer PRIVATE __HIP_PLATFORM_AMD__=1 __HIP_PLATFORM_HCC__=1)
if (onnxruntime_USE_COMPOSABLE_KERNEL)
target_compile_definitions(kernel_explorer PRIVATE USE_COMPOSABLE_KERNEL)
if (onnxruntime_USE_COMPOSABLE_KERNEL_CK_TILE)
target_compile_definitions(kernel_explorer PRIVATE USE_COMPOSABLE_KERNEL_CK_TILE)
endif()
target_link_libraries(kernel_explorer PRIVATE onnxruntime_composable_kernel_includes)
endif()
if (onnxruntime_USE_TRITON_KERNEL)
Expand Down
8 changes: 0 additions & 8 deletions cmake/onnxruntime_language_interop_ops.cmake

This file was deleted.

4 changes: 4 additions & 0 deletions cmake/onnxruntime_providers_rocm.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,10 @@
device_softmax_instance
)
target_compile_definitions(onnxruntime_providers_rocm PRIVATE USE_COMPOSABLE_KERNEL)
if (onnxruntime_USE_COMPOSABLE_KERNEL_CK_TILE)
target_link_libraries(onnxruntime_providers_rocm PUBLIC onnxruntime_composable_kernel_fmha)
target_compile_definitions(onnxruntime_providers_rocm PRIVATE USE_COMPOSABLE_KERNEL_CK_TILE)
endif()
endif()

if(UNIX)
Expand Down
12 changes: 0 additions & 12 deletions cmake/onnxruntime_pyop.cmake

This file was deleted.

Loading

0 comments on commit 578d47d

Please sign in to comment.