From 54bd87b3e4d97db8ac1f58d9e9095f5d79b4a7f0 Mon Sep 17 00:00:00 2001 From: Jian Chen Date: Mon, 7 Oct 2024 17:34:02 -0700 Subject: [PATCH 01/65] rename cuda to gpu --- .../c-api-noopenmp-packaging-pipelines.yml | 170 +----------------- .../cuda-packaging-pipeline.yml | 2 +- .../nuget-cuda-publishing-pipeline.yml | 2 +- ...-stage.yml => nuget-combine-gpu-stage.yml} | 4 +- ...tage.yml => nuget-gpu-packaging-stage.yml} | 0 ...age.yml => nuget-gpu-publishing-stage.yml} | 0 ....yml => nuget-win-gpu-packaging-stage.yml} | 0 7 files changed, 6 insertions(+), 172 deletions(-) rename tools/ci_build/github/azure-pipelines/stages/{nuget-combine-cuda-stage.yml => nuget-combine-gpu-stage.yml} (96%) rename tools/ci_build/github/azure-pipelines/stages/{nuget-cuda-packaging-stage.yml => nuget-gpu-packaging-stage.yml} (100%) rename tools/ci_build/github/azure-pipelines/stages/{nuget-cuda-publishing-stage.yml => nuget-gpu-publishing-stage.yml} (100%) rename tools/ci_build/github/azure-pipelines/stages/{nuget-win-cuda-packaging-stage.yml => nuget-win-gpu-packaging-stage.yml} (100%) diff --git a/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml b/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml index e2d977bd60986..d06bfb3710c10 100644 --- a/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml +++ b/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml @@ -118,7 +118,7 @@ stages: SpecificArtifact: ${{ parameters.SpecificArtifact }} BuildId: ${{ parameters.BuildId }} -- template: stages/nuget-combine-cuda-stage.yml +- template: stages/nuget-combine-gpu-stage.yml parameters: DoCompliance: ${{ parameters.DoCompliance }} CudaVersion: 11.8 @@ -131,170 +131,4 @@ stages: buildJava: true buildNodejs: true SpecificArtifact: ${{ parameters.SpecificArtifact }} - BuildId: ${{ parameters.BuildId }} - - -- template: nuget/templates/dml-vs-2022.yml - parameters: - AgentPool: 'onnxruntime-Win2022-GPU-dml-A10' - IsReleaseBuild: ${{ parameters.IsReleaseBuild }} - ArtifactName: 'drop-nuget-dml' - StageName: 'Windows_CI_GPU_DML_Dev' - BuildCommand: --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --enable_onnx_tests --enable_wcos --use_telemetry --use_dml --use_winml --build_nodejs --cmake_generator "Visual Studio 17 2022" - BuildArch: 'x64' - msbuildArchitecture: 'amd64' - EnvSetupScript: 'setup_env.bat' - sln_platform: 'x64' - DoDebugBuild: 'false' - DoNugetPack: 'true' - DoCompliance: 'false' - DoEsrp: ${{ parameters.DoEsrp }} - NuPackScript: | - msbuild $(Build.SourcesDirectory)\csharp\OnnxRuntime.CSharp.proj /p:Configuration=RelWithDebInfo /t:CreatePackage /p:OrtPackageId=Microsoft.ML.OnnxRuntime.DirectML /p:IsReleaseBuild=${{ parameters.IsReleaseBuild }} /p:CurrentData=$(BuildDate) /p:CurrentTime=$(BuildTime) - copy $(Build.SourcesDirectory)\csharp\src\Microsoft.ML.OnnxRuntime\bin\RelWithDebInfo\*.nupkg $(Build.ArtifactStagingDirectory) - copy $(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo\*.nupkg $(Build.ArtifactStagingDirectory) - mkdir $(Build.ArtifactStagingDirectory)\testdata - copy $(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo\custom_op_library.* $(Build.ArtifactStagingDirectory)\testdata - -- template: nuget/templates/dml-vs-2022.yml - parameters: - AgentPool: 'onnxruntime-Win2022-GPU-dml-A10' - IsReleaseBuild: ${{ parameters.IsReleaseBuild }} - ArtifactName: 'drop-win-dml-x86-zip' - StageName: 'Windows_CI_GPU_DML_Dev_x86' - BuildCommand: --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --enable_onnx_tests --enable_wcos --use_telemetry --use_dml --use_winml --cmake_generator "Visual Studio 17 2022" - BuildArch: 'x86' - EnvSetupScript: 'setup_env_x86.bat' - sln_platform: 'Win32' - DoDebugBuild: 'false' - DoNugetPack: 'true' - DoCompliance: ${{ parameters.DoCompliance }} - DoEsrp: ${{ parameters.DoEsrp }} - RunTests: 'false' - NuPackScript: | - msbuild $(Build.SourcesDirectory)\csharp\OnnxRuntime.CSharp.proj /p:Configuration=RelWithDebInfo /p:TargetArchitecture=x86 /t:CreatePackage /p:OrtPackageId=Microsoft.ML.OnnxRuntime.DirectML /p:IsReleaseBuild=${{ parameters.IsReleaseBuild }} - cd $(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo\ - ren Microsoft.ML.OnnxRuntime.DirectML.* win-dml-x86.zip - copy $(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo\win-dml-x86.zip $(Build.ArtifactStagingDirectory) - mkdir $(Build.ArtifactStagingDirectory)\testdata - copy $(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo\custom_op_library.* $(Build.ArtifactStagingDirectory)\testdata - -- template: nuget/templates/dml-vs-2022.yml - parameters: - AgentPool: 'onnxruntime-Win2022-GPU-dml-A10' - IsReleaseBuild: ${{ parameters.IsReleaseBuild }} - ArtifactName: 'drop-win-dml-arm64-zip' - StageName: 'Windows_CI_GPU_DML_Dev_arm64' - BuildCommand: --build_dir $(Build.BinariesDirectory) --arm64 --skip_submodule_sync --build_shared_lib --enable_onnx_tests --enable_wcos --use_telemetry --use_dml --use_winml --build_nodejs --cmake_generator "Visual Studio 17 2022" - BuildArch: 'x64' - EnvSetupScript: 'setup_env.bat' - sln_platform: 'arm64' - DoDebugBuild: 'false' - DoNugetPack: 'true' - DoCompliance: ${{ parameters.DoCompliance }} - DoEsrp: ${{ parameters.DoEsrp }} - RunTests: 'false' - NuPackScript: | - msbuild $(Build.SourcesDirectory)\csharp\OnnxRuntime.CSharp.proj /p:Configuration=RelWithDebInfo /p:TargetArchitecture=arm64 /t:CreatePackage /p:OrtPackageId=Microsoft.ML.OnnxRuntime.DirectML /p:IsReleaseBuild=${{ parameters.IsReleaseBuild }} - cd $(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo\ - ren Microsoft.ML.OnnxRuntime.DirectML.* win-dml-arm64.zip - copy $(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo\win-dml-arm64.zip $(Build.ArtifactStagingDirectory) - mkdir $(Build.ArtifactStagingDirectory)\testdata - copy $(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo\custom_op_library.* $(Build.ArtifactStagingDirectory)\testdata - -- stage: NuGet_Packaging_DML - dependsOn: - - Windows_CI_GPU_DML_Dev - - Windows_CI_GPU_DML_Dev_x86 - - Windows_CI_GPU_DML_Dev_arm64 - condition: succeeded() - jobs: - - job: NuGet_Packaging_DML - workspace: - clean: all - pool: 'onnxruntime-Win2022-GPU-dml-A10' - steps: - - task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3 - displayName: 'Clean Agent Directories' - condition: always() - - task: DownloadPipelineArtifact@0 - displayName: 'Download Pipeline Artifact - NuGet DirectML' - inputs: - artifactName: 'drop-nuget-dml' - targetPath: '$(Build.BinariesDirectory)/nuget-artifact-dml' - - - task: DownloadPipelineArtifact@0 - displayName: 'Download Pipeline Artifact - NuGet DirectML x86' - inputs: - artifactName: 'drop-win-dml-x86-zip' - targetPath: '$(Build.BinariesDirectory)/nuget-artifact-dml' - - - task: DownloadPipelineArtifact@0 - displayName: 'Download Pipeline Artifact - NuGet DirectML arm64' - inputs: - artifactName: 'drop-win-dml-arm64-zip' - targetPath: '$(Build.BinariesDirectory)/nuget-artifact-dml' - - - script: | - pushd $(Build.BinariesDirectory)\nuget-artifact-dml - dir - powershell -Command "Invoke-WebRequest http://stahlworks.com/dev/unzip.exe -OutFile unzip.exe" - powershell -Command "Invoke-WebRequest http://stahlworks.com/dev/zip.exe -OutFile zip.exe" - set PATH=%CD%;%PATH% - SETLOCAL EnableDelayedExpansion - FOR /R %%i IN (*.nupkg) do ( - set filename=%%~ni - IF NOT "!filename:~25,7!"=="Managed" ( - rename %%~ni.nupkg %%~ni.zip - unzip %%~ni.zip -d %%~ni - del /Q %%~ni.zip - - unzip win-dml-x86.zip -d win-x86 - mkdir %%~ni\runtimes\win-x86 - mkdir %%~ni\runtimes\win-x86\native - - move win-x86\runtimes\win-x86\native\onnxruntime.dll %%~ni\runtimes\win-x86\native\onnxruntime.dll - move win-x86\runtimes\win-x86\native\onnxruntime.lib %%~ni\runtimes\win-x86\native\onnxruntime.lib - move win-x86\runtimes\win-x86\native\onnxruntime.pdb %%~ni\runtimes\win-x86\native\onnxruntime.pdb - - unzip win-dml-arm64.zip -d win-arm64 - mkdir %%~ni\runtimes\win-arm64 - mkdir %%~ni\runtimes\win-arm64\native - - move win-arm64\runtimes\win-arm64\native\onnxruntime.dll %%~ni\runtimes\win-arm64\native\onnxruntime.dll - move win-arm64\runtimes\win-arm64\native\onnxruntime.lib %%~ni\runtimes\win-arm64\native\onnxruntime.lib - move win-arm64\runtimes\win-arm64\native\onnxruntime.pdb %%~ni\runtimes\win-arm64\native\onnxruntime.pdb - - - pushd %%~ni - zip -r ..\%%~ni.zip . - popd - move %%~ni.zip %%~ni.nupkg - ) - ) - popd - copy $(Build.BinariesDirectory)\nuget-artifact-dml\Microsoft.ML.OnnxRuntime.DirectML*nupkg $(Build.ArtifactStagingDirectory) - displayName: 'Bundle DML NuGet and other binaries' - - - template: templates/esrp_nuget.yml - parameters: - DisplayName: 'ESRP - sign NuGet package' - FolderPath: '$(Build.ArtifactStagingDirectory)' - DoEsrp: ${{ parameters.DoEsrp }} - - - template: templates/validate-package.yml - parameters: - PackageType: 'nuget' - PackagePath: '$(Build.ArtifactStagingDirectory)' - PackageName: 'Microsoft.ML.OnnxRuntime.DirectML*nupkg' - PlatformsSupported: 'win-x64,win-x86,win-arm64' - VerifyNugetSigning: ${{ parameters.DoEsrp }} - - - task: PublishPipelineArtifact@0 - displayName: 'Publish Pipeline NuGet Artifact' - inputs: - artifactName: 'drop-signed-nuget-dml' - targetPath: '$(Build.ArtifactStagingDirectory)' - - template: templates/component-governance-component-detection-steps.yml - parameters: - condition: 'succeeded' + BuildId: ${{ parameters.BuildId }} \ No newline at end of file diff --git a/tools/ci_build/github/azure-pipelines/cuda-packaging-pipeline.yml b/tools/ci_build/github/azure-pipelines/cuda-packaging-pipeline.yml index 7118e85e9ea4b..6a245eed44828 100644 --- a/tools/ci_build/github/azure-pipelines/cuda-packaging-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/cuda-packaging-pipeline.yml @@ -105,7 +105,7 @@ stages: PackageNodeJS: false # Nuget Packaging - - template: stages/nuget-combine-cuda-stage.yml + - template: stages/nuget-combine-gpu-stage.yml parameters: DoCompliance: ${{ parameters.DoCompliance }} CudaVersion: ${{ parameters.CudaVersion }} diff --git a/tools/ci_build/github/azure-pipelines/nuget-cuda-publishing-pipeline.yml b/tools/ci_build/github/azure-pipelines/nuget-cuda-publishing-pipeline.yml index aeb250e1e0cbc..0e17bef4c8f73 100644 --- a/tools/ci_build/github/azure-pipelines/nuget-cuda-publishing-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/nuget-cuda-publishing-pipeline.yml @@ -22,7 +22,7 @@ variables: value: onnxruntime-cuda-12 stages: - - template: stages/nuget-cuda-publishing-stage.yml + - template: stages/nuget-gpu-publishing-stage.yml parameters: artifact_feed: $(ArtifactFeed) diff --git a/tools/ci_build/github/azure-pipelines/stages/nuget-combine-cuda-stage.yml b/tools/ci_build/github/azure-pipelines/stages/nuget-combine-gpu-stage.yml similarity index 96% rename from tools/ci_build/github/azure-pipelines/stages/nuget-combine-cuda-stage.yml rename to tools/ci_build/github/azure-pipelines/stages/nuget-combine-gpu-stage.yml index 9c7fbc24ab1b6..f10d700e7d1d9 100644 --- a/tools/ci_build/github/azure-pipelines/stages/nuget-combine-cuda-stage.yml +++ b/tools/ci_build/github/azure-pipelines/stages/nuget-combine-gpu-stage.yml @@ -42,7 +42,7 @@ stages: buildJava: ${{ parameters.buildJava }} buildNodejs: ${{ parameters.buildNodejs }} -- template: nuget-win-cuda-packaging-stage.yml +- template: nuget-win-gpu-packaging-stage.yml parameters: RunOnnxRuntimeTests: ${{ parameters.RunOnnxRuntimeTests }} UseIncreasedTimeoutForTests: ${{ parameters.UseIncreasedTimeoutForTests }} @@ -51,7 +51,7 @@ stages: win_cuda_home: ${{ parameters.win_cuda_home }} buildJava: ${{ parameters.buildJava }} -- template: nuget-cuda-packaging-stage.yml +- template: nuget-gpu-packaging-stage.yml parameters: DoCompliance: ${{ parameters.DoCompliance }} DoEsrp: ${{ parameters.DoEsrp }} diff --git a/tools/ci_build/github/azure-pipelines/stages/nuget-cuda-packaging-stage.yml b/tools/ci_build/github/azure-pipelines/stages/nuget-gpu-packaging-stage.yml similarity index 100% rename from tools/ci_build/github/azure-pipelines/stages/nuget-cuda-packaging-stage.yml rename to tools/ci_build/github/azure-pipelines/stages/nuget-gpu-packaging-stage.yml diff --git a/tools/ci_build/github/azure-pipelines/stages/nuget-cuda-publishing-stage.yml b/tools/ci_build/github/azure-pipelines/stages/nuget-gpu-publishing-stage.yml similarity index 100% rename from tools/ci_build/github/azure-pipelines/stages/nuget-cuda-publishing-stage.yml rename to tools/ci_build/github/azure-pipelines/stages/nuget-gpu-publishing-stage.yml diff --git a/tools/ci_build/github/azure-pipelines/stages/nuget-win-cuda-packaging-stage.yml b/tools/ci_build/github/azure-pipelines/stages/nuget-win-gpu-packaging-stage.yml similarity index 100% rename from tools/ci_build/github/azure-pipelines/stages/nuget-win-cuda-packaging-stage.yml rename to tools/ci_build/github/azure-pipelines/stages/nuget-win-gpu-packaging-stage.yml From df06c82c8282e77f78540b4f46dd9c13ee56d3a9 Mon Sep 17 00:00:00 2001 From: Jian Chen Date: Mon, 7 Oct 2024 17:34:36 -0700 Subject: [PATCH 02/65] rename cuda to gpu --- tools/ci_build/github/azure-pipelines/publish-nuget.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/ci_build/github/azure-pipelines/publish-nuget.yml b/tools/ci_build/github/azure-pipelines/publish-nuget.yml index b78d586288ba3..42ead98209505 100644 --- a/tools/ci_build/github/azure-pipelines/publish-nuget.yml +++ b/tools/ci_build/github/azure-pipelines/publish-nuget.yml @@ -33,7 +33,7 @@ stages: - script: move "$(Pipeline.Workspace)\build\drop-signed-nuget-dml\*" $(Build.BinariesDirectory)\nuget-artifact\final-package # Publish CUDA 11 Nuget/Java pkgs to ADO feed - - template: stages/nuget-cuda-publishing-stage.yml + - template: stages/nuget-gpu-publishing-stage.yml parameters: artifact_feed: $(ArtifactFeed) From 984e123e8df6e69f2dbb62b958d29b8c80305d3b Mon Sep 17 00:00:00 2001 From: Jian Chen Date: Thu, 10 Oct 2024 17:01:14 -0700 Subject: [PATCH 03/65] Use Cuda with use dml --- .../azure-pipelines/stages/nuget-win-gpu-packaging-stage.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/ci_build/github/azure-pipelines/stages/nuget-win-gpu-packaging-stage.yml b/tools/ci_build/github/azure-pipelines/stages/nuget-win-gpu-packaging-stage.yml index 445066f08995a..bf5fc661fd70c 100644 --- a/tools/ci_build/github/azure-pipelines/stages/nuget-win-gpu-packaging-stage.yml +++ b/tools/ci_build/github/azure-pipelines/stages/nuget-win-gpu-packaging-stage.yml @@ -50,7 +50,7 @@ stages: msbuildPlatform: x64 packageName: x64-cuda CudaVersion: ${{ parameters.CudaVersion }} - buildparameter: --use_cuda --cuda_home=${{ parameters.win_cuda_home }} --enable_onnx_tests --enable_wcos --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=60;61;70;75;80" + buildparameter: --use_cuda --cuda_home=${{ parameters.win_cuda_home }} --enable_onnx_tests --enable_wcos --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=60;61;70;75;80" --use_dml runTests: ${{ parameters.RunOnnxRuntimeTests }} buildJava: ${{ parameters.buildJava }} java_artifact_id: onnxruntime_gpu From e5315a479e20dc971c34afb1eab0cd0b2200c0f6 Mon Sep 17 00:00:00 2001 From: Jian Chen Date: Thu, 10 Oct 2024 17:06:12 -0700 Subject: [PATCH 04/65] revert c-api-noopenmp-packaging-pipelines.yml --- .../c-api-noopenmp-packaging-pipelines.yml | 595 +++++++++++++++++- 1 file changed, 592 insertions(+), 3 deletions(-) diff --git a/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml b/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml index d06bfb3710c10..a24e40c1957ea 100644 --- a/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml +++ b/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml @@ -62,7 +62,7 @@ parameters: - name: QnnSdk displayName: QNN SDK Version type: string - default: 2.26.0.240828 + default: 2.22.0.240425 resources: repositories: @@ -83,7 +83,7 @@ variables: value: 11.8 - name: win_trt_home - value: $(Agent.TempDirectory)\TensorRT-10.4.0.26.Windows10.x86_64.cuda-11.8 + value: $(Agent.TempDirectory)\TensorRT-10.0.1.6.Windows10.x86_64.cuda-11.8 - name: win_cuda_home value: $(Agent.TempDirectory)\v11.8 @@ -94,6 +94,28 @@ stages: PreReleaseVersionSuffixString: ${{ parameters.PreReleaseVersionSuffixString }} PreReleaseVersionSuffixNumber: ${{ parameters.PreReleaseVersionSuffixNumber }} +- stage: Debug + dependsOn: Setup + jobs: + - job: D1 + pool: + name: 'onnxruntime-Ubuntu2204-AMD-CPU' + variables: + MyVar: $[stageDependencies.Setup.Set_Variables.outputs['Set_Release_Version_Suffix.ReleaseVersionSuffix']] + BuildDate: $[stageDependencies.Setup.Set_Variables.outputs['Set_Build_Date.BuildDate']] + BuildTime: $[stageDependencies.Setup.Set_Variables.outputs['Set_Build_Time.BuildTime']] + steps: + - checkout: none + - task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3 + displayName: 'Clean Agent Directories' + condition: always() + - bash: echo $(MyVar) + - bash: echo $(BuildTime) + - bash: echo $(BuildDate) + - template: templates/component-governance-component-detection-steps.yml + parameters : + condition : 'succeeded' + - template: stages/download-java-tools-stage.yml - template: templates/c-api-cpu.yml @@ -112,6 +134,17 @@ stages: SpecificArtifact: ${{ parameters.SpecificArtifact }} BuildId: ${{ parameters.BuildId }} +- template: templates/ondevice-training-cpu-packaging-pipeline.yml + parameters: + RunOnnxRuntimeTests: ${{ parameters.RunOnnxRuntimeTests }} + DoCompliance: ${{ parameters.DoCompliance }} + DoEsrp: ${{ parameters.DoEsrp }} + IsReleaseBuild: ${{ parameters.IsReleaseBuild }} + OrtNugetPackageId: 'Microsoft.ML.OnnxRuntime.Training' + AdditionalBuildFlags: '--enable_training_apis' + AdditionalWinBuildFlags: '--enable_onnx_tests --enable_wcos' + BuildVariant: 'default' + - template: stages/java-cuda-packaging-stage.yml parameters: CudaVersion: 11.8 @@ -122,6 +155,7 @@ stages: parameters: DoCompliance: ${{ parameters.DoCompliance }} CudaVersion: 11.8 + docker_base_image: 'nvidia/cuda:11.8.0-cudnn8-devel-ubi8' RunOnnxRuntimeTests: ${{ parameters.RunOnnxRuntimeTests }} UseIncreasedTimeoutForTests: ${{ parameters.UseIncreasedTimeoutForTests }} win_trt_home: ${{ variables.win_trt_home }} @@ -131,4 +165,559 @@ stages: buildJava: true buildNodejs: true SpecificArtifact: ${{ parameters.SpecificArtifact }} - BuildId: ${{ parameters.BuildId }} \ No newline at end of file + BuildId: ${{ parameters.BuildId }} + +# ROCm +- stage: Linux_C_API_Packaging_ROCm_x64 + dependsOn: [] + jobs: + - job: Linux_C_API_Packaging_ROCm_x64 + workspace: + clean: all + timeoutInMinutes: 120 + pool: onnxruntime-Ubuntu2204-AMD-CPU + variables: + RocmVersion: '5.6' + steps: + - checkout: self # due to checkout multiple repos, the root directory is $(Build.SourcesDirectory)/onnxruntime + submodules: recursive + - checkout: manylinux # due to checkout multiple repos, the root directory is $(Build.SourcesDirectory)/manylinux, for get-docker-image-steps.yml + submodules: false + + # get-docker-image-steps.yml will move the $(Build.SourcesDirectory)/manylinux into $(Build.SourcesDirectory)/onnxruntime, + # then rename $(Build.SourcesDirectory)/onnxruntime as $(Build.SourcesDirectory) + - template: templates/get-docker-image-steps.yml + parameters: + Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_rocm + Context: tools/ci_build/github/linux/docker + DockerBuildArgs: >- + --build-arg INSTALL_DEPS_EXTRA_ARGS=-tmur + --build-arg BUILD_UID=$(id -u) + --network=host --build-arg POLICY=manylinux_2_28 --build-arg PLATFORM=x86_64 + --build-arg ROCM_VERSION=$(RocmVersion) + --build-arg DEVTOOLSET_ROOTPATH=/opt/rh/gcc-toolset-12/root + --build-arg PREPEND_PATH=/opt/rh/gcc-toolset-12/root/usr/bin: + --build-arg LD_LIBRARY_PATH_ARG=/opt/rh/gcc-toolset-12/root/usr/lib64:/opt/rh/gcc-toolset-12/root/usr/lib:/opt/rh/gcc-toolset-12/root/usr/lib64/dyninst:/opt/rh/gcc-toolset-12/root/usr/lib/dyninst:/usr/local/lib64:/usr/local/lib + Repository: onnxruntimetrainingrocmbuild-rocm$(RocmVersion) + CheckOutManyLinux: true + + - template: templates/set-version-number-variables-step.yml + + - task: Bash@3 + displayName: 'Build' + inputs: + targetType: filePath + filePath: tools/ci_build/github/linux/build_rocm_c_api_package.sh + arguments: >- + -S $(Build.SourcesDirectory) + -B $(Build.BinariesDirectory) + -V $(RocmVersion) + -I onnxruntimetrainingrocmbuild-rocm$(RocmVersion) + -P python3.10 + + - script: | + set -e -x + mkdir $(Build.ArtifactStagingDirectory)/testdata + cp $(Build.BinariesDirectory)/Release/libcustom_op_library.so* $(Build.ArtifactStagingDirectory)/testdata + ls -al $(Build.ArtifactStagingDirectory) + displayName: 'Create Artifacts for CustomOp' # libcustom_op_library.so from cpu build is built with fp8, ROCm does not support it. + + - template: templates/c-api-artifacts-package-and-publish-steps-posix.yml + parameters: + buildConfig: 'Release' + artifactName: 'onnxruntime-linux-x64-rocm-$(OnnxRuntimeVersion)' + artifactNameNoVersionString: 'onnxruntime-linux-x64-rocm' + libraryName: 'libonnxruntime.so.$(OnnxRuntimeVersion)' + + - template: templates/component-governance-component-detection-steps.yml + parameters: + condition: 'succeeded' + - template: templates/clean-agent-build-directory-step.yml + + +- stage: NuGet_Packaging_ROCm + dependsOn: + - Setup + - Linux_C_API_Packaging_ROCm_x64 + condition: succeeded() + jobs: + - job: NuGet_Packaging_ROCm + workspace: + clean: all + # we need to use the 2022 pool to create the nuget package with both pre-net6+Xamarin and net6 targets. + # VS2019 has no support for net6 and we need to use msbuild (from the VS install) to do the packing + pool: 'Onnxruntime-Win-CPU-2022' + variables: + breakCodesignValidationInjection: ${{ parameters.DoEsrp }} + ReleaseVersionSuffix: $[stageDependencies.Setup.Set_Variables.outputs['Set_Release_Version_Suffix.ReleaseVersionSuffix']] + BuildDate : $[stageDependencies.Setup.Set_Variables.outputs['Set_Build_Date.BuildDate']] + BuildTime : $[stageDependencies.Setup.Set_Variables.outputs['Set_Build_Time.BuildTime']] + + steps: + - checkout: self + submodules: true + fetchDepth: 1 + + - template: templates/flex-downloadPipelineArtifact.yml + parameters: + StepName: 'Download Pipeline Artifact - NuGet' + ArtifactName: 'onnxruntime-linux-x64-rocm' + targetPath: '$(Build.BinariesDirectory)/nuget-artifact' + SpecificArtifact: ${{ parameters.specificArtifact }} + BuildId: ${{ parameters.BuildId }} + + - task: PowerShell@2 + displayName: 'Reconstruct Build Directory' + inputs: + targetType: inline + script: | + Get-ChildItem $(Build.BinariesDirectory)\nuget-artifact -Filter *.tgz | % { + # *.tar will be created after *.tgz is extracted + $cmd = "7z.exe x $($_.FullName) -y -o$(Build.BinariesDirectory)\nuget-artifact" + Write-Output $cmd + Invoke-Expression -Command $cmd + } + + Get-ChildItem $(Build.BinariesDirectory)\nuget-artifact -Filter *.tar | % { + $cmd = "7z.exe x $($_.FullName) -y -o$(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo\nuget-artifacts" + Write-Output $cmd + Invoke-Expression -Command $cmd + } + + $ort_dirs = Get-ChildItem -Path $(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo\nuget-artifacts\onnxruntime-* -Directory + foreach ($ort_dir in $ort_dirs) + { + $dirname = Split-Path -Path $ort_dir -Leaf + $dirname = $dirname.SubString(0, $dirname.LastIndexOf('-')) + Write-Output "Renaming $ort_dir to $dirname" + Rename-Item -Path $ort_dir -NewName $(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo\nuget-artifacts\$dirname + } + + Copy-Item -Path $(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo\nuget-artifacts\onnxruntime-linux-x64-rocm\lib\* -Destination $(Build.BinariesDirectory)\RelWithDebInfo + + - script: | + tree /F + workingDirectory: '$(Build.BinariesDirectory)' + displayName: 'Inspect Build Binaries Directory' + + - script: | + mklink /D /J models C:\local\models + workingDirectory: '$(Build.BinariesDirectory)' + displayName: 'Create models link' + + - task: NuGetToolInstaller@0 + displayName: Use Nuget 6.2.1 + inputs: + versionSpec: 6.2.1 + + - task: PowerShell@2 + displayName: Build .NET 6 targets using dotnet + inputs: + targetType: 'inline' + # we don't specify 'Any CPU' as the platform here because if we do it gets added to the output path + # e.g. csharp\src\Microsoft.ML.OnnxRuntime\bin\Any CPU\RelWithDebInfo\net6.0-ios\ + # which is inconsistent with the msbuild output path for the pre-.net6 targets + # e.g. csharp\src\Microsoft.ML.OnnxRuntime\bin\RelWithDebInfo\monoandroid11.0 + # and makes it harder to do the packing + # + # 'Any CPU' is the default (first 'mixed' platform specified in the csproj) so this should be fine. + script: | + dotnet build .\src\Microsoft.ML.OnnxRuntime\Microsoft.ML.OnnxRuntime.csproj ` + -p:SelectedTargets=Net6 ` + /p:Net6Targets=net6.0 ` + -p:Configuration=RelWithDebInfo ` + -p:OnnxRuntimeBuildDirectory="$(Build.BinariesDirectory)" ` + -p:OrtPackageId="Microsoft.ML.OnnxRuntime.ROCm" ` + -p:IsReleaseBuild=${{ parameters.IsReleaseBuild }} ` + -p:ReleaseVersionSuffix=$(ReleaseVersionSuffix) + workingDirectory: '$(Build.SourcesDirectory)\csharp' + + - task: MSBuild@1 + displayName: 'Restore NuGet Packages and create project.assets.json for pre-.net6 targets' + inputs: + solution: '$(Build.SourcesDirectory)\csharp\OnnxRuntime.CSharp.sln' + platform: 'Any CPU' + configuration: RelWithDebInfo + msbuildArguments: '-t:restore -p:SelectedTargets=PreNet6 -p:OrtPackageId="Microsoft.ML.OnnxRuntime.ROCm"' + workingDirectory: '$(Build.SourcesDirectory)\csharp' + + - task: MSBuild@1 + displayName: 'Build C# for pre-.net6 targets' + inputs: + solution: '$(Build.SourcesDirectory)\csharp\OnnxRuntime.CSharp.sln' + configuration: RelWithDebInfo + platform: 'Any CPU' + msbuildArguments: > + -p:SelectedTargets=PreNet6 + -p:OnnxRuntimeBuildDirectory="$(Build.BinariesDirectory)" + -p:OrtPackageId="Microsoft.ML.OnnxRuntime.ROCm" + -p:IsReleaseBuild=${{ parameters.IsReleaseBuild }} + -p:ReleaseVersionSuffix=$(ReleaseVersionSuffix) + -p:IsLinuxBuild=true + -p:IsWindowsBuild=false + -p:IsMacOSBuild=false + workingDirectory: '$(Build.SourcesDirectory)\csharp' + + - template: templates/win-esrp-dll.yml + parameters: + FolderPath: '$(Build.SourcesDirectory)\csharp\src\Microsoft.ML.OnnxRuntime\bin\RelWithDebInfo' + DisplayName: 'ESRP - Sign C# dlls' + DoEsrp: ${{ parameters.DoEsrp }} + + - task: MSBuild@1 + displayName: Update projects.assets.json with combined list of all target frameworks + inputs: + solution: '$(Build.SourcesDirectory)\csharp\src\Microsoft.ML.OnnxRuntime\Microsoft.ML.OnnxRuntime.csproj' + platform: 'Any CPU' + configuration: RelWithDebInfo + msbuildArguments: '-t:restore -p:SelectedTargets=All -p:OrtPackageId=Microsoft.ML.OnnxRuntime.ROCm' + workingDirectory: '$(Build.SourcesDirectory)\csharp' + + - task: MSBuild@1 + displayName: 'Build Nuget Packages' + inputs: + solution: '$(Build.SourcesDirectory)\csharp\OnnxRuntime.CSharp.proj' + configuration: RelWithDebInfo + platform: 'Any CPU' + msbuildArguments: '-t:CreatePackage -p:OnnxRuntimeBuildDirectory="$(Build.BinariesDirectory)" -p:OrtPackageId=Microsoft.ML.OnnxRuntime.ROCm -p:IsReleaseBuild=${{ parameters.IsReleaseBuild }} -p:ReleaseVersionSuffix=$(ReleaseVersionSuffix) -p:CurrentTime=$(BuildTime) -p:CurrentDate=$(BuildDate)' + workingDirectory: '$(Build.SourcesDirectory)\csharp' + + - task: CopyFiles@2 + displayName: 'Copy nuget packages to: $(Build.ArtifactStagingDirectory)' + inputs: + SourceFolder: '$(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo' + Contents: '*.snupkg' + TargetFolder: '$(Build.ArtifactStagingDirectory)' + + - task: CopyFiles@2 + displayName: 'Copy nuget packages to: $(Build.ArtifactStagingDirectory)' + inputs: + SourceFolder: '$(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo' + Contents: '*.nupkg' + TargetFolder: '$(Build.ArtifactStagingDirectory)' + + - task: CopyFiles@2 + displayName: 'Copy nuget packages to: $(Build.ArtifactStagingDirectory)' + inputs: + SourceFolder: '$(Build.SourcesDirectory)\csharp\src\Microsoft.ML.OnnxRuntime\bin\RelWithDebInfo' + Contents: '*.nupkg' + TargetFolder: '$(Build.ArtifactStagingDirectory)' + + - template: templates/esrp_nuget.yml + parameters: + DisplayName: 'ESRP - sign NuGet package' + FolderPath: '$(Build.ArtifactStagingDirectory)' + DoEsrp: ${{ parameters.DoEsrp }} + + - template: templates/validate-package.yml + parameters: + PackageType: 'nuget' + PackagePath: '$(Build.ArtifactStagingDirectory)' + PackageName: 'Microsoft.ML.OnnxRuntime.*nupkg' + PlatformsSupported: 'linux-x64' + VerifyNugetSigning: false + + - task: PublishPipelineArtifact@0 + displayName: 'Publish Pipeline NuGet Artifact' + inputs: + artifactName: 'drop-signed-nuget-ROCm' + targetPath: '$(Build.ArtifactStagingDirectory)' + + - task: MSBuild@1 + displayName: 'Clean C#' + inputs: + solution: '$(Build.SourcesDirectory)\csharp\OnnxRuntime.CSharp.sln' + platform: 'Any CPU' + configuration: RelWithDebInfo + msbuildArguments: '-t:Clean -p:OnnxRuntimeBuildDirectory="$(Build.BinariesDirectory)" -p:OrtPackageId=Microsoft.ML.OnnxRuntime.ROCm' + workingDirectory: '$(Build.SourcesDirectory)\csharp' + + - task: RoslynAnalyzers@2 + displayName: 'Run Roslyn Analyzers' + inputs: + userProvideBuildInfo: msBuildInfo + msBuildCommandline: > + "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\MSBuild\Current\Bin\msbuild.exe" + $(Build.SourcesDirectory)\csharp\OnnxRuntime.CSharp.sln + -p:configuration="RelWithDebInfo" + -p:Platform="Any CPU" + -p:OnnxRuntimeBuildDirectory="$(Build.BinariesDirectory)" + -p:OrtPackageId=Microsoft.ML.OnnxRuntime.ROCm + -p:IsLinuxBuild=true + -p:IsWindowsBuild=false + -p:IsMacOSBuild=false + condition: and(succeeded(), eq('${{ parameters.DoCompliance }}', true)) + + - template: templates/component-governance-component-detection-steps.yml + parameters : + condition : 'succeeded' + + - task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3 + displayName: 'Clean Agent Directories' + condition: always() + +- template: nuget/templates/test_linux.yml + parameters: + AgentPool: AMD-GPU + ArtifactSuffix: 'ROCm' + StageSuffix: 'ROCm' + NugetPackageName: 'Microsoft.ML.OnnxRuntime.ROCm' + SpecificArtifact: ${{ parameters.specificArtifact }} + CustomOpArtifactName: 'onnxruntime-linux-x64-rocm' + BuildId: ${{ parameters.BuildId }} + +- template: nuget/templates/dml-vs-2022.yml + parameters: + AgentPool: 'onnxruntime-Win2022-GPU-dml-A10' + IsReleaseBuild: ${{ parameters.IsReleaseBuild }} + ArtifactName: 'drop-nuget-dml' + StageName: 'Windows_CI_GPU_DML_Dev' + BuildCommand: --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --enable_onnx_tests --enable_wcos --use_telemetry --use_dml --use_winml --build_nodejs --cmake_generator "Visual Studio 17 2022" + BuildArch: 'x64' + msbuildArchitecture: 'amd64' + EnvSetupScript: 'setup_env.bat' + sln_platform: 'x64' + DoDebugBuild: 'false' + DoNugetPack: 'true' + DoCompliance: 'false' + DoEsrp: ${{ parameters.DoEsrp }} + NuPackScript: | + msbuild $(Build.SourcesDirectory)\csharp\OnnxRuntime.CSharp.proj /p:Configuration=RelWithDebInfo /t:CreatePackage /p:OrtPackageId=Microsoft.ML.OnnxRuntime.DirectML /p:IsReleaseBuild=${{ parameters.IsReleaseBuild }} /p:CurrentData=$(BuildDate) /p:CurrentTime=$(BuildTime) + copy $(Build.SourcesDirectory)\csharp\src\Microsoft.ML.OnnxRuntime\bin\RelWithDebInfo\*.nupkg $(Build.ArtifactStagingDirectory) + copy $(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo\*.nupkg $(Build.ArtifactStagingDirectory) + mkdir $(Build.ArtifactStagingDirectory)\testdata + copy $(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo\custom_op_library.* $(Build.ArtifactStagingDirectory)\testdata + +- template: nuget/templates/dml-vs-2022.yml + parameters: + AgentPool: 'onnxruntime-Win2022-GPU-dml-A10' + IsReleaseBuild: ${{ parameters.IsReleaseBuild }} + ArtifactName: 'drop-win-dml-x86-zip' + StageName: 'Windows_CI_GPU_DML_Dev_x86' + BuildCommand: --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --enable_onnx_tests --enable_wcos --use_telemetry --use_dml --use_winml --cmake_generator "Visual Studio 17 2022" + BuildArch: 'x86' + EnvSetupScript: 'setup_env_x86.bat' + sln_platform: 'Win32' + DoDebugBuild: 'false' + DoNugetPack: 'true' + DoCompliance: ${{ parameters.DoCompliance }} + DoEsrp: ${{ parameters.DoEsrp }} + RunTests: 'false' + NuPackScript: | + msbuild $(Build.SourcesDirectory)\csharp\OnnxRuntime.CSharp.proj /p:Configuration=RelWithDebInfo /p:TargetArchitecture=x86 /t:CreatePackage /p:OrtPackageId=Microsoft.ML.OnnxRuntime.DirectML /p:IsReleaseBuild=${{ parameters.IsReleaseBuild }} + cd $(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo\ + ren Microsoft.ML.OnnxRuntime.DirectML.* win-dml-x86.zip + copy $(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo\win-dml-x86.zip $(Build.ArtifactStagingDirectory) + mkdir $(Build.ArtifactStagingDirectory)\testdata + copy $(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo\custom_op_library.* $(Build.ArtifactStagingDirectory)\testdata + +- template: nuget/templates/dml-vs-2022.yml + parameters: + AgentPool: 'onnxruntime-Win2022-GPU-dml-A10' + IsReleaseBuild: ${{ parameters.IsReleaseBuild }} + ArtifactName: 'drop-win-dml-arm64-zip' + StageName: 'Windows_CI_GPU_DML_Dev_arm64' + BuildCommand: --build_dir $(Build.BinariesDirectory) --arm64 --skip_submodule_sync --build_shared_lib --enable_onnx_tests --enable_wcos --use_telemetry --use_dml --use_winml --build_nodejs --cmake_generator "Visual Studio 17 2022" + BuildArch: 'x64' + EnvSetupScript: 'setup_env.bat' + sln_platform: 'arm64' + DoDebugBuild: 'false' + DoNugetPack: 'true' + DoCompliance: ${{ parameters.DoCompliance }} + DoEsrp: ${{ parameters.DoEsrp }} + RunTests: 'false' + NuPackScript: | + msbuild $(Build.SourcesDirectory)\csharp\OnnxRuntime.CSharp.proj /p:Configuration=RelWithDebInfo /p:TargetArchitecture=arm64 /t:CreatePackage /p:OrtPackageId=Microsoft.ML.OnnxRuntime.DirectML /p:IsReleaseBuild=${{ parameters.IsReleaseBuild }} + cd $(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo\ + ren Microsoft.ML.OnnxRuntime.DirectML.* win-dml-arm64.zip + copy $(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo\win-dml-arm64.zip $(Build.ArtifactStagingDirectory) + mkdir $(Build.ArtifactStagingDirectory)\testdata + copy $(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo\custom_op_library.* $(Build.ArtifactStagingDirectory)\testdata + +- stage: NuGet_Packaging_DML + dependsOn: + - Windows_CI_GPU_DML_Dev + - Windows_CI_GPU_DML_Dev_x86 + - Windows_CI_GPU_DML_Dev_arm64 + condition: succeeded() + jobs: + - job: NuGet_Packaging_DML + workspace: + clean: all + pool: 'onnxruntime-Win2022-GPU-dml-A10' + steps: + - task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3 + displayName: 'Clean Agent Directories' + condition: always() + - task: DownloadPipelineArtifact@0 + displayName: 'Download Pipeline Artifact - NuGet DirectML' + inputs: + artifactName: 'drop-nuget-dml' + targetPath: '$(Build.BinariesDirectory)/nuget-artifact-dml' + + - task: DownloadPipelineArtifact@0 + displayName: 'Download Pipeline Artifact - NuGet DirectML x86' + inputs: + artifactName: 'drop-win-dml-x86-zip' + targetPath: '$(Build.BinariesDirectory)/nuget-artifact-dml' + + - task: DownloadPipelineArtifact@0 + displayName: 'Download Pipeline Artifact - NuGet DirectML arm64' + inputs: + artifactName: 'drop-win-dml-arm64-zip' + targetPath: '$(Build.BinariesDirectory)/nuget-artifact-dml' + + - script: | + pushd $(Build.BinariesDirectory)\nuget-artifact-dml + dir + powershell -Command "Invoke-WebRequest http://stahlworks.com/dev/unzip.exe -OutFile unzip.exe" + powershell -Command "Invoke-WebRequest http://stahlworks.com/dev/zip.exe -OutFile zip.exe" + set PATH=%CD%;%PATH% + SETLOCAL EnableDelayedExpansion + FOR /R %%i IN (*.nupkg) do ( + set filename=%%~ni + IF NOT "!filename:~25,7!"=="Managed" ( + rename %%~ni.nupkg %%~ni.zip + unzip %%~ni.zip -d %%~ni + del /Q %%~ni.zip + + unzip win-dml-x86.zip -d win-x86 + mkdir %%~ni\runtimes\win-x86 + mkdir %%~ni\runtimes\win-x86\native + + move win-x86\runtimes\win-x86\native\onnxruntime.dll %%~ni\runtimes\win-x86\native\onnxruntime.dll + move win-x86\runtimes\win-x86\native\onnxruntime.lib %%~ni\runtimes\win-x86\native\onnxruntime.lib + move win-x86\runtimes\win-x86\native\onnxruntime.pdb %%~ni\runtimes\win-x86\native\onnxruntime.pdb + + unzip win-dml-arm64.zip -d win-arm64 + mkdir %%~ni\runtimes\win-arm64 + mkdir %%~ni\runtimes\win-arm64\native + + move win-arm64\runtimes\win-arm64\native\onnxruntime.dll %%~ni\runtimes\win-arm64\native\onnxruntime.dll + move win-arm64\runtimes\win-arm64\native\onnxruntime.lib %%~ni\runtimes\win-arm64\native\onnxruntime.lib + move win-arm64\runtimes\win-arm64\native\onnxruntime.pdb %%~ni\runtimes\win-arm64\native\onnxruntime.pdb + + + pushd %%~ni + zip -r ..\%%~ni.zip . + popd + move %%~ni.zip %%~ni.nupkg + ) + ) + popd + copy $(Build.BinariesDirectory)\nuget-artifact-dml\Microsoft.ML.OnnxRuntime.DirectML*nupkg $(Build.ArtifactStagingDirectory) + displayName: 'Bundle DML NuGet and other binaries' + + - template: templates/esrp_nuget.yml + parameters: + DisplayName: 'ESRP - sign NuGet package' + FolderPath: '$(Build.ArtifactStagingDirectory)' + DoEsrp: ${{ parameters.DoEsrp }} + + - template: templates/validate-package.yml + parameters: + PackageType: 'nuget' + PackagePath: '$(Build.ArtifactStagingDirectory)' + PackageName: 'Microsoft.ML.OnnxRuntime.DirectML*nupkg' + PlatformsSupported: 'win-x64,win-x86,win-arm64' + VerifyNugetSigning: ${{ parameters.DoEsrp }} + + - task: PublishPipelineArtifact@0 + displayName: 'Publish Pipeline NuGet Artifact' + inputs: + artifactName: 'drop-signed-nuget-dml' + targetPath: '$(Build.ArtifactStagingDirectory)' + - template: templates/component-governance-component-detection-steps.yml + parameters: + condition: 'succeeded' + +- template: templates/qnn-ep-win.yml + parameters: + qnn_ep_build_pool_name: 'Onnxruntime-QNNEP-Windows-2022-CPU' + QnnSdk: ${{ parameters.QnnSdk }} + IsReleaseBuild: ${{ parameters.IsReleaseBuild }} + DoEsrp: ${{ parameters.DoEsrp }} + ArtifactName: 'drop-nuget-qnn-x64' + StageName: 'OnnxRuntime_QNN_Nuget_Win_x64' + build_config: 'RelWithDebInfo' +- template: templates/qnn-ep-win.yml + parameters: + qnn_ep_build_pool_name: 'Onnxruntime-QNNEP-Windows-2022-CPU' + QnnSdk: ${{ parameters.QnnSdk }} + IsReleaseBuild: ${{ parameters.IsReleaseBuild }} + DoEsrp: ${{ parameters.DoEsrp }} + ArtifactName: 'drop-nuget-qnn-arm64' + buildParameter: '--arm64' + buildPlatform: 'ARM64' + buildArch: 'ARM64' + StageName: 'OnnxRuntime_QNN_Nuget_Win_Arm64' + build_config: 'RelWithDebInfo' + +- stage: NuGet_Packaging_QNN + pool: 'Onnxruntime-QNNEP-Windows-2022-CPU' + dependsOn: + - OnnxRuntime_QNN_Nuget_Win_x64 + - OnnxRuntime_QNN_Nuget_Win_Arm64 + condition: succeeded() + jobs: + - job: NuGet_Packaging_QNN + workspace: + clean: all + steps: + - task: DownloadPipelineArtifact@0 + displayName: 'Download Pipeline Artifact - QNN NuGet x64' + inputs: + artifactName: 'drop-nuget-qnn-x64' + targetPath: '$(Build.BinariesDirectory)/nuget-artifact-x64' + + - task: DownloadPipelineArtifact@0 + displayName: 'Download Pipeline Artifact - QNN NuGet arm64' + inputs: + artifactName: 'drop-nuget-qnn-arm64' + targetPath: '$(Build.BinariesDirectory)/nuget-artifact-arm64' + + - task: PowerShell@2 + displayName: 'Bundle NuGet' + inputs: + targetType: 'inline' + script: | + + $x64_nupkgs = (Get-ChildItem $(Build.BinariesDirectory)/nuget-artifact-x64 -Filter Microsoft.ML.OnnxRuntime.QNN*.nupkg -Recurse) + $nuget_package_name = $x64_nupkgs[0].Name + $x64_nuget_package = $x64_nupkgs[0].FullName + + $nupkg_unzipped_directory = [System.IO.Path]::Combine($Env:BUILD_ARTIFACTSTAGINGDIRECTORY, 'nuget_unzip_merged', [System.IO.Path]::GetFileNameWithoutExtension($nuget_package_name)) + + $x64_unzip_cmd = "7z.exe x $x64_nuget_package -y -o$nupkg_unzipped_directory" + Invoke-Expression -Command $x64_unzip_cmd + + $arm64_nupkgs = (Get-ChildItem $(Build.BinariesDirectory)/nuget-artifact-arm64 -Filter Microsoft.ML.OnnxRuntime.QNN*.nupkg -Recurse) + $arm64_nuget_package = $arm64_nupkgs[0].FullName + + $arm64_unzip_cmd = "7z.exe x $arm64_nuget_package -y -o$nupkg_unzipped_directory" + Invoke-Expression -Command $arm64_unzip_cmd + + $merged_nuget_path = [System.IO.Path]::Combine($Env:BUILD_ARTIFACTSTAGINGDIRECTORY, 'nuget-artifact-merged') + if (!(Test-Path $merged_nuget_path)) { + New-Item -Path $merged_nuget_path -ItemType Directory + } + + $merged_zip = [System.IO.Path]::Combine($merged_nuget_path, 'qnn_nuget.zip') + $zip_cmd = "7z.exe a -r $merged_zip $nupkg_unzipped_directory/*" + Invoke-Expression -Command $zip_cmd + + $merged_nuget = [System.IO.Path]::Combine($merged_nuget_path, $nuget_package_name) + move $merged_zip $merged_nuget + workingDirectory: $(Build.BinariesDirectory) + + - template: templates/esrp_nuget.yml + parameters: + DisplayName: 'ESRP - sign NuGet package' + FolderPath: '$(Build.ArtifactStagingDirectory)/nuget-artifact-merged' + DoEsrp: ${{ parameters.DoEsrp }} + + - task: PublishPipelineArtifact@0 + displayName: 'Publish Pipeline NuGet Artifact' + inputs: + artifactName: 'drop-signed-nuget-qnn' + targetPath: '$(Build.ArtifactStagingDirectory)/nuget-artifact-merged' From bd5eb99252637fb5559eda1ce38167ed68705f92 Mon Sep 17 00:00:00 2001 From: Jian Chen Date: Thu, 10 Oct 2024 17:08:42 -0700 Subject: [PATCH 05/65] revert c-api-noopenmp-packaging-pipelines.yml --- .../c-api-noopenmp-packaging-pipelines.yml | 427 +----------------- 1 file changed, 2 insertions(+), 425 deletions(-) diff --git a/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml b/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml index a24e40c1957ea..ba111f804a55e 100644 --- a/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml +++ b/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml @@ -62,7 +62,7 @@ parameters: - name: QnnSdk displayName: QNN SDK Version type: string - default: 2.22.0.240425 + default: 2.26.0.240828 resources: repositories: @@ -83,7 +83,7 @@ variables: value: 11.8 - name: win_trt_home - value: $(Agent.TempDirectory)\TensorRT-10.0.1.6.Windows10.x86_64.cuda-11.8 + value: $(Agent.TempDirectory)\TensorRT-10.4.0.26.Windows10.x86_64.cuda-11.8 - name: win_cuda_home value: $(Agent.TempDirectory)\v11.8 @@ -94,28 +94,6 @@ stages: PreReleaseVersionSuffixString: ${{ parameters.PreReleaseVersionSuffixString }} PreReleaseVersionSuffixNumber: ${{ parameters.PreReleaseVersionSuffixNumber }} -- stage: Debug - dependsOn: Setup - jobs: - - job: D1 - pool: - name: 'onnxruntime-Ubuntu2204-AMD-CPU' - variables: - MyVar: $[stageDependencies.Setup.Set_Variables.outputs['Set_Release_Version_Suffix.ReleaseVersionSuffix']] - BuildDate: $[stageDependencies.Setup.Set_Variables.outputs['Set_Build_Date.BuildDate']] - BuildTime: $[stageDependencies.Setup.Set_Variables.outputs['Set_Build_Time.BuildTime']] - steps: - - checkout: none - - task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3 - displayName: 'Clean Agent Directories' - condition: always() - - bash: echo $(MyVar) - - bash: echo $(BuildTime) - - bash: echo $(BuildDate) - - template: templates/component-governance-component-detection-steps.yml - parameters : - condition : 'succeeded' - - template: stages/download-java-tools-stage.yml - template: templates/c-api-cpu.yml @@ -134,17 +112,6 @@ stages: SpecificArtifact: ${{ parameters.SpecificArtifact }} BuildId: ${{ parameters.BuildId }} -- template: templates/ondevice-training-cpu-packaging-pipeline.yml - parameters: - RunOnnxRuntimeTests: ${{ parameters.RunOnnxRuntimeTests }} - DoCompliance: ${{ parameters.DoCompliance }} - DoEsrp: ${{ parameters.DoEsrp }} - IsReleaseBuild: ${{ parameters.IsReleaseBuild }} - OrtNugetPackageId: 'Microsoft.ML.OnnxRuntime.Training' - AdditionalBuildFlags: '--enable_training_apis' - AdditionalWinBuildFlags: '--enable_onnx_tests --enable_wcos' - BuildVariant: 'default' - - template: stages/java-cuda-packaging-stage.yml parameters: CudaVersion: 11.8 @@ -155,7 +122,6 @@ stages: parameters: DoCompliance: ${{ parameters.DoCompliance }} CudaVersion: 11.8 - docker_base_image: 'nvidia/cuda:11.8.0-cudnn8-devel-ubi8' RunOnnxRuntimeTests: ${{ parameters.RunOnnxRuntimeTests }} UseIncreasedTimeoutForTests: ${{ parameters.UseIncreasedTimeoutForTests }} win_trt_home: ${{ variables.win_trt_home }} @@ -167,304 +133,6 @@ stages: SpecificArtifact: ${{ parameters.SpecificArtifact }} BuildId: ${{ parameters.BuildId }} -# ROCm -- stage: Linux_C_API_Packaging_ROCm_x64 - dependsOn: [] - jobs: - - job: Linux_C_API_Packaging_ROCm_x64 - workspace: - clean: all - timeoutInMinutes: 120 - pool: onnxruntime-Ubuntu2204-AMD-CPU - variables: - RocmVersion: '5.6' - steps: - - checkout: self # due to checkout multiple repos, the root directory is $(Build.SourcesDirectory)/onnxruntime - submodules: recursive - - checkout: manylinux # due to checkout multiple repos, the root directory is $(Build.SourcesDirectory)/manylinux, for get-docker-image-steps.yml - submodules: false - - # get-docker-image-steps.yml will move the $(Build.SourcesDirectory)/manylinux into $(Build.SourcesDirectory)/onnxruntime, - # then rename $(Build.SourcesDirectory)/onnxruntime as $(Build.SourcesDirectory) - - template: templates/get-docker-image-steps.yml - parameters: - Dockerfile: tools/ci_build/github/linux/docker/Dockerfile.manylinux2_28_rocm - Context: tools/ci_build/github/linux/docker - DockerBuildArgs: >- - --build-arg INSTALL_DEPS_EXTRA_ARGS=-tmur - --build-arg BUILD_UID=$(id -u) - --network=host --build-arg POLICY=manylinux_2_28 --build-arg PLATFORM=x86_64 - --build-arg ROCM_VERSION=$(RocmVersion) - --build-arg DEVTOOLSET_ROOTPATH=/opt/rh/gcc-toolset-12/root - --build-arg PREPEND_PATH=/opt/rh/gcc-toolset-12/root/usr/bin: - --build-arg LD_LIBRARY_PATH_ARG=/opt/rh/gcc-toolset-12/root/usr/lib64:/opt/rh/gcc-toolset-12/root/usr/lib:/opt/rh/gcc-toolset-12/root/usr/lib64/dyninst:/opt/rh/gcc-toolset-12/root/usr/lib/dyninst:/usr/local/lib64:/usr/local/lib - Repository: onnxruntimetrainingrocmbuild-rocm$(RocmVersion) - CheckOutManyLinux: true - - - template: templates/set-version-number-variables-step.yml - - - task: Bash@3 - displayName: 'Build' - inputs: - targetType: filePath - filePath: tools/ci_build/github/linux/build_rocm_c_api_package.sh - arguments: >- - -S $(Build.SourcesDirectory) - -B $(Build.BinariesDirectory) - -V $(RocmVersion) - -I onnxruntimetrainingrocmbuild-rocm$(RocmVersion) - -P python3.10 - - - script: | - set -e -x - mkdir $(Build.ArtifactStagingDirectory)/testdata - cp $(Build.BinariesDirectory)/Release/libcustom_op_library.so* $(Build.ArtifactStagingDirectory)/testdata - ls -al $(Build.ArtifactStagingDirectory) - displayName: 'Create Artifacts for CustomOp' # libcustom_op_library.so from cpu build is built with fp8, ROCm does not support it. - - - template: templates/c-api-artifacts-package-and-publish-steps-posix.yml - parameters: - buildConfig: 'Release' - artifactName: 'onnxruntime-linux-x64-rocm-$(OnnxRuntimeVersion)' - artifactNameNoVersionString: 'onnxruntime-linux-x64-rocm' - libraryName: 'libonnxruntime.so.$(OnnxRuntimeVersion)' - - - template: templates/component-governance-component-detection-steps.yml - parameters: - condition: 'succeeded' - - template: templates/clean-agent-build-directory-step.yml - - -- stage: NuGet_Packaging_ROCm - dependsOn: - - Setup - - Linux_C_API_Packaging_ROCm_x64 - condition: succeeded() - jobs: - - job: NuGet_Packaging_ROCm - workspace: - clean: all - # we need to use the 2022 pool to create the nuget package with both pre-net6+Xamarin and net6 targets. - # VS2019 has no support for net6 and we need to use msbuild (from the VS install) to do the packing - pool: 'Onnxruntime-Win-CPU-2022' - variables: - breakCodesignValidationInjection: ${{ parameters.DoEsrp }} - ReleaseVersionSuffix: $[stageDependencies.Setup.Set_Variables.outputs['Set_Release_Version_Suffix.ReleaseVersionSuffix']] - BuildDate : $[stageDependencies.Setup.Set_Variables.outputs['Set_Build_Date.BuildDate']] - BuildTime : $[stageDependencies.Setup.Set_Variables.outputs['Set_Build_Time.BuildTime']] - - steps: - - checkout: self - submodules: true - fetchDepth: 1 - - - template: templates/flex-downloadPipelineArtifact.yml - parameters: - StepName: 'Download Pipeline Artifact - NuGet' - ArtifactName: 'onnxruntime-linux-x64-rocm' - targetPath: '$(Build.BinariesDirectory)/nuget-artifact' - SpecificArtifact: ${{ parameters.specificArtifact }} - BuildId: ${{ parameters.BuildId }} - - - task: PowerShell@2 - displayName: 'Reconstruct Build Directory' - inputs: - targetType: inline - script: | - Get-ChildItem $(Build.BinariesDirectory)\nuget-artifact -Filter *.tgz | % { - # *.tar will be created after *.tgz is extracted - $cmd = "7z.exe x $($_.FullName) -y -o$(Build.BinariesDirectory)\nuget-artifact" - Write-Output $cmd - Invoke-Expression -Command $cmd - } - - Get-ChildItem $(Build.BinariesDirectory)\nuget-artifact -Filter *.tar | % { - $cmd = "7z.exe x $($_.FullName) -y -o$(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo\nuget-artifacts" - Write-Output $cmd - Invoke-Expression -Command $cmd - } - - $ort_dirs = Get-ChildItem -Path $(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo\nuget-artifacts\onnxruntime-* -Directory - foreach ($ort_dir in $ort_dirs) - { - $dirname = Split-Path -Path $ort_dir -Leaf - $dirname = $dirname.SubString(0, $dirname.LastIndexOf('-')) - Write-Output "Renaming $ort_dir to $dirname" - Rename-Item -Path $ort_dir -NewName $(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo\nuget-artifacts\$dirname - } - - Copy-Item -Path $(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo\nuget-artifacts\onnxruntime-linux-x64-rocm\lib\* -Destination $(Build.BinariesDirectory)\RelWithDebInfo - - - script: | - tree /F - workingDirectory: '$(Build.BinariesDirectory)' - displayName: 'Inspect Build Binaries Directory' - - - script: | - mklink /D /J models C:\local\models - workingDirectory: '$(Build.BinariesDirectory)' - displayName: 'Create models link' - - - task: NuGetToolInstaller@0 - displayName: Use Nuget 6.2.1 - inputs: - versionSpec: 6.2.1 - - - task: PowerShell@2 - displayName: Build .NET 6 targets using dotnet - inputs: - targetType: 'inline' - # we don't specify 'Any CPU' as the platform here because if we do it gets added to the output path - # e.g. csharp\src\Microsoft.ML.OnnxRuntime\bin\Any CPU\RelWithDebInfo\net6.0-ios\ - # which is inconsistent with the msbuild output path for the pre-.net6 targets - # e.g. csharp\src\Microsoft.ML.OnnxRuntime\bin\RelWithDebInfo\monoandroid11.0 - # and makes it harder to do the packing - # - # 'Any CPU' is the default (first 'mixed' platform specified in the csproj) so this should be fine. - script: | - dotnet build .\src\Microsoft.ML.OnnxRuntime\Microsoft.ML.OnnxRuntime.csproj ` - -p:SelectedTargets=Net6 ` - /p:Net6Targets=net6.0 ` - -p:Configuration=RelWithDebInfo ` - -p:OnnxRuntimeBuildDirectory="$(Build.BinariesDirectory)" ` - -p:OrtPackageId="Microsoft.ML.OnnxRuntime.ROCm" ` - -p:IsReleaseBuild=${{ parameters.IsReleaseBuild }} ` - -p:ReleaseVersionSuffix=$(ReleaseVersionSuffix) - workingDirectory: '$(Build.SourcesDirectory)\csharp' - - - task: MSBuild@1 - displayName: 'Restore NuGet Packages and create project.assets.json for pre-.net6 targets' - inputs: - solution: '$(Build.SourcesDirectory)\csharp\OnnxRuntime.CSharp.sln' - platform: 'Any CPU' - configuration: RelWithDebInfo - msbuildArguments: '-t:restore -p:SelectedTargets=PreNet6 -p:OrtPackageId="Microsoft.ML.OnnxRuntime.ROCm"' - workingDirectory: '$(Build.SourcesDirectory)\csharp' - - - task: MSBuild@1 - displayName: 'Build C# for pre-.net6 targets' - inputs: - solution: '$(Build.SourcesDirectory)\csharp\OnnxRuntime.CSharp.sln' - configuration: RelWithDebInfo - platform: 'Any CPU' - msbuildArguments: > - -p:SelectedTargets=PreNet6 - -p:OnnxRuntimeBuildDirectory="$(Build.BinariesDirectory)" - -p:OrtPackageId="Microsoft.ML.OnnxRuntime.ROCm" - -p:IsReleaseBuild=${{ parameters.IsReleaseBuild }} - -p:ReleaseVersionSuffix=$(ReleaseVersionSuffix) - -p:IsLinuxBuild=true - -p:IsWindowsBuild=false - -p:IsMacOSBuild=false - workingDirectory: '$(Build.SourcesDirectory)\csharp' - - - template: templates/win-esrp-dll.yml - parameters: - FolderPath: '$(Build.SourcesDirectory)\csharp\src\Microsoft.ML.OnnxRuntime\bin\RelWithDebInfo' - DisplayName: 'ESRP - Sign C# dlls' - DoEsrp: ${{ parameters.DoEsrp }} - - - task: MSBuild@1 - displayName: Update projects.assets.json with combined list of all target frameworks - inputs: - solution: '$(Build.SourcesDirectory)\csharp\src\Microsoft.ML.OnnxRuntime\Microsoft.ML.OnnxRuntime.csproj' - platform: 'Any CPU' - configuration: RelWithDebInfo - msbuildArguments: '-t:restore -p:SelectedTargets=All -p:OrtPackageId=Microsoft.ML.OnnxRuntime.ROCm' - workingDirectory: '$(Build.SourcesDirectory)\csharp' - - - task: MSBuild@1 - displayName: 'Build Nuget Packages' - inputs: - solution: '$(Build.SourcesDirectory)\csharp\OnnxRuntime.CSharp.proj' - configuration: RelWithDebInfo - platform: 'Any CPU' - msbuildArguments: '-t:CreatePackage -p:OnnxRuntimeBuildDirectory="$(Build.BinariesDirectory)" -p:OrtPackageId=Microsoft.ML.OnnxRuntime.ROCm -p:IsReleaseBuild=${{ parameters.IsReleaseBuild }} -p:ReleaseVersionSuffix=$(ReleaseVersionSuffix) -p:CurrentTime=$(BuildTime) -p:CurrentDate=$(BuildDate)' - workingDirectory: '$(Build.SourcesDirectory)\csharp' - - - task: CopyFiles@2 - displayName: 'Copy nuget packages to: $(Build.ArtifactStagingDirectory)' - inputs: - SourceFolder: '$(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo' - Contents: '*.snupkg' - TargetFolder: '$(Build.ArtifactStagingDirectory)' - - - task: CopyFiles@2 - displayName: 'Copy nuget packages to: $(Build.ArtifactStagingDirectory)' - inputs: - SourceFolder: '$(Build.BinariesDirectory)\RelWithDebInfo\RelWithDebInfo' - Contents: '*.nupkg' - TargetFolder: '$(Build.ArtifactStagingDirectory)' - - - task: CopyFiles@2 - displayName: 'Copy nuget packages to: $(Build.ArtifactStagingDirectory)' - inputs: - SourceFolder: '$(Build.SourcesDirectory)\csharp\src\Microsoft.ML.OnnxRuntime\bin\RelWithDebInfo' - Contents: '*.nupkg' - TargetFolder: '$(Build.ArtifactStagingDirectory)' - - - template: templates/esrp_nuget.yml - parameters: - DisplayName: 'ESRP - sign NuGet package' - FolderPath: '$(Build.ArtifactStagingDirectory)' - DoEsrp: ${{ parameters.DoEsrp }} - - - template: templates/validate-package.yml - parameters: - PackageType: 'nuget' - PackagePath: '$(Build.ArtifactStagingDirectory)' - PackageName: 'Microsoft.ML.OnnxRuntime.*nupkg' - PlatformsSupported: 'linux-x64' - VerifyNugetSigning: false - - - task: PublishPipelineArtifact@0 - displayName: 'Publish Pipeline NuGet Artifact' - inputs: - artifactName: 'drop-signed-nuget-ROCm' - targetPath: '$(Build.ArtifactStagingDirectory)' - - - task: MSBuild@1 - displayName: 'Clean C#' - inputs: - solution: '$(Build.SourcesDirectory)\csharp\OnnxRuntime.CSharp.sln' - platform: 'Any CPU' - configuration: RelWithDebInfo - msbuildArguments: '-t:Clean -p:OnnxRuntimeBuildDirectory="$(Build.BinariesDirectory)" -p:OrtPackageId=Microsoft.ML.OnnxRuntime.ROCm' - workingDirectory: '$(Build.SourcesDirectory)\csharp' - - - task: RoslynAnalyzers@2 - displayName: 'Run Roslyn Analyzers' - inputs: - userProvideBuildInfo: msBuildInfo - msBuildCommandline: > - "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\MSBuild\Current\Bin\msbuild.exe" - $(Build.SourcesDirectory)\csharp\OnnxRuntime.CSharp.sln - -p:configuration="RelWithDebInfo" - -p:Platform="Any CPU" - -p:OnnxRuntimeBuildDirectory="$(Build.BinariesDirectory)" - -p:OrtPackageId=Microsoft.ML.OnnxRuntime.ROCm - -p:IsLinuxBuild=true - -p:IsWindowsBuild=false - -p:IsMacOSBuild=false - condition: and(succeeded(), eq('${{ parameters.DoCompliance }}', true)) - - - template: templates/component-governance-component-detection-steps.yml - parameters : - condition : 'succeeded' - - - task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3 - displayName: 'Clean Agent Directories' - condition: always() - -- template: nuget/templates/test_linux.yml - parameters: - AgentPool: AMD-GPU - ArtifactSuffix: 'ROCm' - StageSuffix: 'ROCm' - NugetPackageName: 'Microsoft.ML.OnnxRuntime.ROCm' - SpecificArtifact: ${{ parameters.specificArtifact }} - CustomOpArtifactName: 'onnxruntime-linux-x64-rocm' - BuildId: ${{ parameters.BuildId }} - template: nuget/templates/dml-vs-2022.yml parameters: @@ -630,94 +298,3 @@ stages: - template: templates/component-governance-component-detection-steps.yml parameters: condition: 'succeeded' - -- template: templates/qnn-ep-win.yml - parameters: - qnn_ep_build_pool_name: 'Onnxruntime-QNNEP-Windows-2022-CPU' - QnnSdk: ${{ parameters.QnnSdk }} - IsReleaseBuild: ${{ parameters.IsReleaseBuild }} - DoEsrp: ${{ parameters.DoEsrp }} - ArtifactName: 'drop-nuget-qnn-x64' - StageName: 'OnnxRuntime_QNN_Nuget_Win_x64' - build_config: 'RelWithDebInfo' -- template: templates/qnn-ep-win.yml - parameters: - qnn_ep_build_pool_name: 'Onnxruntime-QNNEP-Windows-2022-CPU' - QnnSdk: ${{ parameters.QnnSdk }} - IsReleaseBuild: ${{ parameters.IsReleaseBuild }} - DoEsrp: ${{ parameters.DoEsrp }} - ArtifactName: 'drop-nuget-qnn-arm64' - buildParameter: '--arm64' - buildPlatform: 'ARM64' - buildArch: 'ARM64' - StageName: 'OnnxRuntime_QNN_Nuget_Win_Arm64' - build_config: 'RelWithDebInfo' - -- stage: NuGet_Packaging_QNN - pool: 'Onnxruntime-QNNEP-Windows-2022-CPU' - dependsOn: - - OnnxRuntime_QNN_Nuget_Win_x64 - - OnnxRuntime_QNN_Nuget_Win_Arm64 - condition: succeeded() - jobs: - - job: NuGet_Packaging_QNN - workspace: - clean: all - steps: - - task: DownloadPipelineArtifact@0 - displayName: 'Download Pipeline Artifact - QNN NuGet x64' - inputs: - artifactName: 'drop-nuget-qnn-x64' - targetPath: '$(Build.BinariesDirectory)/nuget-artifact-x64' - - - task: DownloadPipelineArtifact@0 - displayName: 'Download Pipeline Artifact - QNN NuGet arm64' - inputs: - artifactName: 'drop-nuget-qnn-arm64' - targetPath: '$(Build.BinariesDirectory)/nuget-artifact-arm64' - - - task: PowerShell@2 - displayName: 'Bundle NuGet' - inputs: - targetType: 'inline' - script: | - - $x64_nupkgs = (Get-ChildItem $(Build.BinariesDirectory)/nuget-artifact-x64 -Filter Microsoft.ML.OnnxRuntime.QNN*.nupkg -Recurse) - $nuget_package_name = $x64_nupkgs[0].Name - $x64_nuget_package = $x64_nupkgs[0].FullName - - $nupkg_unzipped_directory = [System.IO.Path]::Combine($Env:BUILD_ARTIFACTSTAGINGDIRECTORY, 'nuget_unzip_merged', [System.IO.Path]::GetFileNameWithoutExtension($nuget_package_name)) - - $x64_unzip_cmd = "7z.exe x $x64_nuget_package -y -o$nupkg_unzipped_directory" - Invoke-Expression -Command $x64_unzip_cmd - - $arm64_nupkgs = (Get-ChildItem $(Build.BinariesDirectory)/nuget-artifact-arm64 -Filter Microsoft.ML.OnnxRuntime.QNN*.nupkg -Recurse) - $arm64_nuget_package = $arm64_nupkgs[0].FullName - - $arm64_unzip_cmd = "7z.exe x $arm64_nuget_package -y -o$nupkg_unzipped_directory" - Invoke-Expression -Command $arm64_unzip_cmd - - $merged_nuget_path = [System.IO.Path]::Combine($Env:BUILD_ARTIFACTSTAGINGDIRECTORY, 'nuget-artifact-merged') - if (!(Test-Path $merged_nuget_path)) { - New-Item -Path $merged_nuget_path -ItemType Directory - } - - $merged_zip = [System.IO.Path]::Combine($merged_nuget_path, 'qnn_nuget.zip') - $zip_cmd = "7z.exe a -r $merged_zip $nupkg_unzipped_directory/*" - Invoke-Expression -Command $zip_cmd - - $merged_nuget = [System.IO.Path]::Combine($merged_nuget_path, $nuget_package_name) - move $merged_zip $merged_nuget - workingDirectory: $(Build.BinariesDirectory) - - - template: templates/esrp_nuget.yml - parameters: - DisplayName: 'ESRP - sign NuGet package' - FolderPath: '$(Build.ArtifactStagingDirectory)/nuget-artifact-merged' - DoEsrp: ${{ parameters.DoEsrp }} - - - task: PublishPipelineArtifact@0 - displayName: 'Publish Pipeline NuGet Artifact' - inputs: - artifactName: 'drop-signed-nuget-qnn' - targetPath: '$(Build.ArtifactStagingDirectory)/nuget-artifact-merged' From c162c88d0d136454f12c8915c19ea300a7308348 Mon Sep 17 00:00:00 2001 From: Jian Chen Date: Fri, 11 Oct 2024 10:24:54 -0700 Subject: [PATCH 06/65] adding --use_winml and --parallel --- .../azure-pipelines/stages/nuget-win-gpu-packaging-stage.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/ci_build/github/azure-pipelines/stages/nuget-win-gpu-packaging-stage.yml b/tools/ci_build/github/azure-pipelines/stages/nuget-win-gpu-packaging-stage.yml index bf5fc661fd70c..77c1e1470ea31 100644 --- a/tools/ci_build/github/azure-pipelines/stages/nuget-win-gpu-packaging-stage.yml +++ b/tools/ci_build/github/azure-pipelines/stages/nuget-win-gpu-packaging-stage.yml @@ -50,7 +50,7 @@ stages: msbuildPlatform: x64 packageName: x64-cuda CudaVersion: ${{ parameters.CudaVersion }} - buildparameter: --use_cuda --cuda_home=${{ parameters.win_cuda_home }} --enable_onnx_tests --enable_wcos --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=60;61;70;75;80" --use_dml + buildparameter: --use_cuda --cuda_home=${{ parameters.win_cuda_home }} --enable_onnx_tests --enable_wcos --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=60;61;70;75;80" --use_dml --use_winml --build_csharp --parallel runTests: ${{ parameters.RunOnnxRuntimeTests }} buildJava: ${{ parameters.buildJava }} java_artifact_id: onnxruntime_gpu @@ -68,7 +68,7 @@ stages: msbuildPlatform: x64 CudaVersion: ${{ parameters.CudaVersion }} packageName: x64-tensorrt - buildparameter: --use_tensorrt --tensorrt_home=${{ parameters.win_trt_home }} --cuda_home=${{ parameters.win_cuda_home }} --enable_onnx_tests --enable_wcos --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=60;61;70;75;80" + buildparameter: --use_tensorrt --tensorrt_home=${{ parameters.win_trt_home }} --cuda_home=${{ parameters.win_cuda_home }} --enable_onnx_tests --enable_wcos --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=60;61;70;75;80" --parallel runTests: ${{ parameters.RunOnnxRuntimeTests }} buildJava: ${{ parameters.buildJava }} java_artifact_id: onnxruntime_gpu From e229292c363cd178bb17278d160ff06219f697f7 Mon Sep 17 00:00:00 2001 From: Jian Chen Date: Fri, 11 Oct 2024 14:05:18 -0700 Subject: [PATCH 07/65] Remove --use_winml --- .../azure-pipelines/stages/nuget-win-gpu-packaging-stage.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/ci_build/github/azure-pipelines/stages/nuget-win-gpu-packaging-stage.yml b/tools/ci_build/github/azure-pipelines/stages/nuget-win-gpu-packaging-stage.yml index 77c1e1470ea31..77701c8ff934d 100644 --- a/tools/ci_build/github/azure-pipelines/stages/nuget-win-gpu-packaging-stage.yml +++ b/tools/ci_build/github/azure-pipelines/stages/nuget-win-gpu-packaging-stage.yml @@ -50,7 +50,7 @@ stages: msbuildPlatform: x64 packageName: x64-cuda CudaVersion: ${{ parameters.CudaVersion }} - buildparameter: --use_cuda --cuda_home=${{ parameters.win_cuda_home }} --enable_onnx_tests --enable_wcos --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=60;61;70;75;80" --use_dml --use_winml --build_csharp --parallel + buildparameter: --use_cuda --cuda_home=${{ parameters.win_cuda_home }} --enable_onnx_tests --enable_wcos --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=60;61;70;75;80" --use_dml --build_csharp --parallel runTests: ${{ parameters.RunOnnxRuntimeTests }} buildJava: ${{ parameters.buildJava }} java_artifact_id: onnxruntime_gpu From fb68a59f15320d32ed007981b3368aac540191ad Mon Sep 17 00:00:00 2001 From: Jian Chen Date: Mon, 21 Oct 2024 21:21:53 -0400 Subject: [PATCH 08/65] remove --test from build --- tools/ci_build/github/azure-pipelines/templates/win-ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/ci_build/github/azure-pipelines/templates/win-ci.yml b/tools/ci_build/github/azure-pipelines/templates/win-ci.yml index 52547fd9a796b..9eb9e9558405a 100644 --- a/tools/ci_build/github/azure-pipelines/templates/win-ci.yml +++ b/tools/ci_build/github/azure-pipelines/templates/win-ci.yml @@ -214,7 +214,7 @@ stages: condition: and(succeeded(), eq('${{ parameters.runTests}}', true)) inputs: scriptPath: '$(Build.SourcesDirectory)\tools\ci_build\build.py' - arguments: '--config RelWithDebInfo --use_binskim_compliant_compile_flags --enable_lto --disable_rtti --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --test --cmake_generator "$(VSGenerator)" --enable_onnx_tests $(TelemetryOption) ${{ parameters.buildparameter }}' + arguments: '--config RelWithDebInfo --use_binskim_compliant_compile_flags --enable_lto --disable_rtti --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --cmake_generator "$(VSGenerator)" --enable_onnx_tests $(TelemetryOption) ${{ parameters.buildparameter }}' workingDirectory: '$(Build.BinariesDirectory)' - ${{ else }}: - powershell: | @@ -389,7 +389,7 @@ stages: condition: and(succeeded(), eq('${{ parameters.runTests}}', true)) inputs: scriptPath: '$(Build.SourcesDirectory)\tools\ci_build\build.py' - arguments: '--config RelWithDebInfo --use_binskim_compliant_compile_flags --enable_lto --disable_rtti --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --test --enable_onnx_tests $(TelemetryOption) ' + arguments: '--config RelWithDebInfo --use_binskim_compliant_compile_flags --enable_lto --disable_rtti --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --enable_onnx_tests $(TelemetryOption) ' workingDirectory: '$(Build.BinariesDirectory)' # Previous stage only assembles the java binaries, testing will be done in this stage with GPU machine - ${{ if eq(parameters.buildJava, 'true') }}: From 04717a87ba7d817b00283fd3233af0a655f6f94c Mon Sep 17 00:00:00 2001 From: Jian Chen Date: Tue, 22 Oct 2024 14:14:34 -0400 Subject: [PATCH 09/65] Update --- .../github/azure-pipelines/cuda-packaging-pipeline.yml | 2 +- .../github/azure-pipelines/nuget-cuda-publishing-pipeline.yml | 2 +- tools/ci_build/github/azure-pipelines/publish-nuget.yml | 2 +- ...get-combine-gpu-stage.yml => nuget-combine-cuda-stage.yml} | 4 ++-- ...gpu-packaging-stage.yml => nuget-cuda-packaging-stage.yml} | 0 ...u-publishing-stage.yml => nuget-cuda-publishing-stage.yml} | 0 ...packaging-stage.yml => nuget-win-cuda-packaging-stage.yml} | 4 ++-- 7 files changed, 7 insertions(+), 7 deletions(-) rename tools/ci_build/github/azure-pipelines/stages/{nuget-combine-gpu-stage.yml => nuget-combine-cuda-stage.yml} (96%) rename tools/ci_build/github/azure-pipelines/stages/{nuget-gpu-packaging-stage.yml => nuget-cuda-packaging-stage.yml} (100%) rename tools/ci_build/github/azure-pipelines/stages/{nuget-gpu-publishing-stage.yml => nuget-cuda-publishing-stage.yml} (100%) rename tools/ci_build/github/azure-pipelines/stages/{nuget-win-gpu-packaging-stage.yml => nuget-win-cuda-packaging-stage.yml} (98%) diff --git a/tools/ci_build/github/azure-pipelines/cuda-packaging-pipeline.yml b/tools/ci_build/github/azure-pipelines/cuda-packaging-pipeline.yml index 6a245eed44828..7118e85e9ea4b 100644 --- a/tools/ci_build/github/azure-pipelines/cuda-packaging-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/cuda-packaging-pipeline.yml @@ -105,7 +105,7 @@ stages: PackageNodeJS: false # Nuget Packaging - - template: stages/nuget-combine-gpu-stage.yml + - template: stages/nuget-combine-cuda-stage.yml parameters: DoCompliance: ${{ parameters.DoCompliance }} CudaVersion: ${{ parameters.CudaVersion }} diff --git a/tools/ci_build/github/azure-pipelines/nuget-cuda-publishing-pipeline.yml b/tools/ci_build/github/azure-pipelines/nuget-cuda-publishing-pipeline.yml index 0e17bef4c8f73..aeb250e1e0cbc 100644 --- a/tools/ci_build/github/azure-pipelines/nuget-cuda-publishing-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/nuget-cuda-publishing-pipeline.yml @@ -22,7 +22,7 @@ variables: value: onnxruntime-cuda-12 stages: - - template: stages/nuget-gpu-publishing-stage.yml + - template: stages/nuget-cuda-publishing-stage.yml parameters: artifact_feed: $(ArtifactFeed) diff --git a/tools/ci_build/github/azure-pipelines/publish-nuget.yml b/tools/ci_build/github/azure-pipelines/publish-nuget.yml index 42ead98209505..b78d586288ba3 100644 --- a/tools/ci_build/github/azure-pipelines/publish-nuget.yml +++ b/tools/ci_build/github/azure-pipelines/publish-nuget.yml @@ -33,7 +33,7 @@ stages: - script: move "$(Pipeline.Workspace)\build\drop-signed-nuget-dml\*" $(Build.BinariesDirectory)\nuget-artifact\final-package # Publish CUDA 11 Nuget/Java pkgs to ADO feed - - template: stages/nuget-gpu-publishing-stage.yml + - template: stages/nuget-cuda-publishing-stage.yml parameters: artifact_feed: $(ArtifactFeed) diff --git a/tools/ci_build/github/azure-pipelines/stages/nuget-combine-gpu-stage.yml b/tools/ci_build/github/azure-pipelines/stages/nuget-combine-cuda-stage.yml similarity index 96% rename from tools/ci_build/github/azure-pipelines/stages/nuget-combine-gpu-stage.yml rename to tools/ci_build/github/azure-pipelines/stages/nuget-combine-cuda-stage.yml index f10d700e7d1d9..9c7fbc24ab1b6 100644 --- a/tools/ci_build/github/azure-pipelines/stages/nuget-combine-gpu-stage.yml +++ b/tools/ci_build/github/azure-pipelines/stages/nuget-combine-cuda-stage.yml @@ -42,7 +42,7 @@ stages: buildJava: ${{ parameters.buildJava }} buildNodejs: ${{ parameters.buildNodejs }} -- template: nuget-win-gpu-packaging-stage.yml +- template: nuget-win-cuda-packaging-stage.yml parameters: RunOnnxRuntimeTests: ${{ parameters.RunOnnxRuntimeTests }} UseIncreasedTimeoutForTests: ${{ parameters.UseIncreasedTimeoutForTests }} @@ -51,7 +51,7 @@ stages: win_cuda_home: ${{ parameters.win_cuda_home }} buildJava: ${{ parameters.buildJava }} -- template: nuget-gpu-packaging-stage.yml +- template: nuget-cuda-packaging-stage.yml parameters: DoCompliance: ${{ parameters.DoCompliance }} DoEsrp: ${{ parameters.DoEsrp }} diff --git a/tools/ci_build/github/azure-pipelines/stages/nuget-gpu-packaging-stage.yml b/tools/ci_build/github/azure-pipelines/stages/nuget-cuda-packaging-stage.yml similarity index 100% rename from tools/ci_build/github/azure-pipelines/stages/nuget-gpu-packaging-stage.yml rename to tools/ci_build/github/azure-pipelines/stages/nuget-cuda-packaging-stage.yml diff --git a/tools/ci_build/github/azure-pipelines/stages/nuget-gpu-publishing-stage.yml b/tools/ci_build/github/azure-pipelines/stages/nuget-cuda-publishing-stage.yml similarity index 100% rename from tools/ci_build/github/azure-pipelines/stages/nuget-gpu-publishing-stage.yml rename to tools/ci_build/github/azure-pipelines/stages/nuget-cuda-publishing-stage.yml diff --git a/tools/ci_build/github/azure-pipelines/stages/nuget-win-gpu-packaging-stage.yml b/tools/ci_build/github/azure-pipelines/stages/nuget-win-cuda-packaging-stage.yml similarity index 98% rename from tools/ci_build/github/azure-pipelines/stages/nuget-win-gpu-packaging-stage.yml rename to tools/ci_build/github/azure-pipelines/stages/nuget-win-cuda-packaging-stage.yml index 77701c8ff934d..445066f08995a 100644 --- a/tools/ci_build/github/azure-pipelines/stages/nuget-win-gpu-packaging-stage.yml +++ b/tools/ci_build/github/azure-pipelines/stages/nuget-win-cuda-packaging-stage.yml @@ -50,7 +50,7 @@ stages: msbuildPlatform: x64 packageName: x64-cuda CudaVersion: ${{ parameters.CudaVersion }} - buildparameter: --use_cuda --cuda_home=${{ parameters.win_cuda_home }} --enable_onnx_tests --enable_wcos --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=60;61;70;75;80" --use_dml --build_csharp --parallel + buildparameter: --use_cuda --cuda_home=${{ parameters.win_cuda_home }} --enable_onnx_tests --enable_wcos --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=60;61;70;75;80" runTests: ${{ parameters.RunOnnxRuntimeTests }} buildJava: ${{ parameters.buildJava }} java_artifact_id: onnxruntime_gpu @@ -68,7 +68,7 @@ stages: msbuildPlatform: x64 CudaVersion: ${{ parameters.CudaVersion }} packageName: x64-tensorrt - buildparameter: --use_tensorrt --tensorrt_home=${{ parameters.win_trt_home }} --cuda_home=${{ parameters.win_cuda_home }} --enable_onnx_tests --enable_wcos --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=60;61;70;75;80" --parallel + buildparameter: --use_tensorrt --tensorrt_home=${{ parameters.win_trt_home }} --cuda_home=${{ parameters.win_cuda_home }} --enable_onnx_tests --enable_wcos --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=60;61;70;75;80" runTests: ${{ parameters.RunOnnxRuntimeTests }} buildJava: ${{ parameters.buildJava }} java_artifact_id: onnxruntime_gpu From 2401b80d0b512dcb776757f46c70e7fd3b0d262e Mon Sep 17 00:00:00 2001 From: Jian Chen Date: Tue, 22 Oct 2024 14:15:33 -0400 Subject: [PATCH 10/65] Merge with main --- .../azure-pipelines/c-api-noopenmp-packaging-pipelines.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml b/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml index 4d76f4c610def..b12360d2710d0 100644 --- a/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml +++ b/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml @@ -118,7 +118,7 @@ stages: SpecificArtifact: ${{ parameters.SpecificArtifact }} BuildId: ${{ parameters.BuildId }} -- template: stages/nuget-combine-gpu-stage.yml +- template: stages/nuget-combine-cuda-stage.yml parameters: DoCompliance: ${{ parameters.DoCompliance }} CudaVersion: 11.8 From a9e47fb2fedfc4fb4b484cbc900b63b15948b458 Mon Sep 17 00:00:00 2001 From: Jian Chen Date: Tue, 22 Oct 2024 14:21:18 -0400 Subject: [PATCH 11/65] parallel --- .../azure-pipelines/stages/nuget-win-cuda-packaging-stage.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/ci_build/github/azure-pipelines/stages/nuget-win-cuda-packaging-stage.yml b/tools/ci_build/github/azure-pipelines/stages/nuget-win-cuda-packaging-stage.yml index 445066f08995a..1adebe434a64c 100644 --- a/tools/ci_build/github/azure-pipelines/stages/nuget-win-cuda-packaging-stage.yml +++ b/tools/ci_build/github/azure-pipelines/stages/nuget-win-cuda-packaging-stage.yml @@ -50,7 +50,7 @@ stages: msbuildPlatform: x64 packageName: x64-cuda CudaVersion: ${{ parameters.CudaVersion }} - buildparameter: --use_cuda --cuda_home=${{ parameters.win_cuda_home }} --enable_onnx_tests --enable_wcos --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=60;61;70;75;80" + buildparameter: --use_cuda --cuda_home=${{ parameters.win_cuda_home }} --enable_wcos --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=60;61;70;75;80" --use_dml --parallel runTests: ${{ parameters.RunOnnxRuntimeTests }} buildJava: ${{ parameters.buildJava }} java_artifact_id: onnxruntime_gpu From 4d0ce6dcc9670cb348825d77ed6f8d3b1cbe9d38 Mon Sep 17 00:00:00 2001 From: Jian Chen Date: Tue, 22 Oct 2024 14:25:14 -0400 Subject: [PATCH 12/65] --use_dml --build_csharp --parallel --- .../azure-pipelines/stages/nuget-win-cuda-packaging-stage.yml | 2 +- tools/ci_build/github/azure-pipelines/templates/win-ci.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/ci_build/github/azure-pipelines/stages/nuget-win-cuda-packaging-stage.yml b/tools/ci_build/github/azure-pipelines/stages/nuget-win-cuda-packaging-stage.yml index 1adebe434a64c..4ce7a13ca2cc6 100644 --- a/tools/ci_build/github/azure-pipelines/stages/nuget-win-cuda-packaging-stage.yml +++ b/tools/ci_build/github/azure-pipelines/stages/nuget-win-cuda-packaging-stage.yml @@ -50,7 +50,7 @@ stages: msbuildPlatform: x64 packageName: x64-cuda CudaVersion: ${{ parameters.CudaVersion }} - buildparameter: --use_cuda --cuda_home=${{ parameters.win_cuda_home }} --enable_wcos --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=60;61;70;75;80" --use_dml --parallel + buildparameter: --use_cuda --cuda_home=${{ parameters.win_cuda_home }} --enable_wcos --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=60;61;70;75;80" --use_dml --build_csharp --parallel runTests: ${{ parameters.RunOnnxRuntimeTests }} buildJava: ${{ parameters.buildJava }} java_artifact_id: onnxruntime_gpu diff --git a/tools/ci_build/github/azure-pipelines/templates/win-ci.yml b/tools/ci_build/github/azure-pipelines/templates/win-ci.yml index cc8048e2e84ce..259fe79de243a 100644 --- a/tools/ci_build/github/azure-pipelines/templates/win-ci.yml +++ b/tools/ci_build/github/azure-pipelines/templates/win-ci.yml @@ -25,7 +25,7 @@ parameters: - name: runTests type: boolean - default: true + default: false - name: buildJava type: boolean From 24750acfe47e5a9b383404c7c9c3842182a62416 Mon Sep 17 00:00:00 2001 From: Jian Chen Date: Tue, 22 Oct 2024 14:26:00 -0400 Subject: [PATCH 13/65] --use_dml --build_csharp --parallel --- .../azure-pipelines/stages/nuget-win-cuda-packaging-stage.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/ci_build/github/azure-pipelines/stages/nuget-win-cuda-packaging-stage.yml b/tools/ci_build/github/azure-pipelines/stages/nuget-win-cuda-packaging-stage.yml index 4ce7a13ca2cc6..be9abc5640cf4 100644 --- a/tools/ci_build/github/azure-pipelines/stages/nuget-win-cuda-packaging-stage.yml +++ b/tools/ci_build/github/azure-pipelines/stages/nuget-win-cuda-packaging-stage.yml @@ -68,7 +68,7 @@ stages: msbuildPlatform: x64 CudaVersion: ${{ parameters.CudaVersion }} packageName: x64-tensorrt - buildparameter: --use_tensorrt --tensorrt_home=${{ parameters.win_trt_home }} --cuda_home=${{ parameters.win_cuda_home }} --enable_onnx_tests --enable_wcos --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=60;61;70;75;80" + buildparameter: --use_tensorrt --tensorrt_home=${{ parameters.win_trt_home }} --cuda_home=${{ parameters.win_cuda_home }} --enable_onnx_tests --enable_wcos --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=60;61;70;75;80" --parallel runTests: ${{ parameters.RunOnnxRuntimeTests }} buildJava: ${{ parameters.buildJava }} java_artifact_id: onnxruntime_gpu From 3ddf44be50ac2b25ae205acefc02966e46411422 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Thu, 24 Oct 2024 13:33:14 +0800 Subject: [PATCH 14/65] verify image --- tools/ci_build/github/azure-pipelines/templates/win-ci.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tools/ci_build/github/azure-pipelines/templates/win-ci.yml b/tools/ci_build/github/azure-pipelines/templates/win-ci.yml index 259fe79de243a..cb2d95171979f 100644 --- a/tools/ci_build/github/azure-pipelines/templates/win-ci.yml +++ b/tools/ci_build/github/azure-pipelines/templates/win-ci.yml @@ -319,7 +319,10 @@ stages: - job: Windows_Packaging_${{ parameters.stage_name_suffix }}_Testing workspace: clean: all - pool: ${{ parameters.ort_build_pool_name }} + pool: + name: ${{ parameters.ort_build_pool_name }} + demainds: + - ImageVersionOverride: 233.0.0 timeoutInMinutes: 180 steps: - task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3 From c16ded2bccc59db6f41cee10dbe348ae5752eca8 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Thu, 24 Oct 2024 21:45:23 +0800 Subject: [PATCH 15/65] update --- tools/ci_build/github/azure-pipelines/templates/win-ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/ci_build/github/azure-pipelines/templates/win-ci.yml b/tools/ci_build/github/azure-pipelines/templates/win-ci.yml index cb2d95171979f..06bf5421e1b16 100644 --- a/tools/ci_build/github/azure-pipelines/templates/win-ci.yml +++ b/tools/ci_build/github/azure-pipelines/templates/win-ci.yml @@ -214,7 +214,7 @@ stages: condition: and(succeeded(), eq('${{ parameters.runTests}}', true)) inputs: scriptPath: '$(Build.SourcesDirectory)\tools\ci_build\build.py' - arguments: '--config RelWithDebInfo --use_binskim_compliant_compile_flags --enable_lto --disable_rtti --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --cmake_generator "$(VSGenerator)" --enable_onnx_tests $(TelemetryOption) ${{ parameters.buildparameter }}' + arguments: '--config RelWithDebInfo --use_binskim_compliant_compile_flags --enable_lto --disable_rtti --build_dir $(Build.BinariesDirectory) --test --skip_submodule_sync --build_shared_lib --cmake_generator "$(VSGenerator)" --enable_onnx_tests $(TelemetryOption) ${{ parameters.buildparameter }}' workingDirectory: '$(Build.BinariesDirectory)' - ${{ else }}: - powershell: | @@ -392,7 +392,7 @@ stages: condition: and(succeeded(), eq('${{ parameters.runTests}}', true)) inputs: scriptPath: '$(Build.SourcesDirectory)\tools\ci_build\build.py' - arguments: '--config RelWithDebInfo --use_binskim_compliant_compile_flags --enable_lto --disable_rtti --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --enable_onnx_tests $(TelemetryOption) ' + arguments: '--config RelWithDebInfo --use_binskim_compliant_compile_flags --enable_lto --disable_rtti --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --test --enable_onnx_tests $(TelemetryOption) ' workingDirectory: '$(Build.BinariesDirectory)' # Previous stage only assembles the java binaries, testing will be done in this stage with GPU machine - ${{ if eq(parameters.buildJava, 'true') }}: From 5a13349f6b7abc7e81b260ddeebcc569eb693e49 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Thu, 24 Oct 2024 22:01:06 +0800 Subject: [PATCH 16/65] typo --- tools/ci_build/github/azure-pipelines/templates/win-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/ci_build/github/azure-pipelines/templates/win-ci.yml b/tools/ci_build/github/azure-pipelines/templates/win-ci.yml index 06bf5421e1b16..7a319e6c8d27e 100644 --- a/tools/ci_build/github/azure-pipelines/templates/win-ci.yml +++ b/tools/ci_build/github/azure-pipelines/templates/win-ci.yml @@ -321,7 +321,7 @@ stages: clean: all pool: name: ${{ parameters.ort_build_pool_name }} - demainds: + demands: - ImageVersionOverride: 233.0.0 timeoutInMinutes: 180 steps: From e21c3875411117e5cc65781d1cce83ef509e3f3d Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Thu, 24 Oct 2024 22:05:39 +0800 Subject: [PATCH 17/65] update1 --- tools/ci_build/github/azure-pipelines/templates/win-ci.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tools/ci_build/github/azure-pipelines/templates/win-ci.yml b/tools/ci_build/github/azure-pipelines/templates/win-ci.yml index 7a319e6c8d27e..0fac73831f527 100644 --- a/tools/ci_build/github/azure-pipelines/templates/win-ci.yml +++ b/tools/ci_build/github/azure-pipelines/templates/win-ci.yml @@ -321,8 +321,7 @@ stages: clean: all pool: name: ${{ parameters.ort_build_pool_name }} - demands: - - ImageVersionOverride: 233.0.0 + demands: ImageVersionOverride -equals 233.0.0 timeoutInMinutes: 180 steps: - task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3 From 1e45aafdf9a9e0634834927d2622cc02669d9056 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Thu, 24 Oct 2024 22:09:14 +0800 Subject: [PATCH 18/65] update 2 --- tools/ci_build/github/azure-pipelines/templates/win-ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/ci_build/github/azure-pipelines/templates/win-ci.yml b/tools/ci_build/github/azure-pipelines/templates/win-ci.yml index 0fac73831f527..136dadd9ae52a 100644 --- a/tools/ci_build/github/azure-pipelines/templates/win-ci.yml +++ b/tools/ci_build/github/azure-pipelines/templates/win-ci.yml @@ -321,7 +321,7 @@ stages: clean: all pool: name: ${{ parameters.ort_build_pool_name }} - demands: ImageVersionOverride -equals 233.0.0 + demands: ImageVersionOverride -equals 248.0.0 timeoutInMinutes: 180 steps: - task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3 From 73ecd6012655fdfbe44fec992a8b3d19b0818ee9 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Thu, 24 Oct 2024 22:22:36 +0800 Subject: [PATCH 19/65] print log --- tools/ci_build/github/azure-pipelines/templates/win-ci.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tools/ci_build/github/azure-pipelines/templates/win-ci.yml b/tools/ci_build/github/azure-pipelines/templates/win-ci.yml index 136dadd9ae52a..dd3ee17184f15 100644 --- a/tools/ci_build/github/azure-pipelines/templates/win-ci.yml +++ b/tools/ci_build/github/azure-pipelines/templates/win-ci.yml @@ -328,6 +328,10 @@ stages: displayName: 'Clean Agent Directories' condition: always() + - script: + echo ${{ parameters.SpecificArtifact }} + displayName: 'Print Specific Artifact' + - checkout: self clean: true submodules: none From 9fd99e4a447d26f34481308946534f7daa915bc9 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Thu, 24 Oct 2024 22:35:04 +0800 Subject: [PATCH 20/65] update 3 --- .../github/azure-pipelines/stages/nuget-combine-cuda-stage.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/ci_build/github/azure-pipelines/stages/nuget-combine-cuda-stage.yml b/tools/ci_build/github/azure-pipelines/stages/nuget-combine-cuda-stage.yml index 9c7fbc24ab1b6..0b3eac0110abc 100644 --- a/tools/ci_build/github/azure-pipelines/stages/nuget-combine-cuda-stage.yml +++ b/tools/ci_build/github/azure-pipelines/stages/nuget-combine-cuda-stage.yml @@ -50,6 +50,8 @@ stages: win_trt_home: ${{ parameters.win_trt_home }} win_cuda_home: ${{ parameters.win_cuda_home }} buildJava: ${{ parameters.buildJava }} + SpecificArtifact: ${{ parameters.SpecificArtifact }} + BuildId: ${{ parameters.BuildId }} - template: nuget-cuda-packaging-stage.yml parameters: From ffa9c2bb60f83ee1e9318b9ce70df822b1135f73 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Thu, 24 Oct 2024 23:38:14 +0800 Subject: [PATCH 21/65] test filter --- tools/ci_build/build.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index 9624f9112c49f..2677e759d0ef3 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -2072,7 +2072,7 @@ def run_onnxruntime_tests(args, source_dir, ctest_path, build_dir, configs): executables.append("onnxruntime_global_thread_pools_test") executables.append("onnxruntime_customopregistration_test") for exe in executables: - test_output = f"--gtest_output=xml:{cwd}/{exe}.{config}.results.xml" + test_output = f"--gtest_filter==*FusedMatMulOpTest* --gtest_output=xml:{cwd}/{exe}.{config}.results.xml" run_subprocess([os.path.join(cwd, exe), test_output], cwd=cwd, dll_path=dll_path) else: ctest_cmd = [ctest_path, "--build-config", config, "--verbose", "--timeout", args.test_all_timeout] From 7490e44586c50944abb730ee7e7c2829096688ce Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Thu, 24 Oct 2024 23:54:09 +0800 Subject: [PATCH 22/65] test filter1 --- tools/ci_build/build.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index 2677e759d0ef3..2bc7be6b0115c 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -2072,7 +2072,7 @@ def run_onnxruntime_tests(args, source_dir, ctest_path, build_dir, configs): executables.append("onnxruntime_global_thread_pools_test") executables.append("onnxruntime_customopregistration_test") for exe in executables: - test_output = f"--gtest_filter==*FusedMatMulOpTest* --gtest_output=xml:{cwd}/{exe}.{config}.results.xml" + test_output = f"--gtest_filter=*FusedMatMulOpTest* --gtest_output=xml:{cwd}/{exe}.{config}.results.xml" run_subprocess([os.path.join(cwd, exe), test_output], cwd=cwd, dll_path=dll_path) else: ctest_cmd = [ctest_path, "--build-config", config, "--verbose", "--timeout", args.test_all_timeout] From 09fc7ec1c4f64cc40f893228688607f7058acaaa Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Fri, 25 Oct 2024 00:07:52 +0800 Subject: [PATCH 23/65] complete A10 --- tools/ci_build/github/azure-pipelines/templates/win-ci.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/ci_build/github/azure-pipelines/templates/win-ci.yml b/tools/ci_build/github/azure-pipelines/templates/win-ci.yml index dd3ee17184f15..aa315002bb0b3 100644 --- a/tools/ci_build/github/azure-pipelines/templates/win-ci.yml +++ b/tools/ci_build/github/azure-pipelines/templates/win-ci.yml @@ -319,9 +319,9 @@ stages: - job: Windows_Packaging_${{ parameters.stage_name_suffix }}_Testing workspace: clean: all - pool: - name: ${{ parameters.ort_build_pool_name }} - demands: ImageVersionOverride -equals 248.0.0 + pool: zhanyi_test_A100_pool + # name: ${{ parameters.ort_build_pool_name }} + # demands: ImageVersionOverride -equals 248.0.0 timeoutInMinutes: 180 steps: - task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3 From 652950e586b06602cf6fee51c95e661ed78b4847 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Fri, 25 Oct 2024 07:34:12 +0800 Subject: [PATCH 24/65] add --use-winml --- .../stages/nuget-win-cuda-packaging-stage.yml | 4 ++-- tools/ci_build/github/azure-pipelines/templates/win-ci.yml | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/ci_build/github/azure-pipelines/stages/nuget-win-cuda-packaging-stage.yml b/tools/ci_build/github/azure-pipelines/stages/nuget-win-cuda-packaging-stage.yml index be9abc5640cf4..99b972fac2455 100644 --- a/tools/ci_build/github/azure-pipelines/stages/nuget-win-cuda-packaging-stage.yml +++ b/tools/ci_build/github/azure-pipelines/stages/nuget-win-cuda-packaging-stage.yml @@ -34,7 +34,7 @@ parameters: displayName: Specific Artifact's BuildId type: string default: '0' - + - name: buildJava type: boolean @@ -50,7 +50,7 @@ stages: msbuildPlatform: x64 packageName: x64-cuda CudaVersion: ${{ parameters.CudaVersion }} - buildparameter: --use_cuda --cuda_home=${{ parameters.win_cuda_home }} --enable_wcos --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=60;61;70;75;80" --use_dml --build_csharp --parallel + buildparameter: --use_cuda --cuda_home=${{ parameters.win_cuda_home }} --enable_wcos --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=60;61;70;75;80" --use_dml --use_winml --build_csharp --parallel runTests: ${{ parameters.RunOnnxRuntimeTests }} buildJava: ${{ parameters.buildJava }} java_artifact_id: onnxruntime_gpu diff --git a/tools/ci_build/github/azure-pipelines/templates/win-ci.yml b/tools/ci_build/github/azure-pipelines/templates/win-ci.yml index aa315002bb0b3..dd3ee17184f15 100644 --- a/tools/ci_build/github/azure-pipelines/templates/win-ci.yml +++ b/tools/ci_build/github/azure-pipelines/templates/win-ci.yml @@ -319,9 +319,9 @@ stages: - job: Windows_Packaging_${{ parameters.stage_name_suffix }}_Testing workspace: clean: all - pool: zhanyi_test_A100_pool - # name: ${{ parameters.ort_build_pool_name }} - # demands: ImageVersionOverride -equals 248.0.0 + pool: + name: ${{ parameters.ort_build_pool_name }} + demands: ImageVersionOverride -equals 248.0.0 timeoutInMinutes: 180 steps: - task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3 From 982a67411a17489c7e2427c0aa96fafb55731b0a Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Fri, 25 Oct 2024 15:17:19 +0800 Subject: [PATCH 25/65] split cuda and dml test --- onnxruntime/test/util/default_providers.cc | 18 +++++++-- .../stages/nuget-win-cuda-packaging-stage.yml | 1 + .../azure-pipelines/templates/win-ci.yml | 39 +++++++++++++++---- 3 files changed, 48 insertions(+), 10 deletions(-) diff --git a/onnxruntime/test/util/default_providers.cc b/onnxruntime/test/util/default_providers.cc index d57a22f024d5f..3039c6cae0398 100644 --- a/onnxruntime/test/util/default_providers.cc +++ b/onnxruntime/test/util/default_providers.cc @@ -147,6 +147,12 @@ std::unique_ptr DefaultCudaNHWCExecutionProvider() { std::unique_ptr CudaExecutionProviderWithOptions(const OrtCUDAProviderOptionsV2* provider_options) { #ifdef USE_CUDA +#ifdef USE_CUDA + const std::string no_dml_ep_test = Env::Default().GetEnvironmentVar("NO_DML_TEST"); + if (no_dml_ep_test == "1") { + return nullptr; + } +#endif if (auto factory = CudaProviderFactoryCreator::Create(provider_options)) return factory->CreateProvider(); #else @@ -324,10 +330,16 @@ std::unique_ptr DefaultCannExecutionProvider() { std::unique_ptr DefaultDmlExecutionProvider() { #ifdef USE_DML - ConfigOptions config_options{}; - if (auto factory = DMLProviderFactoryCreator::CreateFromDeviceOptions(config_options, nullptr, false, false)) { - return factory->CreateProvider(); +#ifdef USE_CUDA + const std::string no_cuda_ep_test = Env::Default().GetEnvironmentVar("NO_CUDA_TEST"); + if (no_cuda_ep_test == "1") { + return nullptr; } +#endif + ConfigOptions config_options{}; + if (auto factory = DMLProviderFactoryCreator::CreateFromDeviceOptions(config_options, nullptr, false, false)) { + return factory->CreateProvider(); + } #endif return nullptr; } diff --git a/tools/ci_build/github/azure-pipelines/stages/nuget-win-cuda-packaging-stage.yml b/tools/ci_build/github/azure-pipelines/stages/nuget-win-cuda-packaging-stage.yml index 99b972fac2455..bf64361102df2 100644 --- a/tools/ci_build/github/azure-pipelines/stages/nuget-win-cuda-packaging-stage.yml +++ b/tools/ci_build/github/azure-pipelines/stages/nuget-win-cuda-packaging-stage.yml @@ -57,6 +57,7 @@ stages: UseIncreasedTimeoutForTests: ${{ parameters.UseIncreasedTimeoutForTests }} SpecificArtifact: ${{ parameters.SpecificArtifact }} BuildId: ${{ parameters.BuildId }} + ComboTests: true # Windows CUDA with TensorRT Packaging - template: ../templates/win-ci.yml parameters: diff --git a/tools/ci_build/github/azure-pipelines/templates/win-ci.yml b/tools/ci_build/github/azure-pipelines/templates/win-ci.yml index dd3ee17184f15..fdbb665697eb8 100644 --- a/tools/ci_build/github/azure-pipelines/templates/win-ci.yml +++ b/tools/ci_build/github/azure-pipelines/templates/win-ci.yml @@ -71,6 +71,10 @@ parameters: - 11.8 - 12.2 +- name: ComboTests + type: boolean + default: false + - name: SpecificArtifact displayName: Use Specific Artifact type: boolean @@ -390,13 +394,34 @@ stages: displayName: 'Append dotnet x86 Directory to PATH' condition: and(succeeded(), eq('${{ parameters.buildArch}}', 'x86')) - - task: PythonScript@0 - displayName: 'test' - condition: and(succeeded(), eq('${{ parameters.runTests}}', true)) - inputs: - scriptPath: '$(Build.SourcesDirectory)\tools\ci_build\build.py' - arguments: '--config RelWithDebInfo --use_binskim_compliant_compile_flags --enable_lto --disable_rtti --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --test --enable_onnx_tests $(TelemetryOption) ' - workingDirectory: '$(Build.BinariesDirectory)' + - ${{ if eq(parameters.ComboTests, 'true') }}: + - task: PythonScript@0 + displayName: 'test' + condition: and(succeeded(), eq('${{ parameters.runTests}}', true)) + inputs: + scriptPath: '$(Build.SourcesDirectory)\tools\ci_build\build.py' + arguments: '--config RelWithDebInfo --use_binskim_compliant_compile_flags --enable_lto --disable_rtti --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --test --enable_onnx_tests $(TelemetryOption) ' + workingDirectory: '$(Build.BinariesDirectory)' + environment: + NO_CUDA_TEST: 1 + - task: PythonScript@0 + displayName: 'test' + condition: and(succeeded(), eq('${{ parameters.runTests}}', true)) + inputs: + scriptPath: '$(Build.SourcesDirectory)\tools\ci_build\build.py' + arguments: '--config RelWithDebInfo --use_binskim_compliant_compile_flags --enable_lto --disable_rtti --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --test --enable_onnx_tests $(TelemetryOption) ' + workingDirectory: '$(Build.BinariesDirectory)' + environment: + NO_DML_TEST: 1 + - ${{ else }}: + - task: PythonScript@0 + displayName: 'test' + condition: and(succeeded(), eq('${{ parameters.runTests}}', true)) + inputs: + scriptPath: '$(Build.SourcesDirectory)\tools\ci_build\build.py' + arguments: '--config RelWithDebInfo --use_binskim_compliant_compile_flags --enable_lto --disable_rtti --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --test --enable_onnx_tests $(TelemetryOption) ' + workingDirectory: '$(Build.BinariesDirectory)' + # Previous stage only assembles the java binaries, testing will be done in this stage with GPU machine - ${{ if eq(parameters.buildJava, 'true') }}: - template: make_java_win_binaries.yml From e6c18de492f5138ed1f055330db7f1b5552ecdd5 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Fri, 25 Oct 2024 15:18:26 +0800 Subject: [PATCH 26/65] update --- .../azure-pipelines/stages/nuget-win-cuda-packaging-stage.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/ci_build/github/azure-pipelines/stages/nuget-win-cuda-packaging-stage.yml b/tools/ci_build/github/azure-pipelines/stages/nuget-win-cuda-packaging-stage.yml index bf64361102df2..c2d44f4d249aa 100644 --- a/tools/ci_build/github/azure-pipelines/stages/nuget-win-cuda-packaging-stage.yml +++ b/tools/ci_build/github/azure-pipelines/stages/nuget-win-cuda-packaging-stage.yml @@ -50,7 +50,7 @@ stages: msbuildPlatform: x64 packageName: x64-cuda CudaVersion: ${{ parameters.CudaVersion }} - buildparameter: --use_cuda --cuda_home=${{ parameters.win_cuda_home }} --enable_wcos --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=60;61;70;75;80" --use_dml --use_winml --build_csharp --parallel + buildparameter: --use_cuda --cuda_home=${{ parameters.win_cuda_home }} --enable_wcos --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=60;61;70;75;80" --use_dml --build_csharp --parallel runTests: ${{ parameters.RunOnnxRuntimeTests }} buildJava: ${{ parameters.buildJava }} java_artifact_id: onnxruntime_gpu From 6a5118ee0285d7df16b5419d856691838fdd0189 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Fri, 25 Oct 2024 15:21:17 +0800 Subject: [PATCH 27/65] update 1 --- .../ci_build/github/azure-pipelines/templates/win-ci.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/ci_build/github/azure-pipelines/templates/win-ci.yml b/tools/ci_build/github/azure-pipelines/templates/win-ci.yml index fdbb665697eb8..11e3ce04c5c85 100644 --- a/tools/ci_build/github/azure-pipelines/templates/win-ci.yml +++ b/tools/ci_build/github/azure-pipelines/templates/win-ci.yml @@ -396,22 +396,22 @@ stages: - ${{ if eq(parameters.ComboTests, 'true') }}: - task: PythonScript@0 - displayName: 'test' + displayName: 'test excludes CUDA' condition: and(succeeded(), eq('${{ parameters.runTests}}', true)) inputs: scriptPath: '$(Build.SourcesDirectory)\tools\ci_build\build.py' arguments: '--config RelWithDebInfo --use_binskim_compliant_compile_flags --enable_lto --disable_rtti --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --test --enable_onnx_tests $(TelemetryOption) ' workingDirectory: '$(Build.BinariesDirectory)' - environment: + env: NO_CUDA_TEST: 1 - task: PythonScript@0 - displayName: 'test' + displayName: 'test excludes DML' condition: and(succeeded(), eq('${{ parameters.runTests}}', true)) inputs: scriptPath: '$(Build.SourcesDirectory)\tools\ci_build\build.py' arguments: '--config RelWithDebInfo --use_binskim_compliant_compile_flags --enable_lto --disable_rtti --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --test --enable_onnx_tests $(TelemetryOption) ' workingDirectory: '$(Build.BinariesDirectory)' - environment: + env: NO_DML_TEST: 1 - ${{ else }}: - task: PythonScript@0 From 65223279ffd05e8d102ec32cd901f0881b0526b4 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Fri, 25 Oct 2024 16:27:21 +0800 Subject: [PATCH 28/65] update 3 --- onnxruntime/test/util/default_providers.cc | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/onnxruntime/test/util/default_providers.cc b/onnxruntime/test/util/default_providers.cc index 3039c6cae0398..d79151d79fce3 100644 --- a/onnxruntime/test/util/default_providers.cc +++ b/onnxruntime/test/util/default_providers.cc @@ -122,6 +122,12 @@ std::unique_ptr DefaultOpenVINOExecutionProvider() { std::unique_ptr DefaultCudaExecutionProvider() { #ifdef USE_CUDA +#ifdef USE_DML + const std::string no_dml_ep_test = Env::Default().GetEnvironmentVar("NO_DML_TEST"); + if (no_dml_ep_test == "1") { + return nullptr; + } +#endif OrtCUDAProviderOptionsV2 provider_options{}; provider_options.do_copy_in_default_stream = true; provider_options.use_tf32 = false; @@ -147,12 +153,6 @@ std::unique_ptr DefaultCudaNHWCExecutionProvider() { std::unique_ptr CudaExecutionProviderWithOptions(const OrtCUDAProviderOptionsV2* provider_options) { #ifdef USE_CUDA -#ifdef USE_CUDA - const std::string no_dml_ep_test = Env::Default().GetEnvironmentVar("NO_DML_TEST"); - if (no_dml_ep_test == "1") { - return nullptr; - } -#endif if (auto factory = CudaProviderFactoryCreator::Create(provider_options)) return factory->CreateProvider(); #else @@ -336,10 +336,10 @@ std::unique_ptr DefaultDmlExecutionProvider() { return nullptr; } #endif - ConfigOptions config_options{}; - if (auto factory = DMLProviderFactoryCreator::CreateFromDeviceOptions(config_options, nullptr, false, false)) { - return factory->CreateProvider(); - } + ConfigOptions config_options{}; + if (auto factory = DMLProviderFactoryCreator::CreateFromDeviceOptions(config_options, nullptr, false, false)) { + return factory->CreateProvider(); + } #endif return nullptr; } From 974ee3a2f65764526769fc302cfb3f906cdf8841 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Fri, 25 Oct 2024 22:07:34 +0800 Subject: [PATCH 29/65] update tests --- .../test/contrib_ops/matmul_4bits_test.cc | 23 ++++++++++++++----- .../matmul_integer_to_float_test.cc | 2 +- onnxruntime/test/lora/lora_test.cc | 16 +++++++++++++ onnxruntime/test/providers/cpu/model_tests.cc | 12 ++++++++++ tools/ci_build/build.py | 2 +- 5 files changed, 47 insertions(+), 8 deletions(-) diff --git a/onnxruntime/test/contrib_ops/matmul_4bits_test.cc b/onnxruntime/test/contrib_ops/matmul_4bits_test.cc index 8138829b057f2..9fa1e155f0d7a 100644 --- a/onnxruntime/test/contrib_ops/matmul_4bits_test.cc +++ b/onnxruntime/test/contrib_ops/matmul_4bits_test.cc @@ -485,13 +485,17 @@ void RunTest(int64_t M, int64_t N, int64_t K, int64_t block_size, int64_t accura std::vector> execution_providers; if (use_float16) { #ifdef USE_CUDA - execution_providers.push_back(DefaultCudaExecutionProvider()); + if (DefaultCudaExecutionProvider() != nullptr) { + execution_providers.push_back(DefaultCudaExecutionProvider()); + } #endif #ifdef USE_ROCM execution_providers.push_back(DefaultRocmExecutionProvider()); #endif #ifdef USE_DML - execution_providers.push_back(DefaultDmlExecutionProvider()); + if (DefaultDmlExecutionProvider() != nullptr) { + execution_providers.push_back(DefaultDmlExecutionProvider()); + } #endif RunTest(opts, std::move(execution_providers)); @@ -506,8 +510,11 @@ void RunTest(int64_t M, int64_t N, int64_t K, int64_t block_size, int64_t accura } // namespace TEST(MatMulNBits, Float16Cuda) { -#if defined(USE_CUDA) || defined(USE_ROCM) +#if defined(USE_CUDA) || defined(USE_ROCM) || defined(USE_DML) auto has_gidx_options = {true, false}; + if (DefaultDmlExecutionProvider() != nullptr) { + has_gidx_options = {false}; + } #else auto has_gidx_options = {false}; #endif @@ -518,7 +525,9 @@ TEST(MatMulNBits, Float16Cuda) { for (auto block_size : {16, 32, 64, 128}) { for (auto has_gidx : has_gidx_options) { #ifdef USE_DML - RunTest(M, N, K, block_size, 0, false, true, has_gidx, true, 0.04f); + if (DefaultDmlExecutionProvider() != nullptr) { + RunTest(M, N, K, block_size, 0, false, true, has_gidx, true, 0.04f); + } #else RunTest(M, N, K, block_size, 0, false, true, has_gidx); RunTest(M, N, K, block_size, 0, true, true, has_gidx, false); @@ -531,12 +540,15 @@ TEST(MatMulNBits, Float16Cuda) { } TEST(MatMulNBits, Float16Large) { -#ifdef USE_DML +#if defined(USE_CUDA) || defined(USE_DML) // For some reason, the A10 machine that runs these tests during CI has a much bigger error than all retail // machines we tested on. All consumer-grade machines from Nvidia/AMD/Intel seem to pass these tests with an // absolute error of 0.08, but the A10 has errors going as high as 0.22. Ultimately, given the large number // of elements in this test, ULPs should probably be used instead of absolute/relative tolerances. float abs_error = 0.3f; + if (DefaultDmlExecutionProvider() != nullptr) { + abs_error = 0.05f; + } #else float abs_error = 0.05f; #endif @@ -549,7 +561,6 @@ TEST(MatMulNBits, Float16Large) { } } } - #endif // defined(USE_CUDA) || defined(USE_ROCM) || defined(USE_DML) } // namespace test } // namespace onnxruntime diff --git a/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc b/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc index 8d7629b5fda1c..d88c3131a4ca5 100644 --- a/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc +++ b/onnxruntime/test/contrib_ops/matmul_integer_to_float_test.cc @@ -227,7 +227,7 @@ TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_U8S8) { } // DML EP supports Float16 output type and Signed A Matrix and Unsigned B Matric for Float32 output -#if defined(USE_DML) +#if defined(USE_DML) && !defined(USE_CUDA) TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_S8U8) { RunMatMulIntegerToFloatTest(); diff --git a/onnxruntime/test/lora/lora_test.cc b/onnxruntime/test/lora/lora_test.cc index fde603858f9a9..4155cb7abc279 100644 --- a/onnxruntime/test/lora/lora_test.cc +++ b/onnxruntime/test/lora/lora_test.cc @@ -201,6 +201,14 @@ TEST(LoraAdapterTest, Load) { #ifdef USE_CUDA TEST(LoraAdapterTest, VerifyCudaDeviceCopy) { + if (DefaultCudaExecutionProvider() == nullptr) { + GTEST_SKIP() << "Skip This Test Due to this EP is null"; + } +#ifdef USE_DML + if (DefaultDmlExecutionProvider() == nullptr) { + GTEST_SKIP() << "It should not run with DML EP"; + } +#endif auto cpu_ep = DefaultCpuExecutionProvider(); auto cpu_allocator = cpu_ep->CreatePreferredAllocators()[0]; auto cuda_allocator = DefaultCudaExecutionProvider()->CreatePreferredAllocators()[0]; @@ -234,6 +242,14 @@ TEST(LoraAdapterTest, VerifyCudaDeviceCopy) { #ifdef USE_DML TEST(LoraAdapterTest, VerifyDmlDeviceCopy) { + if (DefaultDmlExecutionProvider() == nullptr) { + GTEST_SKIP() << "Skip This Test Due to this EP is null"; + } +#ifdef USE_CUDA + if (DefaultCudaExecutionProvider() == nullptr) { + GTEST_SKIP() << "It should not run with CUDA EP"; + } +#endif auto cpu_ep = DefaultCpuExecutionProvider(); auto cpu_allocator = cpu_ep->CreatePreferredAllocators()[0]; diff --git a/onnxruntime/test/providers/cpu/model_tests.cc b/onnxruntime/test/providers/cpu/model_tests.cc index e3c86a137484f..b46c253fb8ed9 100644 --- a/onnxruntime/test/providers/cpu/model_tests.cc +++ b/onnxruntime/test/providers/cpu/model_tests.cc @@ -491,6 +491,18 @@ ::std::vector<::std::basic_string> GetParameterStrings() { // the number of times these are run to reduce the CI time. provider_names.erase(provider_name_cpu); #endif + +#if defined(USE_CUDA) && defined(USE_DML) + const std::string no_cuda_ep_test = Env::Default().GetEnvironmentVar("NO_CUDA_TEST"); + if (no_cuda_ep_test == "1") { + provider_names.erase(provider_name_cuda); + } + const std::string no_dml_ep_test = Env::Default().GetEnvironmentVar("NO_DML_TEST"); + if (no_dml_ep_test == "1") { + provider_names.erase(provider_name_dml); + } +#endif + std::vector> v; // Permanently exclude following tests because ORT support only opset starting from 7, // Please make no more changes to the list diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index 2bc7be6b0115c..9624f9112c49f 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -2072,7 +2072,7 @@ def run_onnxruntime_tests(args, source_dir, ctest_path, build_dir, configs): executables.append("onnxruntime_global_thread_pools_test") executables.append("onnxruntime_customopregistration_test") for exe in executables: - test_output = f"--gtest_filter=*FusedMatMulOpTest* --gtest_output=xml:{cwd}/{exe}.{config}.results.xml" + test_output = f"--gtest_output=xml:{cwd}/{exe}.{config}.results.xml" run_subprocess([os.path.join(cwd, exe), test_output], cwd=cwd, dll_path=dll_path) else: ctest_cmd = [ctest_path, "--build-config", config, "--verbose", "--timeout", args.test_all_timeout] From 9494656ed2eac36225e096f87d159ae842bc73c5 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Mon, 28 Oct 2024 10:37:22 +0800 Subject: [PATCH 30/65] onnxruntime4j_test --- java/src/test/java/ai/onnxruntime/InferenceTest.java | 9 ++++++++- onnxruntime/test/contrib_ops/matmul_4bits_test.cc | 4 ++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/java/src/test/java/ai/onnxruntime/InferenceTest.java b/java/src/test/java/ai/onnxruntime/InferenceTest.java index e11537492d3a7..e16a0526efd8e 100644 --- a/java/src/test/java/ai/onnxruntime/InferenceTest.java +++ b/java/src/test/java/ai/onnxruntime/InferenceTest.java @@ -740,7 +740,14 @@ public void testCoreML() throws OrtException { @Test @EnabledIfSystemProperty(named = "USE_DML", matches = "1") public void testDirectML() throws OrtException { - runProvider(OrtProvider.DIRECT_ML); + if (System.getProperty("USE_CUDA") != "1") { + runProvider(OrtProvider.DIRECT_ML); + } else if(System.getProperty("USE_CUDA") == "1" && System.getenv("NO_CUDA_TEST") == "1" ) { + runProvider(OrtProvider.DIRECT_ML); + } else { + System.out.println("Skipping DirectML test because CUDA EP test is enabled."); + return; + } } @Test diff --git a/onnxruntime/test/contrib_ops/matmul_4bits_test.cc b/onnxruntime/test/contrib_ops/matmul_4bits_test.cc index 9fa1e155f0d7a..f3ad5618f267f 100644 --- a/onnxruntime/test/contrib_ops/matmul_4bits_test.cc +++ b/onnxruntime/test/contrib_ops/matmul_4bits_test.cc @@ -511,9 +511,9 @@ void RunTest(int64_t M, int64_t N, int64_t K, int64_t block_size, int64_t accura TEST(MatMulNBits, Float16Cuda) { #if defined(USE_CUDA) || defined(USE_ROCM) || defined(USE_DML) - auto has_gidx_options = {true, false}; + std::vector has_gidx_options = {true, false}; if (DefaultDmlExecutionProvider() != nullptr) { - has_gidx_options = {false}; + has_gidx_options.assign(1, false); } #else auto has_gidx_options = {false}; From 1b213cba17d40b21d73f29a1e1a21d1faeb69140 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Mon, 28 Oct 2024 13:58:56 +0800 Subject: [PATCH 31/65] typo --- java/src/test/java/ai/onnxruntime/InferenceTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/src/test/java/ai/onnxruntime/InferenceTest.java b/java/src/test/java/ai/onnxruntime/InferenceTest.java index e16a0526efd8e..b2611a322fa16 100644 --- a/java/src/test/java/ai/onnxruntime/InferenceTest.java +++ b/java/src/test/java/ai/onnxruntime/InferenceTest.java @@ -740,7 +740,7 @@ public void testCoreML() throws OrtException { @Test @EnabledIfSystemProperty(named = "USE_DML", matches = "1") public void testDirectML() throws OrtException { - if (System.getProperty("USE_CUDA") != "1") { + if (System.getenv("USE_CUDA") != "1") { runProvider(OrtProvider.DIRECT_ML); } else if(System.getProperty("USE_CUDA") == "1" && System.getenv("NO_CUDA_TEST") == "1" ) { runProvider(OrtProvider.DIRECT_ML); From 4019016885a87b1c3dcbbd84aebcc1eedf950ee4 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Mon, 28 Oct 2024 14:12:33 +0800 Subject: [PATCH 32/65] update --- .../src/test/java/ai/onnxruntime/InferenceTest.java | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/java/src/test/java/ai/onnxruntime/InferenceTest.java b/java/src/test/java/ai/onnxruntime/InferenceTest.java index b2611a322fa16..724ccbcf4756e 100644 --- a/java/src/test/java/ai/onnxruntime/InferenceTest.java +++ b/java/src/test/java/ai/onnxruntime/InferenceTest.java @@ -740,13 +740,14 @@ public void testCoreML() throws OrtException { @Test @EnabledIfSystemProperty(named = "USE_DML", matches = "1") public void testDirectML() throws OrtException { - if (System.getenv("USE_CUDA") != "1") { - runProvider(OrtProvider.DIRECT_ML); - } else if(System.getProperty("USE_CUDA") == "1" && System.getenv("NO_CUDA_TEST") == "1" ) { - runProvider(OrtProvider.DIRECT_ML); + String no_cuda_test = System.getenv("NO_CUDA_TEST"); + if (no_cuda_test == null || no_cuda_test.isEmpty() || no_cuda_test != "1") { + if(System.getProperty("USE_CUDA") == "1") { + System.out.println("Skipping DirectML test because CUDA EP test is enabled."); + return; + } } else { - System.out.println("Skipping DirectML test because CUDA EP test is enabled."); - return; + runProvider(OrtProvider.CORE_ML); } } From abe4326c2c796393efa8bcbd1b0e7d3c9939a539 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Mon, 28 Oct 2024 14:33:16 +0800 Subject: [PATCH 33/65] update --- java/src/test/java/ai/onnxruntime/InferenceTest.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/java/src/test/java/ai/onnxruntime/InferenceTest.java b/java/src/test/java/ai/onnxruntime/InferenceTest.java index 724ccbcf4756e..54ef492ac016d 100644 --- a/java/src/test/java/ai/onnxruntime/InferenceTest.java +++ b/java/src/test/java/ai/onnxruntime/InferenceTest.java @@ -741,8 +741,8 @@ public void testCoreML() throws OrtException { @EnabledIfSystemProperty(named = "USE_DML", matches = "1") public void testDirectML() throws OrtException { String no_cuda_test = System.getenv("NO_CUDA_TEST"); - if (no_cuda_test == null || no_cuda_test.isEmpty() || no_cuda_test != "1") { - if(System.getProperty("USE_CUDA") == "1") { + if (no_cuda_test == null || no_cuda_test.isEmpty() || ! no_cuda_test.equals("1")) { + if (System.getProperty("USE_CUDA").equals("1")) { System.out.println("Skipping DirectML test because CUDA EP test is enabled."); return; } From f24e621a6915bd0f83f1cdb26b1b1b30112bab45 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Mon, 28 Oct 2024 15:23:29 +0800 Subject: [PATCH 34/65] update --- onnxruntime/test/lora/lora_test.cc | 4 ++-- onnxruntime/test/util/default_providers.cc | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/onnxruntime/test/lora/lora_test.cc b/onnxruntime/test/lora/lora_test.cc index 4155cb7abc279..bfb6dfcc606fe 100644 --- a/onnxruntime/test/lora/lora_test.cc +++ b/onnxruntime/test/lora/lora_test.cc @@ -205,7 +205,7 @@ TEST(LoraAdapterTest, VerifyCudaDeviceCopy) { GTEST_SKIP() << "Skip This Test Due to this EP is null"; } #ifdef USE_DML - if (DefaultDmlExecutionProvider() == nullptr) { + if (DefaultDmlExecutionProvider() != nullptr) { GTEST_SKIP() << "It should not run with DML EP"; } #endif @@ -246,7 +246,7 @@ TEST(LoraAdapterTest, VerifyDmlDeviceCopy) { GTEST_SKIP() << "Skip This Test Due to this EP is null"; } #ifdef USE_CUDA - if (DefaultCudaExecutionProvider() == nullptr) { + if (DefaultCudaExecutionProvider() != nullptr) { GTEST_SKIP() << "It should not run with CUDA EP"; } #endif diff --git a/onnxruntime/test/util/default_providers.cc b/onnxruntime/test/util/default_providers.cc index d79151d79fce3..ec391c397abac 100644 --- a/onnxruntime/test/util/default_providers.cc +++ b/onnxruntime/test/util/default_providers.cc @@ -124,7 +124,7 @@ std::unique_ptr DefaultCudaExecutionProvider() { #ifdef USE_CUDA #ifdef USE_DML const std::string no_dml_ep_test = Env::Default().GetEnvironmentVar("NO_DML_TEST"); - if (no_dml_ep_test == "1") { + if (no_dml_ep_test != "1") { return nullptr; } #endif @@ -332,7 +332,7 @@ std::unique_ptr DefaultDmlExecutionProvider() { #ifdef USE_DML #ifdef USE_CUDA const std::string no_cuda_ep_test = Env::Default().GetEnvironmentVar("NO_CUDA_TEST"); - if (no_cuda_ep_test == "1") { + if (no_cuda_ep_test != "1") { return nullptr; } #endif From 1598875e896552fc0aedbbb1c5ad517815055fb4 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Mon, 28 Oct 2024 15:26:53 +0800 Subject: [PATCH 35/65] update java code --- java/src/test/java/ai/onnxruntime/InferenceTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/src/test/java/ai/onnxruntime/InferenceTest.java b/java/src/test/java/ai/onnxruntime/InferenceTest.java index 54ef492ac016d..4b391987cc826 100644 --- a/java/src/test/java/ai/onnxruntime/InferenceTest.java +++ b/java/src/test/java/ai/onnxruntime/InferenceTest.java @@ -741,7 +741,7 @@ public void testCoreML() throws OrtException { @EnabledIfSystemProperty(named = "USE_DML", matches = "1") public void testDirectML() throws OrtException { String no_cuda_test = System.getenv("NO_CUDA_TEST"); - if (no_cuda_test == null || no_cuda_test.isEmpty() || ! no_cuda_test.equals("1")) { + if (no_cuda_test == null || no_cuda_test.isEmpty() || !no_cuda_test.equals("1")) { if (System.getProperty("USE_CUDA").equals("1")) { System.out.println("Skipping DirectML test because CUDA EP test is enabled."); return; From 0a28ba564ecdb37869f59424eccd0804f328423e Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Mon, 28 Oct 2024 18:05:59 +0800 Subject: [PATCH 36/65] update pool image --- tools/ci_build/github/azure-pipelines/templates/win-ci.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tools/ci_build/github/azure-pipelines/templates/win-ci.yml b/tools/ci_build/github/azure-pipelines/templates/win-ci.yml index 11e3ce04c5c85..8e38604f90fce 100644 --- a/tools/ci_build/github/azure-pipelines/templates/win-ci.yml +++ b/tools/ci_build/github/azure-pipelines/templates/win-ci.yml @@ -323,9 +323,7 @@ stages: - job: Windows_Packaging_${{ parameters.stage_name_suffix }}_Testing workspace: clean: all - pool: - name: ${{ parameters.ort_build_pool_name }} - demands: ImageVersionOverride -equals 248.0.0 + pool: ${{ parameters.ort_build_pool_name }} timeoutInMinutes: 180 steps: - task: mspremier.PostBuildCleanup.PostBuildCleanup-task.PostBuildCleanup@3 From e81aea8971bab1cb08e8c0f941ff64342e996735 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Mon, 28 Oct 2024 18:45:52 +0800 Subject: [PATCH 37/65] fail condition --- onnxruntime/test/lora/lora_test.cc | 7 +++++-- onnxruntime/test/util/default_providers.cc | 2 +- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/onnxruntime/test/lora/lora_test.cc b/onnxruntime/test/lora/lora_test.cc index bfb6dfcc606fe..8338c7d547a09 100644 --- a/onnxruntime/test/lora/lora_test.cc +++ b/onnxruntime/test/lora/lora_test.cc @@ -206,7 +206,7 @@ TEST(LoraAdapterTest, VerifyCudaDeviceCopy) { } #ifdef USE_DML if (DefaultDmlExecutionProvider() != nullptr) { - GTEST_SKIP() << "It should not run with DML EP"; + GTEST_FAIL() << "It should not run with DML EP"; } #endif auto cpu_ep = DefaultCpuExecutionProvider(); @@ -242,14 +242,17 @@ TEST(LoraAdapterTest, VerifyCudaDeviceCopy) { #ifdef USE_DML TEST(LoraAdapterTest, VerifyDmlDeviceCopy) { + // NO_DML_TEST is set, DML test is skipped if (DefaultDmlExecutionProvider() == nullptr) { GTEST_SKIP() << "Skip This Test Due to this EP is null"; } + #ifdef USE_CUDA if (DefaultCudaExecutionProvider() != nullptr) { - GTEST_SKIP() << "It should not run with CUDA EP"; + GTEST_FAIL() << "It should not run with CUDA EP"; } #endif + auto cpu_ep = DefaultCpuExecutionProvider(); auto cpu_allocator = cpu_ep->CreatePreferredAllocators()[0]; diff --git a/onnxruntime/test/util/default_providers.cc b/onnxruntime/test/util/default_providers.cc index ec391c397abac..1acaf40eab79b 100644 --- a/onnxruntime/test/util/default_providers.cc +++ b/onnxruntime/test/util/default_providers.cc @@ -125,7 +125,7 @@ std::unique_ptr DefaultCudaExecutionProvider() { #ifdef USE_DML const std::string no_dml_ep_test = Env::Default().GetEnvironmentVar("NO_DML_TEST"); if (no_dml_ep_test != "1") { - return nullptr; + return nullptr; } #endif OrtCUDAProviderOptionsV2 provider_options{}; From 5e976d381849b0e41489834fd9d49a9e9d02761e Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Mon, 28 Oct 2024 21:24:18 +0800 Subject: [PATCH 38/65] Float16Larget test --- onnxruntime/test/contrib_ops/matmul_4bits_test.cc | 6 ++++-- onnxruntime/test/util/default_providers.cc | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/onnxruntime/test/contrib_ops/matmul_4bits_test.cc b/onnxruntime/test/contrib_ops/matmul_4bits_test.cc index f3ad5618f267f..638f85c06c30b 100644 --- a/onnxruntime/test/contrib_ops/matmul_4bits_test.cc +++ b/onnxruntime/test/contrib_ops/matmul_4bits_test.cc @@ -545,9 +545,11 @@ TEST(MatMulNBits, Float16Large) { // machines we tested on. All consumer-grade machines from Nvidia/AMD/Intel seem to pass these tests with an // absolute error of 0.08, but the A10 has errors going as high as 0.22. Ultimately, given the large number // of elements in this test, ULPs should probably be used instead of absolute/relative tolerances. - float abs_error = 0.3f; + + float abs_error = 0.05f; if (DefaultDmlExecutionProvider() != nullptr) { - abs_error = 0.05f; + // it means the ep is dml in runtime, the abs_error is changed to 0.3f + abs_error = 0.3f; } #else float abs_error = 0.05f; diff --git a/onnxruntime/test/util/default_providers.cc b/onnxruntime/test/util/default_providers.cc index 1acaf40eab79b..ec391c397abac 100644 --- a/onnxruntime/test/util/default_providers.cc +++ b/onnxruntime/test/util/default_providers.cc @@ -125,7 +125,7 @@ std::unique_ptr DefaultCudaExecutionProvider() { #ifdef USE_DML const std::string no_dml_ep_test = Env::Default().GetEnvironmentVar("NO_DML_TEST"); if (no_dml_ep_test != "1") { - return nullptr; + return nullptr; } #endif OrtCUDAProviderOptionsV2 provider_options{}; From d5cf61f412ae89e5a9736c3789ea9de5a69ff79e Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Mon, 28 Oct 2024 22:27:09 +0800 Subject: [PATCH 39/65] remove nullptr in eps --- onnxruntime/test/common/cuda_op_test_utils.h | 4 ++++ onnxruntime/test/framework/inference_session_test.cc | 3 +++ onnxruntime/test/providers/base_tester.cc | 7 +++++++ 3 files changed, 14 insertions(+) diff --git a/onnxruntime/test/common/cuda_op_test_utils.h b/onnxruntime/test/common/cuda_op_test_utils.h index 6f3e460628566..6833c3785466d 100644 --- a/onnxruntime/test/common/cuda_op_test_utils.h +++ b/onnxruntime/test/common/cuda_op_test_utils.h @@ -13,6 +13,10 @@ namespace test { int GetCudaArchitecture(); inline bool HasCudaEnvironment(int min_cuda_architecture) { + if (DefaultCudaExecutionProvider() == nullptr) { + return false; + } + if (DefaultCudaExecutionProvider().get() == nullptr) { return false; } diff --git a/onnxruntime/test/framework/inference_session_test.cc b/onnxruntime/test/framework/inference_session_test.cc index 61a8f7e23fe87..0e1bf3f8c6965 100644 --- a/onnxruntime/test/framework/inference_session_test.cc +++ b/onnxruntime/test/framework/inference_session_test.cc @@ -996,6 +996,9 @@ static void TestBindHelper(const std::string& log_str, if (bind_provider_type == kCudaExecutionProvider || bind_provider_type == kRocmExecutionProvider) { #ifdef USE_CUDA auto provider = DefaultCudaExecutionProvider(); + if (provider == nullptr) { + return; + } gpu_provider = provider.get(); ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(std::move(provider))); #endif diff --git a/onnxruntime/test/providers/base_tester.cc b/onnxruntime/test/providers/base_tester.cc index dea39bc99d3e9..ba6546b791baf 100644 --- a/onnxruntime/test/providers/base_tester.cc +++ b/onnxruntime/test/providers/base_tester.cc @@ -529,6 +529,13 @@ void BaseTester::Run(ExpectResult expect_result, const std::string& expected_fai so.use_deterministic_compute = use_determinism_; so.graph_optimization_level = TransformerLevel::Default; // 'Default' == off + // remove nullptr in execution_providers. + // it's a little ugly but we need to do this because DefaultXXXExecutionProvider() can return nullptr in Runtime. + // And there're many places adding DefaultXXXExecutionProvider() to execution_providers directly. + if (execution_providers != nullptr || execution_providers->empty()) { + execution_providers->erase(std::remove(execution_providers->begin(), execution_providers->end(), nullptr), execution_providers->end()); + } + Run(so, expect_result, expected_failure_string, excluded_provider_types, run_options, execution_providers, options); } From e3b25cfc2490c12b382ba4d2339f2c69bdab38a8 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Mon, 28 Oct 2024 23:53:51 +0800 Subject: [PATCH 40/65] skip cuda tests 1 --- .../test/contrib_ops/bias_dropout_op_test.cc | 3 +++ .../contrib_ops/bitmask_dropout_op_test.cc | 3 +++ .../test/contrib_ops/layer_norm_test.cc | 13 ++++++++++--- .../test/framework/allocation_planner_test.cc | 18 ++++++++++++++++++ .../test/framework/cuda/fence_cuda_test.cc | 9 +++++++++ .../test/framework/inference_session_test.cc | 15 +++++++++++++++ .../test/framework/sparse_kernels_test.cc | 6 ++++++ onnxruntime/test/providers/base_tester.cc | 6 +++++- .../providers/cpu/tensor/grid_sample_test.cc | 8 +++++--- 9 files changed, 74 insertions(+), 7 deletions(-) diff --git a/onnxruntime/test/contrib_ops/bias_dropout_op_test.cc b/onnxruntime/test/contrib_ops/bias_dropout_op_test.cc index 027d4b3fff1b0..297629b015796 100644 --- a/onnxruntime/test/contrib_ops/bias_dropout_op_test.cc +++ b/onnxruntime/test/contrib_ops/bias_dropout_op_test.cc @@ -181,6 +181,9 @@ void RunBiasDropoutTest(const bool use_mask, const std::vector& input_s t.SetCustomOutputVerifier(output_verifier); std::vector> t_eps; #ifdef USE_CUDA + if (DefaultCudaExecutionProvider() == nullptr) { + return; + } t_eps.emplace_back(DefaultCudaExecutionProvider()); #elif USE_ROCM t_eps.emplace_back(DefaultRocmExecutionProvider()); diff --git a/onnxruntime/test/contrib_ops/bitmask_dropout_op_test.cc b/onnxruntime/test/contrib_ops/bitmask_dropout_op_test.cc index 7ca4e1004066c..ce474cc75431b 100644 --- a/onnxruntime/test/contrib_ops/bitmask_dropout_op_test.cc +++ b/onnxruntime/test/contrib_ops/bitmask_dropout_op_test.cc @@ -61,6 +61,9 @@ void RunTestForInference(const std::vector& input_dims, bool has_ratio std::vector> test_eps; #ifdef USE_CUDA + if (DefaultCudaExecutionProvider() == nullptr) { + return; + } test_eps.emplace_back(DefaultCudaExecutionProvider()); #elif USE_ROCM test_eps.emplace_back(DefaultRocmExecutionProvider()); diff --git a/onnxruntime/test/contrib_ops/layer_norm_test.cc b/onnxruntime/test/contrib_ops/layer_norm_test.cc index 438a1100ca95c..4055b1449e70a 100644 --- a/onnxruntime/test/contrib_ops/layer_norm_test.cc +++ b/onnxruntime/test/contrib_ops/layer_norm_test.cc @@ -2,6 +2,7 @@ // Licensed under the MIT License. #include "test/providers/compare_provider_test_utils.h" +#include "test/util/include/default_providers.h" namespace onnxruntime { namespace test { @@ -79,11 +80,17 @@ static void TestLayerNorm(const std::vector& x_dims, #endif #ifdef USE_CUDA - test.CompareWithCPU(kCudaExecutionProvider); + if (DefaultCudaExecutionProvider() != nullptr) { + test.CompareWithCPU(kCudaExecutionProvider); + } #elif USE_ROCM test.CompareWithCPU(kRocmExecutionProvider); -#elif USE_DML - test.CompareWithCPU(kDmlExecutionProvider); +#endif + +#ifdef USE_DML + if (DefaultDmlExecutionProvider() != nullptr) { + test.CompareWithCPU(kDmlExecutionProvider); + } #endif } diff --git a/onnxruntime/test/framework/allocation_planner_test.cc b/onnxruntime/test/framework/allocation_planner_test.cc index 0105e90b5a24a..407f61b8eb1ef 100644 --- a/onnxruntime/test/framework/allocation_planner_test.cc +++ b/onnxruntime/test/framework/allocation_planner_test.cc @@ -894,6 +894,9 @@ TEST_F(PlannerTest, LocationPlanningForPassThroughExplicitAndImplicitSubgraphInp SessionOptions so; InferenceSession sess{so, GetEnvironment()}; + if (DefaultCudaExecutionProvider() == nullptr) { + return; + } auto status = sess.RegisterExecutionProvider(DefaultCudaExecutionProvider()); ASSERT_TRUE(status.IsOK()); @@ -1036,6 +1039,9 @@ TEST_F(PlannerTest, LocationPlanningForInitializersOnlyUsedInANestedSubgraph) { SessionOptions so; InferenceSession sess{so, GetEnvironment()}; + if (DefaultCudaExecutionProvider() == nullptr) { + return; + } auto status = sess.RegisterExecutionProvider(DefaultCudaExecutionProvider()); ASSERT_TRUE(status.IsOK()); @@ -1143,6 +1149,9 @@ TEST_F(PlannerTest, LocationPlanningForInitializersUsedOnDifferentDevicesInMainG SessionOptions so; InferenceSession sess{so, GetEnvironment()}; + if (DefaultCudaExecutionProvider() == nullptr) { + return; + } auto status = sess.RegisterExecutionProvider(DefaultCudaExecutionProvider()); ASSERT_TRUE(status.IsOK()); @@ -1235,6 +1244,9 @@ TEST_F(PlannerTest, LocationPlanningForImplicitInputsWithoutExplicitConsumersInM SessionOptions so; InferenceSession sess{so, GetEnvironment()}; + if (DefaultCudaExecutionProvider() == nullptr) { + return; + } auto status = sess.RegisterExecutionProvider(DefaultCudaExecutionProvider()); ASSERT_TRUE(status.IsOK()); @@ -2007,6 +2019,9 @@ TEST_F(PlannerTest, TestCpuIf) { sess_opt.graph_optimization_level = TransformerLevel::Default; InferenceSession sess(sess_opt, GetEnvironment(), ORT_TSTR("./testdata/multi_stream_models/cpu_if.onnx")); + if (DefaultCudaExecutionProvider() == nullptr) { + return; + } ASSERT_STATUS_OK(sess.RegisterExecutionProvider(DefaultCudaExecutionProvider())); ASSERT_STATUS_OK(sess.Load()); ASSERT_STATUS_OK(sess.Initialize()); @@ -2071,6 +2086,9 @@ TEST(AllocationPlannerTest, ReusedInputCrossDifferentStreams) { sess_opt.graph_optimization_level = TransformerLevel::Default; InferenceSession sess(sess_opt, GetEnvironment(), ORT_TSTR("./testdata/multi_stream_models/issue_19480.onnx")); + if (DefaultCudaExecutionProvider() == nullptr) { + return; + } auto status = sess.RegisterExecutionProvider(DefaultCudaExecutionProvider()); status = sess.Load(); status = sess.Initialize(); diff --git a/onnxruntime/test/framework/cuda/fence_cuda_test.cc b/onnxruntime/test/framework/cuda/fence_cuda_test.cc index e28327941dda4..3e5ef30e7ebef 100644 --- a/onnxruntime/test/framework/cuda/fence_cuda_test.cc +++ b/onnxruntime/test/framework/cuda/fence_cuda_test.cc @@ -115,6 +115,9 @@ TEST(CUDAFenceTests, DISABLED_PartOnCPU) { SessionOptions so; FenceCudaTestInferenceSession session(so, GetEnvironment()); ASSERT_STATUS_OK(LoadInferenceSessionFromModel(session, *model)); + if (DefaultCudaExecutionProvider() == nullptr) { + return; + } ASSERT_STATUS_OK(session.RegisterExecutionProvider(DefaultCudaExecutionProvider())); ASSERT_TRUE(session.Initialize().IsOK()); ASSERT_TRUE(1 == CountCopyNodes(graph)); @@ -164,6 +167,9 @@ TEST(CUDAFenceTests, TileWithInitializer) { SessionOptions so; FenceCudaTestInferenceSession session(so, GetEnvironment()); ASSERT_STATUS_OK(LoadInferenceSessionFromModel(session, *model)); + if (DefaultCudaExecutionProvider() == nullptr) { + return; + } ASSERT_STATUS_OK(session.RegisterExecutionProvider(DefaultCudaExecutionProvider())); ASSERT_STATUS_OK(session.Initialize()); @@ -224,6 +230,9 @@ TEST(CUDAFenceTests, TileWithComputedInput) { SessionOptions so; FenceCudaTestInferenceSession session(so, GetEnvironment()); ASSERT_STATUS_OK(LoadInferenceSessionFromModel(session, *model)); + if (DefaultCudaExecutionProvider() == nullptr) { + return; + } ASSERT_STATUS_OK(session.RegisterExecutionProvider(DefaultCudaExecutionProvider())); ASSERT_TRUE(session.Initialize().IsOK()); diff --git a/onnxruntime/test/framework/inference_session_test.cc b/onnxruntime/test/framework/inference_session_test.cc index 0e1bf3f8c6965..ac5a5933cc107 100644 --- a/onnxruntime/test/framework/inference_session_test.cc +++ b/onnxruntime/test/framework/inference_session_test.cc @@ -635,6 +635,9 @@ TEST(InferenceSessionTests, CheckRunProfilerWithSessionOptions) { InferenceSession session_object(so, GetEnvironment()); #ifdef USE_CUDA + if (DefaultCudaExecutionProvider() == nullptr) { + return; + } ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(DefaultCudaExecutionProvider())); #endif #ifdef USE_ROCM @@ -689,6 +692,9 @@ TEST(InferenceSessionTests, CheckRunProfilerWithSessionOptions2) { InferenceSession session_object(so, GetEnvironment()); #ifdef USE_CUDA + if (DefaultCudaExecutionProvider() == nullptr) { + return; + } ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(DefaultCudaExecutionProvider())); #endif #ifdef USE_ROCM @@ -1594,6 +1600,9 @@ TEST(InferenceSessionTests, Test3LayerNestedSubgraph) { #if USE_TENSORRT ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(DefaultTensorrtExecutionProvider())); #elif USE_CUDA + if (DefaultCudaExecutionProvider() == nullptr) { + return; + } ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(DefaultCudaExecutionProvider())); #elif USE_ROCM ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(DefaultRocmExecutionProvider())); @@ -1746,6 +1755,9 @@ TEST(InferenceSessionTests, Test2LayerNestedSubgraph) { #if USE_TENSORRT ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(DefaultTensorrtExecutionProvider())); #elif USE_CUDA + if (DefaultCudaExecutionProvider() == nullptr) { + return; + } ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(DefaultCudaExecutionProvider())); #elif USE_ROCM ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(DefaultRocmExecutionProvider())); @@ -2108,6 +2120,9 @@ TEST(InferenceSessionTests, DISABLED_TestParallelExecutionWithCudaProvider) { so.session_logid = "InferenceSessionTests.TestParallelExecutionWithCudaProvider"; InferenceSession session_object{so, GetEnvironment()}; + if (DefaultCudaExecutionProvider() == nullptr) { + return; + } ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(DefaultCudaExecutionProvider())); ASSERT_STATUS_OK(session_object.Load(model_uri)); diff --git a/onnxruntime/test/framework/sparse_kernels_test.cc b/onnxruntime/test/framework/sparse_kernels_test.cc index 7bd6b47f52b7d..db9592c293fd0 100644 --- a/onnxruntime/test/framework/sparse_kernels_test.cc +++ b/onnxruntime/test/framework/sparse_kernels_test.cc @@ -1457,6 +1457,9 @@ TEST(SparseTensorConversionTests, CsrConversion) { #ifdef USE_CUDA auto cuda_provider = DefaultCudaExecutionProvider(); + if (cuda_provider == nullptr) { + return; + } auto cuda_allocator = cuda_provider->CreatePreferredAllocators()[0]; { auto cuda_transfer = cuda_provider->GetDataTransfer(); @@ -1684,6 +1687,9 @@ TEST(SparseTensorConversionTests, CooConversion) { #ifdef USE_CUDA auto cuda_provider = DefaultCudaExecutionProvider(); + if (cuda_provider == nullptr) { + return; + } auto cuda_allocator = cuda_provider->CreatePreferredAllocators()[0]; { auto cuda_transfer = cuda_provider->GetDataTransfer(); diff --git a/onnxruntime/test/providers/base_tester.cc b/onnxruntime/test/providers/base_tester.cc index ba6546b791baf..9d83c789c5124 100644 --- a/onnxruntime/test/providers/base_tester.cc +++ b/onnxruntime/test/providers/base_tester.cc @@ -532,8 +532,12 @@ void BaseTester::Run(ExpectResult expect_result, const std::string& expected_fai // remove nullptr in execution_providers. // it's a little ugly but we need to do this because DefaultXXXExecutionProvider() can return nullptr in Runtime. // And there're many places adding DefaultXXXExecutionProvider() to execution_providers directly. - if (execution_providers != nullptr || execution_providers->empty()) { + if (execution_providers != nullptr) { execution_providers->erase(std::remove(execution_providers->begin(), execution_providers->end(), nullptr), execution_providers->end()); + if (execution_providers->size() == 0) { + // In fact, no ep is needed to run + return; + } } Run(so, expect_result, expected_failure_string, excluded_provider_types, run_options, execution_providers, options); diff --git a/onnxruntime/test/providers/cpu/tensor/grid_sample_test.cc b/onnxruntime/test/providers/cpu/tensor/grid_sample_test.cc index 05cfb5c13d689..7e1a2384d7fc6 100644 --- a/onnxruntime/test/providers/cpu/tensor/grid_sample_test.cc +++ b/onnxruntime/test/providers/cpu/tensor/grid_sample_test.cc @@ -15,11 +15,13 @@ std::vector> GetExecutionProviders(int opset execution_providers.emplace_back(DefaultCpuExecutionProvider()); #ifdef USE_CUDA - if (opset_version < 20) { - execution_providers.emplace_back(DefaultCudaExecutionProvider()); + if (DefaultCudaExecutionProvider() != nullptr) { + if (opset_version < 20) { + execution_providers.emplace_back(DefaultCudaExecutionProvider()); #ifdef ENABLE_CUDA_NHWC_OPS - execution_providers.push_back(DefaultCudaNHWCExecutionProvider()); + execution_providers.push_back(DefaultCudaNHWCExecutionProvider()); #endif + } } #endif From 839dcbf4332546aae37d997800842a99c33d3c86 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Tue, 29 Oct 2024 00:06:03 +0800 Subject: [PATCH 41/65] check cudaep 2 --- onnxruntime/test/contrib_ops/bitmask_dropout_op_test.cc | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/onnxruntime/test/contrib_ops/bitmask_dropout_op_test.cc b/onnxruntime/test/contrib_ops/bitmask_dropout_op_test.cc index ce474cc75431b..4f5967c23cb37 100644 --- a/onnxruntime/test/contrib_ops/bitmask_dropout_op_test.cc +++ b/onnxruntime/test/contrib_ops/bitmask_dropout_op_test.cc @@ -61,10 +61,9 @@ void RunTestForInference(const std::vector& input_dims, bool has_ratio std::vector> test_eps; #ifdef USE_CUDA - if (DefaultCudaExecutionProvider() == nullptr) { - return; + if (DefaultCudaExecutionProvider() != nullptr) { + test_eps.emplace_back(DefaultCudaExecutionProvider()); } - test_eps.emplace_back(DefaultCudaExecutionProvider()); #elif USE_ROCM test_eps.emplace_back(DefaultRocmExecutionProvider()); #endif @@ -125,7 +124,9 @@ void RunTestForTraining(const std::vector& input_dims) { std::vector> dropout_eps; #ifdef USE_CUDA - dropout_eps.emplace_back(DefaultCudaExecutionProvider()); + if (DefaultCudaExecutionProvider() != nullptr) { + dropout_eps.emplace_back(DefaultCudaExecutionProvider()); + } #elif USE_ROCM dropout_eps.emplace_back(DefaultRocmExecutionProvider()); #endif From 08064f2e280c4d5bb797af77ff4900dc4882e578 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Tue, 29 Oct 2024 09:45:52 +0800 Subject: [PATCH 42/65] update tests --- .../contrib_ops/bitmask_dropout_op_test.cc | 5 ++- .../test/contrib_ops/tensor_op_test.cc | 20 +++++++++- .../test/framework/memcpy_transformer_test.cc | 37 +++++++++++++++++++ .../providers/compare_provider_test_utils.cc | 5 +++ .../providers/cpu/tensor/gather_op_test.cc | 13 ++++--- 5 files changed, 72 insertions(+), 8 deletions(-) diff --git a/onnxruntime/test/contrib_ops/bitmask_dropout_op_test.cc b/onnxruntime/test/contrib_ops/bitmask_dropout_op_test.cc index 4f5967c23cb37..26b0e3a4dd7a9 100644 --- a/onnxruntime/test/contrib_ops/bitmask_dropout_op_test.cc +++ b/onnxruntime/test/contrib_ops/bitmask_dropout_op_test.cc @@ -124,9 +124,10 @@ void RunTestForTraining(const std::vector& input_dims) { std::vector> dropout_eps; #ifdef USE_CUDA - if (DefaultCudaExecutionProvider() != nullptr) { - dropout_eps.emplace_back(DefaultCudaExecutionProvider()); + if (DefaultCudaExecutionProvider() == nullptr) { + return; } + dropout_eps.emplace_back(DefaultCudaExecutionProvider()); #elif USE_ROCM dropout_eps.emplace_back(DefaultRocmExecutionProvider()); #endif diff --git a/onnxruntime/test/contrib_ops/tensor_op_test.cc b/onnxruntime/test/contrib_ops/tensor_op_test.cc index bc2ff5f4f724d..a86860ceffa41 100644 --- a/onnxruntime/test/contrib_ops/tensor_op_test.cc +++ b/onnxruntime/test/contrib_ops/tensor_op_test.cc @@ -121,7 +121,15 @@ void MeanVarianceNormalizationAcrossChannels(bool across_channels, bool normaliz test.AddAttribute("normalize_variance", normalize_variance ? one : zero); test.AddInput("input", {N, C, H, W}, X); test.AddOutput("output", {N, C, H, W}, result); +#if defined(USE_CUDA) && defined(USE_DML) + if (DefaultCudaExecutionProvider() == nullptr) { + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider, kCudaExecutionProvider, kTensorrtExecutionProvider}); + } else if (DefaultDmlExecutionProvider() == nullptr) { + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider, kDmlExecutionProvider, kTensorrtExecutionProvider}); + } +#else test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider, kTensorrtExecutionProvider}); // OpenVINO doesn't support MVN operator below opset 9. TensorRT doesn't support opset 8 of MVN operator. +#endif } void MeanVarianceNormalizationPerChannel(bool across_channels, bool normalize_variance) { @@ -188,7 +196,15 @@ void MeanVarianceNormalizationPerChannel(bool across_channels, bool normalize_va test.AddAttribute("normalize_variance", normalize_variance ? one : zero); test.AddInput("input", {N, C, H, W}, X); test.AddOutput("output", {N, C, H, W}, result); +#if defined(USE_CUDA) && defined(USE_DML) + if (DefaultCudaExecutionProvider() == nullptr) { + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider, kCudaExecutionProvider, kTensorrtExecutionProvider}); + } else if (DefaultDmlExecutionProvider() == nullptr) { + test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider, kDmlExecutionProvider, kTensorrtExecutionProvider}); + } +#else test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider, kTensorrtExecutionProvider}); // OpenVINO doesn't support MVN operator below opset 9. TensorRT doesn't support opset 8 of MVN operator. +#endif } TEST(MVNContribOpTest, MeanVarianceNormalizationCPUTest_Version1_TO_8) { @@ -230,7 +246,9 @@ TEST(UnfoldTensorOpTest, LastDim) { std::vector> execution_providers; #ifdef USE_CUDA - execution_providers.push_back(DefaultCudaExecutionProvider()); + if (DefaultCudaExecutionProvider() != nullptr) { + execution_providers.push_back(DefaultCudaExecutionProvider()); + } #endif execution_providers.push_back(DefaultCpuExecutionProvider()); tester.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); diff --git a/onnxruntime/test/framework/memcpy_transformer_test.cc b/onnxruntime/test/framework/memcpy_transformer_test.cc index 6e86e5b58aead..3bb6bb2ffd097 100644 --- a/onnxruntime/test/framework/memcpy_transformer_test.cc +++ b/onnxruntime/test/framework/memcpy_transformer_test.cc @@ -106,12 +106,24 @@ TEST(TransformerTest, MemcpyTransformerTest) { KernelRegistryManager kernel_registry_manager; ExecutionProviders execution_providers; +#if defined(USE_CUDA) && defined(USE_DML) + if (DefaultCudaExecutionProvider() != nullptr) { + ASSERT_STATUS_OK(execution_providers.Add(onnxruntime::kCudaExecutionProvider, DefaultCudaExecutionProvider())); + } +#else ASSERT_STATUS_OK(execution_providers.Add(onnxruntime::kCudaExecutionProvider, DefaultCudaExecutionProvider())); +#endif ASSERT_STATUS_OK(execution_providers.Add(onnxruntime::kCpuExecutionProvider, std::make_unique(CPUExecutionProviderInfo()))); KernelRegistryManager test_registry_manager; ASSERT_STATUS_OK(test_registry_manager.RegisterKernels(execution_providers)); +#if defined(USE_CUDA) && defined(USE_DML) + if (DefaultCudaExecutionProvider() == nullptr) { + return; + } +#endif + MemcpyTransformer transformer({onnxruntime::kCudaExecutionProvider}, test_registry_manager); bool modified = false; @@ -161,7 +173,14 @@ TEST(TransformerTest, MemcpyTransformerTestCudaFirst) { KernelRegistryManager kernel_registry_manager; ExecutionProviders execution_providers; +#if defined(USE_CUDA) && defined(USE_DML) + if (DefaultCudaExecutionProvider() == nullptr) { + return; + } +#endif + ASSERT_STATUS_OK(execution_providers.Add(onnxruntime::kCudaExecutionProvider, DefaultCudaExecutionProvider())); + ASSERT_STATUS_OK(execution_providers.Add(onnxruntime::kCpuExecutionProvider, std::make_unique(CPUExecutionProviderInfo()))); KernelRegistryManager test_registry_manager; @@ -281,7 +300,13 @@ TEST(TransformerTest, TestInitializerDuplicationInSubgraph) { KernelRegistryManager kernel_registry_manager; ExecutionProviders execution_providers; +#if defined(USE_CUDA) && defined(USE_DML) + if (DefaultCudaExecutionProvider() == nullptr) { + return; + } +#endif ASSERT_STATUS_OK(execution_providers.Add(onnxruntime::kCudaExecutionProvider, DefaultCudaExecutionProvider())); + ASSERT_STATUS_OK(execution_providers.Add(onnxruntime::kCpuExecutionProvider, std::make_unique(CPUExecutionProviderInfo()))); KernelRegistryManager test_registry_manager; @@ -323,7 +348,13 @@ TEST(TransformerTest, MemcpyTransformerTestGraphInputConsumedOnMultipleDevices) KernelRegistryManager kernel_registry_manager; ExecutionProviders execution_providers; +#if defined(USE_CUDA) && defined(USE_DML) + if (DefaultCudaExecutionProvider() == nullptr) { + return; + } +#endif ASSERT_STATUS_OK(execution_providers.Add(onnxruntime::kCudaExecutionProvider, DefaultCudaExecutionProvider())); + ASSERT_STATUS_OK(execution_providers.Add(onnxruntime::kCpuExecutionProvider, std::make_unique(CPUExecutionProviderInfo()))); KernelRegistryManager test_registry_manager; @@ -425,7 +456,13 @@ TEST(TransformerTest, MemcpyTransformerTestImplicitInputConsumedOnMultipleDevice KernelRegistryManager kernel_registry_manager; ExecutionProviders execution_providers; +#if defined(USE_CUDA) && defined(USE_DML) + if (DefaultCudaExecutionProvider() == nullptr) { + return; + } +#endif ASSERT_STATUS_OK(execution_providers.Add(onnxruntime::kCudaExecutionProvider, DefaultCudaExecutionProvider())); + ASSERT_STATUS_OK(execution_providers.Add(onnxruntime::kCpuExecutionProvider, std::make_unique(CPUExecutionProviderInfo()))); KernelRegistryManager test_registry_manager; diff --git a/onnxruntime/test/providers/compare_provider_test_utils.cc b/onnxruntime/test/providers/compare_provider_test_utils.cc index 386a5656d8a01..9acb37c24ddd0 100644 --- a/onnxruntime/test/providers/compare_provider_test_utils.cc +++ b/onnxruntime/test/providers/compare_provider_test_utils.cc @@ -53,6 +53,11 @@ void CompareOpTester::CompareWithCPU(const std::string& target_provider_type, SetTestFunctionCalled(); std::unique_ptr target_execution_provider = GetExecutionProvider(target_provider_type); +#if defined(USE_CUDA) && defined(USE_DML) + if (target_execution_provider == nullptr) { + return; + } +#endif ASSERT_TRUE(target_execution_provider != nullptr) << "provider_type " << target_provider_type << " is not supported."; diff --git a/onnxruntime/test/providers/cpu/tensor/gather_op_test.cc b/onnxruntime/test/providers/cpu/tensor/gather_op_test.cc index be79a6d29d539..ef5ab61e2eb01 100644 --- a/onnxruntime/test/providers/cpu/tensor/gather_op_test.cc +++ b/onnxruntime/test/providers/cpu/tensor/gather_op_test.cc @@ -123,13 +123,16 @@ TEST(GatherOpTest, Gather_invalid_index_gpu) { 0.0f, 0.0f, 0.0f, 0.0f}); // On GPU, just set the value to 0 instead of report error. exclude all other providers - test #if defined(USE_CUDA) - .ConfigEp(DefaultCudaExecutionProvider()) -#else - .ConfigEp(DefaultRocmExecutionProvider()) + if (DefaultCudaExecutionProvider() != nullptr) { + test.ConfigEp(DefaultCudaExecutionProvider()) + .ConfigEp(DefaultRocmExecutionProvider()) + .RunWithConfig(); + } else { + test.ConfigEp(DefaultRocmExecutionProvider()) + .RunWithConfig(); + } #endif - .RunWithConfig(); } #endif From be93bd966a594bf713019c2d31e9c115e21071b2 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Tue, 29 Oct 2024 09:46:51 +0800 Subject: [PATCH 43/65] lint --- onnxruntime/test/contrib_ops/tensor_op_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/test/contrib_ops/tensor_op_test.cc b/onnxruntime/test/contrib_ops/tensor_op_test.cc index a86860ceffa41..d5e2ddebfe67f 100644 --- a/onnxruntime/test/contrib_ops/tensor_op_test.cc +++ b/onnxruntime/test/contrib_ops/tensor_op_test.cc @@ -248,7 +248,7 @@ TEST(UnfoldTensorOpTest, LastDim) { #ifdef USE_CUDA if (DefaultCudaExecutionProvider() != nullptr) { execution_providers.push_back(DefaultCudaExecutionProvider()); - } + } #endif execution_providers.push_back(DefaultCpuExecutionProvider()); tester.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers); From 8e77a6c051cadb7f98395e305e0551d6e81b8e76 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Tue, 29 Oct 2024 10:51:24 +0800 Subject: [PATCH 44/65] update tests --- .../test/contrib_ops/beam_search_test.cc | 10 ++++ .../test/framework/allocation_planner_test.cc | 55 +++++++++++++++++++ .../test/framework/inference_session_test.cc | 14 ++++- 3 files changed, 76 insertions(+), 3 deletions(-) diff --git a/onnxruntime/test/contrib_ops/beam_search_test.cc b/onnxruntime/test/contrib_ops/beam_search_test.cc index 5f94d30112f0e..ee02c5f1078fe 100644 --- a/onnxruntime/test/contrib_ops/beam_search_test.cc +++ b/onnxruntime/test/contrib_ops/beam_search_test.cc @@ -73,6 +73,11 @@ TEST(BeamSearchTest, GptBeamSearchFp32) { const char* const output_names[] = {"sequences"}; Ort::SessionOptions session_options; +#if defined(USE_CUDA) && defined(USE_DML) + if (DefaultCudaExecutionProvider() != nullptr) { + return; + } +#endif #ifdef USE_CUDA OrtCUDAProviderOptionsV2 cuda_options; cuda_options.use_tf32 = false; @@ -166,6 +171,11 @@ TEST(BeamSearchTest, GptBeamSearchFp16) { bool enable_rocm = (nullptr != DefaultRocmExecutionProvider().get()); if (enable_cuda || enable_rocm) { Ort::SessionOptions session_options; +#if defined(USE_CUDA) && defined(USE_DML) + if (DefaultCudaExecutionProvider() != nullptr) { + return; + } +#endif #ifdef USE_CUDA OrtCUDAProviderOptionsV2 cuda_options; cuda_options.use_tf32 = false; diff --git a/onnxruntime/test/framework/allocation_planner_test.cc b/onnxruntime/test/framework/allocation_planner_test.cc index 407f61b8eb1ef..9d9e3c825b05f 100644 --- a/onnxruntime/test/framework/allocation_planner_test.cc +++ b/onnxruntime/test/framework/allocation_planner_test.cc @@ -1279,6 +1279,12 @@ TEST_F(PlannerTest, LocationPlanningForImplicitInputsWithoutExplicitConsumersInM // Test MultiStream scenario for the graph: // node1(CPU ep)->node2(CPU ep)->node3(CUDA ep)->node4(CPU ep) TEST_F(PlannerTest, MultiStream) { +#if defined(USE_CUDA) && defined(USE_DML) + if (DefaultCudaExecutionProvider() == nullptr) { + return; + } +#endif + ONNX_NAMESPACE::TensorProto tensor; tensor.add_dims(1); tensor.add_float_data(1.0f); @@ -1297,6 +1303,7 @@ TEST_F(PlannerTest, MultiStream) { onnxruntime::ProviderInfo_CUDA& ep = onnxruntime::GetProviderInfo_CUDA(); auto epFactory = ep.CreateExecutionProviderFactory(epi); std::unique_ptr execution_provider = epFactory->CreateProvider(); + ORT_THROW_IF_ERROR(GetExecutionProviders().Add("CUDAExecutionProvider", std::move(execution_provider))); CreatePlan({}, false); @@ -1324,6 +1331,11 @@ TEST_F(PlannerTest, MultiStream) { // node3 // All 3 nodes are CUDA EP, node1 is in stream0, node2 is in stream1, node3 is in stream2 TEST_F(PlannerTest, MultiStream1StreamWaitFor2Streams) { +#if defined(USE_CUDA) && defined(USE_DML) + if (DefaultCudaExecutionProvider() == nullptr) { + return; + } +#endif std::unique_ptr<::onnxruntime::KernelDef> cudaKernel = KernelDefBuilder().SetName("Transpose").Provider(kCudaExecutionProvider).SinceVersion(1, 10).Build(); std::unique_ptr<::onnxruntime::KernelDef> cudaKernelAdd = KernelDefBuilder().SetName("Add").Provider(kCudaExecutionProvider).SinceVersion(1, 10).Build(); std::string Graph_input("Graph_input"), Arg1("Arg1"), Arg2("Arg2"), Arg3("Arg3"), node1("node1"), node2("node2"), node3("node3"); @@ -1365,6 +1377,11 @@ TEST_F(PlannerTest, MultiStream1StreamWaitFor2Streams) { // stream 1: node2 (CPU EP) // node1's output, which is consumed by both node2 and node3, is in CPU. TEST_F(PlannerTest, MultiStreamCudaEPNodeCPUOutput) { +#if defined(USE_CUDA) && defined(USE_DML) + if (DefaultCudaExecutionProvider() == nullptr) { + return; + } +#endif MemcpyToHostInCuda_TransposeInCudaAndCpu("./testdata/multi_stream_models/memcpyToHost_same_stream_with_transpose.json"); EXPECT_EQ(GetState().GetExecutionPlan()->execution_plan.size(), 2) << "2 logic streams"; EXPECT_EQ(GetState().GetExecutionPlan()->execution_plan[0]->steps_.size(), 5) << "stream 0 has 5 steps"; @@ -1386,6 +1403,11 @@ TEST_F(PlannerTest, MultiStreamCudaEPNodeCPUOutput) { // TODO(leca): there is a bug in the corresponding graph that node2 will be visited twice when traversing node1's output nodes // (see: for (auto it = node->OutputNodesBegin(); it != node->OutputNodesEnd(); ++it) in BuildExecutionPlan()). We can just break the loop and don't need the extra variables once it is fixed TEST_F(PlannerTest, MultiStreamMultiOutput) { +#if defined(USE_CUDA) && defined(USE_DML) + if (DefaultCudaExecutionProvider() == nullptr) { + return; + } +#endif std::unique_ptr<::onnxruntime::KernelDef> cudaKernel = KernelDefBuilder().SetName("RNN").Provider(kCudaExecutionProvider).SinceVersion(7).Build(); std::string Graph_input1("Graph_input1"), Graph_input2("Graph_input2"), Graph_input3("Graph_input3"), Arg1("Arg1"), Arg2("Arg2"), Arg3("Arg3"), node1("node1"), node2("node2"); std::vector input1{Arg(Graph_input1), Arg(Graph_input2), Arg(Graph_input3)}, output1{Arg(Arg1), Arg(Arg2)}, input2{Arg(Arg1), Arg(Arg2)}, output2{Arg(Arg3)}; @@ -1423,6 +1445,11 @@ TEST_F(PlannerTest, MultiStreamMultiOutput) { // TODO(leca): the ideal case is there is only 1 wait step before launching node3, // as there is a specific order between node1 and node2 if they are in the same stream, thus node3 will only need to wait the latter one TEST_F(PlannerTest, MultiStream2NodesSameStreamConsumedBy1NodeInDifferentStream) { +#if defined(USE_CUDA) && defined(USE_DML) + if (DefaultCudaExecutionProvider() == nullptr) { + return; + } +#endif std::unique_ptr<::onnxruntime::KernelDef> cudaKernel = KernelDefBuilder().SetName("Transpose").Provider(kCudaExecutionProvider).SinceVersion(1, 10).Build(); std::string Graph_input1("Graph_input1"), Graph_input2("Graph_input2"), Graph_input3("Graph_input3"), Arg1("Arg1"), Arg2("Arg2"), Arg3("Arg3"), node1("node1"), node2("node2"), node3("node3"); std::vector input1{Arg(Graph_input1)}, input2{Arg(Graph_input2)}, output1{Arg(Arg1)}, output2{Arg(Arg2)}, input3{Arg(Arg1), Arg(Arg2)}, output3{Arg(Arg3)}; @@ -1460,6 +1487,11 @@ TEST_F(PlannerTest, MultiStream2NodesSameStreamConsumedBy1NodeInDifferentStream) #if !defined(__wasm__) && defined(ORT_ENABLE_STREAM) TEST_F(PlannerTest, ParaPlanCreation) { +#if defined(USE_CUDA) && defined(USE_DML) + if (DefaultCudaExecutionProvider() == nullptr) { + return; + } +#endif TypeProto graph_in_type; graph_in_type.mutable_tensor_type()->set_elem_type(TensorProto_DataType_FLOAT); auto* graph_in_shape = graph_in_type.mutable_tensor_type()->mutable_shape(); @@ -1901,6 +1933,12 @@ TEST_F(PlannerTest, ParaPlanCreation) { } TEST_F(PlannerTest, TestMultiStreamConfig) { +#if defined(USE_CUDA) && defined(USE_DML) + if (DefaultCudaExecutionProvider() == nullptr) { + return; + } +#endif + const char* type = "DeviceBasedPartitioner"; constexpr size_t type_len = 22; @@ -1974,6 +2012,12 @@ TEST_F(PlannerTest, TestMultiStreamSaveConfig) { // Load with partition config where a node is missing, session load expected to fail. TEST_F(PlannerTest, TestMultiStreamMissingNodeConfig) { +#if defined(USE_CUDA) && defined(USE_DML) + if (DefaultCudaExecutionProvider() == nullptr) { + return; + } +#endif + const char* config_file_path = "./testdata/multi_stream_models/conv_add_relu_single_stream_missing_node.json"; SessionOptions sess_opt; sess_opt.graph_optimization_level = TransformerLevel::Default; @@ -1994,6 +2038,11 @@ TEST_F(PlannerTest, TestMultiStreamMissingNodeConfig) { // Load with partition config where streams and devices has mismatch TEST_F(PlannerTest, TestMultiStreamMismatchDevice) { +#if defined(USE_CUDA) && defined(USE_DML) + if (DefaultCudaExecutionProvider() == nullptr) { + return; + } +#endif const char* config_file_path = "./testdata/multi_stream_models/conv_add_relu_single_stream_mismatch_device.json"; SessionOptions sess_opt; sess_opt.graph_optimization_level = TransformerLevel::Default; @@ -2082,6 +2131,12 @@ TEST_F(PlannerTest, TestCpuIf) { // onnx.save(model, 'issue_19480.onnx') // TEST(AllocationPlannerTest, ReusedInputCrossDifferentStreams) { +#if defined(USE_CUDA) && defined(USE_DML) + if (DefaultCudaExecutionProvider() == nullptr) { + return; + } +#endif + SessionOptions sess_opt; sess_opt.graph_optimization_level = TransformerLevel::Default; diff --git a/onnxruntime/test/framework/inference_session_test.cc b/onnxruntime/test/framework/inference_session_test.cc index ac5a5933cc107..06ab4e3ece099 100644 --- a/onnxruntime/test/framework/inference_session_test.cc +++ b/onnxruntime/test/framework/inference_session_test.cc @@ -2113,6 +2113,11 @@ TEST(InferenceSessionTests, TestStrictShapeInference) { #ifdef USE_CUDA // disable it, since we are going to enable parallel execution with cuda ep TEST(InferenceSessionTests, DISABLED_TestParallelExecutionWithCudaProvider) { +#if defined(USE_CUDA) && defined(USE_DML) + if (DefaultCudaExecutionProvider() == nullptr) { + return; + } +#endif string model_uri = "testdata/transform/fusion/fuse-conv-bn-mul-add-unsqueeze.onnx"; SessionOptions so; @@ -2120,9 +2125,6 @@ TEST(InferenceSessionTests, DISABLED_TestParallelExecutionWithCudaProvider) { so.session_logid = "InferenceSessionTests.TestParallelExecutionWithCudaProvider"; InferenceSession session_object{so, GetEnvironment()}; - if (DefaultCudaExecutionProvider() == nullptr) { - return; - } ASSERT_STATUS_OK(session_object.RegisterExecutionProvider(DefaultCudaExecutionProvider())); ASSERT_STATUS_OK(session_object.Load(model_uri)); @@ -2139,6 +2141,12 @@ TEST(InferenceSessionTests, DISABLED_TestParallelExecutionWithCudaProvider) { } TEST(InferenceSessionTests, TestArenaShrinkageAfterRun) { +#if defined(USE_CUDA) && defined(USE_DML) + if (DefaultCudaExecutionProvider() == nullptr) { + return; + } +#endif + OrtArenaCfg arena_cfg; arena_cfg.arena_extend_strategy = 1; // kSameAsRequested From ff784463a6d4aa11d0a2cf461c55f8c0975b29ad Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Tue, 29 Oct 2024 11:05:21 +0800 Subject: [PATCH 45/65] typo --- onnxruntime/test/contrib_ops/beam_search_test.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/onnxruntime/test/contrib_ops/beam_search_test.cc b/onnxruntime/test/contrib_ops/beam_search_test.cc index ee02c5f1078fe..09d4fd470affd 100644 --- a/onnxruntime/test/contrib_ops/beam_search_test.cc +++ b/onnxruntime/test/contrib_ops/beam_search_test.cc @@ -74,7 +74,7 @@ TEST(BeamSearchTest, GptBeamSearchFp32) { Ort::SessionOptions session_options; #if defined(USE_CUDA) && defined(USE_DML) - if (DefaultCudaExecutionProvider() != nullptr) { + if (DefaultCudaExecutionProvider() == nullptr) { return; } #endif @@ -172,7 +172,7 @@ TEST(BeamSearchTest, GptBeamSearchFp16) { if (enable_cuda || enable_rocm) { Ort::SessionOptions session_options; #if defined(USE_CUDA) && defined(USE_DML) - if (DefaultCudaExecutionProvider() != nullptr) { + if (DefaultCudaExecutionProvider() == nullptr) { return; } #endif From 9e1bafc8ff17b031cb019ae884f316875cc2b05a Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Tue, 29 Oct 2024 15:38:13 +0800 Subject: [PATCH 46/65] cuda log --- onnxruntime/core/providers/cuda/cuda_allocator.cc | 3 +++ onnxruntime/core/providers/cuda/cuda_execution_provider.cc | 7 +++++++ onnxruntime/core/providers/cuda/cuda_provider_factory.cc | 7 ++++++- onnxruntime/core/providers/cuda/cuda_stream_handle.cc | 3 +++ 4 files changed, 19 insertions(+), 1 deletion(-) diff --git a/onnxruntime/core/providers/cuda/cuda_allocator.cc b/onnxruntime/core/providers/cuda/cuda_allocator.cc index 8c96d8f57a0ba..557f449799052 100644 --- a/onnxruntime/core/providers/cuda/cuda_allocator.cc +++ b/onnxruntime/core/providers/cuda/cuda_allocator.cc @@ -24,6 +24,9 @@ void CUDAAllocator::CheckDevice(bool throw_when_fail) const { } void CUDAAllocator::SetDevice(bool throw_when_fail) const { +#if defined(USE_CUDA) && defined(USE_DML) + LOGS_DEFAULT(WARNING) << "CUDA SetDevice is called"; +#endif int current_device; auto cuda_err = cudaGetDevice(¤t_device); if (cuda_err == cudaSuccess) { diff --git a/onnxruntime/core/providers/cuda/cuda_execution_provider.cc b/onnxruntime/core/providers/cuda/cuda_execution_provider.cc index d3f01c1f7adc1..fef5dfe93097b 100644 --- a/onnxruntime/core/providers/cuda/cuda_execution_provider.cc +++ b/onnxruntime/core/providers/cuda/cuda_execution_provider.cc @@ -173,6 +173,9 @@ AllocatorPtr CUDAExecutionProvider::CreateCudaAllocator(OrtDevice::DeviceId devi CUDAExecutionProvider::PerThreadContext::PerThreadContext(OrtDevice::DeviceId device_id, cudaStream_t stream, size_t /*gpu_mem_limit*/, ArenaExtendStrategy /*arena_extend_strategy*/, CUDAExecutionProviderExternalAllocatorInfo /*external_allocator_info*/, OrtArenaCfg* /*default_memory_arena_cfg*/) { +#if defined(USE_CUDA) && defined(USE_DML) + LOGS_DEFAULT(WARNING) << "CUDA PerThreadContext is called"; +#endif CUDA_CALL_THROW(cudaSetDevice(device_id)); #ifndef USE_CUDA_MINIMAL CUBLAS_CALL_THROW(cublasCreate(&cublas_handle_)); @@ -279,6 +282,10 @@ CUDAExecutionProvider::CUDAExecutionProvider(const CUDAExecutionProviderInfo& in ORT_ENFORCE(info_.prefer_nhwc == 0, "This build does not support NHWC layout"); #endif +#if defined(USE_CUDA) && defined(USE_DML) + LOGS_DEFAULT(WARNING) << "CUDA ctor is called"; +#endif + CUDA_CALL_THROW(cudaSetDevice(info_.device_id)); // must wait GPU idle, otherwise cudaGetDeviceProperties might fail diff --git a/onnxruntime/core/providers/cuda/cuda_provider_factory.cc b/onnxruntime/core/providers/cuda/cuda_provider_factory.cc index 83a5d02d16c6c..24d4daa336817 100644 --- a/onnxruntime/core/providers/cuda/cuda_provider_factory.cc +++ b/onnxruntime/core/providers/cuda/cuda_provider_factory.cc @@ -52,6 +52,9 @@ std::unique_ptr CUDAProviderFactory::CreateProvider() { struct ProviderInfo_CUDA_Impl final : ProviderInfo_CUDA { OrtStatus* SetCurrentGpuDeviceId(_In_ int device_id) override { +#if defined(USE_CUDA) && defined(USE_DML) + LOGS_DEFAULT(WARNING) << "CUDA SetCurrentGpuDeviceId is called"; +#endif int num_devices; auto cuda_err = ::cudaGetDeviceCount(&num_devices); if (cuda_err != cudaSuccess) { @@ -112,7 +115,9 @@ struct ProviderInfo_CUDA_Impl final : ProviderInfo_CUDA { void CopyGpuToCpu(void* dst_ptr, const void* src_ptr, const size_t size, const OrtMemoryInfo& dst_location, const OrtMemoryInfo& src_location) override { ORT_ENFORCE(dst_location.device.Type() == OrtDevice::CPU); - +#if defined(USE_CUDA) && defined(USE_DML) + LOGS_DEFAULT(WARNING) << "CUDA CopyGpuToCpu is called"; +#endif // Current CUDA device. int device; CUDA_CALL_THROW(cudaGetDevice(&device)); diff --git a/onnxruntime/core/providers/cuda/cuda_stream_handle.cc b/onnxruntime/core/providers/cuda/cuda_stream_handle.cc index e9b159516dad9..9be2f9ab3199c 100644 --- a/onnxruntime/core/providers/cuda/cuda_stream_handle.cc +++ b/onnxruntime/core/providers/cuda/cuda_stream_handle.cc @@ -245,6 +245,9 @@ void RegisterCudaStreamHandles(IStreamCommandHandleRegistry& stream_handle_regis cudnnHandle_t external_cudnn_handle, cublasHandle_t external_cublas_handle, const CUDAExecutionProviderInfo& ep_info) { +#if defined(USE_CUDA) && defined(USE_DML) + LOGS_DEFAULT(WARNING) << "CUDA RegisterCudaStreamHandles is called"; +#endif // wait cuda notification on cuda ep stream_handle_registry.RegisterWaitFn(device_type, device_type, WaitCudaNotificationOnDevice); // wait cuda notification on cpu ep From 64ade9ba0efd862fc9a87f8ca1229ec7c2b26e05 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Tue, 29 Oct 2024 17:35:45 +0800 Subject: [PATCH 47/65] update 1 test case --- .../providers/cpu/tensor/gather_op_test.cc | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/onnxruntime/test/providers/cpu/tensor/gather_op_test.cc b/onnxruntime/test/providers/cpu/tensor/gather_op_test.cc index ef5ab61e2eb01..ae838f10b4153 100644 --- a/onnxruntime/test/providers/cpu/tensor/gather_op_test.cc +++ b/onnxruntime/test/providers/cpu/tensor/gather_op_test.cc @@ -122,17 +122,19 @@ TEST(GatherOpTest, Gather_invalid_index_gpu) { 4.0f, 5.0f, 6.0f, 7.0f, 0.0f, 0.0f, 0.0f, 0.0f}); +#if defined(USE_CUDA) && defined(USE_DML) + if (DefaultCudaExecutionProvider() == nullptr) { + return; + } +#endif // On GPU, just set the value to 0 instead of report error. exclude all other providers + test #if defined(USE_CUDA) - if (DefaultCudaExecutionProvider() != nullptr) { - test.ConfigEp(DefaultCudaExecutionProvider()) - .ConfigEp(DefaultRocmExecutionProvider()) - .RunWithConfig(); - } else { - test.ConfigEp(DefaultRocmExecutionProvider()) - .RunWithConfig(); - } + .ConfigEp(DefaultCudaExecutionProvider()) +#else + .ConfigEp(DefaultRocmExecutionProvider()) #endif + .RunWithConfig(); } #endif From aee739253012c152afffce5d2af2e468c74e4624 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Tue, 29 Oct 2024 17:39:10 +0800 Subject: [PATCH 48/65] update --- tools/ci_build/github/azure-pipelines/templates/win-ci.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/ci_build/github/azure-pipelines/templates/win-ci.yml b/tools/ci_build/github/azure-pipelines/templates/win-ci.yml index 8e38604f90fce..88bcdf3927248 100644 --- a/tools/ci_build/github/azure-pipelines/templates/win-ci.yml +++ b/tools/ci_build/github/azure-pipelines/templates/win-ci.yml @@ -401,7 +401,7 @@ stages: arguments: '--config RelWithDebInfo --use_binskim_compliant_compile_flags --enable_lto --disable_rtti --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --test --enable_onnx_tests $(TelemetryOption) ' workingDirectory: '$(Build.BinariesDirectory)' env: - NO_CUDA_TEST: 1 + NO_CUDA_TEST: '1' - task: PythonScript@0 displayName: 'test excludes DML' condition: and(succeeded(), eq('${{ parameters.runTests}}', true)) @@ -410,7 +410,7 @@ stages: arguments: '--config RelWithDebInfo --use_binskim_compliant_compile_flags --enable_lto --disable_rtti --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --test --enable_onnx_tests $(TelemetryOption) ' workingDirectory: '$(Build.BinariesDirectory)' env: - NO_DML_TEST: 1 + NO_DML_TEST: '1' - ${{ else }}: - task: PythonScript@0 displayName: 'test' From a8c6e929cb9da44fa78e6196344f739fa8041fb7 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Tue, 29 Oct 2024 20:17:50 +0800 Subject: [PATCH 49/65] Revert "cuda log" This reverts commit 9e1bafc8ff17b031cb019ae884f316875cc2b05a. --- onnxruntime/core/providers/cuda/cuda_allocator.cc | 3 --- onnxruntime/core/providers/cuda/cuda_execution_provider.cc | 7 ------- onnxruntime/core/providers/cuda/cuda_provider_factory.cc | 7 +------ onnxruntime/core/providers/cuda/cuda_stream_handle.cc | 3 --- 4 files changed, 1 insertion(+), 19 deletions(-) diff --git a/onnxruntime/core/providers/cuda/cuda_allocator.cc b/onnxruntime/core/providers/cuda/cuda_allocator.cc index 557f449799052..8c96d8f57a0ba 100644 --- a/onnxruntime/core/providers/cuda/cuda_allocator.cc +++ b/onnxruntime/core/providers/cuda/cuda_allocator.cc @@ -24,9 +24,6 @@ void CUDAAllocator::CheckDevice(bool throw_when_fail) const { } void CUDAAllocator::SetDevice(bool throw_when_fail) const { -#if defined(USE_CUDA) && defined(USE_DML) - LOGS_DEFAULT(WARNING) << "CUDA SetDevice is called"; -#endif int current_device; auto cuda_err = cudaGetDevice(¤t_device); if (cuda_err == cudaSuccess) { diff --git a/onnxruntime/core/providers/cuda/cuda_execution_provider.cc b/onnxruntime/core/providers/cuda/cuda_execution_provider.cc index fef5dfe93097b..d3f01c1f7adc1 100644 --- a/onnxruntime/core/providers/cuda/cuda_execution_provider.cc +++ b/onnxruntime/core/providers/cuda/cuda_execution_provider.cc @@ -173,9 +173,6 @@ AllocatorPtr CUDAExecutionProvider::CreateCudaAllocator(OrtDevice::DeviceId devi CUDAExecutionProvider::PerThreadContext::PerThreadContext(OrtDevice::DeviceId device_id, cudaStream_t stream, size_t /*gpu_mem_limit*/, ArenaExtendStrategy /*arena_extend_strategy*/, CUDAExecutionProviderExternalAllocatorInfo /*external_allocator_info*/, OrtArenaCfg* /*default_memory_arena_cfg*/) { -#if defined(USE_CUDA) && defined(USE_DML) - LOGS_DEFAULT(WARNING) << "CUDA PerThreadContext is called"; -#endif CUDA_CALL_THROW(cudaSetDevice(device_id)); #ifndef USE_CUDA_MINIMAL CUBLAS_CALL_THROW(cublasCreate(&cublas_handle_)); @@ -282,10 +279,6 @@ CUDAExecutionProvider::CUDAExecutionProvider(const CUDAExecutionProviderInfo& in ORT_ENFORCE(info_.prefer_nhwc == 0, "This build does not support NHWC layout"); #endif -#if defined(USE_CUDA) && defined(USE_DML) - LOGS_DEFAULT(WARNING) << "CUDA ctor is called"; -#endif - CUDA_CALL_THROW(cudaSetDevice(info_.device_id)); // must wait GPU idle, otherwise cudaGetDeviceProperties might fail diff --git a/onnxruntime/core/providers/cuda/cuda_provider_factory.cc b/onnxruntime/core/providers/cuda/cuda_provider_factory.cc index 24d4daa336817..83a5d02d16c6c 100644 --- a/onnxruntime/core/providers/cuda/cuda_provider_factory.cc +++ b/onnxruntime/core/providers/cuda/cuda_provider_factory.cc @@ -52,9 +52,6 @@ std::unique_ptr CUDAProviderFactory::CreateProvider() { struct ProviderInfo_CUDA_Impl final : ProviderInfo_CUDA { OrtStatus* SetCurrentGpuDeviceId(_In_ int device_id) override { -#if defined(USE_CUDA) && defined(USE_DML) - LOGS_DEFAULT(WARNING) << "CUDA SetCurrentGpuDeviceId is called"; -#endif int num_devices; auto cuda_err = ::cudaGetDeviceCount(&num_devices); if (cuda_err != cudaSuccess) { @@ -115,9 +112,7 @@ struct ProviderInfo_CUDA_Impl final : ProviderInfo_CUDA { void CopyGpuToCpu(void* dst_ptr, const void* src_ptr, const size_t size, const OrtMemoryInfo& dst_location, const OrtMemoryInfo& src_location) override { ORT_ENFORCE(dst_location.device.Type() == OrtDevice::CPU); -#if defined(USE_CUDA) && defined(USE_DML) - LOGS_DEFAULT(WARNING) << "CUDA CopyGpuToCpu is called"; -#endif + // Current CUDA device. int device; CUDA_CALL_THROW(cudaGetDevice(&device)); diff --git a/onnxruntime/core/providers/cuda/cuda_stream_handle.cc b/onnxruntime/core/providers/cuda/cuda_stream_handle.cc index 9be2f9ab3199c..e9b159516dad9 100644 --- a/onnxruntime/core/providers/cuda/cuda_stream_handle.cc +++ b/onnxruntime/core/providers/cuda/cuda_stream_handle.cc @@ -245,9 +245,6 @@ void RegisterCudaStreamHandles(IStreamCommandHandleRegistry& stream_handle_regis cudnnHandle_t external_cudnn_handle, cublasHandle_t external_cublas_handle, const CUDAExecutionProviderInfo& ep_info) { -#if defined(USE_CUDA) && defined(USE_DML) - LOGS_DEFAULT(WARNING) << "CUDA RegisterCudaStreamHandles is called"; -#endif // wait cuda notification on cuda ep stream_handle_registry.RegisterWaitFn(device_type, device_type, WaitCudaNotificationOnDevice); // wait cuda notification on cpu ep From 603e0c20893daf7d3e49725e563aedbddb00a27d Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Tue, 29 Oct 2024 20:35:40 +0800 Subject: [PATCH 50/65] update java test --- .../java/ai/onnxruntime/InferenceTest.java | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/java/src/test/java/ai/onnxruntime/InferenceTest.java b/java/src/test/java/ai/onnxruntime/InferenceTest.java index 4b391987cc826..81f483fdb865f 100644 --- a/java/src/test/java/ai/onnxruntime/InferenceTest.java +++ b/java/src/test/java/ai/onnxruntime/InferenceTest.java @@ -690,7 +690,12 @@ public void testSymbolicDimensionAssignment() throws OrtException { @Test @EnabledIfSystemProperty(named = "USE_CUDA", matches = "1") public void testCUDA() throws OrtException { - runProvider(OrtProvider.CUDA); + String no_cuda_test = Optional.ofNullable(System.getenv("NO_CUDA_TEST")).orElse("0"); + if (!no_cuda_test.equals("1")) { + runProvider(OrtProvider.CUDA); + } else { + System.out.println("Skipping CUDA test because NO_CUDA_TEST is set."); + } } @Test @@ -740,14 +745,11 @@ public void testCoreML() throws OrtException { @Test @EnabledIfSystemProperty(named = "USE_DML", matches = "1") public void testDirectML() throws OrtException { - String no_cuda_test = System.getenv("NO_CUDA_TEST"); - if (no_cuda_test == null || no_cuda_test.isEmpty() || !no_cuda_test.equals("1")) { - if (System.getProperty("USE_CUDA").equals("1")) { - System.out.println("Skipping DirectML test because CUDA EP test is enabled."); - return; - } + String no_dml_test = Optional.ofNullable(System.getenv("NO_DML_TEST")).orElse("0");; + if (!no_dml_test.equals("1")) { + runProvider(OrtProvider.DIRECT_ML); } else { - runProvider(OrtProvider.CORE_ML); + System.out.println("Skipping DML test because NO_DML_TEST is set."); } } From 31fb04ba5536751f774e42a681a3de415db6b206 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Tue, 29 Oct 2024 20:45:24 +0800 Subject: [PATCH 51/65] typo --- java/src/test/java/ai/onnxruntime/InferenceTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/java/src/test/java/ai/onnxruntime/InferenceTest.java b/java/src/test/java/ai/onnxruntime/InferenceTest.java index 81f483fdb865f..23b1dda1f73d6 100644 --- a/java/src/test/java/ai/onnxruntime/InferenceTest.java +++ b/java/src/test/java/ai/onnxruntime/InferenceTest.java @@ -745,7 +745,7 @@ public void testCoreML() throws OrtException { @Test @EnabledIfSystemProperty(named = "USE_DML", matches = "1") public void testDirectML() throws OrtException { - String no_dml_test = Optional.ofNullable(System.getenv("NO_DML_TEST")).orElse("0");; + String no_dml_test = Optional.ofNullable(System.getenv("NO_DML_TEST")).orElse("0"); if (!no_dml_test.equals("1")) { runProvider(OrtProvider.DIRECT_ML); } else { From 659131f57c2f5afb8bf2bc57d4f1c1b44fad1b02 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Tue, 29 Oct 2024 22:27:14 +0800 Subject: [PATCH 52/65] java test --- java/src/test/java/ai/onnxruntime/InferenceTest.java | 9 ++++++++- java/src/test/java/ai/onnxruntime/OnnxTensorTest.java | 1 + .../ai/onnxruntime/providers/ProviderOptionsTest.java | 6 ++++++ 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/java/src/test/java/ai/onnxruntime/InferenceTest.java b/java/src/test/java/ai/onnxruntime/InferenceTest.java index 23b1dda1f73d6..5fe1f69c8a0da 100644 --- a/java/src/test/java/ai/onnxruntime/InferenceTest.java +++ b/java/src/test/java/ai/onnxruntime/InferenceTest.java @@ -650,7 +650,14 @@ public void testProviders() { int providersSize = providers.size(); assertTrue(providersSize > 0); assertTrue(providers.contains(OrtProvider.CPU)); - + String no_cuda_test = Optional.ofNullable(System.getenv("NO_CUDA_TEST")).orElse("0"); + if (no_cuda_test.equals("1") && providers.contains(OrtProvider.CUDA)) { + providers.remove(OrtProvider.CUDA); + } + String no_dml_test = Optional.ofNullable(System.getenv("NO_DML_TEST")).orElse("0"); + if (no_dml_test.equals("1") && providers.contains(OrtProvider.DIRECT_ML)) { + providers.remove(OrtProvider.DIRECT_ML); + } // Check that the providers are a copy of the original, note this does not enable the DNNL // provider providers.add(OrtProvider.DNNL); diff --git a/java/src/test/java/ai/onnxruntime/OnnxTensorTest.java b/java/src/test/java/ai/onnxruntime/OnnxTensorTest.java index 11af2726bd904..a2d9532ce93b0 100644 --- a/java/src/test/java/ai/onnxruntime/OnnxTensorTest.java +++ b/java/src/test/java/ai/onnxruntime/OnnxTensorTest.java @@ -17,6 +17,7 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; +import java.util.Optional; import java.util.SplittableRandom; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; diff --git a/java/src/test/java/ai/onnxruntime/providers/ProviderOptionsTest.java b/java/src/test/java/ai/onnxruntime/providers/ProviderOptionsTest.java index 57c4eb3577fd0..d58bc87247d91 100644 --- a/java/src/test/java/ai/onnxruntime/providers/ProviderOptionsTest.java +++ b/java/src/test/java/ai/onnxruntime/providers/ProviderOptionsTest.java @@ -26,6 +26,8 @@ import java.util.EnumSet; import java.util.HashMap; import java.util.Map; +import java.util.Optional; + import org.junit.jupiter.api.Test; import org.junit.jupiter.api.condition.EnabledIfSystemProperty; @@ -35,6 +37,10 @@ public class ProviderOptionsTest { @Test @EnabledIfSystemProperty(named = "USE_CUDA", matches = "1") public void testCUDAOptions() throws OrtException { + String no_cuda_test = Optional.ofNullable(System.getenv("NO_CUDA_TEST")).orElse("0"); + if (no_cuda_test.equals("1")) { + return; + } // Test standard options OrtCUDAProviderOptions cudaOpts = new OrtCUDAProviderOptions(0); cudaOpts.add("gpu_mem_limit", "" + (512 * 1024 * 1024)); From f8f3ac1d43fb9d662a004df52c51f3e0aff14bcd Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Tue, 29 Oct 2024 22:27:48 +0800 Subject: [PATCH 53/65] java lint --- java/src/test/java/ai/onnxruntime/OnnxTensorTest.java | 1 - .../test/java/ai/onnxruntime/providers/ProviderOptionsTest.java | 1 - 2 files changed, 2 deletions(-) diff --git a/java/src/test/java/ai/onnxruntime/OnnxTensorTest.java b/java/src/test/java/ai/onnxruntime/OnnxTensorTest.java index a2d9532ce93b0..11af2726bd904 100644 --- a/java/src/test/java/ai/onnxruntime/OnnxTensorTest.java +++ b/java/src/test/java/ai/onnxruntime/OnnxTensorTest.java @@ -17,7 +17,6 @@ import java.util.ArrayList; import java.util.Collections; import java.util.List; -import java.util.Optional; import java.util.SplittableRandom; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.Test; diff --git a/java/src/test/java/ai/onnxruntime/providers/ProviderOptionsTest.java b/java/src/test/java/ai/onnxruntime/providers/ProviderOptionsTest.java index d58bc87247d91..e6baaa9cb0c60 100644 --- a/java/src/test/java/ai/onnxruntime/providers/ProviderOptionsTest.java +++ b/java/src/test/java/ai/onnxruntime/providers/ProviderOptionsTest.java @@ -27,7 +27,6 @@ import java.util.HashMap; import java.util.Map; import java.util.Optional; - import org.junit.jupiter.api.Test; import org.junit.jupiter.api.condition.EnabledIfSystemProperty; From fe2f0a5928c556700508e2c5e92bcacd55c821a7 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Tue, 29 Oct 2024 23:42:57 +0800 Subject: [PATCH 54/65] split java test --- cmake/onnxruntime_java_unittests.cmake | 46 +++++++++++++++++--------- 1 file changed, 31 insertions(+), 15 deletions(-) diff --git a/cmake/onnxruntime_java_unittests.cmake b/cmake/onnxruntime_java_unittests.cmake index 7b57cf71e5ef0..0c7e2c3b1330e 100644 --- a/cmake/onnxruntime_java_unittests.cmake +++ b/cmake/onnxruntime_java_unittests.cmake @@ -6,26 +6,42 @@ FILE(TO_NATIVE_PATH ${GRADLE_EXECUTABLE} GRADLE_NATIVE_PATH) FILE(TO_NATIVE_PATH ${BIN_DIR} BINDIR_NATIVE_PATH) +function(run_java_unit_test SYSTEM_PROPERTY_DEFINITION) + set(GRADLE_TEST_ARGS + ${GRADLE_NATIVE_PATH} + test --rerun + cmakeCheck + --console=plain + -DcmakeBuildDir=${BINDIR_NATIVE_PATH} + -Dorg.gradle.daemon=false + ${SYSTEM_PROPERTY_DEFINITIONS}) + + if(WIN32) + list(PREPEND GRADLE_TEST_ARGS cmd /C) + endif() + + message(STATUS "gradle test command args: ${GRADLE_TEST_ARGS}") + + execute_process(COMMAND ${GRADLE_TEST_ARGS} + WORKING_DIRECTORY ${REPO_ROOT}/java + RESULT_VARIABLE HAD_ERROR) +endfunction() + message(STATUS "gradle additional system property definitions: ${GRADLE_SYSTEM_PROPERTY_DEFINITIONS}") -set(GRADLE_TEST_ARGS - ${GRADLE_NATIVE_PATH} - test --rerun - cmakeCheck - --console=plain - -DcmakeBuildDir=${BINDIR_NATIVE_PATH} - -Dorg.gradle.daemon=false - ${GRADLE_SYSTEM_PROPERTY_DEFINITIONS}) +string(FIND "${GRADLE_SYSTEM_PROPERTY_DEFINITIONS}" "-DUSE_CUDA=1" INDEX_CUDA) +string(FIND "${GRADLE_SYSTEM_PROPERTY_DEFINITIONS}" "-DUSE_DML=1" INDEX_DML) -if(WIN32) - list(PREPEND GRADLE_TEST_ARGS cmd /C) -endif() -message(STATUS "gradle test command args: ${GRADLE_TEST_ARGS}") +if((INDEX_CUDA GREATER -1) AND (INDEX_DML GREATER -1)) + run_java_unit_test(${GRADLE_SYSTEM_PROPERTY_DEFINITIONS}) +else() + string(REPLACE "-DUSE_CUDA=1" "" GRADLE_DML_YSTEM_PROPERTY_DEFINITIONS ${GRADLE_SYSTEM_PROPERTY_DEFINITIONS}) + run_java_unit_test(${GRADLE_DML_YSTEM_PROPERTY_DEFINITIONS}) -execute_process(COMMAND ${GRADLE_TEST_ARGS} - WORKING_DIRECTORY ${REPO_ROOT}/java - RESULT_VARIABLE HAD_ERROR) + string(REPLACE "-DUSE_DML=1" "" GRADLE_CUDA_YSTEM_PROPERTY_DEFINITIONS ${GRADLE_SYSTEM_PROPERTY_DEFINITIONS}) + run_java_unit_test(${GRADLE_CUDA_YSTEM_PROPERTY_DEFINITIONS}) +endif() if(HAD_ERROR) message(FATAL_ERROR "Java Unitests failed") From b84eba7c06c4d84a9e28cda38a4deef6ec8466bc Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Wed, 30 Oct 2024 00:11:12 +0800 Subject: [PATCH 55/65] update --- cmake/onnxruntime_java_unittests.cmake | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cmake/onnxruntime_java_unittests.cmake b/cmake/onnxruntime_java_unittests.cmake index 0c7e2c3b1330e..e3ff4322ddb00 100644 --- a/cmake/onnxruntime_java_unittests.cmake +++ b/cmake/onnxruntime_java_unittests.cmake @@ -34,13 +34,13 @@ string(FIND "${GRADLE_SYSTEM_PROPERTY_DEFINITIONS}" "-DUSE_DML=1" INDEX_DML) if((INDEX_CUDA GREATER -1) AND (INDEX_DML GREATER -1)) - run_java_unit_test(${GRADLE_SYSTEM_PROPERTY_DEFINITIONS}) -else() string(REPLACE "-DUSE_CUDA=1" "" GRADLE_DML_YSTEM_PROPERTY_DEFINITIONS ${GRADLE_SYSTEM_PROPERTY_DEFINITIONS}) run_java_unit_test(${GRADLE_DML_YSTEM_PROPERTY_DEFINITIONS}) string(REPLACE "-DUSE_DML=1" "" GRADLE_CUDA_YSTEM_PROPERTY_DEFINITIONS ${GRADLE_SYSTEM_PROPERTY_DEFINITIONS}) run_java_unit_test(${GRADLE_CUDA_YSTEM_PROPERTY_DEFINITIONS}) +else() + run_java_unit_test(${GRADLE_SYSTEM_PROPERTY_DEFINITIONS}) endif() if(HAD_ERROR) From 02a981330435030c091c526cfe06cd26362411de Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Wed, 30 Oct 2024 10:13:15 +0800 Subject: [PATCH 56/65] fix onnxruntime4j --- cmake/onnxruntime_java_unittests.cmake | 6 +++--- onnxruntime/test/util/default_providers.cc | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/cmake/onnxruntime_java_unittests.cmake b/cmake/onnxruntime_java_unittests.cmake index e3ff4322ddb00..dfd6ad91c99fa 100644 --- a/cmake/onnxruntime_java_unittests.cmake +++ b/cmake/onnxruntime_java_unittests.cmake @@ -35,12 +35,12 @@ string(FIND "${GRADLE_SYSTEM_PROPERTY_DEFINITIONS}" "-DUSE_DML=1" INDEX_DML) if((INDEX_CUDA GREATER -1) AND (INDEX_DML GREATER -1)) string(REPLACE "-DUSE_CUDA=1" "" GRADLE_DML_YSTEM_PROPERTY_DEFINITIONS ${GRADLE_SYSTEM_PROPERTY_DEFINITIONS}) - run_java_unit_test(${GRADLE_DML_YSTEM_PROPERTY_DEFINITIONS}) + run_java_unit_test("${GRADLE_DML_YSTEM_PROPERTY_DEFINITIONS}") string(REPLACE "-DUSE_DML=1" "" GRADLE_CUDA_YSTEM_PROPERTY_DEFINITIONS ${GRADLE_SYSTEM_PROPERTY_DEFINITIONS}) - run_java_unit_test(${GRADLE_CUDA_YSTEM_PROPERTY_DEFINITIONS}) + run_java_unit_test("${GRADLE_CUDA_YSTEM_PROPERTY_DEFINITIONS}") else() - run_java_unit_test(${GRADLE_SYSTEM_PROPERTY_DEFINITIONS}) + run_java_unit_test("${GRADLE_SYSTEM_PROPERTY_DEFINITIONS}") endif() if(HAD_ERROR) diff --git a/onnxruntime/test/util/default_providers.cc b/onnxruntime/test/util/default_providers.cc index ec391c397abac..4aeaf670f3484 100644 --- a/onnxruntime/test/util/default_providers.cc +++ b/onnxruntime/test/util/default_providers.cc @@ -123,8 +123,8 @@ std::unique_ptr DefaultOpenVINOExecutionProvider() { std::unique_ptr DefaultCudaExecutionProvider() { #ifdef USE_CUDA #ifdef USE_DML - const std::string no_dml_ep_test = Env::Default().GetEnvironmentVar("NO_DML_TEST"); - if (no_dml_ep_test != "1") { + const std::string no_cuda_ep_test = Env::Default().GetEnvironmentVar("NO_CUDA_TEST"); + if (no_cuda_ep_test == "1") { return nullptr; } #endif @@ -331,8 +331,8 @@ std::unique_ptr DefaultCannExecutionProvider() { std::unique_ptr DefaultDmlExecutionProvider() { #ifdef USE_DML #ifdef USE_CUDA - const std::string no_cuda_ep_test = Env::Default().GetEnvironmentVar("NO_CUDA_TEST"); - if (no_cuda_ep_test != "1") { + const std::string no_dml_ep_test = Env::Default().GetEnvironmentVar("NO_DML_TEST"); + if (no_dml_ep_test == "1") { return nullptr; } #endif From 171c36fca2ea4c589ef21e640b48bdc07244a13d Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Wed, 30 Oct 2024 11:12:19 +0800 Subject: [PATCH 57/65] not using predefined marco for EP --- onnxruntime/core/session/lora_adapters.cc | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/onnxruntime/core/session/lora_adapters.cc b/onnxruntime/core/session/lora_adapters.cc index a095027a1d417..ef368d94b31c8 100644 --- a/onnxruntime/core/session/lora_adapters.cc +++ b/onnxruntime/core/session/lora_adapters.cc @@ -62,15 +62,16 @@ namespace { struct DataTransfer { std::unique_ptr ep; std::unique_ptr data_transfer; + bool is_dml = false; Status CopyTensor(const Tensor& src, Tensor& dst) const { return data_transfer->CopyTensor(src, dst); } Status Sync() const { -#if USE_DML - return ep->Sync(); -#else - return Status::OK(); -#endif + if (is_dml) { + return ep->Sync(); + } else { + return Status::OK(); + } } }; } // namespace @@ -94,6 +95,7 @@ static Status GetDataTransfer(const OrtMemoryInfo& mem_info, [[maybe_unused]] Da #ifdef USE_DML auto ep_factory = onnxruntime::DMLProviderFactoryCreator::Create(ConfigOptions{}, 0, false, false, false); dt.ep = ep_factory->CreateProvider(); + dt.is_dml = true; dt.data_transfer = dt.ep->GetDataTransfer(); #else status = ORT_MAKE_STATUS(ONNXRUNTIME, FAIL, "DML provider is not enabled in this build"); From c1e01441f7d3ccef88661d69637e0b30bff3215d Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Wed, 30 Oct 2024 11:12:58 +0800 Subject: [PATCH 58/65] update --- .../azure-pipelines/stages/nuget-win-cuda-packaging-stage.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/ci_build/github/azure-pipelines/stages/nuget-win-cuda-packaging-stage.yml b/tools/ci_build/github/azure-pipelines/stages/nuget-win-cuda-packaging-stage.yml index c2d44f4d249aa..d6b25c98936f0 100644 --- a/tools/ci_build/github/azure-pipelines/stages/nuget-win-cuda-packaging-stage.yml +++ b/tools/ci_build/github/azure-pipelines/stages/nuget-win-cuda-packaging-stage.yml @@ -50,7 +50,7 @@ stages: msbuildPlatform: x64 packageName: x64-cuda CudaVersion: ${{ parameters.CudaVersion }} - buildparameter: --use_cuda --cuda_home=${{ parameters.win_cuda_home }} --enable_wcos --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=60;61;70;75;80" --use_dml --build_csharp --parallel + buildparameter: --use_cuda --cuda_home=${{ parameters.win_cuda_home }} --enable_onnx_tests --enable_wcos --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=60;61;70;75;80" --use_dml --build_csharp --parallel runTests: ${{ parameters.RunOnnxRuntimeTests }} buildJava: ${{ parameters.buildJava }} java_artifact_id: onnxruntime_gpu From 19c4a054a3e88120e97d63ab13b46bbab4eafd1f Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Thu, 31 Oct 2024 08:55:45 +0800 Subject: [PATCH 59/65] revert onnxruntime_java_unittests.cmake --- cmake/onnxruntime_java_unittests.cmake | 46 +++++++++----------------- 1 file changed, 15 insertions(+), 31 deletions(-) diff --git a/cmake/onnxruntime_java_unittests.cmake b/cmake/onnxruntime_java_unittests.cmake index dfd6ad91c99fa..7b57cf71e5ef0 100644 --- a/cmake/onnxruntime_java_unittests.cmake +++ b/cmake/onnxruntime_java_unittests.cmake @@ -6,42 +6,26 @@ FILE(TO_NATIVE_PATH ${GRADLE_EXECUTABLE} GRADLE_NATIVE_PATH) FILE(TO_NATIVE_PATH ${BIN_DIR} BINDIR_NATIVE_PATH) -function(run_java_unit_test SYSTEM_PROPERTY_DEFINITION) - set(GRADLE_TEST_ARGS - ${GRADLE_NATIVE_PATH} - test --rerun - cmakeCheck - --console=plain - -DcmakeBuildDir=${BINDIR_NATIVE_PATH} - -Dorg.gradle.daemon=false - ${SYSTEM_PROPERTY_DEFINITIONS}) - - if(WIN32) - list(PREPEND GRADLE_TEST_ARGS cmd /C) - endif() - - message(STATUS "gradle test command args: ${GRADLE_TEST_ARGS}") - - execute_process(COMMAND ${GRADLE_TEST_ARGS} - WORKING_DIRECTORY ${REPO_ROOT}/java - RESULT_VARIABLE HAD_ERROR) -endfunction() - message(STATUS "gradle additional system property definitions: ${GRADLE_SYSTEM_PROPERTY_DEFINITIONS}") -string(FIND "${GRADLE_SYSTEM_PROPERTY_DEFINITIONS}" "-DUSE_CUDA=1" INDEX_CUDA) -string(FIND "${GRADLE_SYSTEM_PROPERTY_DEFINITIONS}" "-DUSE_DML=1" INDEX_DML) +set(GRADLE_TEST_ARGS + ${GRADLE_NATIVE_PATH} + test --rerun + cmakeCheck + --console=plain + -DcmakeBuildDir=${BINDIR_NATIVE_PATH} + -Dorg.gradle.daemon=false + ${GRADLE_SYSTEM_PROPERTY_DEFINITIONS}) +if(WIN32) + list(PREPEND GRADLE_TEST_ARGS cmd /C) +endif() -if((INDEX_CUDA GREATER -1) AND (INDEX_DML GREATER -1)) - string(REPLACE "-DUSE_CUDA=1" "" GRADLE_DML_YSTEM_PROPERTY_DEFINITIONS ${GRADLE_SYSTEM_PROPERTY_DEFINITIONS}) - run_java_unit_test("${GRADLE_DML_YSTEM_PROPERTY_DEFINITIONS}") +message(STATUS "gradle test command args: ${GRADLE_TEST_ARGS}") - string(REPLACE "-DUSE_DML=1" "" GRADLE_CUDA_YSTEM_PROPERTY_DEFINITIONS ${GRADLE_SYSTEM_PROPERTY_DEFINITIONS}) - run_java_unit_test("${GRADLE_CUDA_YSTEM_PROPERTY_DEFINITIONS}") -else() - run_java_unit_test("${GRADLE_SYSTEM_PROPERTY_DEFINITIONS}") -endif() +execute_process(COMMAND ${GRADLE_TEST_ARGS} + WORKING_DIRECTORY ${REPO_ROOT}/java + RESULT_VARIABLE HAD_ERROR) if(HAD_ERROR) message(FATAL_ERROR "Java Unitests failed") From 29a9a60c835e7e9bef515056033ef5b8742db167 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Thu, 31 Oct 2024 09:01:52 +0800 Subject: [PATCH 60/65] restore java test and disable testDML --- .../java/ai/onnxruntime/InferenceTest.java | 24 ++++--------------- 1 file changed, 4 insertions(+), 20 deletions(-) diff --git a/java/src/test/java/ai/onnxruntime/InferenceTest.java b/java/src/test/java/ai/onnxruntime/InferenceTest.java index 5fe1f69c8a0da..15d89b536b39a 100644 --- a/java/src/test/java/ai/onnxruntime/InferenceTest.java +++ b/java/src/test/java/ai/onnxruntime/InferenceTest.java @@ -650,14 +650,7 @@ public void testProviders() { int providersSize = providers.size(); assertTrue(providersSize > 0); assertTrue(providers.contains(OrtProvider.CPU)); - String no_cuda_test = Optional.ofNullable(System.getenv("NO_CUDA_TEST")).orElse("0"); - if (no_cuda_test.equals("1") && providers.contains(OrtProvider.CUDA)) { - providers.remove(OrtProvider.CUDA); - } - String no_dml_test = Optional.ofNullable(System.getenv("NO_DML_TEST")).orElse("0"); - if (no_dml_test.equals("1") && providers.contains(OrtProvider.DIRECT_ML)) { - providers.remove(OrtProvider.DIRECT_ML); - } + // Check that the providers are a copy of the original, note this does not enable the DNNL // provider providers.add(OrtProvider.DNNL); @@ -697,12 +690,7 @@ public void testSymbolicDimensionAssignment() throws OrtException { @Test @EnabledIfSystemProperty(named = "USE_CUDA", matches = "1") public void testCUDA() throws OrtException { - String no_cuda_test = Optional.ofNullable(System.getenv("NO_CUDA_TEST")).orElse("0"); - if (!no_cuda_test.equals("1")) { - runProvider(OrtProvider.CUDA); - } else { - System.out.println("Skipping CUDA test because NO_CUDA_TEST is set."); - } + runProvider(OrtProvider.CUDA); } @Test @@ -749,15 +737,11 @@ public void testCoreML() throws OrtException { runProvider(OrtProvider.CORE_ML); } + @Disabled("DirectML Java API hasn't been supported yet") @Test @EnabledIfSystemProperty(named = "USE_DML", matches = "1") public void testDirectML() throws OrtException { - String no_dml_test = Optional.ofNullable(System.getenv("NO_DML_TEST")).orElse("0"); - if (!no_dml_test.equals("1")) { - runProvider(OrtProvider.DIRECT_ML); - } else { - System.out.println("Skipping DML test because NO_DML_TEST is set."); - } + runProvider(OrtProvider.DIRECT_ML); } @Test From c5b1fc40d641592db823deac350623770249abca Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Thu, 31 Oct 2024 10:16:54 +0800 Subject: [PATCH 61/65] revert one change --- .../java/ai/onnxruntime/providers/ProviderOptionsTest.java | 5 ----- 1 file changed, 5 deletions(-) diff --git a/java/src/test/java/ai/onnxruntime/providers/ProviderOptionsTest.java b/java/src/test/java/ai/onnxruntime/providers/ProviderOptionsTest.java index e6baaa9cb0c60..57c4eb3577fd0 100644 --- a/java/src/test/java/ai/onnxruntime/providers/ProviderOptionsTest.java +++ b/java/src/test/java/ai/onnxruntime/providers/ProviderOptionsTest.java @@ -26,7 +26,6 @@ import java.util.EnumSet; import java.util.HashMap; import java.util.Map; -import java.util.Optional; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.condition.EnabledIfSystemProperty; @@ -36,10 +35,6 @@ public class ProviderOptionsTest { @Test @EnabledIfSystemProperty(named = "USE_CUDA", matches = "1") public void testCUDAOptions() throws OrtException { - String no_cuda_test = Optional.ofNullable(System.getenv("NO_CUDA_TEST")).orElse("0"); - if (no_cuda_test.equals("1")) { - return; - } // Test standard options OrtCUDAProviderOptions cudaOpts = new OrtCUDAProviderOptions(0); cudaOpts.add("gpu_mem_limit", "" + (512 * 1024 * 1024)); From c8b24ce4fe483577d731235cd893fb9c7c513735 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Thu, 31 Oct 2024 15:40:56 +0800 Subject: [PATCH 62/65] code learn --- onnxruntime/test/common/cuda_op_test_utils.h | 6 +++ .../test/contrib_ops/beam_search_test.cc | 8 +--- .../test/framework/allocation_planner_test.cc | 37 +++++------------- .../test/framework/inference_session_test.cc | 9 ++--- .../test/framework/memcpy_transformer_test.cc | 38 +++++++------------ .../providers/cpu/tensor/gather_op_test.cc | 7 ++-- 6 files changed, 38 insertions(+), 67 deletions(-) diff --git a/onnxruntime/test/common/cuda_op_test_utils.h b/onnxruntime/test/common/cuda_op_test_utils.h index 6833c3785466d..5700b936dca50 100644 --- a/onnxruntime/test/common/cuda_op_test_utils.h +++ b/onnxruntime/test/common/cuda_op_test_utils.h @@ -5,6 +5,12 @@ #include "test/util/include/default_providers.h" +#define SKIP_CUDA_TEST_WITH_DML \ + if (DefaultCudaExecutionProvider() == nullptr) { \ + std::cout << "Skip cuda ep test in " << ::testing::UnitTest::GetInstance()->current_test_info()->name() << std::endl; \ + return; \ + } + namespace onnxruntime { namespace test { diff --git a/onnxruntime/test/contrib_ops/beam_search_test.cc b/onnxruntime/test/contrib_ops/beam_search_test.cc index 09d4fd470affd..f6fc9ea7662cb 100644 --- a/onnxruntime/test/contrib_ops/beam_search_test.cc +++ b/onnxruntime/test/contrib_ops/beam_search_test.cc @@ -74,9 +74,7 @@ TEST(BeamSearchTest, GptBeamSearchFp32) { Ort::SessionOptions session_options; #if defined(USE_CUDA) && defined(USE_DML) - if (DefaultCudaExecutionProvider() == nullptr) { - return; - } + SKIP_CUDA_TEST_WITH_DML; #endif #ifdef USE_CUDA OrtCUDAProviderOptionsV2 cuda_options; @@ -172,9 +170,7 @@ TEST(BeamSearchTest, GptBeamSearchFp16) { if (enable_cuda || enable_rocm) { Ort::SessionOptions session_options; #if defined(USE_CUDA) && defined(USE_DML) - if (DefaultCudaExecutionProvider() == nullptr) { - return; - } + SKIP_CUDA_TEST_WITH_DML; #endif #ifdef USE_CUDA OrtCUDAProviderOptionsV2 cuda_options; diff --git a/onnxruntime/test/framework/allocation_planner_test.cc b/onnxruntime/test/framework/allocation_planner_test.cc index 9d9e3c825b05f..a7f8a6424aa50 100644 --- a/onnxruntime/test/framework/allocation_planner_test.cc +++ b/onnxruntime/test/framework/allocation_planner_test.cc @@ -28,6 +28,7 @@ using json = nlohmann::json; #ifdef USE_CUDA #include "core/providers/cuda/cuda_execution_provider.h" #include "core/providers/cuda/cuda_provider_factory.h" +#include "test/common/cuda_op_test_utils.h" #endif // USE_CUDA #include "core/session/onnxruntime_session_options_config_keys.h" using namespace ONNX_NAMESPACE; @@ -1280,9 +1281,7 @@ TEST_F(PlannerTest, LocationPlanningForImplicitInputsWithoutExplicitConsumersInM // node1(CPU ep)->node2(CPU ep)->node3(CUDA ep)->node4(CPU ep) TEST_F(PlannerTest, MultiStream) { #if defined(USE_CUDA) && defined(USE_DML) - if (DefaultCudaExecutionProvider() == nullptr) { - return; - } + SKIP_CUDA_TEST_WITH_DML; #endif ONNX_NAMESPACE::TensorProto tensor; @@ -1332,9 +1331,7 @@ TEST_F(PlannerTest, MultiStream) { // All 3 nodes are CUDA EP, node1 is in stream0, node2 is in stream1, node3 is in stream2 TEST_F(PlannerTest, MultiStream1StreamWaitFor2Streams) { #if defined(USE_CUDA) && defined(USE_DML) - if (DefaultCudaExecutionProvider() == nullptr) { - return; - } + SKIP_CUDA_TEST_WITH_DML; #endif std::unique_ptr<::onnxruntime::KernelDef> cudaKernel = KernelDefBuilder().SetName("Transpose").Provider(kCudaExecutionProvider).SinceVersion(1, 10).Build(); std::unique_ptr<::onnxruntime::KernelDef> cudaKernelAdd = KernelDefBuilder().SetName("Add").Provider(kCudaExecutionProvider).SinceVersion(1, 10).Build(); @@ -1378,9 +1375,7 @@ TEST_F(PlannerTest, MultiStream1StreamWaitFor2Streams) { // node1's output, which is consumed by both node2 and node3, is in CPU. TEST_F(PlannerTest, MultiStreamCudaEPNodeCPUOutput) { #if defined(USE_CUDA) && defined(USE_DML) - if (DefaultCudaExecutionProvider() == nullptr) { - return; - } + SKIP_CUDA_TEST_WITH_DML; #endif MemcpyToHostInCuda_TransposeInCudaAndCpu("./testdata/multi_stream_models/memcpyToHost_same_stream_with_transpose.json"); EXPECT_EQ(GetState().GetExecutionPlan()->execution_plan.size(), 2) << "2 logic streams"; @@ -1446,9 +1441,7 @@ TEST_F(PlannerTest, MultiStreamMultiOutput) { // as there is a specific order between node1 and node2 if they are in the same stream, thus node3 will only need to wait the latter one TEST_F(PlannerTest, MultiStream2NodesSameStreamConsumedBy1NodeInDifferentStream) { #if defined(USE_CUDA) && defined(USE_DML) - if (DefaultCudaExecutionProvider() == nullptr) { - return; - } + SKIP_CUDA_TEST_WITH_DML; #endif std::unique_ptr<::onnxruntime::KernelDef> cudaKernel = KernelDefBuilder().SetName("Transpose").Provider(kCudaExecutionProvider).SinceVersion(1, 10).Build(); std::string Graph_input1("Graph_input1"), Graph_input2("Graph_input2"), Graph_input3("Graph_input3"), Arg1("Arg1"), Arg2("Arg2"), Arg3("Arg3"), node1("node1"), node2("node2"), node3("node3"); @@ -1488,9 +1481,7 @@ TEST_F(PlannerTest, MultiStream2NodesSameStreamConsumedBy1NodeInDifferentStream) #if !defined(__wasm__) && defined(ORT_ENABLE_STREAM) TEST_F(PlannerTest, ParaPlanCreation) { #if defined(USE_CUDA) && defined(USE_DML) - if (DefaultCudaExecutionProvider() == nullptr) { - return; - } + SKIP_CUDA_TEST_WITH_DML; #endif TypeProto graph_in_type; graph_in_type.mutable_tensor_type()->set_elem_type(TensorProto_DataType_FLOAT); @@ -1934,9 +1925,7 @@ TEST_F(PlannerTest, ParaPlanCreation) { TEST_F(PlannerTest, TestMultiStreamConfig) { #if defined(USE_CUDA) && defined(USE_DML) - if (DefaultCudaExecutionProvider() == nullptr) { - return; - } + SKIP_CUDA_TEST_WITH_DML; #endif const char* type = "DeviceBasedPartitioner"; @@ -2013,9 +2002,7 @@ TEST_F(PlannerTest, TestMultiStreamSaveConfig) { // Load with partition config where a node is missing, session load expected to fail. TEST_F(PlannerTest, TestMultiStreamMissingNodeConfig) { #if defined(USE_CUDA) && defined(USE_DML) - if (DefaultCudaExecutionProvider() == nullptr) { - return; - } + SKIP_CUDA_TEST_WITH_DML; #endif const char* config_file_path = "./testdata/multi_stream_models/conv_add_relu_single_stream_missing_node.json"; @@ -2039,9 +2026,7 @@ TEST_F(PlannerTest, TestMultiStreamMissingNodeConfig) { // Load with partition config where streams and devices has mismatch TEST_F(PlannerTest, TestMultiStreamMismatchDevice) { #if defined(USE_CUDA) && defined(USE_DML) - if (DefaultCudaExecutionProvider() == nullptr) { - return; - } + SKIP_CUDA_TEST_WITH_DML; #endif const char* config_file_path = "./testdata/multi_stream_models/conv_add_relu_single_stream_mismatch_device.json"; SessionOptions sess_opt; @@ -2132,9 +2117,7 @@ TEST_F(PlannerTest, TestCpuIf) { // TEST(AllocationPlannerTest, ReusedInputCrossDifferentStreams) { #if defined(USE_CUDA) && defined(USE_DML) - if (DefaultCudaExecutionProvider() == nullptr) { - return; - } + SKIP_CUDA_TEST_WITH_DML; #endif SessionOptions sess_opt; diff --git a/onnxruntime/test/framework/inference_session_test.cc b/onnxruntime/test/framework/inference_session_test.cc index e4f9697d5242e..9c7e6e9761728 100644 --- a/onnxruntime/test/framework/inference_session_test.cc +++ b/onnxruntime/test/framework/inference_session_test.cc @@ -34,6 +34,7 @@ #ifdef USE_CUDA #include "core/providers/cuda/cuda_provider_factory.h" #include "core/providers/cuda/gpu_data_transfer.h" +#include "test/common/cuda_op_test_utils.h" #endif #ifdef USE_TENSORRT #include "core/providers/tensorrt/tensorrt_provider_options.h" @@ -2173,9 +2174,7 @@ TEST(InferenceSessionTests, TestStrictShapeInference) { // disable it, since we are going to enable parallel execution with cuda ep TEST(InferenceSessionTests, DISABLED_TestParallelExecutionWithCudaProvider) { #if defined(USE_CUDA) && defined(USE_DML) - if (DefaultCudaExecutionProvider() == nullptr) { - return; - } + SKIP_CUDA_TEST_WITH_DML; #endif string model_uri = "testdata/transform/fusion/fuse-conv-bn-mul-add-unsqueeze.onnx"; @@ -2201,9 +2200,7 @@ TEST(InferenceSessionTests, DISABLED_TestParallelExecutionWithCudaProvider) { TEST(InferenceSessionTests, TestArenaShrinkageAfterRun) { #if defined(USE_CUDA) && defined(USE_DML) - if (DefaultCudaExecutionProvider() == nullptr) { - return; - } + SKIP_CUDA_TEST_WITH_DML; #endif OrtArenaCfg arena_cfg; diff --git a/onnxruntime/test/framework/memcpy_transformer_test.cc b/onnxruntime/test/framework/memcpy_transformer_test.cc index 3bb6bb2ffd097..aae6a3f8ab15f 100644 --- a/onnxruntime/test/framework/memcpy_transformer_test.cc +++ b/onnxruntime/test/framework/memcpy_transformer_test.cc @@ -9,6 +9,9 @@ #include "default_providers.h" #include "gtest/gtest.h" #include "test_utils.h" +#ifdef USE_CUDA +#include "test/common/cuda_op_test_utils.h" +#endif #include "test/test_environment.h" #include "asserts.h" @@ -74,6 +77,9 @@ void ExpectCopy(const onnxruntime::Node& source, const std::string copy_op, #ifdef USE_CUDA TEST(TransformerTest, MemcpyTransformerTest) { +#if defined(USE_CUDA) && defined(USE_DML) + SKIP_CUDA_TEST_WITH_DMLCUDA; +#endif std::unordered_map domain_to_version; domain_to_version[kOnnxDomain] = 7; auto model = std::make_shared("test", false, ModelMetaData(), PathString(), @@ -106,11 +112,7 @@ TEST(TransformerTest, MemcpyTransformerTest) { KernelRegistryManager kernel_registry_manager; ExecutionProviders execution_providers; -#if defined(USE_CUDA) && defined(USE_DML) - if (DefaultCudaExecutionProvider() != nullptr) { - ASSERT_STATUS_OK(execution_providers.Add(onnxruntime::kCudaExecutionProvider, DefaultCudaExecutionProvider())); - } -#else +#if defined(USE_CUDA) ASSERT_STATUS_OK(execution_providers.Add(onnxruntime::kCudaExecutionProvider, DefaultCudaExecutionProvider())); #endif ASSERT_STATUS_OK(execution_providers.Add(onnxruntime::kCpuExecutionProvider, @@ -118,12 +120,6 @@ TEST(TransformerTest, MemcpyTransformerTest) { KernelRegistryManager test_registry_manager; ASSERT_STATUS_OK(test_registry_manager.RegisterKernels(execution_providers)); -#if defined(USE_CUDA) && defined(USE_DML) - if (DefaultCudaExecutionProvider() == nullptr) { - return; - } -#endif - MemcpyTransformer transformer({onnxruntime::kCudaExecutionProvider}, test_registry_manager); bool modified = false; @@ -141,6 +137,9 @@ TEST(TransformerTest, MemcpyTransformerTest) { } TEST(TransformerTest, MemcpyTransformerTestCudaFirst) { +#if defined(USE_CUDA) && defined(USE_DML) + SKIP_CUDA_TEST_WITH_DML; +#endif std::unordered_map domain_to_version; domain_to_version[kOnnxDomain] = 7; auto model = std::make_shared("test", false, ModelMetaData(), PathString(), @@ -173,11 +172,6 @@ TEST(TransformerTest, MemcpyTransformerTestCudaFirst) { KernelRegistryManager kernel_registry_manager; ExecutionProviders execution_providers; -#if defined(USE_CUDA) && defined(USE_DML) - if (DefaultCudaExecutionProvider() == nullptr) { - return; - } -#endif ASSERT_STATUS_OK(execution_providers.Add(onnxruntime::kCudaExecutionProvider, DefaultCudaExecutionProvider())); @@ -301,9 +295,7 @@ TEST(TransformerTest, TestInitializerDuplicationInSubgraph) { KernelRegistryManager kernel_registry_manager; ExecutionProviders execution_providers; #if defined(USE_CUDA) && defined(USE_DML) - if (DefaultCudaExecutionProvider() == nullptr) { - return; - } + SKIP_CUDA_TEST_WITH_DML; #endif ASSERT_STATUS_OK(execution_providers.Add(onnxruntime::kCudaExecutionProvider, DefaultCudaExecutionProvider())); @@ -349,9 +341,7 @@ TEST(TransformerTest, MemcpyTransformerTestGraphInputConsumedOnMultipleDevices) KernelRegistryManager kernel_registry_manager; ExecutionProviders execution_providers; #if defined(USE_CUDA) && defined(USE_DML) - if (DefaultCudaExecutionProvider() == nullptr) { - return; - } + SKIP_CUDA_TEST_WITH_DML; #endif ASSERT_STATUS_OK(execution_providers.Add(onnxruntime::kCudaExecutionProvider, DefaultCudaExecutionProvider())); @@ -457,9 +447,7 @@ TEST(TransformerTest, MemcpyTransformerTestImplicitInputConsumedOnMultipleDevice KernelRegistryManager kernel_registry_manager; ExecutionProviders execution_providers; #if defined(USE_CUDA) && defined(USE_DML) - if (DefaultCudaExecutionProvider() == nullptr) { - return; - } + SKIP_CUDA_TEST_WITH_DML; #endif ASSERT_STATUS_OK(execution_providers.Add(onnxruntime::kCudaExecutionProvider, DefaultCudaExecutionProvider())); diff --git a/onnxruntime/test/providers/cpu/tensor/gather_op_test.cc b/onnxruntime/test/providers/cpu/tensor/gather_op_test.cc index ae838f10b4153..0f23e4c39d7e2 100644 --- a/onnxruntime/test/providers/cpu/tensor/gather_op_test.cc +++ b/onnxruntime/test/providers/cpu/tensor/gather_op_test.cc @@ -3,6 +3,9 @@ #include "core/session/onnxruntime_session_options_config_keys.h" #include "gtest/gtest.h" +#if USE_CUDA +#include "test/common/cuda_op_test_utils.h" +#endif #include "test/providers/provider_test_utils.h" #include "test/util/include/default_providers.h" @@ -123,9 +126,7 @@ TEST(GatherOpTest, Gather_invalid_index_gpu) { 0.0f, 0.0f, 0.0f, 0.0f}); #if defined(USE_CUDA) && defined(USE_DML) - if (DefaultCudaExecutionProvider() == nullptr) { - return; - } + SKIP_CUDA_TEST_WITH_DML; #endif // On GPU, just set the value to 0 instead of report error. exclude all other providers test From 04856f4a0e61bc2ea33645ccd67841ef80978d4e Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Thu, 31 Oct 2024 18:24:46 +0800 Subject: [PATCH 63/65] gtest_skip --- onnxruntime/test/common/cuda_op_test_utils.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/onnxruntime/test/common/cuda_op_test_utils.h b/onnxruntime/test/common/cuda_op_test_utils.h index 5700b936dca50..773369f35850b 100644 --- a/onnxruntime/test/common/cuda_op_test_utils.h +++ b/onnxruntime/test/common/cuda_op_test_utils.h @@ -7,8 +7,7 @@ #define SKIP_CUDA_TEST_WITH_DML \ if (DefaultCudaExecutionProvider() == nullptr) { \ - std::cout << "Skip cuda ep test in " << ::testing::UnitTest::GetInstance()->current_test_info()->name() << std::endl; \ - return; \ + GTEST_SKIP() << "CUDA Tests are not supported while DML is enabled"; \ } namespace onnxruntime { From 9630aebfe63c7f796522bfc4c993717115a4f353 Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Thu, 31 Oct 2024 18:40:02 +0800 Subject: [PATCH 64/65] lint --- onnxruntime/test/common/cuda_op_test_utils.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/onnxruntime/test/common/cuda_op_test_utils.h b/onnxruntime/test/common/cuda_op_test_utils.h index 773369f35850b..d3e069237217e 100644 --- a/onnxruntime/test/common/cuda_op_test_utils.h +++ b/onnxruntime/test/common/cuda_op_test_utils.h @@ -5,9 +5,9 @@ #include "test/util/include/default_providers.h" -#define SKIP_CUDA_TEST_WITH_DML \ - if (DefaultCudaExecutionProvider() == nullptr) { \ - GTEST_SKIP() << "CUDA Tests are not supported while DML is enabled"; \ +#define SKIP_CUDA_TEST_WITH_DML \ + if (DefaultCudaExecutionProvider() == nullptr) { \ + GTEST_SKIP() << "CUDA Tests are not supported while DML is enabled"; \ } namespace onnxruntime { From 526133ad9b82af208ee164351ed2473cd23b548a Mon Sep 17 00:00:00 2001 From: Yi Zhang Date: Thu, 31 Oct 2024 20:16:16 +0800 Subject: [PATCH 65/65] typo --- onnxruntime/test/framework/memcpy_transformer_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/test/framework/memcpy_transformer_test.cc b/onnxruntime/test/framework/memcpy_transformer_test.cc index aae6a3f8ab15f..2313f00e4d123 100644 --- a/onnxruntime/test/framework/memcpy_transformer_test.cc +++ b/onnxruntime/test/framework/memcpy_transformer_test.cc @@ -78,7 +78,7 @@ void ExpectCopy(const onnxruntime::Node& source, const std::string copy_op, TEST(TransformerTest, MemcpyTransformerTest) { #if defined(USE_CUDA) && defined(USE_DML) - SKIP_CUDA_TEST_WITH_DMLCUDA; + SKIP_CUDA_TEST_WITH_DML; #endif std::unordered_map domain_to_version; domain_to_version[kOnnxDomain] = 7;