Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Build DML in Windows GPU CI pipeline #22869

Draft
wants to merge 21 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,15 @@
import java.util.HashMap;
import java.util.Map;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.condition.DisabledIfSystemProperty;
import org.junit.jupiter.api.condition.EnabledIfSystemProperty;

public class ProviderOptionsTest {
private static final OrtEnvironment env = TestHelpers.getOrtEnvironment();

@Test
@EnabledIfSystemProperty(named = "USE_CUDA", matches = "1")
@DisabledIfSystemProperty(named = "NO_CUDA_TEST", matches = "1")
public void testCUDAOptions() throws OrtException {
// Test standard options
OrtCUDAProviderOptions cudaOpts = new OrtCUDAProviderOptions(0);
Expand Down Expand Up @@ -61,6 +63,7 @@ public void testCUDAOptions() throws OrtException {

@Test
@EnabledIfSystemProperty(named = "USE_TENSORRT", matches = "1")
@DisabledIfSystemProperty(named = "NO_CUDA_TEST", matches = "1")
public void testTensorRT() throws OrtException {
// Test standard options
OrtTensorRTProviderOptions rtOpts = new OrtTensorRTProviderOptions(0);
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/test/providers/cuda/cuda_provider_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ ProviderInfo_CUDA& GetProviderInfo_CUDA_Test();

namespace test {
namespace cuda {
TEST(CUDA_EP_Unittest, All) {
TEST(CudaEpUnittest, All) {
onnxruntime::ProviderInfo_CUDA& ep = onnxruntime::GetProviderInfo_CUDA_Test();
ep.TestAll();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
namespace onnxruntime {
namespace test {

TEST(AllocatorTest, CUDAAllocatorTest) {
TEST(CudaEpAllocatorTest, CUDAAllocatorTest) {
OrtDevice::DeviceId cuda_device_id = 0;

// ensure CUDA device is available.
Expand Down Expand Up @@ -77,7 +77,7 @@ TEST(AllocatorTest, CUDAAllocatorTest) {
}

// test that we fallback to smaller allocations if the growth of the arena exceeds the available memory
TEST(AllocatorTest, CUDAAllocatorFallbackTest) {
TEST(CudaEpAllocatorTest, CUDAAllocatorFallbackTest) {
OrtDevice::DeviceId cuda_device_id = 0;

size_t free = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ using onnxruntime::contrib::attention::AttentionBackend;
namespace onnxruntime {
namespace test {

TEST(AttentionKernelOptionsTest, NonZeroValue) {
TEST(CudaEpAttentionKernelOptionsTest, NonZeroValue) {
{
AttentionKernelOptions options;
int value = static_cast<int>(AttentionBackend::FLASH_ATTENTION) | static_cast<int>(AttentionBackend::EFFICIENT_ATTENTION);
Expand Down Expand Up @@ -156,7 +156,7 @@ TEST(AttentionKernelOptionsTest, NonZeroValue) {
}

// Test all environment variables take effect when option value is 0.
TEST(AttentionKernelOptionsTest, DefaultOptionWithEnvVar) {
TEST(CudaEpAttentionKernelOptionsTest, DefaultOptionWithEnvVar) {
constexpr int value = 0;
ScopedEnvironmentVariables scoped_env_vars{
EnvVarMap{
Expand Down Expand Up @@ -186,7 +186,7 @@ TEST(AttentionKernelOptionsTest, DefaultOptionWithEnvVar) {
}

// Test default min sequence lengths when environment variables are not set.
TEST(AttentionKernelOptionsTest, DefaultMinSeqLens) {
TEST(CudaEpAttentionKernelOptionsTest, DefaultMinSeqLens) {
constexpr int value = 0;
ScopedEnvironmentVariables scoped_env_vars{
EnvVarMap{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ void ComputeTopKReference(const std::vector<float>& values,
}
}

TEST(TestBeamSearch, TopK) {
TEST(CudaEpTestBeamSearch, TopK) {
int32_t batch_size = 4;
int32_t beam_size = 4;
int32_t vocab_size = 50257;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -230,7 +230,7 @@ void testPrepack(int rows, int columns) {
}

// TODO: code runs on CPU, but this is for sm80 only, maybe enable only when test on sm80
TEST(BlkQ4_GEMM, PrepackSm80Test) {
TEST(CudaEpBlkQ4_GEMM, PrepackSm80Test) {
Status status = onnxruntime::cuda::test::sm80_supported();
if (!status.IsOK()) {
// skip the test if sm80 is not supported
Expand Down Expand Up @@ -263,7 +263,7 @@ TEST(BlkQ4_GEMM, PrepackSm80Test) {
testPrepack<true, false>(256, 256);
}

TEST(BlkQ4_GEMM, Sm80RowBlockingTest) {
TEST(CudaEpBlkQ4_GEMM, Sm80RowBlockingTest) {
Status status = onnxruntime::cuda::test::sm80_supported();
if (!status.IsOK()) {
// skip the test if sm80 is not supported
Expand Down Expand Up @@ -292,7 +292,7 @@ TEST(BlkQ4_GEMM, Sm80RowBlockingTest) {
onnxruntime::cuda::test::run_blkq4_gemm<64, false, false, true>(256, 1024, 576);
}

TEST(BlkQ4_GEMM, Sm80ColBlockingTest) {
TEST(CudaEpBlkQ4_GEMM, Sm80ColBlockingTest) {
Status status = onnxruntime::cuda::test::sm80_supported();
if (!status.IsOK()) {
// skip the test if sm80 is not supported
Expand All @@ -305,7 +305,7 @@ TEST(BlkQ4_GEMM, Sm80ColBlockingTest) {
onnxruntime::cuda::test::run_blkq4_gemm<64, true, false, true>(256, 1024, 576);
}

TEST(BlkQ4_GEMM, Sm80SmallMTest) {
TEST(CudaEpBlkQ4_GEMM, Sm80SmallMTest) {
Status status = onnxruntime::cuda::test::sm80_supported();
if (!status.IsOK()) {
// skip the test if sm80 is not supported
Expand All @@ -326,7 +326,7 @@ TEST(BlkQ4_GEMM, Sm80SmallMTest) {
onnxruntime::cuda::test::run_blkq4_gemm<64, true, true, true>(16, 1024, 576);
}

TEST(BlkQ4_GEMM, Sm80SmallTileKernelTest) {
TEST(CudaEpBlkQ4_GEMM, Sm80SmallTileKernelTest) {
Status status = onnxruntime::cuda::test::sm80_supported();
if (!status.IsOK()) {
// skip the test if sm80 is not supported
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ namespace cuda {
namespace test {
// TODO: Since the "DeferredRelease" has been migrated to CudaStream class,
// we should migrate this test from CudaEP unit test to CudaStream unit test.
TEST(TestDeferredRelease, WithArena) {
TEST(CudaEpTestDeferredRelease, WithArena) {
// Create CUDA EP.
CUDAExecutionProviderInfo info;
CUDAExecutionProvider ep(info);
Expand Down Expand Up @@ -52,7 +52,7 @@ TEST(TestDeferredRelease, WithArena) {
ORT_THROW_IF_ERROR(ep.OnRunEnd(true, run_opts));
}

TEST(TestDeferredRelease, WithoutArena) {
TEST(CudaEpTestDeferredRelease, WithoutArena) {
// Create CUDA EP.
CUDAExecutionProviderInfo info;
CUDAExecutionProvider ep(info);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ void TestFillCorrectness(size_t num_elements, TElement value) {
}
} // namespace

TEST(CudaUtilsTest, FillCorrectness) {
TEST(CudaEpUnittest, FillCorrectness) {
TestFillCorrectness<int8_t>(1 << 20, 1);
TestFillCorrectness<int16_t>(1 << 20, 2);
TestFillCorrectness<int32_t>(1 << 20, 3);
Expand Down
12 changes: 6 additions & 6 deletions onnxruntime/test/providers/cuda/test_cases/gemm_options_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ namespace onnxruntime {
namespace cuda {
namespace test {

TEST(CudaGemmOptions, TestDefaultOptions) {
TEST(CudaEpGemmOptions, TestDefaultOptions) {
HalfGemmOptions gemm_options;
ASSERT_FALSE(gemm_options.IsCompute16F());
#if defined(USE_CUDA)
Expand All @@ -22,7 +22,7 @@ TEST(CudaGemmOptions, TestDefaultOptions) {
#endif
}

TEST(CudaGemmOptions, TestCompute16F) {
TEST(CudaEpGemmOptions, TestCompute16F) {
HalfGemmOptions gemm_options;
gemm_options.Initialize(1);
ASSERT_TRUE(gemm_options.IsCompute16F());
Expand All @@ -35,7 +35,7 @@ TEST(CudaGemmOptions, TestCompute16F) {
#endif
}

TEST(CudaGemmOptions, NoReducedPrecision) {
TEST(CudaEpGemmOptions, NoReducedPrecision) {
HalfGemmOptions gemm_options;
gemm_options.Initialize(2);
ASSERT_FALSE(gemm_options.IsCompute16F());
Expand All @@ -48,7 +48,7 @@ TEST(CudaGemmOptions, NoReducedPrecision) {
#endif
}

TEST(CudaGemmOptions, Pedantic) {
TEST(CudaEpGemmOptions, Pedantic) {
HalfGemmOptions gemm_options;
gemm_options.Initialize(4);
ASSERT_FALSE(gemm_options.IsCompute16F());
Expand All @@ -61,7 +61,7 @@ TEST(CudaGemmOptions, Pedantic) {
#endif
}

TEST(CudaGemmOptions, Compute16F_Pedantic) {
TEST(CudaEpGemmOptions, Compute16F_Pedantic) {
HalfGemmOptions gemm_options;
gemm_options.Initialize(5);
ASSERT_TRUE(gemm_options.IsCompute16F());
Expand All @@ -74,7 +74,7 @@ TEST(CudaGemmOptions, Compute16F_Pedantic) {
#endif
}

TEST(CudaGemmOptions, Compute16F_NoReducedPrecision) {
TEST(CudaEpGemmOptions, Compute16F_NoReducedPrecision) {
HalfGemmOptions gemm_options;
gemm_options.Initialize(3);
ASSERT_TRUE(gemm_options.IsCompute16F());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ void ComputeTop1Reference(const std::vector<float>& values,
}
}

TEST(TestGreedySearch, TopOne) {
TEST(CudaEpTestGreedySearch, TopOne) {
int32_t batch_size = 4;
int32_t vocab_size = 50257;
int32_t batch_x_vocab = batch_size * vocab_size;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -179,7 +179,7 @@ void TestReduceColumnsToColumn(int m, int n, float relative_error_tolerance = 1e
}
} // namespace

TEST(ReductionFunctionsTest, ReduceRowToScalar) {
TEST(CudaEpReductionFunctionsTest, ReduceRowToScalar) {
TestReduceRowToScalarApis(3);
TestReduceRowToScalarApis(19);
TestReduceRowToScalarApis(123);
Expand All @@ -188,7 +188,7 @@ TEST(ReductionFunctionsTest, ReduceRowToScalar) {
TestReduceRowToScalarApis(941736, 2e-4f);
}

TEST(ReductionFunctionsTest, ReduceRowsToRow) {
TEST(CudaEpReductionFunctionsTest, ReduceRowsToRow) {
for (int m : {3, 193, 2945}) {
for (int n : {3, 193, 2945}) {
TestReduceRowsToRow(m, n, true);
Expand All @@ -197,15 +197,15 @@ TEST(ReductionFunctionsTest, ReduceRowsToRow) {
}
}

TEST(ReductionFunctionsTest, ReduceColumnsToColumn) {
TEST(CudaEpReductionFunctionsTest, ReduceColumnsToColumn) {
for (int m : {3, 193, 2945}) {
for (int n : {3, 193, 2945}) {
TestReduceColumnsToColumn(m, n);
}
}
}

TEST(ReductionFunctionsTest, BufferOffsets) {
TEST(CudaEpReductionFunctionsTest, BufferOffsets) {
const int m = 2048;
const int n = 1024;
const TensorShape shape{m, n};
Expand Down Expand Up @@ -240,7 +240,7 @@ TEST(ReductionFunctionsTest, BufferOffsets) {
}
}

TEST(ReductionFunctionsTest, InvalidBufferSize) {
TEST(CudaEpReductionFunctionsTest, InvalidBufferSize) {
const int m = 2048;
const int n = 1024;
const TensorShape shape{m, n};
Expand All @@ -262,7 +262,7 @@ TEST(ReductionFunctionsTest, InvalidBufferSize) {
ASSERT_FALSE(status.IsOK());
}

TEST(ReductionFunctionsTest, GetApplicableMatrixReduction) {
TEST(CudaEpReductionFunctionsTest, GetApplicableMatrixReduction) {
auto test_get_applicable_matrix_reduction =
[](cudnnReduceTensorOp_t cudnn_op,
const std::vector<int64_t>& dims, const std::vector<int64_t>& axes,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -155,6 +155,28 @@ jobs:
Platform: ${{ parameters.msbuildPlatform }}
BuildConfig: ${{ parameters.BuildConfig }}

- ${{ if eq(parameters.RunOnnxRuntimeTests, true) }}:
- ${{ if and(contains(parameters.additionalBuildFlags, 'use_cuda'), contains(parameters.additionalBuildFlags, 'use_dml')) }}:
- powershell: |
python $(Build.SourcesDirectory)\tools\ci_build\build.py --config ${{ parameters.BuildConfig }} --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --test --cmake_generator "Visual Studio 17 2022" --enable_onnx_tests ${{ parameters.additionalBuildFlags }}
workingDirectory: '$(Build.BinariesDirectory)\${{ parameters.BuildConfig }}\${{ parameters.BuildConfig }}'
displayName: 'Run tests excluding CUDA tests'
env:
NO_CUDA_TEST: '1'
GTEST_FILTER: '-CudaEp*:CudaNhwcTypedTest*:*cpu_*models*' # Exclude CUDA EP tests under providers/cuda/ and cpu models test
- powershell: |
python $(Build.SourcesDirectory)\tools\ci_build\build.py --config ${{ parameters.BuildConfig }} --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --test --cmake_generator "Visual Studio 17 2022" --enable_onnx_tests ${{ parameters.additionalBuildFlags }}
workingDirectory: '$(Build.BinariesDirectory)\${{ parameters.BuildConfig }}\${{ parameters.BuildConfig }}'
displayName: 'Run tests excluding DML tests'
env:
NO_DMLTEST: '1'
GTEST_FILTER: '-*cpu_*models*'
- ${{ else }}:
- powershell: |
python $(Build.SourcesDirectory)\tools\ci_build\build.py --config ${{ parameters.BuildConfig }} --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --test --cmake_generator "Visual Studio 17 2022" --enable_onnx_tests ${{ parameters.additionalBuildFlags }}
workingDirectory: '$(Build.BinariesDirectory)\${{ parameters.BuildConfig }}\${{ parameters.BuildConfig }}'
displayName: 'Run tests'

- powershell: |
Get-Volume $("$(Build.BinariesDirectory)")[0]
displayName: check disk size
Expand Down Expand Up @@ -222,13 +244,6 @@ jobs:
workingDirectory: '$(Build.BinariesDirectory)\${{ parameters.BuildConfig }}\${{ parameters.BuildConfig }}'
displayName: 'Install onnxruntime wheel'

- ${{ if eq(parameters.RunOnnxRuntimeTests, true) }}:
- powershell: |
python $(Build.SourcesDirectory)\tools\ci_build\build.py --config ${{ parameters.BuildConfig }} --build_dir $(Build.BinariesDirectory) --skip_submodule_sync --build_shared_lib --test --cmake_generator "Visual Studio 17 2022" --build_shared_lib --enable_onnx_tests ${{ parameters.additionalBuildFlags }}

workingDirectory: '$(Build.BinariesDirectory)\${{ parameters.BuildConfig }}\${{ parameters.BuildConfig }}'
displayName: 'Run tests'

- ${{ if eq(parameters.GenerateDocumentation, true) }}:
- task: PythonScript@0
displayName: 'Generate documentation'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -413,7 +413,7 @@ stages:
workingDirectory: '$(Build.BinariesDirectory)'
env:
NO_CUDA_TEST: '1'
GTEST_FILTER: -*CudaNhwcTypedTest*
GTEST_FILTER: '-CudaEp*:CudaNhwcTypedTest*' # Exclude CUDA EP tests under providers/cuda/
- task: PythonScript@0
displayName: 'test excludes DML'
condition: and(succeeded(), eq('${{ parameters.runTests}}', true))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ stages:
additionalBuildFlags: >-
--enable_pybind --build_java --build_nodejs --use_cuda --cuda_home="$(Agent.TempDirectory)\v${{ parameters.CudaVersion }}"
--enable_cuda_profiling --enable_transformers_tool_test
--use_dml --enable_wcos --use_winml
--cmake_extra_defines CMAKE_CUDA_ARCHITECTURES=86
--cmake_extra_defines onnxruntime_BUILD_UNIT_TESTS=ON
--cmake_extra_defines onnxruntime_ENABLE_CUDA_EP_INTERNAL_TESTS=ON
Expand All @@ -62,4 +63,4 @@ stages:
RunOnnxRuntimeTests: ${{ parameters.RunOnnxRuntimeTests }}
ORT_EP_NAME: CUDA
WITH_CACHE: true
MachinePool: onnxruntime-Win2022-GPU-A10
MachinePool: onnxruntime-Win2022-GPU-A10-8GB
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,13 @@ pr:
#### end trigger ####

parameters:
- name: CudaVersion
displayName: CUDA version
type: string
default: '12.2'
values:
- 11.8
- 12.2
- name: RunOnnxRuntimeTests
displayName: Run Tests?
type: boolean
Expand All @@ -43,11 +50,12 @@ stages:
BuildConfig: 'RelWithDebInfo'
EnvSetupScript: setup_env.bat
buildArch: x64
additionalBuildFlags: --enable_pybind --use_dml --enable_wcos --use_winml
additionalBuildFlags: >-
--enable_pybind --use_dml --enable_wcos --use_winml
msbuildPlatform: x64
isX86: false
job_name_suffix: x64_RelWithDebInfo
RunOnnxRuntimeTests: ${{ parameters.RunOnnxRuntimeTests }}
ORT_EP_NAME: DML
WITH_CACHE: false
MachinePool: onnxruntime-Win2022-GPU-dml-A10
MachinePool: onnxruntime-Win2022-GPU-dml-A10
Loading