Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Build CUDA and DML together #22602

Merged
merged 70 commits into from
Oct 31, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
70 commits
Select commit Hold shift + click to select a range
54bd87b
rename cuda to gpu
jchen351 Oct 8, 2024
df06c82
rename cuda to gpu
jchen351 Oct 8, 2024
ac5b738
Merge remote-tracking branch 'refs/remotes/origin/main' into Cjian/dm…
jchen351 Oct 10, 2024
984e123
Use Cuda with use dml
jchen351 Oct 11, 2024
e5315a4
revert c-api-noopenmp-packaging-pipelines.yml
jchen351 Oct 11, 2024
bd5eb99
revert c-api-noopenmp-packaging-pipelines.yml
jchen351 Oct 11, 2024
c162c88
adding --use_winml and --parallel
jchen351 Oct 11, 2024
e229292
Remove --use_winml
jchen351 Oct 11, 2024
fb68a59
remove --test from build
jchen351 Oct 22, 2024
322c6bc
Merge branch 'refs/heads/main' into Cjian/dml-ng
jchen351 Oct 22, 2024
04717a8
Update
jchen351 Oct 22, 2024
2401b80
Merge with main
jchen351 Oct 22, 2024
a9e47fb
parallel
jchen351 Oct 22, 2024
4d0ce6d
--use_dml --build_csharp --parallel
jchen351 Oct 22, 2024
24750ac
--use_dml --build_csharp --parallel
jchen351 Oct 22, 2024
3ddf44b
verify image
invalid-email-address Oct 24, 2024
fc00b0b
Merge branch 'main' of https://github.com/microsoft/onnxruntime into …
invalid-email-address Oct 24, 2024
c16ded2
update
invalid-email-address Oct 24, 2024
5a13349
typo
invalid-email-address Oct 24, 2024
e21c387
update1
invalid-email-address Oct 24, 2024
1e45aaf
update 2
invalid-email-address Oct 24, 2024
73ecd60
print log
invalid-email-address Oct 24, 2024
9fd99e4
update 3
invalid-email-address Oct 24, 2024
ffa9c2b
test filter
invalid-email-address Oct 24, 2024
7490e44
test filter1
invalid-email-address Oct 24, 2024
09fc7ec
complete A10
invalid-email-address Oct 24, 2024
652950e
add --use-winml
invalid-email-address Oct 24, 2024
982a674
split cuda and dml test
invalid-email-address Oct 25, 2024
e6c18de
update
invalid-email-address Oct 25, 2024
6a5118e
update 1
invalid-email-address Oct 25, 2024
6522327
update 3
invalid-email-address Oct 25, 2024
974ee3a
update tests
invalid-email-address Oct 25, 2024
9494656
onnxruntime4j_test
invalid-email-address Oct 28, 2024
1b213cb
typo
invalid-email-address Oct 28, 2024
4019016
update
invalid-email-address Oct 28, 2024
abe4326
update
invalid-email-address Oct 28, 2024
f24e621
update
invalid-email-address Oct 28, 2024
1598875
update java code
invalid-email-address Oct 28, 2024
0a28ba5
update pool image
invalid-email-address Oct 28, 2024
e81aea8
fail condition
invalid-email-address Oct 28, 2024
5e976d3
Float16Larget test
invalid-email-address Oct 28, 2024
d5cf61f
remove nullptr in eps
invalid-email-address Oct 28, 2024
e3b25cf
skip cuda tests 1
invalid-email-address Oct 28, 2024
839dcbf
check cudaep 2
invalid-email-address Oct 28, 2024
08064f2
update tests
invalid-email-address Oct 29, 2024
be93bd9
lint
invalid-email-address Oct 29, 2024
8e77a6c
update tests
invalid-email-address Oct 29, 2024
ff78446
typo
invalid-email-address Oct 29, 2024
9e1bafc
cuda log
invalid-email-address Oct 29, 2024
64ade9b
update 1 test case
invalid-email-address Oct 29, 2024
aee7392
update
invalid-email-address Oct 29, 2024
a8c6e92
Revert "cuda log"
invalid-email-address Oct 29, 2024
603e0c2
update java test
invalid-email-address Oct 29, 2024
31fb04b
typo
invalid-email-address Oct 29, 2024
659131f
java test
invalid-email-address Oct 29, 2024
f8f3ac1
java lint
invalid-email-address Oct 29, 2024
fe2f0a5
split java test
invalid-email-address Oct 29, 2024
b84eba7
update
invalid-email-address Oct 29, 2024
02a9813
fix onnxruntime4j
invalid-email-address Oct 30, 2024
171c36f
not using predefined marco for EP
invalid-email-address Oct 30, 2024
c1e0144
update
invalid-email-address Oct 30, 2024
27a8e68
merge with main
invalid-email-address Oct 30, 2024
4643a7a
Merge branch 'main' of https://github.com/microsoft/onnxruntime into …
invalid-email-address Oct 31, 2024
19c4a05
revert onnxruntime_java_unittests.cmake
invalid-email-address Oct 31, 2024
29a9a60
restore java test and disable testDML
invalid-email-address Oct 31, 2024
c5b1fc4
revert one change
invalid-email-address Oct 31, 2024
c8b24ce
code learn
invalid-email-address Oct 31, 2024
04856f4
gtest_skip
invalid-email-address Oct 31, 2024
9630aeb
lint
invalid-email-address Oct 31, 2024
526133a
typo
invalid-email-address Oct 31, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions java/src/test/java/ai/onnxruntime/InferenceTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -737,6 +737,7 @@ public void testCoreML() throws OrtException {
runProvider(OrtProvider.CORE_ML);
}

@Disabled("DirectML Java API hasn't been supported yet")
@Test
@EnabledIfSystemProperty(named = "USE_DML", matches = "1")
public void testDirectML() throws OrtException {
Expand Down
9 changes: 9 additions & 0 deletions onnxruntime/test/common/cuda_op_test_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,11 @@

#include "test/util/include/default_providers.h"

#define SKIP_CUDA_TEST_WITH_DML \
if (DefaultCudaExecutionProvider() == nullptr) { \
GTEST_SKIP() << "CUDA Tests are not supported while DML is enabled"; \
}

namespace onnxruntime {
namespace test {

Expand All @@ -13,6 +18,10 @@ namespace test {
int GetCudaArchitecture();

inline bool HasCudaEnvironment(int min_cuda_architecture) {
if (DefaultCudaExecutionProvider() == nullptr) {
return false;
}

if (DefaultCudaExecutionProvider().get() == nullptr) {
return false;
}
Expand Down
6 changes: 6 additions & 0 deletions onnxruntime/test/contrib_ops/beam_search_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,9 @@ TEST(BeamSearchTest, GptBeamSearchFp32) {
const char* const output_names[] = {"sequences"};

Ort::SessionOptions session_options;
#if defined(USE_CUDA) && defined(USE_DML)
SKIP_CUDA_TEST_WITH_DML;
#endif
#ifdef USE_CUDA
OrtCUDAProviderOptionsV2 cuda_options;
cuda_options.use_tf32 = false;
Expand Down Expand Up @@ -166,6 +169,9 @@ TEST(BeamSearchTest, GptBeamSearchFp16) {
bool enable_rocm = (nullptr != DefaultRocmExecutionProvider().get());
if (enable_cuda || enable_rocm) {
Ort::SessionOptions session_options;
#if defined(USE_CUDA) && defined(USE_DML)
SKIP_CUDA_TEST_WITH_DML;
#endif
#ifdef USE_CUDA
OrtCUDAProviderOptionsV2 cuda_options;
cuda_options.use_tf32 = false;
Expand Down
3 changes: 3 additions & 0 deletions onnxruntime/test/contrib_ops/bias_dropout_op_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,9 @@ void RunBiasDropoutTest(const bool use_mask, const std::vector<int64_t>& input_s
t.SetCustomOutputVerifier(output_verifier);
std::vector<std::unique_ptr<IExecutionProvider>> t_eps;
#ifdef USE_CUDA
if (DefaultCudaExecutionProvider() == nullptr) {
return;
}
t_eps.emplace_back(DefaultCudaExecutionProvider());
#elif USE_ROCM
t_eps.emplace_back(DefaultRocmExecutionProvider());
Expand Down
7 changes: 6 additions & 1 deletion onnxruntime/test/contrib_ops/bitmask_dropout_op_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,9 @@ void RunTestForInference(const std::vector<int64_t>& input_dims, bool has_ratio

std::vector<std::unique_ptr<IExecutionProvider>> test_eps;
#ifdef USE_CUDA
test_eps.emplace_back(DefaultCudaExecutionProvider());
if (DefaultCudaExecutionProvider() != nullptr) {
test_eps.emplace_back(DefaultCudaExecutionProvider());
}
#elif USE_ROCM
test_eps.emplace_back(DefaultRocmExecutionProvider());
#endif
Expand Down Expand Up @@ -122,6 +124,9 @@ void RunTestForTraining(const std::vector<int64_t>& input_dims) {

std::vector<std::unique_ptr<IExecutionProvider>> dropout_eps;
#ifdef USE_CUDA
if (DefaultCudaExecutionProvider() == nullptr) {
return;
}
dropout_eps.emplace_back(DefaultCudaExecutionProvider());
#elif USE_ROCM
dropout_eps.emplace_back(DefaultRocmExecutionProvider());
Expand Down
13 changes: 10 additions & 3 deletions onnxruntime/test/contrib_ops/layer_norm_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// Licensed under the MIT License.

#include "test/providers/compare_provider_test_utils.h"
#include "test/util/include/default_providers.h"

namespace onnxruntime {
namespace test {
Expand Down Expand Up @@ -79,14 +80,20 @@ static void TestLayerNorm(const std::vector<int64_t>& x_dims,
#endif

#ifdef USE_CUDA
test.CompareWithCPU(kCudaExecutionProvider);
if (DefaultCudaExecutionProvider() != nullptr) {
test.CompareWithCPU(kCudaExecutionProvider);
}
#elif USE_ROCM
test.CompareWithCPU(kRocmExecutionProvider);
#elif USE_DML
test.CompareWithCPU(kDmlExecutionProvider);
#elif USE_WEBGPU
test.CompareWithCPU(kWebGpuExecutionProvider);
#endif

#ifdef USE_DML
if (DefaultDmlExecutionProvider() != nullptr) {
test.CompareWithCPU(kDmlExecutionProvider);
}
#endif
}

TEST(CudaKernelTest, LayerNorm_NullInput) {
Expand Down
28 changes: 20 additions & 8 deletions onnxruntime/test/contrib_ops/matmul_4bits_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -489,13 +489,17 @@ void RunTest(int64_t M, int64_t N, int64_t K, int64_t block_size, int64_t accura
std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
if (use_float16) {
#ifdef USE_CUDA
execution_providers.push_back(DefaultCudaExecutionProvider());
if (DefaultCudaExecutionProvider() != nullptr) {
execution_providers.push_back(DefaultCudaExecutionProvider());
}
#endif
#ifdef USE_ROCM
execution_providers.push_back(DefaultRocmExecutionProvider());
#endif
#ifdef USE_DML
execution_providers.push_back(DefaultDmlExecutionProvider());
if (DefaultDmlExecutionProvider() != nullptr) {
execution_providers.push_back(DefaultDmlExecutionProvider());
}
#endif
#ifdef USE_WEBGPU
execution_providers.push_back(DefaultWebGpuExecutionProvider());
Expand All @@ -513,8 +517,11 @@ void RunTest(int64_t M, int64_t N, int64_t K, int64_t block_size, int64_t accura
} // namespace

TEST(MatMulNBits, Float16Cuda) {
#if defined(USE_CUDA) || defined(USE_ROCM)
auto has_gidx_options = {true, false};
#if defined(USE_CUDA) || defined(USE_ROCM) || defined(USE_DML)
std::vector<bool> has_gidx_options = {true, false};
if (DefaultDmlExecutionProvider() != nullptr) {
has_gidx_options.assign(1, false);
}
#else
auto has_gidx_options = {false};
#endif
Expand All @@ -525,7 +532,9 @@ TEST(MatMulNBits, Float16Cuda) {
for (auto block_size : {16, 32, 64, 128}) {
for (auto has_gidx : has_gidx_options) {
#ifdef USE_DML
RunTest(M, N, K, block_size, 0, false, true, has_gidx, true, 0.04f);
if (DefaultDmlExecutionProvider() != nullptr) {
RunTest(M, N, K, block_size, 0, false, true, has_gidx, true, 0.04f);
}
#else
RunTest(M, N, K, block_size, 0, false, true, has_gidx);
RunTest(M, N, K, block_size, 0, true, true, has_gidx, false);
Expand All @@ -538,12 +547,16 @@ TEST(MatMulNBits, Float16Cuda) {
}

TEST(MatMulNBits, Float16Large) {
#ifdef USE_DML
#if defined(USE_CUDA) || defined(USE_DML)
// For some reason, the A10 machine that runs these tests during CI has a much bigger error than all retail
// machines we tested on. All consumer-grade machines from Nvidia/AMD/Intel seem to pass these tests with an
// absolute error of 0.08, but the A10 has errors going as high as 0.22. Ultimately, given the large number
// of elements in this test, ULPs should probably be used instead of absolute/relative tolerances.
float abs_error = 0.3f;
float abs_error = 0.05f;
if (DefaultDmlExecutionProvider() != nullptr) {
// it means the ep is dml in runtime, the abs_error is changed to 0.3f
abs_error = 0.3f;
}
#elif USE_WEBGPU
// See Intel A770 to pass these tests with an absolute error of 0.08.
float abs_error = 0.08f;
Expand All @@ -559,7 +572,6 @@ TEST(MatMulNBits, Float16Large) {
}
}
}

#endif // defined(USE_CUDA) || defined(USE_ROCM) || defined(USE_DML)
} // namespace test
} // namespace onnxruntime
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ TEST(MatMulIntegerToFloat, HasZeroPoint_HasBias_test_U8S8) {
}

// DML EP supports Float16 output type and Signed A Matrix and Unsigned B Matric for Float32 output
#if defined(USE_DML)
#if defined(USE_DML) && !defined(USE_CUDA)

TEST(MatMulIntegerToFloat, HasZeroPoint_NoBias_test_S8U8) {
RunMatMulIntegerToFloatTest<int8_t, uint8_t, float, true, false>();
Expand Down
20 changes: 19 additions & 1 deletion onnxruntime/test/contrib_ops/tensor_op_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,15 @@ void MeanVarianceNormalizationAcrossChannels(bool across_channels, bool normaliz
test.AddAttribute("normalize_variance", normalize_variance ? one : zero);
test.AddInput<float>("input", {N, C, H, W}, X);
test.AddOutput<float>("output", {N, C, H, W}, result);
#if defined(USE_CUDA) && defined(USE_DML)
if (DefaultCudaExecutionProvider() == nullptr) {
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider, kCudaExecutionProvider, kTensorrtExecutionProvider});
} else if (DefaultDmlExecutionProvider() == nullptr) {
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider, kDmlExecutionProvider, kTensorrtExecutionProvider});
}
#else
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider, kTensorrtExecutionProvider}); // OpenVINO doesn't support MVN operator below opset 9. TensorRT doesn't support opset 8 of MVN operator.
#endif
}

void MeanVarianceNormalizationPerChannel(bool across_channels, bool normalize_variance) {
Expand Down Expand Up @@ -188,7 +196,15 @@ void MeanVarianceNormalizationPerChannel(bool across_channels, bool normalize_va
test.AddAttribute("normalize_variance", normalize_variance ? one : zero);
test.AddInput<float>("input", {N, C, H, W}, X);
test.AddOutput<float>("output", {N, C, H, W}, result);
#if defined(USE_CUDA) && defined(USE_DML)
if (DefaultCudaExecutionProvider() == nullptr) {
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider, kCudaExecutionProvider, kTensorrtExecutionProvider});
} else if (DefaultDmlExecutionProvider() == nullptr) {
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider, kDmlExecutionProvider, kTensorrtExecutionProvider});
}
#else
test.Run(OpTester::ExpectResult::kExpectSuccess, "", {kOpenVINOExecutionProvider, kTensorrtExecutionProvider}); // OpenVINO doesn't support MVN operator below opset 9. TensorRT doesn't support opset 8 of MVN operator.
#endif
}

TEST(MVNContribOpTest, MeanVarianceNormalizationCPUTest_Version1_TO_8) {
Expand Down Expand Up @@ -230,7 +246,9 @@ TEST(UnfoldTensorOpTest, LastDim) {

std::vector<std::unique_ptr<IExecutionProvider>> execution_providers;
#ifdef USE_CUDA
execution_providers.push_back(DefaultCudaExecutionProvider());
if (DefaultCudaExecutionProvider() != nullptr) {
execution_providers.push_back(DefaultCudaExecutionProvider());
}
#endif
execution_providers.push_back(DefaultCpuExecutionProvider());
tester.Run(OpTester::ExpectResult::kExpectSuccess, "", {}, nullptr, &execution_providers);
Expand Down
Loading
Loading