diff --git a/onnxruntime/core/providers/cuda/cuda_execution_provider_info.cc b/onnxruntime/core/providers/cuda/cuda_execution_provider_info.cc index 7b507296d5982..8ee1d12d8ccf6 100644 --- a/onnxruntime/core/providers/cuda/cuda_execution_provider_info.cc +++ b/onnxruntime/core/providers/cuda/cuda_execution_provider_info.cc @@ -10,6 +10,7 @@ #include "core/common/parse_string.h" #include "core/framework/provider_options_utils.h" #include "core/providers/cuda/cuda_common.h" +#include "core/common/hash_combine.h" namespace onnxruntime { namespace cuda { @@ -31,8 +32,9 @@ constexpr const char* kTunableOpEnable = "tunable_op_enable"; constexpr const char* kTunableOpTuningEnable = "tunable_op_tuning_enable"; constexpr const char* kTunableOpMaxTuningDurationMs = "tunable_op_max_tuning_duration_ms"; constexpr const char* kEnableSkipLayerNormStrictMode = "enable_skip_layer_norm_strict_mode"; -constexpr const char* kPreferNCHWMode = "prefer_nhwc"; -constexpr const char* KUseEPLevelUnifiedStream = "use_ep_level_unified_stream"; +constexpr const char* kPreferNHWCMode = "prefer_nhwc"; +constexpr const char* kUseEPLevelUnifiedStream = "use_ep_level_unified_stream"; + } // namespace provider_option_names } // namespace cuda @@ -112,8 +114,8 @@ CUDAExecutionProviderInfo CUDAExecutionProviderInfo::FromProviderOptions(const P .AddAssignmentToReference(cuda::provider_option_names::kEnableCudaGraph, info.enable_cuda_graph) .AddAssignmentToReference(cuda::provider_option_names::kCudnnConv1dPadToNc1d, info.cudnn_conv1d_pad_to_nc1d) .AddAssignmentToReference(cuda::provider_option_names::kEnableSkipLayerNormStrictMode, info.enable_skip_layer_norm_strict_mode) - .AddAssignmentToReference(cuda::provider_option_names::kPreferNCHWMode, info.prefer_nhwc) - .AddAssignmentToReference(cuda::provider_option_names::KUseEPLevelUnifiedStream, info.use_ep_level_unified_stream) + .AddAssignmentToReference(cuda::provider_option_names::kPreferNHWCMode, info.prefer_nhwc) + .AddAssignmentToReference(cuda::provider_option_names::kUseEPLevelUnifiedStream, info.use_ep_level_unified_stream) .AddValueParser( cuda::provider_option_names::kTunableOpEnable, [&info](const std::string& value_str) -> Status { @@ -164,8 +166,8 @@ ProviderOptions CUDAExecutionProviderInfo::ToProviderOptions(const CUDAExecution {cuda::provider_option_names::kTunableOpTuningEnable, MakeStringWithClassicLocale(info.tunable_op.tuning_enable)}, {cuda::provider_option_names::kTunableOpMaxTuningDurationMs, MakeStringWithClassicLocale(info.tunable_op.max_tuning_duration_ms)}, {cuda::provider_option_names::kEnableSkipLayerNormStrictMode, MakeStringWithClassicLocale(info.enable_skip_layer_norm_strict_mode)}, - {cuda::provider_option_names::kPreferNCHWMode, MakeStringWithClassicLocale(info.prefer_nhwc)}, - {cuda::provider_option_names::KUseEPLevelUnifiedStream, MakeStringWithClassicLocale(info.use_ep_level_unified_stream)}, + {cuda::provider_option_names::kPreferNHWCMode, MakeStringWithClassicLocale(info.prefer_nhwc)}, + {cuda::provider_option_names::kUseEPLevelUnifiedStream, MakeStringWithClassicLocale(info.use_ep_level_unified_stream)}, }; return options; @@ -185,11 +187,43 @@ ProviderOptions CUDAExecutionProviderInfo::ToProviderOptions(const OrtCUDAProvid {cuda::provider_option_names::kTunableOpEnable, MakeStringWithClassicLocale(info.tunable_op_enable)}, {cuda::provider_option_names::kTunableOpTuningEnable, MakeStringWithClassicLocale(info.tunable_op_tuning_enable)}, {cuda::provider_option_names::kTunableOpMaxTuningDurationMs, MakeStringWithClassicLocale(info.tunable_op_max_tuning_duration_ms)}, - {cuda::provider_option_names::kPreferNCHWMode, MakeStringWithClassicLocale(info.prefer_nhwc)}, - {cuda::provider_option_names::KUseEPLevelUnifiedStream, MakeStringWithClassicLocale(info.use_ep_level_unified_stream)}, + {cuda::provider_option_names::kPreferNHWCMode, MakeStringWithClassicLocale(info.prefer_nhwc)}, + {cuda::provider_option_names::kUseEPLevelUnifiedStream, MakeStringWithClassicLocale(info.use_ep_level_unified_stream)}, }; return options; } +size_t CUDAExecutionProviderInfo::ToHash(const CUDAExecutionProviderInfo& info) { + size_t value{0xbc9f1d34}; // seed + + // Bits: device_id (16), arena_extend_strategy/cudnn_conv_algo_search (reserved 2), boolean options (1 each) + size_t data = static_cast(info.device_id) ^ + (static_cast(info.arena_extend_strategy) << 16) ^ + (static_cast(info.cudnn_conv_algo_search) << 18) ^ + (static_cast(info.do_copy_in_default_stream) << 20) ^ + (static_cast(info.has_user_compute_stream) << 21) ^ + (static_cast(info.cudnn_conv_use_max_workspace) << 22) ^ + (static_cast(info.enable_cuda_graph) << 23) ^ + (static_cast(info.tunable_op.enable) << 24) ^ + (static_cast(info.tunable_op.tuning_enable) << 25) ^ + (static_cast(info.cudnn_conv1d_pad_to_nc1d) << 26) ^ + (static_cast(info.enable_skip_layer_norm_strict_mode) << 27) ^ + (static_cast(info.prefer_nhwc) << 28) ^ + (static_cast(info.use_ep_level_unified_stream) << 29); + HashCombine(data, value); + + HashCombine(info.gpu_mem_limit, value); + HashCombine(info.tunable_op.max_tuning_duration_ms, value); + + // Memory pointers + HashCombine(reinterpret_cast(info.user_compute_stream), value); + HashCombine(reinterpret_cast(info.external_allocator_info.alloc), value); + HashCombine(reinterpret_cast(info.external_allocator_info.free), value); + HashCombine(reinterpret_cast(info.external_allocator_info.empty_cache), value); + + // The default memory arena cfg is not used in hashing right now. + return value; +} + } // namespace onnxruntime diff --git a/onnxruntime/core/providers/cuda/cuda_execution_provider_info.h b/onnxruntime/core/providers/cuda/cuda_execution_provider_info.h index b286f5a9161b0..1f35bc9f3e03d 100644 --- a/onnxruntime/core/providers/cuda/cuda_execution_provider_info.h +++ b/onnxruntime/core/providers/cuda/cuda_execution_provider_info.h @@ -7,7 +7,6 @@ #include #include -#include "core/common/hash_combine.h" #include "core/framework/arena_extend_strategy.h" #include "core/framework/ortdevice.h" #include "core/framework/provider_options.h" @@ -79,16 +78,6 @@ struct CUDAExecutionProviderInfo { static CUDAExecutionProviderInfo FromProviderOptions(const ProviderOptions& options); static ProviderOptions ToProviderOptions(const CUDAExecutionProviderInfo& info); static ProviderOptions ToProviderOptions(const OrtCUDAProviderOptionsV2& info); + static size_t ToHash(const CUDAExecutionProviderInfo& info); }; } // namespace onnxruntime - -template <> -struct std::hash<::onnxruntime::cuda::TunableOpInfo> { - size_t operator()(const ::onnxruntime::cuda::TunableOpInfo& info) const { - size_t seed_and_value{0xbc9f1d34}; - onnxruntime::HashCombine(info.enable, seed_and_value); - onnxruntime::HashCombine(info.tuning_enable, seed_and_value); - onnxruntime::HashCombine(info.max_tuning_duration_ms, seed_and_value); - return seed_and_value; - } -}; diff --git a/onnxruntime/core/providers/rocm/rocm_execution_provider_info.cc b/onnxruntime/core/providers/rocm/rocm_execution_provider_info.cc index b557f92287f2b..b708e5c0a5e3d 100644 --- a/onnxruntime/core/providers/rocm/rocm_execution_provider_info.cc +++ b/onnxruntime/core/providers/rocm/rocm_execution_provider_info.cc @@ -8,6 +8,7 @@ #include "core/common/parse_string.h" #include "core/framework/provider_options_utils.h" #include "core/providers/rocm/rocm_common.h" +#include "core/common/hash_combine.h" namespace onnxruntime { namespace rocm { @@ -147,4 +148,32 @@ ProviderOptions ROCMExecutionProviderInfo::ToProviderOptions(const OrtROCMProvid return options; } +size_t ROCMExecutionProviderInfo::ToHash(const ROCMExecutionProviderInfo& info) { + size_t value{0xbc9f1d34}; // seed + + // Bits: device_id (16), arena_extend_strategy/miopen_conv_exhaustive_search (reserved 2), boolean options (1 each) + size_t data = static_cast(info.device_id) ^ + (static_cast(info.arena_extend_strategy) << 16) ^ + (static_cast(info.miopen_conv_exhaustive_search) << 18) ^ + (static_cast(info.do_copy_in_default_stream) << 20) ^ + (static_cast(info.has_user_compute_stream) << 21) ^ + (static_cast(info.miopen_conv_use_max_workspace) << 22) ^ + (static_cast(info.enable_hip_graph) << 23) ^ + (static_cast(info.tunable_op.enable) << 24) ^ + (static_cast(info.tunable_op.tuning_enable) << 25); + HashCombine(data, value); + + HashCombine(info.gpu_mem_limit, value); + HashCombine(info.tunable_op.max_tuning_duration_ms, value); + + // Memory pointers + HashCombine(reinterpret_cast(info.user_compute_stream), value); + HashCombine(reinterpret_cast(info.external_allocator_info.alloc), value); + HashCombine(reinterpret_cast(info.external_allocator_info.free), value); + HashCombine(reinterpret_cast(info.external_allocator_info.empty_cache), value); + + // The default memory arena cfg is not used in hashing right now. + return value; +} + } // namespace onnxruntime diff --git a/onnxruntime/core/providers/rocm/rocm_execution_provider_info.h b/onnxruntime/core/providers/rocm/rocm_execution_provider_info.h index 2f549cc1ac143..6fdf03de4722b 100644 --- a/onnxruntime/core/providers/rocm/rocm_execution_provider_info.h +++ b/onnxruntime/core/providers/rocm/rocm_execution_provider_info.h @@ -6,7 +6,6 @@ #include #include -#include "core/common/hash_combine.h" #include "core/framework/arena_extend_strategy.h" #include "core/framework/ortdevice.h" #include "core/framework/provider_options.h" @@ -70,16 +69,6 @@ struct ROCMExecutionProviderInfo { static ROCMExecutionProviderInfo FromProviderOptions(const ProviderOptions& options); static ProviderOptions ToProviderOptions(const ROCMExecutionProviderInfo& info); static ProviderOptions ToProviderOptions(const OrtROCMProviderOptions& info); + static size_t ToHash(const ROCMExecutionProviderInfo& info); }; } // namespace onnxruntime - -template <> -struct std::hash<::onnxruntime::rocm::TunableOpInfo> { - size_t operator()(const ::onnxruntime::rocm::TunableOpInfo& info) const { - size_t seed_and_value{0xbc9f1d34}; - onnxruntime::HashCombine(info.enable, seed_and_value); - onnxruntime::HashCombine(info.tuning_enable, seed_and_value); - onnxruntime::HashCombine(info.max_tuning_duration_ms, seed_and_value); - return seed_and_value; - } -}; diff --git a/onnxruntime/test/python/onnxruntime_test_python.py b/onnxruntime/test/python/onnxruntime_test_python.py index 68e441c87860e..dbe8156b055fc 100644 --- a/onnxruntime/test/python/onnxruntime_test_python.py +++ b/onnxruntime/test/python/onnxruntime_test_python.py @@ -414,6 +414,10 @@ def test_get_and_set_option_with_values(option_name, option_values): str(option_value), ) + test_get_and_set_option_with_values("prefer_nhwc", ["1", "0"]) + + test_get_and_set_option_with_values("enable_cuda_graph", ["1", "0"]) + test_get_and_set_option_with_values("arena_extend_strategy", ["kNextPowerOfTwo", "kSameAsRequested"]) test_get_and_set_option_with_values("cudnn_conv_algo_search", ["DEFAULT", "EXHAUSTIVE", "HEURISTIC"]) @@ -553,6 +557,8 @@ def test_get_and_set_option_with_values(option_name, option_values): test_get_and_set_option_with_values("tunable_op_max_tuning_duration_ms", ["-1", "1"]) + test_get_and_set_option_with_values("enable_hip_graph", ["1", "0"]) + run_rocm_options_test() def test_invalid_set_providers(self): diff --git a/orttraining/orttraining/python/orttraining_python_module.cc b/orttraining/orttraining/python/orttraining_python_module.cc index 55cd2af2d0219..7ca3376d4f8d6 100644 --- a/orttraining/orttraining/python/orttraining_python_module.cc +++ b/orttraining/orttraining/python/orttraining_python_module.cc @@ -47,7 +47,7 @@ void addObjectMethodsForLazyTensor(py::module& m); #endif bool InitArray(); -bool GetDyanmicExecutionProviderHash( +bool GetDynamicExecutionProviderHash( const std::string& ep_shared_lib_path, const ProviderOptions& provider_options, size_t& hash, @@ -87,13 +87,7 @@ bool GetProviderInstanceHash(const std::string& type, if (auto* cuda_provider_info = TryGetProviderInfo_CUDA()) { const CUDAExecutionProviderInfo info = GetCudaExecutionProviderInfo(cuda_provider_info, provider_options_map); - hash = static_cast(info.device_id) ^ - info.gpu_mem_limit ^ - (static_cast(info.arena_extend_strategy) << 16) ^ - (static_cast(info.cudnn_conv_algo_search) << 18) ^ - (static_cast(info.do_copy_in_default_stream) << 20) ^ - (static_cast(info.has_user_compute_stream) << 22) ^ - std::hash{}(info.tunable_op); + hash = CUDAExecutionProviderInfo::ToHash(info); return true; } #endif @@ -102,13 +96,7 @@ bool GetProviderInstanceHash(const std::string& type, if (auto* rocm_provider_info = TryGetProviderInfo_ROCM()) { const ROCMExecutionProviderInfo info = GetRocmExecutionProviderInfo(rocm_provider_info, provider_options_map); - hash = static_cast(info.device_id) ^ - info.gpu_mem_limit ^ - (static_cast(info.arena_extend_strategy) << 16) ^ - (static_cast(info.miopen_conv_exhaustive_search) << 18) ^ - (static_cast(info.do_copy_in_default_stream) << 20) ^ - (static_cast(info.has_user_compute_stream) << 22) ^ - std::hash{}(info.tunable_op); + hash = ROCMExecutionProviderInfo::ToHash(info); return true; } #endif @@ -128,7 +116,7 @@ bool GetProviderInstanceHash(const std::string& type, provider_options.insert(option); } } - return GetDyanmicExecutionProviderHash(shared_lib_path_it->second, provider_options, hash); + return GetDynamicExecutionProviderHash(shared_lib_path_it->second, provider_options, hash); } } }