Skip to content

Commit

Permalink
fix cuda/rocm provider info hash
Browse files Browse the repository at this point in the history
  • Loading branch information
tianleiwu committed Feb 3, 2024
1 parent 18c3acb commit 9b8baf2
Show file tree
Hide file tree
Showing 6 changed files with 83 additions and 48 deletions.
50 changes: 42 additions & 8 deletions onnxruntime/core/providers/cuda/cuda_execution_provider_info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "core/common/parse_string.h"
#include "core/framework/provider_options_utils.h"
#include "core/providers/cuda/cuda_common.h"
#include "core/common/hash_combine.h"

namespace onnxruntime {
namespace cuda {
Expand All @@ -31,8 +32,9 @@ constexpr const char* kTunableOpEnable = "tunable_op_enable";
constexpr const char* kTunableOpTuningEnable = "tunable_op_tuning_enable";
constexpr const char* kTunableOpMaxTuningDurationMs = "tunable_op_max_tuning_duration_ms";
constexpr const char* kEnableSkipLayerNormStrictMode = "enable_skip_layer_norm_strict_mode";
constexpr const char* kPreferNCHWMode = "prefer_nhwc";
constexpr const char* KUseEPLevelUnifiedStream = "use_ep_level_unified_stream";
constexpr const char* kPreferNHWCMode = "prefer_nhwc";
constexpr const char* kUseEPLevelUnifiedStream = "use_ep_level_unified_stream";

} // namespace provider_option_names
} // namespace cuda

Expand Down Expand Up @@ -112,8 +114,8 @@ CUDAExecutionProviderInfo CUDAExecutionProviderInfo::FromProviderOptions(const P
.AddAssignmentToReference(cuda::provider_option_names::kEnableCudaGraph, info.enable_cuda_graph)
.AddAssignmentToReference(cuda::provider_option_names::kCudnnConv1dPadToNc1d, info.cudnn_conv1d_pad_to_nc1d)
.AddAssignmentToReference(cuda::provider_option_names::kEnableSkipLayerNormStrictMode, info.enable_skip_layer_norm_strict_mode)
.AddAssignmentToReference(cuda::provider_option_names::kPreferNCHWMode, info.prefer_nhwc)
.AddAssignmentToReference(cuda::provider_option_names::KUseEPLevelUnifiedStream, info.use_ep_level_unified_stream)
.AddAssignmentToReference(cuda::provider_option_names::kPreferNHWCMode, info.prefer_nhwc)
.AddAssignmentToReference(cuda::provider_option_names::kUseEPLevelUnifiedStream, info.use_ep_level_unified_stream)

Check warning on line 118 in onnxruntime/core/providers/cuda/cuda_execution_provider_info.cc

View workflow job for this annotation

GitHub Actions / Lint C++

[cpplint] reported by reviewdog 🐶 Lines should be <= 120 characters long [whitespace/line_length] [2] Raw Output: onnxruntime/core/providers/cuda/cuda_execution_provider_info.cc:118: Lines should be <= 120 characters long [whitespace/line_length] [2]
.AddValueParser(
cuda::provider_option_names::kTunableOpEnable,
[&info](const std::string& value_str) -> Status {
Expand Down Expand Up @@ -164,8 +166,8 @@ ProviderOptions CUDAExecutionProviderInfo::ToProviderOptions(const CUDAExecution
{cuda::provider_option_names::kTunableOpTuningEnable, MakeStringWithClassicLocale(info.tunable_op.tuning_enable)},
{cuda::provider_option_names::kTunableOpMaxTuningDurationMs, MakeStringWithClassicLocale(info.tunable_op.max_tuning_duration_ms)},
{cuda::provider_option_names::kEnableSkipLayerNormStrictMode, MakeStringWithClassicLocale(info.enable_skip_layer_norm_strict_mode)},
{cuda::provider_option_names::kPreferNCHWMode, MakeStringWithClassicLocale(info.prefer_nhwc)},
{cuda::provider_option_names::KUseEPLevelUnifiedStream, MakeStringWithClassicLocale(info.use_ep_level_unified_stream)},
{cuda::provider_option_names::kPreferNHWCMode, MakeStringWithClassicLocale(info.prefer_nhwc)},
{cuda::provider_option_names::kUseEPLevelUnifiedStream, MakeStringWithClassicLocale(info.use_ep_level_unified_stream)},

Check warning on line 170 in onnxruntime/core/providers/cuda/cuda_execution_provider_info.cc

View workflow job for this annotation

GitHub Actions / Lint C++

[cpplint] reported by reviewdog 🐶 Lines should be <= 120 characters long [whitespace/line_length] [2] Raw Output: onnxruntime/core/providers/cuda/cuda_execution_provider_info.cc:170: Lines should be <= 120 characters long [whitespace/line_length] [2]
};

return options;
Expand All @@ -185,11 +187,43 @@ ProviderOptions CUDAExecutionProviderInfo::ToProviderOptions(const OrtCUDAProvid
{cuda::provider_option_names::kTunableOpEnable, MakeStringWithClassicLocale(info.tunable_op_enable)},
{cuda::provider_option_names::kTunableOpTuningEnable, MakeStringWithClassicLocale(info.tunable_op_tuning_enable)},
{cuda::provider_option_names::kTunableOpMaxTuningDurationMs, MakeStringWithClassicLocale(info.tunable_op_max_tuning_duration_ms)},
{cuda::provider_option_names::kPreferNCHWMode, MakeStringWithClassicLocale(info.prefer_nhwc)},
{cuda::provider_option_names::KUseEPLevelUnifiedStream, MakeStringWithClassicLocale(info.use_ep_level_unified_stream)},
{cuda::provider_option_names::kPreferNHWCMode, MakeStringWithClassicLocale(info.prefer_nhwc)},
{cuda::provider_option_names::kUseEPLevelUnifiedStream, MakeStringWithClassicLocale(info.use_ep_level_unified_stream)},

Check warning on line 191 in onnxruntime/core/providers/cuda/cuda_execution_provider_info.cc

View workflow job for this annotation

GitHub Actions / Lint C++

[cpplint] reported by reviewdog 🐶 Lines should be <= 120 characters long [whitespace/line_length] [2] Raw Output: onnxruntime/core/providers/cuda/cuda_execution_provider_info.cc:191: Lines should be <= 120 characters long [whitespace/line_length] [2]
};

return options;
}

size_t CUDAExecutionProviderInfo::ToHash(const CUDAExecutionProviderInfo& info) {
size_t value{0xbc9f1d34}; // seed

// Bits: device_id (16), arena_extend_strategy/cudnn_conv_algo_search (reserved 2), boolean options (1 each)
size_t data = static_cast<size_t>(info.device_id) ^
(static_cast<size_t>(info.arena_extend_strategy) << 16) ^
(static_cast<size_t>(info.cudnn_conv_algo_search) << 18) ^
(static_cast<size_t>(info.do_copy_in_default_stream) << 20) ^
(static_cast<size_t>(info.has_user_compute_stream) << 21) ^
(static_cast<size_t>(info.cudnn_conv_use_max_workspace) << 22) ^
(static_cast<size_t>(info.enable_cuda_graph) << 23) ^
(static_cast<size_t>(info.tunable_op.enable) << 24) ^
(static_cast<size_t>(info.tunable_op.tuning_enable) << 25) ^
(static_cast<size_t>(info.cudnn_conv1d_pad_to_nc1d) << 26) ^
(static_cast<size_t>(info.enable_skip_layer_norm_strict_mode) << 27) ^
(static_cast<size_t>(info.prefer_nhwc) << 28) ^
(static_cast<size_t>(info.use_ep_level_unified_stream) << 29);
HashCombine(data, value);

HashCombine(info.gpu_mem_limit, value);
HashCombine(info.tunable_op.max_tuning_duration_ms, value);

// Memory pointers
HashCombine(reinterpret_cast<size_t>(info.user_compute_stream), value);
HashCombine(reinterpret_cast<size_t>(info.external_allocator_info.alloc), value);
HashCombine(reinterpret_cast<size_t>(info.external_allocator_info.free), value);
HashCombine(reinterpret_cast<size_t>(info.external_allocator_info.empty_cache), value);

// The default memory arena cfg is not used in hashing right now.
return value;
}

} // namespace onnxruntime
13 changes: 1 addition & 12 deletions onnxruntime/core/providers/cuda/cuda_execution_provider_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
#include <functional>
#include <limits>

#include "core/common/hash_combine.h"
#include "core/framework/arena_extend_strategy.h"
#include "core/framework/ortdevice.h"
#include "core/framework/provider_options.h"
Expand Down Expand Up @@ -79,16 +78,6 @@ struct CUDAExecutionProviderInfo {
static CUDAExecutionProviderInfo FromProviderOptions(const ProviderOptions& options);
static ProviderOptions ToProviderOptions(const CUDAExecutionProviderInfo& info);
static ProviderOptions ToProviderOptions(const OrtCUDAProviderOptionsV2& info);
static size_t ToHash(const CUDAExecutionProviderInfo& info);
};
} // namespace onnxruntime

template <>
struct std::hash<::onnxruntime::cuda::TunableOpInfo> {
size_t operator()(const ::onnxruntime::cuda::TunableOpInfo& info) const {
size_t seed_and_value{0xbc9f1d34};
onnxruntime::HashCombine(info.enable, seed_and_value);
onnxruntime::HashCombine(info.tuning_enable, seed_and_value);
onnxruntime::HashCombine(info.max_tuning_duration_ms, seed_and_value);
return seed_and_value;
}
};
29 changes: 29 additions & 0 deletions onnxruntime/core/providers/rocm/rocm_execution_provider_info.cc
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include "core/common/parse_string.h"
#include "core/framework/provider_options_utils.h"
#include "core/providers/rocm/rocm_common.h"
#include "core/common/hash_combine.h"

namespace onnxruntime {
namespace rocm {
Expand Down Expand Up @@ -147,4 +148,32 @@ ProviderOptions ROCMExecutionProviderInfo::ToProviderOptions(const OrtROCMProvid
return options;
}

size_t ROCMExecutionProviderInfo::ToHash(const ROCMExecutionProviderInfo& info) {
size_t value{0xbc9f1d34}; // seed

// Bits: device_id (16), arena_extend_strategy/miopen_conv_exhaustive_search (reserved 2), boolean options (1 each)
size_t data = static_cast<size_t>(info.device_id) ^
(static_cast<size_t>(info.arena_extend_strategy) << 16) ^
(static_cast<size_t>(info.miopen_conv_exhaustive_search) << 18) ^
(static_cast<size_t>(info.do_copy_in_default_stream) << 20) ^
(static_cast<size_t>(info.has_user_compute_stream) << 21) ^
(static_cast<size_t>(info.miopen_conv_use_max_workspace) << 22) ^
(static_cast<size_t>(info.enable_hip_graph) << 23) ^
(static_cast<size_t>(info.tunable_op.enable) << 24) ^
(static_cast<size_t>(info.tunable_op.tuning_enable) << 25);
HashCombine(data, value);

HashCombine(info.gpu_mem_limit, value);
HashCombine(info.tunable_op.max_tuning_duration_ms, value);

// Memory pointers
HashCombine(reinterpret_cast<size_t>(info.user_compute_stream), value);
HashCombine(reinterpret_cast<size_t>(info.external_allocator_info.alloc), value);
HashCombine(reinterpret_cast<size_t>(info.external_allocator_info.free), value);
HashCombine(reinterpret_cast<size_t>(info.external_allocator_info.empty_cache), value);

// The default memory arena cfg is not used in hashing right now.
return value;
}

} // namespace onnxruntime
13 changes: 1 addition & 12 deletions onnxruntime/core/providers/rocm/rocm_execution_provider_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@
#include <functional>
#include <limits>

#include "core/common/hash_combine.h"
#include "core/framework/arena_extend_strategy.h"
#include "core/framework/ortdevice.h"
#include "core/framework/provider_options.h"
Expand Down Expand Up @@ -70,16 +69,6 @@ struct ROCMExecutionProviderInfo {
static ROCMExecutionProviderInfo FromProviderOptions(const ProviderOptions& options);
static ProviderOptions ToProviderOptions(const ROCMExecutionProviderInfo& info);
static ProviderOptions ToProviderOptions(const OrtROCMProviderOptions& info);
static size_t ToHash(const ROCMExecutionProviderInfo& info);
};
} // namespace onnxruntime

template <>
struct std::hash<::onnxruntime::rocm::TunableOpInfo> {
size_t operator()(const ::onnxruntime::rocm::TunableOpInfo& info) const {
size_t seed_and_value{0xbc9f1d34};
onnxruntime::HashCombine(info.enable, seed_and_value);
onnxruntime::HashCombine(info.tuning_enable, seed_and_value);
onnxruntime::HashCombine(info.max_tuning_duration_ms, seed_and_value);
return seed_and_value;
}
};
6 changes: 6 additions & 0 deletions onnxruntime/test/python/onnxruntime_test_python.py
Original file line number Diff line number Diff line change
Expand Up @@ -414,6 +414,10 @@ def test_get_and_set_option_with_values(option_name, option_values):
str(option_value),
)

test_get_and_set_option_with_values("prefer_nhwc", ["1", "0"])

test_get_and_set_option_with_values("enable_cuda_graph", ["1", "0"])

test_get_and_set_option_with_values("arena_extend_strategy", ["kNextPowerOfTwo", "kSameAsRequested"])

test_get_and_set_option_with_values("cudnn_conv_algo_search", ["DEFAULT", "EXHAUSTIVE", "HEURISTIC"])
Expand Down Expand Up @@ -553,6 +557,8 @@ def test_get_and_set_option_with_values(option_name, option_values):

test_get_and_set_option_with_values("tunable_op_max_tuning_duration_ms", ["-1", "1"])

test_get_and_set_option_with_values("enable_hip_graph", ["1", "0"])

run_rocm_options_test()

def test_invalid_set_providers(self):
Expand Down
20 changes: 4 additions & 16 deletions orttraining/orttraining/python/orttraining_python_module.cc
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ void addObjectMethodsForLazyTensor(py::module& m);
#endif
bool InitArray();

bool GetDyanmicExecutionProviderHash(
bool GetDynamicExecutionProviderHash(
const std::string& ep_shared_lib_path,
const ProviderOptions& provider_options,
size_t& hash,
Expand Down Expand Up @@ -87,13 +87,7 @@ bool GetProviderInstanceHash(const std::string& type,
if (auto* cuda_provider_info = TryGetProviderInfo_CUDA()) {
const CUDAExecutionProviderInfo info = GetCudaExecutionProviderInfo(cuda_provider_info,
provider_options_map);
hash = static_cast<size_t>(info.device_id) ^
info.gpu_mem_limit ^
(static_cast<size_t>(info.arena_extend_strategy) << 16) ^
(static_cast<size_t>(info.cudnn_conv_algo_search) << 18) ^
(static_cast<size_t>(info.do_copy_in_default_stream) << 20) ^
(static_cast<size_t>(info.has_user_compute_stream) << 22) ^
std::hash<cuda::TunableOpInfo>{}(info.tunable_op);
hash = CUDAExecutionProviderInfo::ToHash(info);
return true;
}
#endif
Expand All @@ -102,13 +96,7 @@ bool GetProviderInstanceHash(const std::string& type,
if (auto* rocm_provider_info = TryGetProviderInfo_ROCM()) {
const ROCMExecutionProviderInfo info = GetRocmExecutionProviderInfo(rocm_provider_info,
provider_options_map);
hash = static_cast<size_t>(info.device_id) ^
info.gpu_mem_limit ^
(static_cast<size_t>(info.arena_extend_strategy) << 16) ^
(static_cast<size_t>(info.miopen_conv_exhaustive_search) << 18) ^
(static_cast<size_t>(info.do_copy_in_default_stream) << 20) ^
(static_cast<size_t>(info.has_user_compute_stream) << 22) ^
std::hash<rocm::TunableOpInfo>{}(info.tunable_op);
hash = ROCMExecutionProviderInfo::ToHash(info);
return true;
}
#endif
Expand All @@ -128,7 +116,7 @@ bool GetProviderInstanceHash(const std::string& type,
provider_options.insert(option);
}
}
return GetDyanmicExecutionProviderHash(shared_lib_path_it->second, provider_options, hash);
return GetDynamicExecutionProviderHash(shared_lib_path_it->second, provider_options, hash);
}
}
}
Expand Down

0 comments on commit 9b8baf2

Please sign in to comment.