From 22858066cad7503d538e063a15bc20eacc1ca547 Mon Sep 17 00:00:00 2001 From: Uros Petkovic Date: Fri, 20 Dec 2024 15:19:55 +0100 Subject: [PATCH 01/10] Adding ourtvalue support for MGX EP --- .../core/session/onnxruntime_c_api.h | 15 +++++ .../migraphx/migraphx_execution_provider.cc | 41 ++++++++++++- .../migraphx/migraphx_execution_provider.h | 5 +- .../migraphx_execution_provider_info.cc | 52 ++++++++++++++++ .../migraphx_execution_provider_info.h | 61 +++++++++++++++++++ .../migraphx/migraphx_provider_factory.cc | 22 +++++++ .../migraphx/migraphx_provider_factory.h | 3 + .../python/onnxruntime_pybind_mlvalue.cc | 45 ++++++++++++++ .../python/onnxruntime_pybind_mlvalue.h | 12 ++++ .../python/onnxruntime_pybind_ortvalue.cc | 20 ++++++ .../python/onnxruntime_pybind_state.cc | 4 +- .../python/onnxruntime_pybind_state_common.cc | 6 ++ .../python/onnxruntime_pybind_state_common.h | 12 ++++ onnxruntime/test/util/default_providers.cc | 4 +- 14 files changed, 298 insertions(+), 4 deletions(-) diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h index 4ca2791e26ab9..e50e257c87eb1 100644 --- a/include/onnxruntime/core/session/onnxruntime_c_api.h +++ b/include/onnxruntime/core/session/onnxruntime_c_api.h @@ -622,6 +622,21 @@ typedef struct OrtMIGraphXProviderOptions { int migraphx_load_compiled_model; // migraphx int8 cal table. Default 0 = false, noznero = true const char* migraphx_load_model_path; // migraphx model path name bool migraphx_exhaustive_tune; // migraphx tuned compile Default = false + + /** \brief MIGraphX memory limit (To use all possible memory pass in maximum size_t) + * Defaults to SIZE_MAX. + * \note If a ::OrtArenaCfg has been applied, it will override this field + */ + size_t gpu_mem_limit; + + /** \brief Strategy used to grow the memory arena + * 0 = kNextPowerOfTwo
+ * 1 = kSameAsRequested
+ * Defaults to 0. + * \note If a ::OrtArenaCfg has been applied, it will override this field + */ + int arena_extend_strategy; + } OrtMIGraphXProviderOptions; /** \brief OpenVINO Provider Options diff --git a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc index fd36b8ae5f678..7f934d69c48ab 100644 --- a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc +++ b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc @@ -13,10 +13,11 @@ #include "core/common/safeint.h" #include "core/common/logging/severity.h" #include "migraphx_execution_provider.h" +#include "migraphx_execution_provider_info.h" #include "migraphx_execution_provider_utils.h" #include "migraphx_allocator.h" #include "gpu_data_transfer.h" -#include "migraphx_inc.h" +#include "migraphx_call.h" #include "migraphx_stream_handle.h" @@ -208,6 +209,44 @@ MIGraphXExecutionProvider::MIGraphXExecutionProvider(const MIGraphXExecutionProv MIGraphXExecutionProvider::~MIGraphXExecutionProvider() { } +AllocatorPtr MIGraphXExecutionProvider::CreateMIGraphXAllocator(OrtDevice::DeviceId device_id, + size_t gpu_mem_limit, + ArenaExtendStrategy arena_extend_strategy, + MIGraphXExecutionProviderExternalAllocatorInfo + external_allocator_info, + const OrtArenaCfg* default_memory_arena_cfg) { + if (external_allocator_info.UseExternalAllocator()) { + AllocatorCreationInfo default_memory_info( + [external_allocator_info](OrtDevice::DeviceId id) { + return std::make_unique(id, HIP, + external_allocator_info.alloc, + external_allocator_info.free, + external_allocator_info.empty_cache); + }, + device_id, + false); + + return CreateAllocator(default_memory_info); + } else { + AllocatorCreationInfo default_memory_info( + [](OrtDevice::DeviceId id) { + return std::make_unique(id, HIP); + }, + device_id, + true, + {default_memory_arena_cfg ? *default_memory_arena_cfg + : OrtArenaCfg(gpu_mem_limit, static_cast(arena_extend_strategy), + -1, -1, -1, -1L)}, + // make it stream aware + true, + // enable cross stream sharing? + false); + + // ROCM malloc/free is expensive so always use an arena + return CreateAllocator(default_memory_info); + } +} + std::vector MIGraphXExecutionProvider::CreatePreferredAllocators() { AllocatorCreationInfo default_memory_info( [](OrtDevice::DeviceId device_id) { return CreateMIGraphXAllocator(device_id, onnxruntime::CUDA); }, info_.device_id); diff --git a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.h b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.h index 2be6c09551a71..b20d534be66a2 100644 --- a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.h +++ b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.h @@ -7,7 +7,7 @@ #include "core/framework/execution_provider.h" #include "core/platform/ort_mutex.h" #include "core/providers/migraphx/migraphx_execution_provider_info.h" -#include "core/providers/migraphx/migraphx_inc.h" +#include "core/providers/migraphx/migraphx_call.h" #include #include @@ -76,6 +76,9 @@ class MIGraphXExecutionProvider : public IExecutionProvider { virtual std::shared_ptr GetKernelRegistry() const override; std::unique_ptr GetDataTransfer() const override; + static AllocatorPtr CreateMIGraphXAllocator(OrtDevice::DeviceId device_id, size_t rocm_mem_limit, ArenaExtendStrategy arena_extend_strategy, + MIGraphXExecutionProviderExternalAllocatorInfo external_alloc_info, const OrtArenaCfg* arena_cfg); + std::unique_ptr GetSubGraph(const std::vector& graph_nodes_index, const GraphViewer& graph) const; void RegisterStreamHandlers(IStreamCommandHandleRegistry& stream_handle_registry, AllocatorMap& allocators) const override; OrtDevice GetOrtDeviceByMemType(OrtMemType mem_type) const override; diff --git a/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.cc b/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.cc index 1f9a47d3ad87d..1feec9ae2e3e1 100644 --- a/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.cc +++ b/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.cc @@ -1,6 +1,7 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +#include "core/providers/shared_library/provider_api.h" #include "core/providers/migraphx/migraphx_execution_provider_info.h" #include "core/common/make_string.h" @@ -10,6 +11,12 @@ #include "migraphx_call.h" namespace onnxruntime { + +const EnumNameMapping arena_extend_strategy_mapping{ + {ArenaExtendStrategy::kNextPowerOfTwo, "kNextPowerOfTwo"}, + {ArenaExtendStrategy::kSameAsRequested, "kSameAsRequested"}, +}; + namespace migraphx { namespace provider_option_names { constexpr const char* kDeviceId = "device_id"; @@ -22,12 +29,20 @@ constexpr const char* kSaveModelPath = "migx_save_model_name"; constexpr const char* kLoadCompiledModel = "migx_load_compiled_model"; constexpr const char* kLoadModelPath = "migx_load_model_name"; constexpr const char* kExhaustiveTune = "migx_exhaustive_tune"; +constexpr const char* kMemLimit = "gpu_mem_limit"; +constexpr const char* kArenaExtendStrategy = "arena_extend_strategy"; +constexpr const char* kGpuExternalAlloc = "gpu_external_alloc"; +constexpr const char* kGpuExternalFree = "gpu_external_free"; +constexpr const char* kGpuExternalEmptyCache = "gpu_external_empty_cache"; } // namespace provider_option_names } // namespace migraphx MIGraphXExecutionProviderInfo MIGraphXExecutionProviderInfo::FromProviderOptions(const ProviderOptions& options) { MIGraphXExecutionProviderInfo info{}; + void* alloc = nullptr; + void* free = nullptr; + void* empty_cache = nullptr; ORT_THROW_IF_ERROR( ProviderOptionsParser{} .AddValueParser( @@ -42,13 +57,42 @@ MIGraphXExecutionProviderInfo MIGraphXExecutionProviderInfo::FromProviderOptions ", must be between 0 (inclusive) and ", num_devices, " (exclusive)."); return Status::OK(); }) + .AddValueParser( + migraphx_provider_option::kGpuExternalAlloc, + [&alloc](const std::string& value_str) -> Status { + size_t address; + ORT_RETURN_IF_ERROR(ParseStringWithClassicLocale(value_str, address)); + alloc = reinterpret_cast(address); + return Status::OK(); + }) + .AddValueParser( + migraphx_provider_option::kGpuExternalFree, + [&free](const std::string& value_str) -> Status { + size_t address; + ORT_RETURN_IF_ERROR(ParseStringWithClassicLocale(value_str, address)); + free = reinterpret_cast(address); + return Status::OK(); + }) + .AddValueParser( + migraphx_provider_option::kGpuExternalEmptyCache, + [&empty_cache](const std::string& value_str) -> Status { + size_t address; + ORT_RETURN_IF_ERROR(ParseStringWithClassicLocale(value_str, address)); + empty_cache = reinterpret_cast(address); + return Status::OK(); + }) .AddAssignmentToReference(migraphx::provider_option_names::kFp16Enable, info.fp16_enable) .AddAssignmentToReference(migraphx::provider_option_names::kInt8Enable, info.int8_enable) .AddAssignmentToReference(migraphx::provider_option_names::kSaveCompiledModel, info.save_compiled_model) .AddAssignmentToReference(migraphx::provider_option_names::kLoadCompiledModel, info.load_compiled_model) .AddAssignmentToReference(migraphx::provider_option_names::kExhaustiveTune, info.exhaustive_tune) + .AddAssignmentToReference(migraphx_provider_option::kMemLimit, info.gpu_mem_limit) + .AddAssignmentToEnumReference(migraphx_provider_option::kArenaExtendStrategy, arena_extend_strategy_mapping, info.arena_extend_strategy) .Parse(options)); + MIGraphXExecutionProviderExternalAllocatorInfo alloc_info{alloc, free, empty_cache}; + info.external_allocator_info = alloc_info; + return info; } @@ -59,6 +103,12 @@ ProviderOptions MIGraphXExecutionProviderInfo::ToProviderOptions(const MIGraphXE {migraphx::provider_option_names::kInt8Enable, MakeStringWithClassicLocale(info.int8_enable)}, {migraphx::provider_option_names::kSaveCompiledModel, MakeStringWithClassicLocale(info.save_compiled_model)}, {migraphx::provider_option_names::kLoadCompiledModel, MakeStringWithClassicLocale(info.load_compiled_model)}, + {migraphx_provider_option::kMemLimit, MakeStringWithClassicLocale(info.gpu_mem_limit)}, + {migraphx_provider_option::kGpuExternalAlloc, MakeStringWithClassicLocale(reinterpret_cast(info.external_allocator_info.alloc))}, + {migraphx_provider_option::kGpuExternalFree, MakeStringWithClassicLocale(reinterpret_cast(info.external_allocator_info.free))}, + {migraphx_provider_option::kGpuExternalEmptyCache, MakeStringWithClassicLocale(reinterpret_cast(info.external_allocator_info.empty_cache))}, + {migraphx_provider_option::kArenaExtendStrategy, + EnumToName(arena_extend_strategy_mapping, info.arena_extend_strategy)}, {migraphx::provider_option_names::kExhaustiveTune, MakeStringWithClassicLocale(info.exhaustive_tune)}, }; return options; @@ -71,6 +121,8 @@ ProviderOptions MIGraphXExecutionProviderInfo::ToProviderOptions(const OrtMIGrap {migraphx::provider_option_names::kInt8Enable, MakeStringWithClassicLocale(info.migraphx_int8_enable)}, {migraphx::provider_option_names::kSaveCompiledModel, MakeStringWithClassicLocale(info.migraphx_save_compiled_model)}, {migraphx::provider_option_names::kLoadCompiledModel, MakeStringWithClassicLocale(info.migraphx_load_compiled_model)}, + {migraphx_provider_option::kMemLimit, MakeStringWithClassicLocale(info.gpu_mem_limit)}, + {migraphx_provider_option::kArenaExtendStrategy, EnumToName(arena_extend_strategy_mapping, static_cast(info.arena_extend_strategy))}, {migraphx::provider_option_names::kExhaustiveTune, MakeStringWithClassicLocale(info.migraphx_exhaustive_tune)}, }; return options; diff --git a/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.h b/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.h index b8bf86580f03d..61bc0d871be7c 100644 --- a/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.h +++ b/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.h @@ -7,10 +7,36 @@ #include #include "core/framework/ortdevice.h" +#include "core/common/hash_combine.h" +#include "core/framework/arena_extend_strategy.h" #include "core/framework/provider_options.h" #include "core/session/onnxruntime_c_api.h" namespace onnxruntime { + +// Information needed to construct MIGraphX execution providers. +struct MIGraphXExecutionProviderExternalAllocatorInfo { + void* alloc{nullptr}; + void* free{nullptr}; + void* empty_cache{nullptr}; + + MIGraphXExecutionProviderExternalAllocatorInfo() { + alloc = nullptr; + free = nullptr; + empty_cache = nullptr; + } + + MIGraphXExecutionProviderExternalAllocatorInfo(void* a, void* f, void* e) { + alloc = a; + free = f; + empty_cache = e; + } + + bool UseExternalAllocator() const { + return (alloc != nullptr) && (free != nullptr); + } +}; + // Information needed to construct trt execution providers. struct MIGraphXExecutionProviderInfo { std::string target_device; @@ -25,8 +51,43 @@ struct MIGraphXExecutionProviderInfo { std::string load_model_file{"./compiled_model.mxr"}; bool exhaustive_tune{false}; + size_t gpu_mem_limit{std::numeric_limits::max()}; // Will be over-ridden by contents of `default_memory_arena_cfg` (if specified) + ArenaExtendStrategy arena_extend_strategy{ArenaExtendStrategy::kNextPowerOfTwo}; // Will be over-ridden by contents of `default_memory_arena_cfg` (if specified) + + OrtArenaCfg* default_memory_arena_cfg{nullptr}; + MIGraphXExecutionProviderExternalAllocatorInfo external_allocator_info{}; + static MIGraphXExecutionProviderInfo FromProviderOptions(const ProviderOptions& options); static ProviderOptions ToProviderOptions(const MIGraphXExecutionProviderInfo& info); static ProviderOptions ToProviderOptions(const OrtMIGraphXProviderOptions& info); }; } // namespace onnxruntime + +template <> +struct std::hash<::onnxruntime::MIGraphXExecutionProviderInfo> { + size_t operator()(const ::onnxruntime::MIGraphXExecutionProviderInfo& info) const { + size_t value{0xbc9f1d34}; // seed + + // Bits: device_id (16), arena_extend_strategy (reserved 2), boolean options (1 each) + size_t data = static_cast(info.device_id) ^ + (static_cast(info.arena_extend_strategy) << 16) ^ + (static_cast(info.fp16_enable) << 18) ^ + (static_cast(info.int8_enable) << 19) ^ + (static_cast(info.int8_use_native_calibration_table) << 20) ^ + (static_cast(info.model_cache_enable) << 21) ^ + (static_cast(info.save_compiled_model) << 22) ^ + (static_cast(info.load_compiled_model) << 23) ^ + (static_cast(info.exhaustive_tune) << 24); + onnxruntime::HashCombine(data, value); + + onnxruntime::HashCombine(info.gpu_mem_limit, value); + + // Memory pointers + onnxruntime::HashCombine(reinterpret_cast(info.external_allocator_info.alloc), value); + onnxruntime::HashCombine(reinterpret_cast(info.external_allocator_info.free), value); + onnxruntime::HashCombine(reinterpret_cast(info.external_allocator_info.empty_cache), value); + + // The default memory arena cfg is not used in hashing right now. + return value; + } +}; diff --git a/onnxruntime/core/providers/migraphx/migraphx_provider_factory.cc b/onnxruntime/core/providers/migraphx/migraphx_provider_factory.cc index 7b192b657b7cc..64278bf6439f9 100644 --- a/onnxruntime/core/providers/migraphx/migraphx_provider_factory.cc +++ b/onnxruntime/core/providers/migraphx/migraphx_provider_factory.cc @@ -5,6 +5,7 @@ #include "core/providers/shared_library/provider_api.h" #include "core/providers/migraphx/migraphx_provider_factory.h" #include "migraphx_execution_provider.h" +#include "migraphx_execution_provider_info.h" #include "migraphx_provider_factory_creator.h" #include "migraphx_allocator.h" #include "gpu_data_transfer.h" @@ -42,6 +43,27 @@ struct ProviderInfo_MIGraphX_Impl final : ProviderInfo_MIGraphX { return std::make_unique(device_id, name); } + void MIGraphXMemcpy_HostToDevice(void* dst, const void* src, size_t count) override { + // hipMemcpy() operates on the default stream + HIP_CALL_THROW(hipMemcpy(dst, src, count, hipMemcpyHostToDevice)); + + // To ensure that the copy has completed, invoke a stream sync for the default stream. + // For transfers from pageable host memory to device memory, a stream sync is performed before the copy is initiated. + // The function will return once the pageable buffer has been copied to the staging memory for DMA transfer + // to device memory, but the DMA to final destination may not have completed. + + HIP_CALL_THROW(hipStreamSynchronize(0)); + } + + // Used by onnxruntime_pybind_state.cc + void MIGraphXMemcpy_DeviceToHost(void* dst, const void* src, size_t count) override { + // For transfers from device to either pageable or pinned host memory, the function returns only once the copy has completed. + HIP_CALL_THROW(hipMemcpy(dst, src, count, hipMemcpyDeviceToHost)); + } + + std::shared_ptr CreateMIGraphXAllocator(int16_t device_id, size_t gpu_mem_limit, onnxruntime::ArenaExtendStrategy arena_extend_strategy, onnxruntime::MIGraphXExecutionProviderExternalAllocatorInfo& external_allocator_info, const OrtArenaCfg* default_memory_arena_cfg) override { + return MIGraphXExecutionProvider::CreateMIGraphXAllocator(device_id, gpu_mem_limit, arena_extend_strategy, external_allocator_info, default_memory_arena_cfg); + } } g_info; struct MIGraphX_Provider : Provider { diff --git a/onnxruntime/core/providers/migraphx/migraphx_provider_factory.h b/onnxruntime/core/providers/migraphx/migraphx_provider_factory.h index b257a4318dc0e..7c26f9b9f0bb5 100644 --- a/onnxruntime/core/providers/migraphx/migraphx_provider_factory.h +++ b/onnxruntime/core/providers/migraphx/migraphx_provider_factory.h @@ -14,6 +14,9 @@ struct MIGraphXExecutionProviderExternalAllocatorInfo; struct ProviderInfo_MIGraphX { virtual std::unique_ptr CreateMIGraphXAllocator(int16_t device_id, const char* name) = 0; virtual std::unique_ptr CreateMIGraphXPinnedAllocator(int16_t device_id, const char* name) = 0; + virtual void MIGraphXMemcpy_HostToDevice(void* dst, const void* src, size_t count) = 0; + virtual void MIGraphXMemcpy_DeviceToHost(void* dst, const void* src, size_t count) = 0; + virtual std::shared_ptr CreateMIGraphXAllocator(int16_t device_id, size_t gpu_mem_limit, onnxruntime::ArenaExtendStrategy arena_extend_strategy, onnxruntime::MIGraphXExecutionProviderExternalAllocatorInfo& external_allocator_info, const OrtArenaCfg* default_memory_arena_cfg) = 0; protected: ~ProviderInfo_MIGraphX() = default; // Can only be destroyed through a subclass instance diff --git a/onnxruntime/python/onnxruntime_pybind_mlvalue.cc b/onnxruntime/python/onnxruntime_pybind_mlvalue.cc index 8fdac257297c1..da5e942d67f21 100644 --- a/onnxruntime/python/onnxruntime_pybind_mlvalue.cc +++ b/onnxruntime/python/onnxruntime_pybind_mlvalue.cc @@ -115,6 +115,19 @@ OrtMemoryInfo GetMemoryInfoPerDeviceType(const OrtDevice& ort_device) { } mem_info = GetCudaAllocator(ort_device.Id())->Info(); } +#endif +#if USE_ROCM + else if (ort_device.Type() == OrtDevice::GPU) { + if (!IsRocmDeviceIdValid(logging::LoggingManager::DefaultLogger(), ort_device.Id())) { + ORT_THROW("The provided device id doesn't match any available GPUs on the machine: ", ort_device.Id()); + } + mem_info = GetRocmAllocator(ort_device.Id())->Info(); + } +#endif +#if USE_MIGRAPHX + else if (ort_device.Type() == OrtDevice::GPU) { + mem_info = GetMIGraphXAllocator(ort_device.Id())->Info(); + } #endif else { ORT_THROW("Unsupported OrtDevice type: ", ort_device.Type()); @@ -193,6 +206,38 @@ std::unique_ptr GetGPUDataTransfer() { #endif +#ifdef USE_MIGRAPHX +void CpuToMIGraphXMemCpy(void* dst, const void* src, size_t num_bytes) { + GetProviderInfo_MIGraphX().MIGraphXMemcpy_HostToDevice(dst, src, num_bytes); +} + +void MIGraphXToCpuMemCpy(void* dst, const void* src, size_t num_bytes) { + GetProviderInfo_MIGraphX().MIGraphXMemcpy_DeviceToHost(dst, src, num_bytes); +} + +const std::unordered_map* GetMIGraphXToHostMemCpyFunction() { + static std::unordered_map map{ + {OrtDevice::GPU, MIGraphXToCpuMemCpy}}; + + return ↦ +} + +AllocatorPtr GetMIGraphXAllocator(OrtDevice::DeviceId id) { + // Current approach is not thread-safe, but there are some bigger infra pieces to put together in order to make + // multi-threaded MIGraphX allocation work we need to maintain a per-thread MIGraphX allocator + + static auto* id_to_allocator_map = new std::unordered_map(); + + if (id_to_allocator_map->find(id) == id_to_allocator_map->end()) { + // TODO: Expose knobs so that users can set fields associated with OrtArenaCfg so that we can pass it to the following method + id_to_allocator_map->insert({id, GetProviderInfo_MIGraphX().CreateMIGraphXAllocator(id, gpu_mem_limit, arena_extend_strategy, external_allocator_info, nullptr)}); + } + + return (*id_to_allocator_map)[id]; +} + +#endif + #ifdef USE_DML constexpr GUID dml_readback_heap_guid = {0x00d32df8, 0xea2d, 0x40bf, {0xa4, 0x47, 0x9c, 0xb4, 0xbc, 0xf1, 0x1d, 0x5e}}; diff --git a/onnxruntime/python/onnxruntime_pybind_mlvalue.h b/onnxruntime/python/onnxruntime_pybind_mlvalue.h index c76292040b61b..6d45d150ed191 100644 --- a/onnxruntime/python/onnxruntime_pybind_mlvalue.h +++ b/onnxruntime/python/onnxruntime_pybind_mlvalue.h @@ -89,6 +89,18 @@ const std::unordered_map* GetDmlToHostMemCpyF #endif +#ifdef USE_MIGRAPHX + +void CpuToMIGraphXMemCpy(void* dst, const void* src, size_t num_bytes); + +void MIGraphXToCpuMemCpy(void* dst, const void* src, size_t num_bytes); + +const std::unordered_map* GetMIGraphXToHostMemCpyFunction(); + +AllocatorPtr GetMIGraphXAllocator(OrtDevice::DeviceId id); + +#endif + #ifdef USE_CANN void CpuToCannMemCpy(void* dst, const void* src, size_t num_bytes); diff --git a/onnxruntime/python/onnxruntime_pybind_ortvalue.cc b/onnxruntime/python/onnxruntime_pybind_ortvalue.cc index 94235b3043bc7..2b77fa081b98f 100644 --- a/onnxruntime/python/onnxruntime_pybind_ortvalue.cc +++ b/onnxruntime/python/onnxruntime_pybind_ortvalue.cc @@ -60,6 +60,11 @@ void addOrtValueMethods(pybind11::module& m) { // Likewise, there is no need to specify the name (as the name was previously used to lookup the def list) // TODO: Add check to ensure that string arrays are not passed - we currently don't support string tensors in CUDA CreateGenericMLValue(nullptr, GetRocmAllocator(device.Id()), "", array_on_cpu, ml_value.get(), true, false, CpuToRocmMemCpy); +#elif USE_MIGRAPHX + // InputDeflist is null because OrtValue creation is not tied to a specific model + // Likewise, there is no need to specify the name (as the name was previously used to lookup the def list) + // TODO: Add check to ensure that string arrays are not passed - we currently don't support string tensors in CUDA + CreateGenericMLValue(nullptr, GetMIGraphXAllocator(device.Id()), "", array_on_cpu, ml_value.get(), true, false, CpuToMIGraphXMemCpy); #elif USE_DML // InputDeflist is null because OrtValue creation is not tied to a specific model // Likewise, there is no need to specify the name (as the name was previously used to lookup the def list) @@ -128,6 +133,12 @@ void addOrtValueMethods(pybind11::module& m) { values_type, *(ml_value->GetMutable()), CpuToRocmMemCpy); +#elif USE_MIGRAPHX + onnxruntime::python::CopyDataToTensor( + py_values, + values_type, + *(ml_value->GetMutable()), + CpuToMIGraphXMemCpy); #elif USE_DML onnxruntime::python::CopyDataToTensor( py_values, @@ -166,6 +177,13 @@ void addOrtValueMethods(pybind11::module& m) { throw std::runtime_error("The provided device id doesn't match any available GPUs on the machine."); } allocator = GetCudaAllocator(device.Id()); +#elif USE_MIGRAPHX + allocator = GetMIGraphXAllocator(device.Id()); +#elif USE_ROCM + if (!IsRocmDeviceIdValid(logging::LoggingManager::DefaultLogger(), device.Id())) { + throw std::runtime_error("The provided device id doesn't match any available GPUs on the machine."); + } + allocator = GetRocmAllocator(device.Id()); #else throw std::runtime_error( "Can't allocate memory on the CUDA device using this package of OnnxRuntime. " @@ -307,6 +325,8 @@ void addOrtValueMethods(pybind11::module& m) { py::object obj = GetPyObjFromTensor(*ml_value, nullptr, GetCannToHostMemCpyFunction()); #elif USE_DML py::object obj = GetPyObjFromTensor(*ml_value, nullptr, GetDmlToHostMemCpyFunction()); +#elif USE_MIGRAPHX + py::object obj = GetPyObjFromTensor(*ml_value, nullptr, GetMIGraphXToHostMemCpyFunction()); #else py::object obj = GetPyObjFromTensor(*ml_value, nullptr, nullptr); #endif diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc index f31734bdfb805..527bab9f70eba 100644 --- a/onnxruntime/python/onnxruntime_pybind_state.cc +++ b/onnxruntime/python/onnxruntime_pybind_state.cc @@ -836,7 +836,9 @@ std::unique_ptr CreateExecutionProviderInstance( "./compiled_model.mxr", 1, "./compiled_model.mxr", - 1}; + 1, + SIZE_MAX, + 0}; for (auto option : it->second) { if (option.first == "device_id") { if (!option.second.empty()) { diff --git a/onnxruntime/python/onnxruntime_pybind_state_common.cc b/onnxruntime/python/onnxruntime_pybind_state_common.cc index cec4dfc14160c..824aa9979b02b 100644 --- a/onnxruntime/python/onnxruntime_pybind_state_common.cc +++ b/onnxruntime/python/onnxruntime_pybind_state_common.cc @@ -43,6 +43,12 @@ onnxruntime::ROCMExecutionProviderExternalAllocatorInfo external_allocator_info{ onnxruntime::ArenaExtendStrategy arena_extend_strategy = onnxruntime::ArenaExtendStrategy::kNextPowerOfTwo; #endif +#ifdef USE_MIGRAPHX +onnxruntime::MIGraphXExecutionProviderExternalAllocatorInfo external_allocator_info{}; +// TODO remove deprecated global config +onnxruntime::ArenaExtendStrategy arena_extend_strategy = onnxruntime::ArenaExtendStrategy::kNextPowerOfTwo; +#endif + #ifdef ENABLE_TRAINING void DlpackCapsuleDestructor(PyObject* data) { diff --git a/onnxruntime/python/onnxruntime_pybind_state_common.h b/onnxruntime/python/onnxruntime_pybind_state_common.h index 4d6e411defae3..9e8e28096d639 100644 --- a/onnxruntime/python/onnxruntime_pybind_state_common.h +++ b/onnxruntime/python/onnxruntime_pybind_state_common.h @@ -123,6 +123,7 @@ struct OrtStatus { #endif #ifdef USE_MIGRAPHX #include "core/providers/migraphx/migraphx_provider_factory.h" +#include "core/providers/migraphx/migraphx_execution_provider_info.h" #endif #ifdef USE_OPENVINO #include "core/providers/openvino/openvino_provider_factory.h" @@ -182,6 +183,17 @@ ProviderInfo_CANN& GetProviderInfo_CANN(); } // namespace onnxruntime #endif +#ifdef USE_MIGRAPHX +namespace onnxruntime { +ProviderInfo_MIGraphX* TryGetProviderInfo_MIGraphX(); +ProviderInfo_MIGraphX& GetProviderInfo_MIGraphX(); +namespace python { +extern onnxruntime::MIGraphXExecutionProviderExternalAllocatorInfo external_allocator_info; +extern onnxruntime::ArenaExtendStrategy arena_extend_strategy; +} // namespace python +} // namespace onnxruntime +#endif + #ifdef USE_ROCM namespace onnxruntime { ProviderInfo_ROCM* TryGetProviderInfo_ROCM(); diff --git a/onnxruntime/test/util/default_providers.cc b/onnxruntime/test/util/default_providers.cc index 1feba20e32bbb..cd258294fb976 100644 --- a/onnxruntime/test/util/default_providers.cc +++ b/onnxruntime/test/util/default_providers.cc @@ -81,7 +81,9 @@ std::unique_ptr DefaultMIGraphXExecutionProvider() { "./compiled_model.mxr", 1, "./compiled_model.mxr", - 1}; + 1, + SIZE_MAX, + 0}; return MIGraphXProviderFactoryCreator::Create(¶ms)->CreateProvider(); #else return nullptr; From dbcb1e3745a7053c7ab1be688f7bbdd2ebc8dc50 Mon Sep 17 00:00:00 2001 From: Uros Petkovic Date: Fri, 20 Dec 2024 15:58:17 +0100 Subject: [PATCH 02/10] Change variable names to migx_* and migraphx_* --- .../core/session/onnxruntime_c_api.h | 4 ++-- .../migraphx_execution_provider_info.cc | 18 +++++++++--------- .../migraphx_execution_provider_info.h | 4 ++-- .../migraphx/migraphx_provider_factory.cc | 4 ++++ 4 files changed, 17 insertions(+), 13 deletions(-) diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h index e50e257c87eb1..d6b7e0f3df0ba 100644 --- a/include/onnxruntime/core/session/onnxruntime_c_api.h +++ b/include/onnxruntime/core/session/onnxruntime_c_api.h @@ -627,7 +627,7 @@ typedef struct OrtMIGraphXProviderOptions { * Defaults to SIZE_MAX. * \note If a ::OrtArenaCfg has been applied, it will override this field */ - size_t gpu_mem_limit; + size_t migraphx_mem_limit; /** \brief Strategy used to grow the memory arena * 0 = kNextPowerOfTwo
@@ -635,7 +635,7 @@ typedef struct OrtMIGraphXProviderOptions { * Defaults to 0. * \note If a ::OrtArenaCfg has been applied, it will override this field */ - int arena_extend_strategy; + int migraphx_arena_extend_strategy; } OrtMIGraphXProviderOptions; diff --git a/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.cc b/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.cc index 1feec9ae2e3e1..f5638c19e91e6 100644 --- a/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.cc +++ b/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.cc @@ -29,11 +29,11 @@ constexpr const char* kSaveModelPath = "migx_save_model_name"; constexpr const char* kLoadCompiledModel = "migx_load_compiled_model"; constexpr const char* kLoadModelPath = "migx_load_model_name"; constexpr const char* kExhaustiveTune = "migx_exhaustive_tune"; -constexpr const char* kMemLimit = "gpu_mem_limit"; -constexpr const char* kArenaExtendStrategy = "arena_extend_strategy"; -constexpr const char* kGpuExternalAlloc = "gpu_external_alloc"; -constexpr const char* kGpuExternalFree = "gpu_external_free"; -constexpr const char* kGpuExternalEmptyCache = "gpu_external_empty_cache"; +constexpr const char* kMemLimit = "migx_mem_limit"; +constexpr const char* kArenaExtendStrategy = "migx_arena_extend_strategy"; +constexpr const char* kGpuExternalAlloc = "migx_external_alloc"; +constexpr const char* kGpuExternalFree = "migx_external_free"; +constexpr const char* kGpuExternalEmptyCache = "migx_external_empty_cache"; } // namespace provider_option_names } // namespace migraphx @@ -86,7 +86,7 @@ MIGraphXExecutionProviderInfo MIGraphXExecutionProviderInfo::FromProviderOptions .AddAssignmentToReference(migraphx::provider_option_names::kSaveCompiledModel, info.save_compiled_model) .AddAssignmentToReference(migraphx::provider_option_names::kLoadCompiledModel, info.load_compiled_model) .AddAssignmentToReference(migraphx::provider_option_names::kExhaustiveTune, info.exhaustive_tune) - .AddAssignmentToReference(migraphx_provider_option::kMemLimit, info.gpu_mem_limit) + .AddAssignmentToReference(migraphx_provider_option::kMemLimit, info.mem_limit) .AddAssignmentToEnumReference(migraphx_provider_option::kArenaExtendStrategy, arena_extend_strategy_mapping, info.arena_extend_strategy) .Parse(options)); @@ -103,7 +103,7 @@ ProviderOptions MIGraphXExecutionProviderInfo::ToProviderOptions(const MIGraphXE {migraphx::provider_option_names::kInt8Enable, MakeStringWithClassicLocale(info.int8_enable)}, {migraphx::provider_option_names::kSaveCompiledModel, MakeStringWithClassicLocale(info.save_compiled_model)}, {migraphx::provider_option_names::kLoadCompiledModel, MakeStringWithClassicLocale(info.load_compiled_model)}, - {migraphx_provider_option::kMemLimit, MakeStringWithClassicLocale(info.gpu_mem_limit)}, + {migraphx_provider_option::kMemLimit, MakeStringWithClassicLocale(info.mem_limit)}, {migraphx_provider_option::kGpuExternalAlloc, MakeStringWithClassicLocale(reinterpret_cast(info.external_allocator_info.alloc))}, {migraphx_provider_option::kGpuExternalFree, MakeStringWithClassicLocale(reinterpret_cast(info.external_allocator_info.free))}, {migraphx_provider_option::kGpuExternalEmptyCache, MakeStringWithClassicLocale(reinterpret_cast(info.external_allocator_info.empty_cache))}, @@ -121,8 +121,8 @@ ProviderOptions MIGraphXExecutionProviderInfo::ToProviderOptions(const OrtMIGrap {migraphx::provider_option_names::kInt8Enable, MakeStringWithClassicLocale(info.migraphx_int8_enable)}, {migraphx::provider_option_names::kSaveCompiledModel, MakeStringWithClassicLocale(info.migraphx_save_compiled_model)}, {migraphx::provider_option_names::kLoadCompiledModel, MakeStringWithClassicLocale(info.migraphx_load_compiled_model)}, - {migraphx_provider_option::kMemLimit, MakeStringWithClassicLocale(info.gpu_mem_limit)}, - {migraphx_provider_option::kArenaExtendStrategy, EnumToName(arena_extend_strategy_mapping, static_cast(info.arena_extend_strategy))}, + {migraphx_provider_option::kMemLimit, MakeStringWithClassicLocale(info.migraphx_mem_limit)}, + {migraphx_provider_option::kArenaExtendStrategy, EnumToName(arena_extend_strategy_mapping, static_cast(info.migraphx_arena_extend_strategy))}, {migraphx::provider_option_names::kExhaustiveTune, MakeStringWithClassicLocale(info.migraphx_exhaustive_tune)}, }; return options; diff --git a/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.h b/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.h index 61bc0d871be7c..9100d3efd47ba 100644 --- a/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.h +++ b/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.h @@ -51,7 +51,7 @@ struct MIGraphXExecutionProviderInfo { std::string load_model_file{"./compiled_model.mxr"}; bool exhaustive_tune{false}; - size_t gpu_mem_limit{std::numeric_limits::max()}; // Will be over-ridden by contents of `default_memory_arena_cfg` (if specified) + size_t mem_limit{std::numeric_limits::max()}; // Will be over-ridden by contents of `default_memory_arena_cfg` (if specified) ArenaExtendStrategy arena_extend_strategy{ArenaExtendStrategy::kNextPowerOfTwo}; // Will be over-ridden by contents of `default_memory_arena_cfg` (if specified) OrtArenaCfg* default_memory_arena_cfg{nullptr}; @@ -80,7 +80,7 @@ struct std::hash<::onnxruntime::MIGraphXExecutionProviderInfo> { (static_cast(info.exhaustive_tune) << 24); onnxruntime::HashCombine(data, value); - onnxruntime::HashCombine(info.gpu_mem_limit, value); + onnxruntime::HashCombine(info.mem_limit, value); // Memory pointers onnxruntime::HashCombine(reinterpret_cast(info.external_allocator_info.alloc), value); diff --git a/onnxruntime/core/providers/migraphx/migraphx_provider_factory.cc b/onnxruntime/core/providers/migraphx/migraphx_provider_factory.cc index 64278bf6439f9..6540c3cdfc591 100644 --- a/onnxruntime/core/providers/migraphx/migraphx_provider_factory.cc +++ b/onnxruntime/core/providers/migraphx/migraphx_provider_factory.cc @@ -99,6 +99,8 @@ struct MIGraphX_Provider : Provider { if (options.migraphx_load_model_path != nullptr) { info.load_model_file = options.migraphx_load_model_path; } + info.arena_extend_strategy = options.migraphx_arena_extend_strategy; + info.mem_limit = options.migraphx_mem_limit; return std::make_shared(info); } @@ -131,6 +133,8 @@ struct MIGraphX_Provider : Provider { migx_options.migraphx_save_model_path = internal_options.save_model_file.c_str(); migx_options.migraphx_load_compiled_model = internal_options.load_compiled_model; migx_options.migraphx_load_model_path = internal_options.load_model_file.c_str(); + migx_options.migraphx_arena_extend_strategy = internal_options.arena_extend_strategy; + migx_options.migraphx_mem_limit = internal_options.mem_limit; } ProviderOptions GetProviderOptions(const void* provider_options) override { From 0ceb1fd01dd3e40b83568ec71914e4f49930226a Mon Sep 17 00:00:00 2001 From: Uros Petkovic Date: Fri, 20 Dec 2024 16:34:18 +0100 Subject: [PATCH 03/10] Fixing issue with allocating extend arena --- .../core/providers/migraphx/migraphx_provider_factory.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/onnxruntime/core/providers/migraphx/migraphx_provider_factory.cc b/onnxruntime/core/providers/migraphx/migraphx_provider_factory.cc index 6540c3cdfc591..6dd14e00e0bcf 100644 --- a/onnxruntime/core/providers/migraphx/migraphx_provider_factory.cc +++ b/onnxruntime/core/providers/migraphx/migraphx_provider_factory.cc @@ -99,7 +99,7 @@ struct MIGraphX_Provider : Provider { if (options.migraphx_load_model_path != nullptr) { info.load_model_file = options.migraphx_load_model_path; } - info.arena_extend_strategy = options.migraphx_arena_extend_strategy; + info.arena_extend_strategy = static_cast(options.migraphx_arena_extend_strategy); info.mem_limit = options.migraphx_mem_limit; return std::make_shared(info); } @@ -133,7 +133,7 @@ struct MIGraphX_Provider : Provider { migx_options.migraphx_save_model_path = internal_options.save_model_file.c_str(); migx_options.migraphx_load_compiled_model = internal_options.load_compiled_model; migx_options.migraphx_load_model_path = internal_options.load_model_file.c_str(); - migx_options.migraphx_arena_extend_strategy = internal_options.arena_extend_strategy; + migx_options.migraphx_arena_extend_strategy = static_cast(internal_options.arena_extend_strategy); migx_options.migraphx_mem_limit = internal_options.mem_limit; } From dc06f871012dc11ada2f9092933e609ca453f214 Mon Sep 17 00:00:00 2001 From: Uros Petkovic Date: Fri, 20 Dec 2024 18:58:41 +0100 Subject: [PATCH 04/10] Fixing variables names --- .../core/providers/migraphx/migraphx_execution_provider.cc | 4 ++-- .../core/providers/migraphx/migraphx_execution_provider.h | 2 +- .../core/providers/migraphx/migraphx_provider_factory.cc | 4 ++-- .../core/providers/migraphx/migraphx_provider_factory.h | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc index 7f934d69c48ab..0a9c4b977b6cc 100644 --- a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc +++ b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc @@ -210,7 +210,7 @@ MIGraphXExecutionProvider::~MIGraphXExecutionProvider() { } AllocatorPtr MIGraphXExecutionProvider::CreateMIGraphXAllocator(OrtDevice::DeviceId device_id, - size_t gpu_mem_limit, + size_t migx_mem_limit, ArenaExtendStrategy arena_extend_strategy, MIGraphXExecutionProviderExternalAllocatorInfo external_allocator_info, @@ -235,7 +235,7 @@ AllocatorPtr MIGraphXExecutionProvider::CreateMIGraphXAllocator(OrtDevice::Devic device_id, true, {default_memory_arena_cfg ? *default_memory_arena_cfg - : OrtArenaCfg(gpu_mem_limit, static_cast(arena_extend_strategy), + : OrtArenaCfg(migx_mem_limit, static_cast(arena_extend_strategy), -1, -1, -1, -1L)}, // make it stream aware true, diff --git a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.h b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.h index b20d534be66a2..13c45fe35db75 100644 --- a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.h +++ b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.h @@ -76,7 +76,7 @@ class MIGraphXExecutionProvider : public IExecutionProvider { virtual std::shared_ptr GetKernelRegistry() const override; std::unique_ptr GetDataTransfer() const override; - static AllocatorPtr CreateMIGraphXAllocator(OrtDevice::DeviceId device_id, size_t rocm_mem_limit, ArenaExtendStrategy arena_extend_strategy, + static AllocatorPtr CreateMIGraphXAllocator(OrtDevice::DeviceId device_id, size_t migx_mem_limit, ArenaExtendStrategy arena_extend_strategy, MIGraphXExecutionProviderExternalAllocatorInfo external_alloc_info, const OrtArenaCfg* arena_cfg); std::unique_ptr GetSubGraph(const std::vector& graph_nodes_index, const GraphViewer& graph) const; diff --git a/onnxruntime/core/providers/migraphx/migraphx_provider_factory.cc b/onnxruntime/core/providers/migraphx/migraphx_provider_factory.cc index 6dd14e00e0bcf..f8c962fa02d71 100644 --- a/onnxruntime/core/providers/migraphx/migraphx_provider_factory.cc +++ b/onnxruntime/core/providers/migraphx/migraphx_provider_factory.cc @@ -61,8 +61,8 @@ struct ProviderInfo_MIGraphX_Impl final : ProviderInfo_MIGraphX { HIP_CALL_THROW(hipMemcpy(dst, src, count, hipMemcpyDeviceToHost)); } - std::shared_ptr CreateMIGraphXAllocator(int16_t device_id, size_t gpu_mem_limit, onnxruntime::ArenaExtendStrategy arena_extend_strategy, onnxruntime::MIGraphXExecutionProviderExternalAllocatorInfo& external_allocator_info, const OrtArenaCfg* default_memory_arena_cfg) override { - return MIGraphXExecutionProvider::CreateMIGraphXAllocator(device_id, gpu_mem_limit, arena_extend_strategy, external_allocator_info, default_memory_arena_cfg); + std::shared_ptr CreateMIGraphXAllocator(int16_t device_id, size_t migx_mem_limit, onnxruntime::ArenaExtendStrategy arena_extend_strategy, onnxruntime::MIGraphXExecutionProviderExternalAllocatorInfo& external_allocator_info, const OrtArenaCfg* default_memory_arena_cfg) override { + return MIGraphXExecutionProvider::CreateMIGraphXAllocator(device_id, migx_mem_limit, arena_extend_strategy, external_allocator_info, default_memory_arena_cfg); } } g_info; diff --git a/onnxruntime/core/providers/migraphx/migraphx_provider_factory.h b/onnxruntime/core/providers/migraphx/migraphx_provider_factory.h index 7c26f9b9f0bb5..d1c9457bafa0f 100644 --- a/onnxruntime/core/providers/migraphx/migraphx_provider_factory.h +++ b/onnxruntime/core/providers/migraphx/migraphx_provider_factory.h @@ -16,7 +16,7 @@ struct ProviderInfo_MIGraphX { virtual std::unique_ptr CreateMIGraphXPinnedAllocator(int16_t device_id, const char* name) = 0; virtual void MIGraphXMemcpy_HostToDevice(void* dst, const void* src, size_t count) = 0; virtual void MIGraphXMemcpy_DeviceToHost(void* dst, const void* src, size_t count) = 0; - virtual std::shared_ptr CreateMIGraphXAllocator(int16_t device_id, size_t gpu_mem_limit, onnxruntime::ArenaExtendStrategy arena_extend_strategy, onnxruntime::MIGraphXExecutionProviderExternalAllocatorInfo& external_allocator_info, const OrtArenaCfg* default_memory_arena_cfg) = 0; + virtual std::shared_ptr CreateMIGraphXAllocator(int16_t device_id, size_t migx_mem_limit, onnxruntime::ArenaExtendStrategy arena_extend_strategy, onnxruntime::MIGraphXExecutionProviderExternalAllocatorInfo& external_allocator_info, const OrtArenaCfg* default_memory_arena_cfg) = 0; protected: ~ProviderInfo_MIGraphX() = default; // Can only be destroyed through a subclass instance From 754603661f938ecb37323f63232e6f37b756bba1 Mon Sep 17 00:00:00 2001 From: Uros Petkovic Date: Mon, 23 Dec 2024 11:41:26 +0100 Subject: [PATCH 05/10] Updating comments --- onnxruntime/python/onnxruntime_pybind_ortvalue.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/onnxruntime/python/onnxruntime_pybind_ortvalue.cc b/onnxruntime/python/onnxruntime_pybind_ortvalue.cc index 2b77fa081b98f..4d661cd8fb470 100644 --- a/onnxruntime/python/onnxruntime_pybind_ortvalue.cc +++ b/onnxruntime/python/onnxruntime_pybind_ortvalue.cc @@ -58,12 +58,12 @@ void addOrtValueMethods(pybind11::module& m) { // InputDeflist is null because OrtValue creation is not tied to a specific model // Likewise, there is no need to specify the name (as the name was previously used to lookup the def list) - // TODO: Add check to ensure that string arrays are not passed - we currently don't support string tensors in CUDA + // TODO: Add check to ensure that string arrays are not passed - we currently don't support string tensors in ROCm CreateGenericMLValue(nullptr, GetRocmAllocator(device.Id()), "", array_on_cpu, ml_value.get(), true, false, CpuToRocmMemCpy); #elif USE_MIGRAPHX // InputDeflist is null because OrtValue creation is not tied to a specific model // Likewise, there is no need to specify the name (as the name was previously used to lookup the def list) - // TODO: Add check to ensure that string arrays are not passed - we currently don't support string tensors in CUDA + // TODO: Add check to ensure that string arrays are not passed - we currently don't support string tensors in MIGraphX CreateGenericMLValue(nullptr, GetMIGraphXAllocator(device.Id()), "", array_on_cpu, ml_value.get(), true, false, CpuToMIGraphXMemCpy); #elif USE_DML // InputDeflist is null because OrtValue creation is not tied to a specific model From 78043ac0b792cd2a38ed39067faeccd723a2f083 Mon Sep 17 00:00:00 2001 From: Uros Petkovic Date: Tue, 31 Dec 2024 15:30:02 +0100 Subject: [PATCH 06/10] Fixing build errors on Linux when using rocm and migraphx both --- .../migraphx/migraphx_execution_provider.cc | 4 +-- .../migraphx_execution_provider_info.cc | 24 +++++++-------- .../migraphx_execution_provider_info.h | 7 ++--- .../python/onnxruntime_pybind_mlvalue.cc | 2 +- .../python/onnxruntime_pybind_state_common.cc | 7 +++-- .../python/onnxruntime_pybind_state_common.h | 29 ++++++++++++------- 6 files changed, 40 insertions(+), 33 deletions(-) diff --git a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc index 0a9c4b977b6cc..6367475b5077d 100644 --- a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc +++ b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc @@ -249,10 +249,10 @@ AllocatorPtr MIGraphXExecutionProvider::CreateMIGraphXAllocator(OrtDevice::Devic std::vector MIGraphXExecutionProvider::CreatePreferredAllocators() { AllocatorCreationInfo default_memory_info( - [](OrtDevice::DeviceId device_id) { return CreateMIGraphXAllocator(device_id, onnxruntime::CUDA); }, info_.device_id); + [](OrtDevice::DeviceId device_id) { return std::make_unique(device_id, CUDA); }, info_.device_id); AllocatorCreationInfo pinned_allocator_info( [](OrtDevice::DeviceId device_id) { - return CreateMIGraphXPinnedAllocator(device_id, onnxruntime::CUDA_PINNED); + return std::make_unique(device_id, CUDA_PINNED); }, 0); return std::vector{CreateAllocator(default_memory_info), CreateAllocator(pinned_allocator_info)}; diff --git a/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.cc b/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.cc index f5638c19e91e6..11270f2e64b82 100644 --- a/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.cc +++ b/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.cc @@ -58,7 +58,7 @@ MIGraphXExecutionProviderInfo MIGraphXExecutionProviderInfo::FromProviderOptions return Status::OK(); }) .AddValueParser( - migraphx_provider_option::kGpuExternalAlloc, + migraphx::provider_option_names::kGpuExternalAlloc, [&alloc](const std::string& value_str) -> Status { size_t address; ORT_RETURN_IF_ERROR(ParseStringWithClassicLocale(value_str, address)); @@ -66,7 +66,7 @@ MIGraphXExecutionProviderInfo MIGraphXExecutionProviderInfo::FromProviderOptions return Status::OK(); }) .AddValueParser( - migraphx_provider_option::kGpuExternalFree, + migraphx::provider_option_names::kGpuExternalFree, [&free](const std::string& value_str) -> Status { size_t address; ORT_RETURN_IF_ERROR(ParseStringWithClassicLocale(value_str, address)); @@ -74,7 +74,7 @@ MIGraphXExecutionProviderInfo MIGraphXExecutionProviderInfo::FromProviderOptions return Status::OK(); }) .AddValueParser( - migraphx_provider_option::kGpuExternalEmptyCache, + migraphx::provider_option_names::kGpuExternalEmptyCache, [&empty_cache](const std::string& value_str) -> Status { size_t address; ORT_RETURN_IF_ERROR(ParseStringWithClassicLocale(value_str, address)); @@ -86,8 +86,8 @@ MIGraphXExecutionProviderInfo MIGraphXExecutionProviderInfo::FromProviderOptions .AddAssignmentToReference(migraphx::provider_option_names::kSaveCompiledModel, info.save_compiled_model) .AddAssignmentToReference(migraphx::provider_option_names::kLoadCompiledModel, info.load_compiled_model) .AddAssignmentToReference(migraphx::provider_option_names::kExhaustiveTune, info.exhaustive_tune) - .AddAssignmentToReference(migraphx_provider_option::kMemLimit, info.mem_limit) - .AddAssignmentToEnumReference(migraphx_provider_option::kArenaExtendStrategy, arena_extend_strategy_mapping, info.arena_extend_strategy) + .AddAssignmentToReference(migraphx::provider_option_names::kMemLimit, info.mem_limit) + .AddAssignmentToEnumReference(migraphx::provider_option_names::kArenaExtendStrategy, arena_extend_strategy_mapping, info.arena_extend_strategy) .Parse(options)); MIGraphXExecutionProviderExternalAllocatorInfo alloc_info{alloc, free, empty_cache}; @@ -103,11 +103,11 @@ ProviderOptions MIGraphXExecutionProviderInfo::ToProviderOptions(const MIGraphXE {migraphx::provider_option_names::kInt8Enable, MakeStringWithClassicLocale(info.int8_enable)}, {migraphx::provider_option_names::kSaveCompiledModel, MakeStringWithClassicLocale(info.save_compiled_model)}, {migraphx::provider_option_names::kLoadCompiledModel, MakeStringWithClassicLocale(info.load_compiled_model)}, - {migraphx_provider_option::kMemLimit, MakeStringWithClassicLocale(info.mem_limit)}, - {migraphx_provider_option::kGpuExternalAlloc, MakeStringWithClassicLocale(reinterpret_cast(info.external_allocator_info.alloc))}, - {migraphx_provider_option::kGpuExternalFree, MakeStringWithClassicLocale(reinterpret_cast(info.external_allocator_info.free))}, - {migraphx_provider_option::kGpuExternalEmptyCache, MakeStringWithClassicLocale(reinterpret_cast(info.external_allocator_info.empty_cache))}, - {migraphx_provider_option::kArenaExtendStrategy, + {migraphx::provider_option_names::kMemLimit, MakeStringWithClassicLocale(info.mem_limit)}, + {migraphx::provider_option_names::kGpuExternalAlloc, MakeStringWithClassicLocale(reinterpret_cast(info.external_allocator_info.alloc))}, + {migraphx::provider_option_names::kGpuExternalFree, MakeStringWithClassicLocale(reinterpret_cast(info.external_allocator_info.free))}, + {migraphx::provider_option_names::kGpuExternalEmptyCache, MakeStringWithClassicLocale(reinterpret_cast(info.external_allocator_info.empty_cache))}, + {migraphx::provider_option_names::kArenaExtendStrategy, EnumToName(arena_extend_strategy_mapping, info.arena_extend_strategy)}, {migraphx::provider_option_names::kExhaustiveTune, MakeStringWithClassicLocale(info.exhaustive_tune)}, }; @@ -121,8 +121,8 @@ ProviderOptions MIGraphXExecutionProviderInfo::ToProviderOptions(const OrtMIGrap {migraphx::provider_option_names::kInt8Enable, MakeStringWithClassicLocale(info.migraphx_int8_enable)}, {migraphx::provider_option_names::kSaveCompiledModel, MakeStringWithClassicLocale(info.migraphx_save_compiled_model)}, {migraphx::provider_option_names::kLoadCompiledModel, MakeStringWithClassicLocale(info.migraphx_load_compiled_model)}, - {migraphx_provider_option::kMemLimit, MakeStringWithClassicLocale(info.migraphx_mem_limit)}, - {migraphx_provider_option::kArenaExtendStrategy, EnumToName(arena_extend_strategy_mapping, static_cast(info.migraphx_arena_extend_strategy))}, + {migraphx::provider_option_names::kMemLimit, MakeStringWithClassicLocale(info.migraphx_mem_limit)}, + {migraphx::provider_option_names::kArenaExtendStrategy, EnumToName(arena_extend_strategy_mapping, static_cast(info.migraphx_arena_extend_strategy))}, {migraphx::provider_option_names::kExhaustiveTune, MakeStringWithClassicLocale(info.migraphx_exhaustive_tune)}, }; return options; diff --git a/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.h b/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.h index 9100d3efd47ba..35c44fe39b561 100644 --- a/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.h +++ b/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.h @@ -74,10 +74,9 @@ struct std::hash<::onnxruntime::MIGraphXExecutionProviderInfo> { (static_cast(info.fp16_enable) << 18) ^ (static_cast(info.int8_enable) << 19) ^ (static_cast(info.int8_use_native_calibration_table) << 20) ^ - (static_cast(info.model_cache_enable) << 21) ^ - (static_cast(info.save_compiled_model) << 22) ^ - (static_cast(info.load_compiled_model) << 23) ^ - (static_cast(info.exhaustive_tune) << 24); + (static_cast(info.save_compiled_model) << 21) ^ + (static_cast(info.load_compiled_model) << 22) ^ + (static_cast(info.exhaustive_tune) << 23); onnxruntime::HashCombine(data, value); onnxruntime::HashCombine(info.mem_limit, value); diff --git a/onnxruntime/python/onnxruntime_pybind_mlvalue.cc b/onnxruntime/python/onnxruntime_pybind_mlvalue.cc index da5e942d67f21..b69a1098fd8be 100644 --- a/onnxruntime/python/onnxruntime_pybind_mlvalue.cc +++ b/onnxruntime/python/onnxruntime_pybind_mlvalue.cc @@ -230,7 +230,7 @@ AllocatorPtr GetMIGraphXAllocator(OrtDevice::DeviceId id) { if (id_to_allocator_map->find(id) == id_to_allocator_map->end()) { // TODO: Expose knobs so that users can set fields associated with OrtArenaCfg so that we can pass it to the following method - id_to_allocator_map->insert({id, GetProviderInfo_MIGraphX().CreateMIGraphXAllocator(id, gpu_mem_limit, arena_extend_strategy, external_allocator_info, nullptr)}); + id_to_allocator_map->insert({id, GetProviderInfo_MIGraphX().CreateMIGraphXAllocator(id, gpu_mem_limit, arena_extend_strategy, migx_external_allocator_info, nullptr)}); } return (*id_to_allocator_map)[id]; diff --git a/onnxruntime/python/onnxruntime_pybind_state_common.cc b/onnxruntime/python/onnxruntime_pybind_state_common.cc index 824aa9979b02b..8f35f88385095 100644 --- a/onnxruntime/python/onnxruntime_pybind_state_common.cc +++ b/onnxruntime/python/onnxruntime_pybind_state_common.cc @@ -39,14 +39,15 @@ bool do_copy_in_default_stream = true; // TODO remove deprecated global config onnxruntime::rocm::TunableOpInfo tunable_op{}; onnxruntime::ROCMExecutionProviderExternalAllocatorInfo external_allocator_info{}; +#endif + +#if defined(USE_ROCM) || defined(USE_MIGRAPHX) // TODO remove deprecated global config onnxruntime::ArenaExtendStrategy arena_extend_strategy = onnxruntime::ArenaExtendStrategy::kNextPowerOfTwo; #endif #ifdef USE_MIGRAPHX -onnxruntime::MIGraphXExecutionProviderExternalAllocatorInfo external_allocator_info{}; -// TODO remove deprecated global config -onnxruntime::ArenaExtendStrategy arena_extend_strategy = onnxruntime::ArenaExtendStrategy::kNextPowerOfTwo; +onnxruntime::MIGraphXExecutionProviderExternalAllocatorInfo migx_external_allocator_info{}; #endif #ifdef ENABLE_TRAINING diff --git a/onnxruntime/python/onnxruntime_pybind_state_common.h b/onnxruntime/python/onnxruntime_pybind_state_common.h index 9e8e28096d639..e9db8aeb3f429 100644 --- a/onnxruntime/python/onnxruntime_pybind_state_common.h +++ b/onnxruntime/python/onnxruntime_pybind_state_common.h @@ -183,17 +183,6 @@ ProviderInfo_CANN& GetProviderInfo_CANN(); } // namespace onnxruntime #endif -#ifdef USE_MIGRAPHX -namespace onnxruntime { -ProviderInfo_MIGraphX* TryGetProviderInfo_MIGraphX(); -ProviderInfo_MIGraphX& GetProviderInfo_MIGraphX(); -namespace python { -extern onnxruntime::MIGraphXExecutionProviderExternalAllocatorInfo external_allocator_info; -extern onnxruntime::ArenaExtendStrategy arena_extend_strategy; -} // namespace python -} // namespace onnxruntime -#endif - #ifdef USE_ROCM namespace onnxruntime { ProviderInfo_ROCM* TryGetProviderInfo_ROCM(); @@ -206,11 +195,29 @@ extern bool do_copy_in_default_stream; // TODO remove deprecated global config extern onnxruntime::rocm::TunableOpInfo tunable_op; extern onnxruntime::ROCMExecutionProviderExternalAllocatorInfo external_allocator_info; +} // namespace python +} // namespace onnxruntime +#endif + +#if defined(USE_ROCM) || defined(USE_MIGRAPHX) +namespace onnxruntime { +namespace python { extern onnxruntime::ArenaExtendStrategy arena_extend_strategy; } // namespace python } // namespace onnxruntime #endif +#ifdef USE_MIGRAPHX +namespace onnxruntime { +ProviderInfo_MIGraphX* TryGetProviderInfo_MIGraphX(); +ProviderInfo_MIGraphX& GetProviderInfo_MIGraphX(); +namespace python { +extern onnxruntime::MIGraphXExecutionProviderExternalAllocatorInfo migx_external_allocator_info; +} // namespace python +} // namespace onnxruntime + +#endif + #include "core/providers/dnnl/dnnl_provider_factory.h" #include "core/providers/shared_library/provider_host_api.h" From bedb363e0e9b27723201e70634c809b2aaf9c022 Mon Sep 17 00:00:00 2001 From: Uros Petkovic Date: Tue, 31 Dec 2024 15:35:18 +0100 Subject: [PATCH 07/10] Adding namespace onnxruntime --- .../core/providers/migraphx/migraphx_execution_provider.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc index 6367475b5077d..4e98377c80205 100644 --- a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc +++ b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc @@ -249,10 +249,10 @@ AllocatorPtr MIGraphXExecutionProvider::CreateMIGraphXAllocator(OrtDevice::Devic std::vector MIGraphXExecutionProvider::CreatePreferredAllocators() { AllocatorCreationInfo default_memory_info( - [](OrtDevice::DeviceId device_id) { return std::make_unique(device_id, CUDA); }, info_.device_id); + [](OrtDevice::DeviceId device_id) { return std::make_unique(device_id, onnxruntime::CUDA); }, info_.device_id); AllocatorCreationInfo pinned_allocator_info( [](OrtDevice::DeviceId device_id) { - return std::make_unique(device_id, CUDA_PINNED); + return std::make_unique(device_id, onnxrunime::CUDA_PINNED); }, 0); return std::vector{CreateAllocator(default_memory_info), CreateAllocator(pinned_allocator_info)}; From b4b980cafad278f5e36279b548986b9d6c084385 Mon Sep 17 00:00:00 2001 From: Uros Petkovic Date: Fri, 3 Jan 2025 13:20:52 +0100 Subject: [PATCH 08/10] Fixing spelling error --- .../core/providers/migraphx/migraphx_execution_provider.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc index 4e98377c80205..7eef42ed5f7e2 100644 --- a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc +++ b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc @@ -252,7 +252,7 @@ std::vector MIGraphXExecutionProvider::CreatePreferredAllocators() [](OrtDevice::DeviceId device_id) { return std::make_unique(device_id, onnxruntime::CUDA); }, info_.device_id); AllocatorCreationInfo pinned_allocator_info( [](OrtDevice::DeviceId device_id) { - return std::make_unique(device_id, onnxrunime::CUDA_PINNED); + return std::make_unique(device_id, onnxruntime::CUDA_PINNED); }, 0); return std::vector{CreateAllocator(default_memory_info), CreateAllocator(pinned_allocator_info)}; From a3cdc74f5d1451e2fc782b24181063ea955ab496 Mon Sep 17 00:00:00 2001 From: Uros Petkovic Date: Fri, 3 Jan 2025 15:13:53 +0100 Subject: [PATCH 09/10] Fixing clang format issues --- .../migraphx/migraphx_execution_provider.cc | 16 +++--- .../migraphx/migraphx_execution_provider.h | 2 +- .../migraphx_execution_provider_info.h | 2 +- .../migraphx/migraphx_provider_factory.cc | 2 +- .../python/onnxruntime_pybind_state.cc | 51 +++++++++++-------- 5 files changed, 41 insertions(+), 32 deletions(-) diff --git a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc index 7eef42ed5f7e2..1a7a3a27b755b 100644 --- a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc +++ b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc @@ -210,18 +210,18 @@ MIGraphXExecutionProvider::~MIGraphXExecutionProvider() { } AllocatorPtr MIGraphXExecutionProvider::CreateMIGraphXAllocator(OrtDevice::DeviceId device_id, - size_t migx_mem_limit, - ArenaExtendStrategy arena_extend_strategy, - MIGraphXExecutionProviderExternalAllocatorInfo - external_allocator_info, - const OrtArenaCfg* default_memory_arena_cfg) { + size_t migx_mem_limit, + ArenaExtendStrategy arena_extend_strategy, + MIGraphXExecutionProviderExternalAllocatorInfo + external_allocator_info, + const OrtArenaCfg* default_memory_arena_cfg) { if (external_allocator_info.UseExternalAllocator()) { AllocatorCreationInfo default_memory_info( [external_allocator_info](OrtDevice::DeviceId id) { return std::make_unique(id, HIP, - external_allocator_info.alloc, - external_allocator_info.free, - external_allocator_info.empty_cache); + external_allocator_info.alloc, + external_allocator_info.free, + external_allocator_info.empty_cache); }, device_id, false); diff --git a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.h b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.h index 13c45fe35db75..f79b720e758e3 100644 --- a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.h +++ b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.h @@ -77,7 +77,7 @@ class MIGraphXExecutionProvider : public IExecutionProvider { std::unique_ptr GetDataTransfer() const override; static AllocatorPtr CreateMIGraphXAllocator(OrtDevice::DeviceId device_id, size_t migx_mem_limit, ArenaExtendStrategy arena_extend_strategy, - MIGraphXExecutionProviderExternalAllocatorInfo external_alloc_info, const OrtArenaCfg* arena_cfg); + MIGraphXExecutionProviderExternalAllocatorInfo external_alloc_info, const OrtArenaCfg* arena_cfg); std::unique_ptr GetSubGraph(const std::vector& graph_nodes_index, const GraphViewer& graph) const; void RegisterStreamHandlers(IStreamCommandHandleRegistry& stream_handle_registry, AllocatorMap& allocators) const override; diff --git a/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.h b/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.h index 35c44fe39b561..3dbde11ddc4a9 100644 --- a/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.h +++ b/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.h @@ -51,7 +51,7 @@ struct MIGraphXExecutionProviderInfo { std::string load_model_file{"./compiled_model.mxr"}; bool exhaustive_tune{false}; - size_t mem_limit{std::numeric_limits::max()}; // Will be over-ridden by contents of `default_memory_arena_cfg` (if specified) + size_t mem_limit{std::numeric_limits::max()}; // Will be over-ridden by contents of `default_memory_arena_cfg` (if specified) ArenaExtendStrategy arena_extend_strategy{ArenaExtendStrategy::kNextPowerOfTwo}; // Will be over-ridden by contents of `default_memory_arena_cfg` (if specified) OrtArenaCfg* default_memory_arena_cfg{nullptr}; diff --git a/onnxruntime/core/providers/migraphx/migraphx_provider_factory.cc b/onnxruntime/core/providers/migraphx/migraphx_provider_factory.cc index f8c962fa02d71..545b02a345830 100644 --- a/onnxruntime/core/providers/migraphx/migraphx_provider_factory.cc +++ b/onnxruntime/core/providers/migraphx/migraphx_provider_factory.cc @@ -61,7 +61,7 @@ struct ProviderInfo_MIGraphX_Impl final : ProviderInfo_MIGraphX { HIP_CALL_THROW(hipMemcpy(dst, src, count, hipMemcpyDeviceToHost)); } - std::shared_ptr CreateMIGraphXAllocator(int16_t device_id, size_t migx_mem_limit, onnxruntime::ArenaExtendStrategy arena_extend_strategy, onnxruntime::MIGraphXExecutionProviderExternalAllocatorInfo& external_allocator_info, const OrtArenaCfg* default_memory_arena_cfg) override { + std::shared_ptr CreateMIGraphXAllocator(int16_t device_id, size_t migx_mem_limit, onnxruntime::ArenaExtendStrategy arena_extend_strategy, onnxruntime::MIGraphXExecutionProviderExternalAllocatorInfo& external_allocator_info, const OrtArenaCfg* default_memory_arena_cfg) override { return MIGraphXExecutionProvider::CreateMIGraphXAllocator(device_id, migx_mem_limit, arena_extend_strategy, external_allocator_info, default_memory_arena_cfg); } } g_info; diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc index 527bab9f70eba..03dcb80c2f1d1 100644 --- a/onnxruntime/python/onnxruntime_pybind_state.cc +++ b/onnxruntime/python/onnxruntime_pybind_state.cc @@ -1432,7 +1432,7 @@ void addGlobalMethods(py::module& m) { ORT_UNUSED_PARAMETER(algo); ORT_THROW("set_cudnn_conv_algo_search is not supported in ROCM"); #else - cudnn_conv_algo_search = algo; + cudnn_conv_algo_search = algo; #endif }); // TODO remove deprecated global config @@ -1443,7 +1443,7 @@ void addGlobalMethods(py::module& m) { ORT_UNUSED_PARAMETER(use_single_stream); ORT_THROW("set_do_copy_in_default_stream is not supported in ROCM"); #else - do_copy_in_default_stream = use_single_stream; + do_copy_in_default_stream = use_single_stream; #endif }); // TODO remove deprecated global config @@ -1808,10 +1808,10 @@ Applies to session load, initialization, etc. Default is 0.)pbdoc") } ORT_THROW_IF_ERROR(options->value.AddExternalInitializers(names_ptrs, values_ptrs)); #else - ORT_UNUSED_PARAMETER(options); - ORT_UNUSED_PARAMETER(names); - ORT_UNUSED_PARAMETER(ort_values); - ORT_THROW("External initializers are not supported in this build."); + ORT_UNUSED_PARAMETER(options); + ORT_UNUSED_PARAMETER(names); + ORT_UNUSED_PARAMETER(ort_values); + ORT_THROW("External initializers are not supported in this build."); #endif }); @@ -1873,7 +1873,8 @@ including arg name, arg type (contains both type and shape).)pbdoc") return *(na.Type()); }, "node type") - .def("__str__", [](const onnxruntime::NodeArg& na) -> std::string { + .def( + "__str__", [](const onnxruntime::NodeArg& na) -> std::string { std::ostringstream res; res << "NodeArg(name='" << na.Name() << "', type='" << *(na.Type()) << "', shape="; auto shape = na.Shape(); @@ -1900,7 +1901,8 @@ including arg name, arg type (contains both type and shape).)pbdoc") res << ")"; return std::string(res.str()); }, "converts the node into a readable string") - .def_property_readonly("shape", [](const onnxruntime::NodeArg& na) -> std::vector { + .def_property_readonly( + "shape", [](const onnxruntime::NodeArg& na) -> std::vector { auto shape = na.Shape(); std::vector arr; if (shape == nullptr || shape->dim_size() == 0) { @@ -2108,25 +2110,32 @@ including arg name, arg type (contains both type and shape).)pbdoc") .def_property_readonly("get_profiling_start_time_ns", [](const PyInferenceSession* sess) -> uint64_t { return sess->GetSessionHandle()->GetProfiling().GetStartTimeNs(); }) - .def("get_providers", [](const PyInferenceSession* sess) -> const std::vector& { return sess->GetSessionHandle()->GetRegisteredProviderTypes(); }, py::return_value_policy::reference_internal) - .def("get_provider_options", [](const PyInferenceSession* sess) -> const ProviderOptionsMap& { return sess->GetSessionHandle()->GetAllProviderOptions(); }, py::return_value_policy::reference_internal) - .def_property_readonly("session_options", [](const PyInferenceSession* sess) -> PySessionOptions* { + .def( + "get_providers", [](const PyInferenceSession* sess) -> const std::vector& { return sess->GetSessionHandle()->GetRegisteredProviderTypes(); }, py::return_value_policy::reference_internal) + .def( + "get_provider_options", [](const PyInferenceSession* sess) -> const ProviderOptionsMap& { return sess->GetSessionHandle()->GetAllProviderOptions(); }, py::return_value_policy::reference_internal) + .def_property_readonly( + "session_options", [](const PyInferenceSession* sess) -> PySessionOptions* { auto session_options = std::make_unique(); session_options->value = sess->GetSessionHandle()->GetSessionOptions(); return session_options.release(); }, py::return_value_policy::take_ownership) - .def_property_readonly("inputs_meta", [](const PyInferenceSession* sess) -> const std::vector& { + .def_property_readonly( + "inputs_meta", [](const PyInferenceSession* sess) -> const std::vector& { auto res = sess->GetSessionHandle()->GetModelInputs(); OrtPybindThrowIfError(res.first); return *(res.second); }, py::return_value_policy::reference_internal) - .def_property_readonly("outputs_meta", [](const PyInferenceSession* sess) -> const std::vector& { + .def_property_readonly( + "outputs_meta", [](const PyInferenceSession* sess) -> const std::vector& { auto res = sess->GetSessionHandle()->GetModelOutputs(); OrtPybindThrowIfError(res.first); return *(res.second); }, py::return_value_policy::reference_internal) - .def_property_readonly("overridable_initializers", [](const PyInferenceSession* sess) -> const std::vector& { + .def_property_readonly( + "overridable_initializers", [](const PyInferenceSession* sess) -> const std::vector& { auto res = sess->GetSessionHandle()->GetOverridableInitializers(); OrtPybindThrowIfError(res.first); return *(res.second); }, py::return_value_policy::reference_internal) - .def_property_readonly("model_meta", [](const PyInferenceSession* sess) -> const onnxruntime::ModelMetadata& { + .def_property_readonly( + "model_meta", [](const PyInferenceSession* sess) -> const onnxruntime::ModelMetadata& { auto res = sess->GetSessionHandle()->GetModelMetadata(); OrtPybindThrowIfError(res.first); return *(res.second); }, py::return_value_policy::reference_internal) @@ -2154,8 +2163,8 @@ including arg name, arg type (contains both type and shape).)pbdoc") return ret; #else - ORT_UNUSED_PARAMETER(sess); - ORT_THROW("TunableOp and get_tuning_results are not supported in this build."); + ORT_UNUSED_PARAMETER(sess); + ORT_THROW("TunableOp and get_tuning_results are not supported in this build."); #endif }) .def("set_tuning_results", [](PyInferenceSession* sess, py::list results, bool error_on_invalid) -> void { @@ -2186,10 +2195,10 @@ including arg name, arg type (contains both type and shape).)pbdoc") throw std::runtime_error("Error in execution: " + status.ErrorMessage()); } #else - ORT_UNUSED_PARAMETER(sess); - ORT_UNUSED_PARAMETER(results); - ORT_UNUSED_PARAMETER(error_on_invalid); - ORT_THROW("TunableOp and set_tuning_results are not supported in this build."); + ORT_UNUSED_PARAMETER(sess); + ORT_UNUSED_PARAMETER(results); + ORT_UNUSED_PARAMETER(error_on_invalid); + ORT_THROW("TunableOp and set_tuning_results are not supported in this build."); #endif }); From 6f119cb24c0fdff76bde28c43a3a84cc09f5d4f6 Mon Sep 17 00:00:00 2001 From: Uros Petkovic Date: Thu, 9 Jan 2025 15:39:27 -0600 Subject: [PATCH 10/10] Reordering ROCm and MIGraphX elifs --- onnxruntime/python/onnxruntime_pybind_ortvalue.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/onnxruntime/python/onnxruntime_pybind_ortvalue.cc b/onnxruntime/python/onnxruntime_pybind_ortvalue.cc index 4d661cd8fb470..66377b48a9975 100644 --- a/onnxruntime/python/onnxruntime_pybind_ortvalue.cc +++ b/onnxruntime/python/onnxruntime_pybind_ortvalue.cc @@ -177,13 +177,13 @@ void addOrtValueMethods(pybind11::module& m) { throw std::runtime_error("The provided device id doesn't match any available GPUs on the machine."); } allocator = GetCudaAllocator(device.Id()); -#elif USE_MIGRAPHX - allocator = GetMIGraphXAllocator(device.Id()); #elif USE_ROCM if (!IsRocmDeviceIdValid(logging::LoggingManager::DefaultLogger(), device.Id())) { throw std::runtime_error("The provided device id doesn't match any available GPUs on the machine."); } allocator = GetRocmAllocator(device.Id()); +#elif USE_MIGRAPHX + allocator = GetMIGraphXAllocator(device.Id()); #else throw std::runtime_error( "Can't allocate memory on the CUDA device using this package of OnnxRuntime. "