From 22858066cad7503d538e063a15bc20eacc1ca547 Mon Sep 17 00:00:00 2001
From: Uros Petkovic <urpektov@amd.com>
Date: Fri, 20 Dec 2024 15:19:55 +0100
Subject: [PATCH 01/10] Adding ourtvalue support for MGX EP

---
 .../core/session/onnxruntime_c_api.h          | 15 +++++
 .../migraphx/migraphx_execution_provider.cc   | 41 ++++++++++++-
 .../migraphx/migraphx_execution_provider.h    |  5 +-
 .../migraphx_execution_provider_info.cc       | 52 ++++++++++++++++
 .../migraphx_execution_provider_info.h        | 61 +++++++++++++++++++
 .../migraphx/migraphx_provider_factory.cc     | 22 +++++++
 .../migraphx/migraphx_provider_factory.h      |  3 +
 .../python/onnxruntime_pybind_mlvalue.cc      | 45 ++++++++++++++
 .../python/onnxruntime_pybind_mlvalue.h       | 12 ++++
 .../python/onnxruntime_pybind_ortvalue.cc     | 20 ++++++
 .../python/onnxruntime_pybind_state.cc        |  4 +-
 .../python/onnxruntime_pybind_state_common.cc |  6 ++
 .../python/onnxruntime_pybind_state_common.h  | 12 ++++
 onnxruntime/test/util/default_providers.cc    |  4 +-
 14 files changed, 298 insertions(+), 4 deletions(-)
diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h
index 4ca2791e26ab9..e50e257c87eb1 100644
--- a/include/onnxruntime/core/session/onnxruntime_c_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_c_api.h
@@ -622,6 +622,21 @@ typedef struct OrtMIGraphXProviderOptions {
   int migraphx_load_compiled_model;                  // migraphx int8 cal table. Default 0 = false, noznero = true
   const char* migraphx_load_model_path;              // migraphx model path name
   bool migraphx_exhaustive_tune;                     // migraphx tuned compile  Default = false
+
+  /** \brief MIGraphX memory limit (To use all possible memory pass in maximum size_t)
+   *   Defaults to SIZE_MAX.
+   *   \note If a ::OrtArenaCfg has been applied, it will override this field
+   */
+  size_t gpu_mem_limit;
+
+  /** \brief Strategy used to grow the memory arena
+   *   0 = kNextPowerOfTwo<br>
+   *   1 = kSameAsRequested<br>
+   *   Defaults to 0.
+   *   \note If a ::OrtArenaCfg has been applied, it will override this field
+   */
+  int arena_extend_strategy;
+
 } OrtMIGraphXProviderOptions;
 
 /** \brief OpenVINO Provider Options
diff --git a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc
index fd36b8ae5f678..7f934d69c48ab 100644
--- a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc
+++ b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc
@@ -13,10 +13,11 @@
 #include "core/common/safeint.h"
 #include "core/common/logging/severity.h"
 #include "migraphx_execution_provider.h"
+#include "migraphx_execution_provider_info.h"
 #include "migraphx_execution_provider_utils.h"
 #include "migraphx_allocator.h"
 #include "gpu_data_transfer.h"
-#include "migraphx_inc.h"
+#include "migraphx_call.h"
 
 #include "migraphx_stream_handle.h"
 
@@ -208,6 +209,44 @@ MIGraphXExecutionProvider::MIGraphXExecutionProvider(const MIGraphXExecutionProv
 MIGraphXExecutionProvider::~MIGraphXExecutionProvider() {
 }
 
+AllocatorPtr MIGraphXExecutionProvider::CreateMIGraphXAllocator(OrtDevice::DeviceId device_id,
+                                                        size_t gpu_mem_limit,
+                                                        ArenaExtendStrategy arena_extend_strategy,
+                                                        MIGraphXExecutionProviderExternalAllocatorInfo
+                                                            external_allocator_info,
+                                                        const OrtArenaCfg* default_memory_arena_cfg) {
+  if (external_allocator_info.UseExternalAllocator()) {
+    AllocatorCreationInfo default_memory_info(
+        [external_allocator_info](OrtDevice::DeviceId id) {
+          return std::make_unique<MIGraphXExternalAllocator>(id, HIP,
+                                                         external_allocator_info.alloc,
+                                                         external_allocator_info.free,
+                                                         external_allocator_info.empty_cache);
+        },
+        device_id,
+        false);
+
+    return CreateAllocator(default_memory_info);
+  } else {
+    AllocatorCreationInfo default_memory_info(
+        [](OrtDevice::DeviceId id) {
+          return std::make_unique<MIGraphXAllocator>(id, HIP);
+        },
+        device_id,
+        true,
+        {default_memory_arena_cfg ? *default_memory_arena_cfg
+                                  : OrtArenaCfg(gpu_mem_limit, static_cast<int>(arena_extend_strategy),
+                                                -1, -1, -1, -1L)},
+        // make it stream aware
+        true,
+        // enable cross stream sharing?
+        false);
+
+    // ROCM malloc/free is expensive so always use an arena
+    return CreateAllocator(default_memory_info);
+  }
+}
+
 std::vector<AllocatorPtr> MIGraphXExecutionProvider::CreatePreferredAllocators() {
   AllocatorCreationInfo default_memory_info(
       [](OrtDevice::DeviceId device_id) { return CreateMIGraphXAllocator(device_id, onnxruntime::CUDA); }, info_.device_id);
diff --git a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.h b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.h
index 2be6c09551a71..b20d534be66a2 100644
--- a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.h
+++ b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.h
@@ -7,7 +7,7 @@
 #include "core/framework/execution_provider.h"
 #include "core/platform/ort_mutex.h"
 #include "core/providers/migraphx/migraphx_execution_provider_info.h"
-#include "core/providers/migraphx/migraphx_inc.h"
+#include "core/providers/migraphx/migraphx_call.h"
 
 #include <map>
 #include <unordered_map>
@@ -76,6 +76,9 @@ class MIGraphXExecutionProvider : public IExecutionProvider {
   virtual std::shared_ptr<KernelRegistry> GetKernelRegistry() const override;
   std::unique_ptr<onnxruntime::IDataTransfer> GetDataTransfer() const override;
 
+  static AllocatorPtr CreateMIGraphXAllocator(OrtDevice::DeviceId device_id, size_t rocm_mem_limit, ArenaExtendStrategy arena_extend_strategy,
+                                          MIGraphXExecutionProviderExternalAllocatorInfo external_alloc_info, const OrtArenaCfg* arena_cfg);
+
   std::unique_ptr<IndexedSubGraph> GetSubGraph(const std::vector<std::size_t>& graph_nodes_index, const GraphViewer& graph) const;
   void RegisterStreamHandlers(IStreamCommandHandleRegistry& stream_handle_registry, AllocatorMap& allocators) const override;
   OrtDevice GetOrtDeviceByMemType(OrtMemType mem_type) const override;
diff --git a/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.cc b/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.cc
index 1f9a47d3ad87d..1feec9ae2e3e1 100644
--- a/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.cc
+++ b/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.cc
@@ -1,6 +1,7 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
+#include "core/providers/shared_library/provider_api.h"
 #include "core/providers/migraphx/migraphx_execution_provider_info.h"
 
 #include "core/common/make_string.h"
@@ -10,6 +11,12 @@
 #include "migraphx_call.h"
 
 namespace onnxruntime {
+
+const EnumNameMapping<ArenaExtendStrategy> arena_extend_strategy_mapping{
+    {ArenaExtendStrategy::kNextPowerOfTwo, "kNextPowerOfTwo"},
+    {ArenaExtendStrategy::kSameAsRequested, "kSameAsRequested"},
+};
+
 namespace migraphx {
 namespace provider_option_names {
 constexpr const char* kDeviceId = "device_id";
@@ -22,12 +29,20 @@ constexpr const char* kSaveModelPath = "migx_save_model_name";
 constexpr const char* kLoadCompiledModel = "migx_load_compiled_model";
 constexpr const char* kLoadModelPath = "migx_load_model_name";
 constexpr const char* kExhaustiveTune = "migx_exhaustive_tune";
+constexpr const char* kMemLimit = "gpu_mem_limit";
+constexpr const char* kArenaExtendStrategy = "arena_extend_strategy";
+constexpr const char* kGpuExternalAlloc = "gpu_external_alloc";
+constexpr const char* kGpuExternalFree = "gpu_external_free";
+constexpr const char* kGpuExternalEmptyCache = "gpu_external_empty_cache";
 
 }  // namespace provider_option_names
 }  // namespace migraphx
 
 MIGraphXExecutionProviderInfo MIGraphXExecutionProviderInfo::FromProviderOptions(const ProviderOptions& options) {
   MIGraphXExecutionProviderInfo info{};
+  void* alloc = nullptr;
+  void* free = nullptr;
+  void* empty_cache = nullptr;
   ORT_THROW_IF_ERROR(
       ProviderOptionsParser{}
           .AddValueParser(
@@ -42,13 +57,42 @@ MIGraphXExecutionProviderInfo MIGraphXExecutionProviderInfo::FromProviderOptions
                     ", must be between 0 (inclusive) and ", num_devices, " (exclusive).");
                 return Status::OK();
               })
+          .AddValueParser(
+              migraphx_provider_option::kGpuExternalAlloc,
+              [&alloc](const std::string& value_str) -> Status {
+                size_t address;
+                ORT_RETURN_IF_ERROR(ParseStringWithClassicLocale(value_str, address));
+                alloc = reinterpret_cast<void*>(address);
+                return Status::OK();
+              })
+          .AddValueParser(
+              migraphx_provider_option::kGpuExternalFree,
+              [&free](const std::string& value_str) -> Status {
+                size_t address;
+                ORT_RETURN_IF_ERROR(ParseStringWithClassicLocale(value_str, address));
+                free = reinterpret_cast<void*>(address);
+                return Status::OK();
+              })
+          .AddValueParser(
+              migraphx_provider_option::kGpuExternalEmptyCache,
+              [&empty_cache](const std::string& value_str) -> Status {
+                size_t address;
+                ORT_RETURN_IF_ERROR(ParseStringWithClassicLocale(value_str, address));
+                empty_cache = reinterpret_cast<void*>(address);
+                return Status::OK();
+              })
           .AddAssignmentToReference(migraphx::provider_option_names::kFp16Enable, info.fp16_enable)
           .AddAssignmentToReference(migraphx::provider_option_names::kInt8Enable, info.int8_enable)
           .AddAssignmentToReference(migraphx::provider_option_names::kSaveCompiledModel, info.save_compiled_model)
           .AddAssignmentToReference(migraphx::provider_option_names::kLoadCompiledModel, info.load_compiled_model)
           .AddAssignmentToReference(migraphx::provider_option_names::kExhaustiveTune, info.exhaustive_tune)
+          .AddAssignmentToReference(migraphx_provider_option::kMemLimit, info.gpu_mem_limit)
+          .AddAssignmentToEnumReference(migraphx_provider_option::kArenaExtendStrategy, arena_extend_strategy_mapping, info.arena_extend_strategy)
           .Parse(options));
 
+  MIGraphXExecutionProviderExternalAllocatorInfo alloc_info{alloc, free, empty_cache};
+  info.external_allocator_info = alloc_info;
+
   return info;
 }
 
@@ -59,6 +103,12 @@ ProviderOptions MIGraphXExecutionProviderInfo::ToProviderOptions(const MIGraphXE
       {migraphx::provider_option_names::kInt8Enable, MakeStringWithClassicLocale(info.int8_enable)},
       {migraphx::provider_option_names::kSaveCompiledModel, MakeStringWithClassicLocale(info.save_compiled_model)},
       {migraphx::provider_option_names::kLoadCompiledModel, MakeStringWithClassicLocale(info.load_compiled_model)},
+      {migraphx_provider_option::kMemLimit, MakeStringWithClassicLocale(info.gpu_mem_limit)},
+      {migraphx_provider_option::kGpuExternalAlloc, MakeStringWithClassicLocale(reinterpret_cast<size_t>(info.external_allocator_info.alloc))},
+      {migraphx_provider_option::kGpuExternalFree, MakeStringWithClassicLocale(reinterpret_cast<size_t>(info.external_allocator_info.free))},
+      {migraphx_provider_option::kGpuExternalEmptyCache, MakeStringWithClassicLocale(reinterpret_cast<size_t>(info.external_allocator_info.empty_cache))},
+      {migraphx_provider_option::kArenaExtendStrategy,
+       EnumToName(arena_extend_strategy_mapping, info.arena_extend_strategy)},
       {migraphx::provider_option_names::kExhaustiveTune, MakeStringWithClassicLocale(info.exhaustive_tune)},
   };
   return options;
@@ -71,6 +121,8 @@ ProviderOptions MIGraphXExecutionProviderInfo::ToProviderOptions(const OrtMIGrap
       {migraphx::provider_option_names::kInt8Enable, MakeStringWithClassicLocale(info.migraphx_int8_enable)},
       {migraphx::provider_option_names::kSaveCompiledModel, MakeStringWithClassicLocale(info.migraphx_save_compiled_model)},
       {migraphx::provider_option_names::kLoadCompiledModel, MakeStringWithClassicLocale(info.migraphx_load_compiled_model)},
+      {migraphx_provider_option::kMemLimit, MakeStringWithClassicLocale(info.gpu_mem_limit)},
+      {migraphx_provider_option::kArenaExtendStrategy, EnumToName(arena_extend_strategy_mapping, static_cast<onnxruntime::ArenaExtendStrategy>(info.arena_extend_strategy))},
       {migraphx::provider_option_names::kExhaustiveTune, MakeStringWithClassicLocale(info.migraphx_exhaustive_tune)},
   };
   return options;
diff --git a/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.h b/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.h
index b8bf86580f03d..61bc0d871be7c 100644
--- a/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.h
+++ b/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.h
@@ -7,10 +7,36 @@
 #include <string>
 
 #include "core/framework/ortdevice.h"
+#include "core/common/hash_combine.h"
+#include "core/framework/arena_extend_strategy.h"
 #include "core/framework/provider_options.h"
 #include "core/session/onnxruntime_c_api.h"
 
 namespace onnxruntime {
+
+// Information needed to construct MIGraphX execution providers.
+struct MIGraphXExecutionProviderExternalAllocatorInfo {
+  void* alloc{nullptr};
+  void* free{nullptr};
+  void* empty_cache{nullptr};
+
+  MIGraphXExecutionProviderExternalAllocatorInfo() {
+    alloc = nullptr;
+    free = nullptr;
+    empty_cache = nullptr;
+  }
+
+  MIGraphXExecutionProviderExternalAllocatorInfo(void* a, void* f, void* e) {
+    alloc = a;
+    free = f;
+    empty_cache = e;
+  }
+
+  bool UseExternalAllocator() const {
+    return (alloc != nullptr) && (free != nullptr);
+  }
+};
+
 // Information needed to construct trt execution providers.
 struct MIGraphXExecutionProviderInfo {
   std::string target_device;
@@ -25,8 +51,43 @@ struct MIGraphXExecutionProviderInfo {
   std::string load_model_file{"./compiled_model.mxr"};
   bool exhaustive_tune{false};
 
+  size_t gpu_mem_limit{std::numeric_limits<size_t>::max()};                         // Will be over-ridden by contents of `default_memory_arena_cfg` (if specified)
+  ArenaExtendStrategy arena_extend_strategy{ArenaExtendStrategy::kNextPowerOfTwo};  // Will be over-ridden by contents of `default_memory_arena_cfg` (if specified)
+
+  OrtArenaCfg* default_memory_arena_cfg{nullptr};
+  MIGraphXExecutionProviderExternalAllocatorInfo external_allocator_info{};
+
   static MIGraphXExecutionProviderInfo FromProviderOptions(const ProviderOptions& options);
   static ProviderOptions ToProviderOptions(const MIGraphXExecutionProviderInfo& info);
   static ProviderOptions ToProviderOptions(const OrtMIGraphXProviderOptions& info);
 };
 }  // namespace onnxruntime
+
+template <>
+struct std::hash<::onnxruntime::MIGraphXExecutionProviderInfo> {
+  size_t operator()(const ::onnxruntime::MIGraphXExecutionProviderInfo& info) const {
+    size_t value{0xbc9f1d34};  // seed
+
+    // Bits: device_id (16), arena_extend_strategy (reserved 2), boolean options (1 each)
+    size_t data = static_cast<size_t>(info.device_id) ^
+                  (static_cast<size_t>(info.arena_extend_strategy) << 16) ^
+                  (static_cast<size_t>(info.fp16_enable) << 18) ^
+                  (static_cast<size_t>(info.int8_enable) << 19) ^
+                  (static_cast<size_t>(info.int8_use_native_calibration_table) << 20) ^
+                  (static_cast<size_t>(info.model_cache_enable) << 21) ^
+                  (static_cast<size_t>(info.save_compiled_model) << 22) ^
+                  (static_cast<size_t>(info.load_compiled_model) << 23) ^
+                  (static_cast<size_t>(info.exhaustive_tune) << 24);
+    onnxruntime::HashCombine(data, value);
+
+    onnxruntime::HashCombine(info.gpu_mem_limit, value);
+
+    // Memory pointers
+    onnxruntime::HashCombine(reinterpret_cast<size_t>(info.external_allocator_info.alloc), value);
+    onnxruntime::HashCombine(reinterpret_cast<size_t>(info.external_allocator_info.free), value);
+    onnxruntime::HashCombine(reinterpret_cast<size_t>(info.external_allocator_info.empty_cache), value);
+
+    // The default memory arena cfg is not used in hashing right now.
+    return value;
+  }
+};
diff --git a/onnxruntime/core/providers/migraphx/migraphx_provider_factory.cc b/onnxruntime/core/providers/migraphx/migraphx_provider_factory.cc
index 7b192b657b7cc..64278bf6439f9 100644
--- a/onnxruntime/core/providers/migraphx/migraphx_provider_factory.cc
+++ b/onnxruntime/core/providers/migraphx/migraphx_provider_factory.cc
@@ -5,6 +5,7 @@
 #include "core/providers/shared_library/provider_api.h"
 #include "core/providers/migraphx/migraphx_provider_factory.h"
 #include "migraphx_execution_provider.h"
+#include "migraphx_execution_provider_info.h"
 #include "migraphx_provider_factory_creator.h"
 #include "migraphx_allocator.h"
 #include "gpu_data_transfer.h"
@@ -42,6 +43,27 @@ struct ProviderInfo_MIGraphX_Impl final : ProviderInfo_MIGraphX {
     return std::make_unique<HIPPinnedAllocator>(device_id, name);
   }
 
+  void MIGraphXMemcpy_HostToDevice(void* dst, const void* src, size_t count) override {
+    // hipMemcpy() operates on the default stream
+    HIP_CALL_THROW(hipMemcpy(dst, src, count, hipMemcpyHostToDevice));
+
+    // To ensure that the copy has completed, invoke a stream sync for the default stream.
+    // For transfers from pageable host memory to device memory, a stream sync is performed before the copy is initiated.
+    // The function will return once the pageable buffer has been copied to the staging memory for DMA transfer
+    // to device memory, but the DMA to final destination may not have completed.
+
+    HIP_CALL_THROW(hipStreamSynchronize(0));
+  }
+
+  // Used by onnxruntime_pybind_state.cc
+  void MIGraphXMemcpy_DeviceToHost(void* dst, const void* src, size_t count) override {
+    // For transfers from device to either pageable or pinned host memory, the function returns only once the copy has completed.
+    HIP_CALL_THROW(hipMemcpy(dst, src, count, hipMemcpyDeviceToHost));
+  }
+
+ std::shared_ptr<IAllocator> CreateMIGraphXAllocator(int16_t device_id, size_t gpu_mem_limit, onnxruntime::ArenaExtendStrategy arena_extend_strategy, onnxruntime::MIGraphXExecutionProviderExternalAllocatorInfo& external_allocator_info, const OrtArenaCfg* default_memory_arena_cfg) override {
+    return MIGraphXExecutionProvider::CreateMIGraphXAllocator(device_id, gpu_mem_limit, arena_extend_strategy, external_allocator_info, default_memory_arena_cfg);
+  }
 } g_info;
 
 struct MIGraphX_Provider : Provider {
diff --git a/onnxruntime/core/providers/migraphx/migraphx_provider_factory.h b/onnxruntime/core/providers/migraphx/migraphx_provider_factory.h
index b257a4318dc0e..7c26f9b9f0bb5 100644
--- a/onnxruntime/core/providers/migraphx/migraphx_provider_factory.h
+++ b/onnxruntime/core/providers/migraphx/migraphx_provider_factory.h
@@ -14,6 +14,9 @@ struct MIGraphXExecutionProviderExternalAllocatorInfo;
 struct ProviderInfo_MIGraphX {
   virtual std::unique_ptr<onnxruntime::IAllocator> CreateMIGraphXAllocator(int16_t device_id, const char* name) = 0;
   virtual std::unique_ptr<onnxruntime::IAllocator> CreateMIGraphXPinnedAllocator(int16_t device_id, const char* name) = 0;
+  virtual void MIGraphXMemcpy_HostToDevice(void* dst, const void* src, size_t count) = 0;
+  virtual void MIGraphXMemcpy_DeviceToHost(void* dst, const void* src, size_t count) = 0;
+  virtual std::shared_ptr<onnxruntime::IAllocator> CreateMIGraphXAllocator(int16_t device_id, size_t gpu_mem_limit, onnxruntime::ArenaExtendStrategy arena_extend_strategy, onnxruntime::MIGraphXExecutionProviderExternalAllocatorInfo& external_allocator_info, const OrtArenaCfg* default_memory_arena_cfg) = 0;
 
  protected:
   ~ProviderInfo_MIGraphX() = default;  // Can only be destroyed through a subclass instance
diff --git a/onnxruntime/python/onnxruntime_pybind_mlvalue.cc b/onnxruntime/python/onnxruntime_pybind_mlvalue.cc
index 8fdac257297c1..da5e942d67f21 100644
--- a/onnxruntime/python/onnxruntime_pybind_mlvalue.cc
+++ b/onnxruntime/python/onnxruntime_pybind_mlvalue.cc
@@ -115,6 +115,19 @@ OrtMemoryInfo GetMemoryInfoPerDeviceType(const OrtDevice& ort_device) {
     }
     mem_info = GetCudaAllocator(ort_device.Id())->Info();
   }
+#endif
+#if USE_ROCM
+  else if (ort_device.Type() == OrtDevice::GPU) {
+    if (!IsRocmDeviceIdValid(logging::LoggingManager::DefaultLogger(), ort_device.Id())) {
+      ORT_THROW("The provided device id doesn't match any available GPUs on the machine: ", ort_device.Id());
+    }
+    mem_info = GetRocmAllocator(ort_device.Id())->Info();
+  }
+#endif
+#if USE_MIGRAPHX
+  else if (ort_device.Type() == OrtDevice::GPU) {
+    mem_info = GetMIGraphXAllocator(ort_device.Id())->Info();
+  }
 #endif
   else {
     ORT_THROW("Unsupported OrtDevice type: ", ort_device.Type());
@@ -193,6 +206,38 @@ std::unique_ptr<IDataTransfer> GetGPUDataTransfer() {
 
 #endif
 
+#ifdef USE_MIGRAPHX
+void CpuToMIGraphXMemCpy(void* dst, const void* src, size_t num_bytes) {
+  GetProviderInfo_MIGraphX().MIGraphXMemcpy_HostToDevice(dst, src, num_bytes);
+}
+
+void MIGraphXToCpuMemCpy(void* dst, const void* src, size_t num_bytes) {
+  GetProviderInfo_MIGraphX().MIGraphXMemcpy_DeviceToHost(dst, src, num_bytes);
+}
+
+const std::unordered_map<OrtDevice::DeviceType, MemCpyFunc>* GetMIGraphXToHostMemCpyFunction() {
+  static std::unordered_map<OrtDevice::DeviceType, MemCpyFunc> map{
+      {OrtDevice::GPU, MIGraphXToCpuMemCpy}};
+
+  return &map;
+}
+
+AllocatorPtr GetMIGraphXAllocator(OrtDevice::DeviceId id) {
+  // Current approach is not thread-safe, but there are some bigger infra pieces to put together in order to make
+  // multi-threaded MIGraphX allocation work we need to maintain a per-thread MIGraphX allocator
+
+  static auto* id_to_allocator_map = new std::unordered_map<OrtDevice::DeviceId, AllocatorPtr>();
+
+  if (id_to_allocator_map->find(id) == id_to_allocator_map->end()) {
+    // TODO: Expose knobs so that users can set fields associated with OrtArenaCfg so that we can pass it to the following method
+    id_to_allocator_map->insert({id, GetProviderInfo_MIGraphX().CreateMIGraphXAllocator(id, gpu_mem_limit, arena_extend_strategy, external_allocator_info, nullptr)});
+  }
+
+  return (*id_to_allocator_map)[id];
+}
+
+#endif
+
 #ifdef USE_DML
 
 constexpr GUID dml_readback_heap_guid = {0x00d32df8, 0xea2d, 0x40bf, {0xa4, 0x47, 0x9c, 0xb4, 0xbc, 0xf1, 0x1d, 0x5e}};
diff --git a/onnxruntime/python/onnxruntime_pybind_mlvalue.h b/onnxruntime/python/onnxruntime_pybind_mlvalue.h
index c76292040b61b..6d45d150ed191 100644
--- a/onnxruntime/python/onnxruntime_pybind_mlvalue.h
+++ b/onnxruntime/python/onnxruntime_pybind_mlvalue.h
@@ -89,6 +89,18 @@ const std::unordered_map<OrtDevice::DeviceType, MemCpyFunc>* GetDmlToHostMemCpyF
 
 #endif
 
+#ifdef USE_MIGRAPHX
+
+void CpuToMIGraphXMemCpy(void* dst, const void* src, size_t num_bytes);
+
+void MIGraphXToCpuMemCpy(void* dst, const void* src, size_t num_bytes);
+
+const std::unordered_map<OrtDevice::DeviceType, MemCpyFunc>* GetMIGraphXToHostMemCpyFunction();
+
+AllocatorPtr GetMIGraphXAllocator(OrtDevice::DeviceId id);
+
+#endif
+
 #ifdef USE_CANN
 
 void CpuToCannMemCpy(void* dst, const void* src, size_t num_bytes);
diff --git a/onnxruntime/python/onnxruntime_pybind_ortvalue.cc b/onnxruntime/python/onnxruntime_pybind_ortvalue.cc
index 94235b3043bc7..2b77fa081b98f 100644
--- a/onnxruntime/python/onnxruntime_pybind_ortvalue.cc
+++ b/onnxruntime/python/onnxruntime_pybind_ortvalue.cc
@@ -60,6 +60,11 @@ void addOrtValueMethods(pybind11::module& m) {
       // Likewise, there is no need to specify the name (as the name was previously used to lookup the def list)
       // TODO: Add check to ensure that string arrays are not passed - we currently don't support string tensors in CUDA
       CreateGenericMLValue(nullptr, GetRocmAllocator(device.Id()), "", array_on_cpu, ml_value.get(), true, false, CpuToRocmMemCpy);
+#elif USE_MIGRAPHX
+      // InputDeflist is null because OrtValue creation is not tied to a specific model
+      // Likewise, there is no need to specify the name (as the name was previously used to lookup the def list)
+      // TODO: Add check to ensure that string arrays are not passed - we currently don't support string tensors in CUDA
+      CreateGenericMLValue(nullptr, GetMIGraphXAllocator(device.Id()), "", array_on_cpu, ml_value.get(), true, false, CpuToMIGraphXMemCpy);
 #elif USE_DML
       // InputDeflist is null because OrtValue creation is not tied to a specific model
       // Likewise, there is no need to specify the name (as the name was previously used to lookup the def list)
@@ -128,6 +133,12 @@ void addOrtValueMethods(pybind11::module& m) {
             values_type,
             *(ml_value->GetMutable<Tensor>()),
             CpuToRocmMemCpy);
+#elif USE_MIGRAPHX
+          onnxruntime::python::CopyDataToTensor(
+              py_values,
+              values_type,
+              *(ml_value->GetMutable<Tensor>()),
+              CpuToMIGraphXMemCpy);
 #elif USE_DML
           onnxruntime::python::CopyDataToTensor(
             py_values,
@@ -166,6 +177,13 @@ void addOrtValueMethods(pybind11::module& m) {
             throw std::runtime_error("The provided device id doesn't match any available GPUs on the machine.");
           }
           allocator = GetCudaAllocator(device.Id());
+#elif USE_MIGRAPHX
+          allocator = GetMIGraphXAllocator(device.Id());
+#elif USE_ROCM
+          if (!IsRocmDeviceIdValid(logging::LoggingManager::DefaultLogger(), device.Id())) {
+            throw std::runtime_error("The provided device id doesn't match any available GPUs on the machine.");
+          }
+          allocator = GetRocmAllocator(device.Id());
 #else
       throw std::runtime_error(
           "Can't allocate memory on the CUDA device using this package of OnnxRuntime. "
@@ -307,6 +325,8 @@ void addOrtValueMethods(pybind11::module& m) {
         py::object obj = GetPyObjFromTensor(*ml_value, nullptr, GetCannToHostMemCpyFunction());
 #elif USE_DML
         py::object obj = GetPyObjFromTensor(*ml_value, nullptr, GetDmlToHostMemCpyFunction());
+#elif USE_MIGRAPHX
+        py::object obj = GetPyObjFromTensor(*ml_value, nullptr, GetMIGraphXToHostMemCpyFunction());
 #else
         py::object obj = GetPyObjFromTensor(*ml_value, nullptr, nullptr);
 #endif
diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc
index f31734bdfb805..527bab9f70eba 100644
--- a/onnxruntime/python/onnxruntime_pybind_state.cc
+++ b/onnxruntime/python/onnxruntime_pybind_state.cc
@@ -836,7 +836,9 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
           "./compiled_model.mxr",
           1,
           "./compiled_model.mxr",
-          1};
+          1,
+          SIZE_MAX,
+          0};
       for (auto option : it->second) {
         if (option.first == "device_id") {
           if (!option.second.empty()) {
diff --git a/onnxruntime/python/onnxruntime_pybind_state_common.cc b/onnxruntime/python/onnxruntime_pybind_state_common.cc
index cec4dfc14160c..824aa9979b02b 100644
--- a/onnxruntime/python/onnxruntime_pybind_state_common.cc
+++ b/onnxruntime/python/onnxruntime_pybind_state_common.cc
@@ -43,6 +43,12 @@ onnxruntime::ROCMExecutionProviderExternalAllocatorInfo external_allocator_info{
 onnxruntime::ArenaExtendStrategy arena_extend_strategy = onnxruntime::ArenaExtendStrategy::kNextPowerOfTwo;
 #endif
 
+#ifdef USE_MIGRAPHX
+onnxruntime::MIGraphXExecutionProviderExternalAllocatorInfo external_allocator_info{};
+// TODO remove deprecated global config
+onnxruntime::ArenaExtendStrategy arena_extend_strategy = onnxruntime::ArenaExtendStrategy::kNextPowerOfTwo;
+#endif
+
 #ifdef ENABLE_TRAINING
 
 void DlpackCapsuleDestructor(PyObject* data) {
diff --git a/onnxruntime/python/onnxruntime_pybind_state_common.h b/onnxruntime/python/onnxruntime_pybind_state_common.h
index 4d6e411defae3..9e8e28096d639 100644
--- a/onnxruntime/python/onnxruntime_pybind_state_common.h
+++ b/onnxruntime/python/onnxruntime_pybind_state_common.h
@@ -123,6 +123,7 @@ struct OrtStatus {
 #endif
 #ifdef USE_MIGRAPHX
 #include "core/providers/migraphx/migraphx_provider_factory.h"
+#include "core/providers/migraphx/migraphx_execution_provider_info.h"
 #endif
 #ifdef USE_OPENVINO
 #include "core/providers/openvino/openvino_provider_factory.h"
@@ -182,6 +183,17 @@ ProviderInfo_CANN& GetProviderInfo_CANN();
 }  // namespace onnxruntime
 #endif
 
+#ifdef USE_MIGRAPHX
+namespace onnxruntime {
+ProviderInfo_MIGraphX* TryGetProviderInfo_MIGraphX();
+ProviderInfo_MIGraphX& GetProviderInfo_MIGraphX();
+namespace python {
+extern onnxruntime::MIGraphXExecutionProviderExternalAllocatorInfo external_allocator_info;
+extern onnxruntime::ArenaExtendStrategy arena_extend_strategy;
+}  // namespace python
+}  // namespace onnxruntime
+#endif
+
 #ifdef USE_ROCM
 namespace onnxruntime {
 ProviderInfo_ROCM* TryGetProviderInfo_ROCM();
diff --git a/onnxruntime/test/util/default_providers.cc b/onnxruntime/test/util/default_providers.cc
index 1feba20e32bbb..cd258294fb976 100644
--- a/onnxruntime/test/util/default_providers.cc
+++ b/onnxruntime/test/util/default_providers.cc
@@ -81,7 +81,9 @@ std::unique_ptr<IExecutionProvider> DefaultMIGraphXExecutionProvider() {
       "./compiled_model.mxr",
       1,
       "./compiled_model.mxr",
-      1};
+      1,
+      SIZE_MAX,
+      0};
   return MIGraphXProviderFactoryCreator::Create(&params)->CreateProvider();
 #else
   return nullptr;

From dbcb1e3745a7053c7ab1be688f7bbdd2ebc8dc50 Mon Sep 17 00:00:00 2001
From: Uros Petkovic <urpektov@amd.com>
Date: Fri, 20 Dec 2024 15:58:17 +0100
Subject: [PATCH 02/10] Change variable names to migx_* and migraphx_*

---
 .../core/session/onnxruntime_c_api.h           |  4 ++--
 .../migraphx_execution_provider_info.cc        | 18 +++++++++---------
 .../migraphx_execution_provider_info.h         |  4 ++--
 .../migraphx/migraphx_provider_factory.cc      |  4 ++++
 4 files changed, 17 insertions(+), 13 deletions(-)

diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h
index e50e257c87eb1..d6b7e0f3df0ba 100644
--- a/include/onnxruntime/core/session/onnxruntime_c_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_c_api.h
@@ -627,7 +627,7 @@ typedef struct OrtMIGraphXProviderOptions {
    *   Defaults to SIZE_MAX.
    *   \note If a ::OrtArenaCfg has been applied, it will override this field
    */
-  size_t gpu_mem_limit;
+  size_t migraphx_mem_limit;
 
   /** \brief Strategy used to grow the memory arena
    *   0 = kNextPowerOfTwo<br>
@@ -635,7 +635,7 @@ typedef struct OrtMIGraphXProviderOptions {
    *   Defaults to 0.
    *   \note If a ::OrtArenaCfg has been applied, it will override this field
    */
-  int arena_extend_strategy;
+  int migraphx_arena_extend_strategy;
 
 } OrtMIGraphXProviderOptions;
 
diff --git a/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.cc b/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.cc
index 1feec9ae2e3e1..f5638c19e91e6 100644
--- a/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.cc
+++ b/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.cc
@@ -29,11 +29,11 @@ constexpr const char* kSaveModelPath = "migx_save_model_name";
 constexpr const char* kLoadCompiledModel = "migx_load_compiled_model";
 constexpr const char* kLoadModelPath = "migx_load_model_name";
 constexpr const char* kExhaustiveTune = "migx_exhaustive_tune";
-constexpr const char* kMemLimit = "gpu_mem_limit";
-constexpr const char* kArenaExtendStrategy = "arena_extend_strategy";
-constexpr const char* kGpuExternalAlloc = "gpu_external_alloc";
-constexpr const char* kGpuExternalFree = "gpu_external_free";
-constexpr const char* kGpuExternalEmptyCache = "gpu_external_empty_cache";
+constexpr const char* kMemLimit = "migx_mem_limit";
+constexpr const char* kArenaExtendStrategy = "migx_arena_extend_strategy";
+constexpr const char* kGpuExternalAlloc = "migx_external_alloc";
+constexpr const char* kGpuExternalFree = "migx_external_free";
+constexpr const char* kGpuExternalEmptyCache = "migx_external_empty_cache";
 
 }  // namespace provider_option_names
 }  // namespace migraphx
@@ -86,7 +86,7 @@ MIGraphXExecutionProviderInfo MIGraphXExecutionProviderInfo::FromProviderOptions
           .AddAssignmentToReference(migraphx::provider_option_names::kSaveCompiledModel, info.save_compiled_model)
           .AddAssignmentToReference(migraphx::provider_option_names::kLoadCompiledModel, info.load_compiled_model)
           .AddAssignmentToReference(migraphx::provider_option_names::kExhaustiveTune, info.exhaustive_tune)
-          .AddAssignmentToReference(migraphx_provider_option::kMemLimit, info.gpu_mem_limit)
+          .AddAssignmentToReference(migraphx_provider_option::kMemLimit, info.mem_limit)
           .AddAssignmentToEnumReference(migraphx_provider_option::kArenaExtendStrategy, arena_extend_strategy_mapping, info.arena_extend_strategy)
           .Parse(options));
 
@@ -103,7 +103,7 @@ ProviderOptions MIGraphXExecutionProviderInfo::ToProviderOptions(const MIGraphXE
       {migraphx::provider_option_names::kInt8Enable, MakeStringWithClassicLocale(info.int8_enable)},
       {migraphx::provider_option_names::kSaveCompiledModel, MakeStringWithClassicLocale(info.save_compiled_model)},
       {migraphx::provider_option_names::kLoadCompiledModel, MakeStringWithClassicLocale(info.load_compiled_model)},
-      {migraphx_provider_option::kMemLimit, MakeStringWithClassicLocale(info.gpu_mem_limit)},
+      {migraphx_provider_option::kMemLimit, MakeStringWithClassicLocale(info.mem_limit)},
       {migraphx_provider_option::kGpuExternalAlloc, MakeStringWithClassicLocale(reinterpret_cast<size_t>(info.external_allocator_info.alloc))},
       {migraphx_provider_option::kGpuExternalFree, MakeStringWithClassicLocale(reinterpret_cast<size_t>(info.external_allocator_info.free))},
       {migraphx_provider_option::kGpuExternalEmptyCache, MakeStringWithClassicLocale(reinterpret_cast<size_t>(info.external_allocator_info.empty_cache))},
@@ -121,8 +121,8 @@ ProviderOptions MIGraphXExecutionProviderInfo::ToProviderOptions(const OrtMIGrap
       {migraphx::provider_option_names::kInt8Enable, MakeStringWithClassicLocale(info.migraphx_int8_enable)},
       {migraphx::provider_option_names::kSaveCompiledModel, MakeStringWithClassicLocale(info.migraphx_save_compiled_model)},
       {migraphx::provider_option_names::kLoadCompiledModel, MakeStringWithClassicLocale(info.migraphx_load_compiled_model)},
-      {migraphx_provider_option::kMemLimit, MakeStringWithClassicLocale(info.gpu_mem_limit)},
-      {migraphx_provider_option::kArenaExtendStrategy, EnumToName(arena_extend_strategy_mapping, static_cast<onnxruntime::ArenaExtendStrategy>(info.arena_extend_strategy))},
+      {migraphx_provider_option::kMemLimit, MakeStringWithClassicLocale(info.migraphx_mem_limit)},
+      {migraphx_provider_option::kArenaExtendStrategy, EnumToName(arena_extend_strategy_mapping, static_cast<onnxruntime::ArenaExtendStrategy>(info.migraphx_arena_extend_strategy))},
       {migraphx::provider_option_names::kExhaustiveTune, MakeStringWithClassicLocale(info.migraphx_exhaustive_tune)},
   };
   return options;
diff --git a/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.h b/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.h
index 61bc0d871be7c..9100d3efd47ba 100644
--- a/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.h
+++ b/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.h
@@ -51,7 +51,7 @@ struct MIGraphXExecutionProviderInfo {
   std::string load_model_file{"./compiled_model.mxr"};
   bool exhaustive_tune{false};
 
-  size_t gpu_mem_limit{std::numeric_limits<size_t>::max()};                         // Will be over-ridden by contents of `default_memory_arena_cfg` (if specified)
+  size_t mem_limit{std::numeric_limits<size_t>::max()};                         // Will be over-ridden by contents of `default_memory_arena_cfg` (if specified)
   ArenaExtendStrategy arena_extend_strategy{ArenaExtendStrategy::kNextPowerOfTwo};  // Will be over-ridden by contents of `default_memory_arena_cfg` (if specified)
 
   OrtArenaCfg* default_memory_arena_cfg{nullptr};
@@ -80,7 +80,7 @@ struct std::hash<::onnxruntime::MIGraphXExecutionProviderInfo> {
                   (static_cast<size_t>(info.exhaustive_tune) << 24);
     onnxruntime::HashCombine(data, value);
 
-    onnxruntime::HashCombine(info.gpu_mem_limit, value);
+    onnxruntime::HashCombine(info.mem_limit, value);
 
     // Memory pointers
     onnxruntime::HashCombine(reinterpret_cast<size_t>(info.external_allocator_info.alloc), value);
diff --git a/onnxruntime/core/providers/migraphx/migraphx_provider_factory.cc b/onnxruntime/core/providers/migraphx/migraphx_provider_factory.cc
index 64278bf6439f9..6540c3cdfc591 100644
--- a/onnxruntime/core/providers/migraphx/migraphx_provider_factory.cc
+++ b/onnxruntime/core/providers/migraphx/migraphx_provider_factory.cc
@@ -99,6 +99,8 @@ struct MIGraphX_Provider : Provider {
     if (options.migraphx_load_model_path != nullptr) {
       info.load_model_file = options.migraphx_load_model_path;
     }
+    info.arena_extend_strategy = options.migraphx_arena_extend_strategy;
+    info.mem_limit = options.migraphx_mem_limit;
     return std::make_shared<MIGraphXProviderFactory>(info);
   }
 
@@ -131,6 +133,8 @@ struct MIGraphX_Provider : Provider {
     migx_options.migraphx_save_model_path = internal_options.save_model_file.c_str();
     migx_options.migraphx_load_compiled_model = internal_options.load_compiled_model;
     migx_options.migraphx_load_model_path = internal_options.load_model_file.c_str();
+    migx_options.migraphx_arena_extend_strategy = internal_options.arena_extend_strategy;
+    migx_options.migraphx_mem_limit = internal_options.mem_limit;
   }
 
   ProviderOptions GetProviderOptions(const void* provider_options) override {

From 0ceb1fd01dd3e40b83568ec71914e4f49930226a Mon Sep 17 00:00:00 2001
From: Uros Petkovic <urpektov@amd.com>
Date: Fri, 20 Dec 2024 16:34:18 +0100
Subject: [PATCH 03/10] Fixing issue with allocating extend arena

---
 .../core/providers/migraphx/migraphx_provider_factory.cc      | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/onnxruntime/core/providers/migraphx/migraphx_provider_factory.cc b/onnxruntime/core/providers/migraphx/migraphx_provider_factory.cc
index 6540c3cdfc591..6dd14e00e0bcf 100644
--- a/onnxruntime/core/providers/migraphx/migraphx_provider_factory.cc
+++ b/onnxruntime/core/providers/migraphx/migraphx_provider_factory.cc
@@ -99,7 +99,7 @@ struct MIGraphX_Provider : Provider {
     if (options.migraphx_load_model_path != nullptr) {
       info.load_model_file = options.migraphx_load_model_path;
     }
-    info.arena_extend_strategy = options.migraphx_arena_extend_strategy;
+    info.arena_extend_strategy = static_cast<onnxruntime::ArenaExtendStrategy>(options.migraphx_arena_extend_strategy);
     info.mem_limit = options.migraphx_mem_limit;
     return std::make_shared<MIGraphXProviderFactory>(info);
   }
@@ -133,7 +133,7 @@ struct MIGraphX_Provider : Provider {
     migx_options.migraphx_save_model_path = internal_options.save_model_file.c_str();
     migx_options.migraphx_load_compiled_model = internal_options.load_compiled_model;
     migx_options.migraphx_load_model_path = internal_options.load_model_file.c_str();
-    migx_options.migraphx_arena_extend_strategy = internal_options.arena_extend_strategy;
+    migx_options.migraphx_arena_extend_strategy = static_cast<int>(internal_options.arena_extend_strategy);
     migx_options.migraphx_mem_limit = internal_options.mem_limit;
   }
 

From dc06f871012dc11ada2f9092933e609ca453f214 Mon Sep 17 00:00:00 2001
From: Uros Petkovic <urpektov@amd.com>
Date: Fri, 20 Dec 2024 18:58:41 +0100
Subject: [PATCH 04/10] Fixing variables names

---
 .../core/providers/migraphx/migraphx_execution_provider.cc    | 4 ++--
 .../core/providers/migraphx/migraphx_execution_provider.h     | 2 +-
 .../core/providers/migraphx/migraphx_provider_factory.cc      | 4 ++--
 .../core/providers/migraphx/migraphx_provider_factory.h       | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc
index 7f934d69c48ab..0a9c4b977b6cc 100644
--- a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc
+++ b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc
@@ -210,7 +210,7 @@ MIGraphXExecutionProvider::~MIGraphXExecutionProvider() {
 }
 
 AllocatorPtr MIGraphXExecutionProvider::CreateMIGraphXAllocator(OrtDevice::DeviceId device_id,
-                                                        size_t gpu_mem_limit,
+                                                        size_t migx_mem_limit,
                                                         ArenaExtendStrategy arena_extend_strategy,
                                                         MIGraphXExecutionProviderExternalAllocatorInfo
                                                             external_allocator_info,
@@ -235,7 +235,7 @@ AllocatorPtr MIGraphXExecutionProvider::CreateMIGraphXAllocator(OrtDevice::Devic
         device_id,
         true,
         {default_memory_arena_cfg ? *default_memory_arena_cfg
-                                  : OrtArenaCfg(gpu_mem_limit, static_cast<int>(arena_extend_strategy),
+                                  : OrtArenaCfg(migx_mem_limit, static_cast<int>(arena_extend_strategy),
                                                 -1, -1, -1, -1L)},
         // make it stream aware
         true,
diff --git a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.h b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.h
index b20d534be66a2..13c45fe35db75 100644
--- a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.h
+++ b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.h
@@ -76,7 +76,7 @@ class MIGraphXExecutionProvider : public IExecutionProvider {
   virtual std::shared_ptr<KernelRegistry> GetKernelRegistry() const override;
   std::unique_ptr<onnxruntime::IDataTransfer> GetDataTransfer() const override;
 
-  static AllocatorPtr CreateMIGraphXAllocator(OrtDevice::DeviceId device_id, size_t rocm_mem_limit, ArenaExtendStrategy arena_extend_strategy,
+  static AllocatorPtr CreateMIGraphXAllocator(OrtDevice::DeviceId device_id, size_t migx_mem_limit, ArenaExtendStrategy arena_extend_strategy,
                                           MIGraphXExecutionProviderExternalAllocatorInfo external_alloc_info, const OrtArenaCfg* arena_cfg);
 
   std::unique_ptr<IndexedSubGraph> GetSubGraph(const std::vector<std::size_t>& graph_nodes_index, const GraphViewer& graph) const;
diff --git a/onnxruntime/core/providers/migraphx/migraphx_provider_factory.cc b/onnxruntime/core/providers/migraphx/migraphx_provider_factory.cc
index 6dd14e00e0bcf..f8c962fa02d71 100644
--- a/onnxruntime/core/providers/migraphx/migraphx_provider_factory.cc
+++ b/onnxruntime/core/providers/migraphx/migraphx_provider_factory.cc
@@ -61,8 +61,8 @@ struct ProviderInfo_MIGraphX_Impl final : ProviderInfo_MIGraphX {
     HIP_CALL_THROW(hipMemcpy(dst, src, count, hipMemcpyDeviceToHost));
   }
 
- std::shared_ptr<IAllocator> CreateMIGraphXAllocator(int16_t device_id, size_t gpu_mem_limit, onnxruntime::ArenaExtendStrategy arena_extend_strategy, onnxruntime::MIGraphXExecutionProviderExternalAllocatorInfo& external_allocator_info, const OrtArenaCfg* default_memory_arena_cfg) override {
-    return MIGraphXExecutionProvider::CreateMIGraphXAllocator(device_id, gpu_mem_limit, arena_extend_strategy, external_allocator_info, default_memory_arena_cfg);
+ std::shared_ptr<IAllocator> CreateMIGraphXAllocator(int16_t device_id, size_t migx_mem_limit, onnxruntime::ArenaExtendStrategy arena_extend_strategy, onnxruntime::MIGraphXExecutionProviderExternalAllocatorInfo& external_allocator_info, const OrtArenaCfg* default_memory_arena_cfg) override {
+    return MIGraphXExecutionProvider::CreateMIGraphXAllocator(device_id, migx_mem_limit, arena_extend_strategy, external_allocator_info, default_memory_arena_cfg);
   }
 } g_info;
 
diff --git a/onnxruntime/core/providers/migraphx/migraphx_provider_factory.h b/onnxruntime/core/providers/migraphx/migraphx_provider_factory.h
index 7c26f9b9f0bb5..d1c9457bafa0f 100644
--- a/onnxruntime/core/providers/migraphx/migraphx_provider_factory.h
+++ b/onnxruntime/core/providers/migraphx/migraphx_provider_factory.h
@@ -16,7 +16,7 @@ struct ProviderInfo_MIGraphX {
   virtual std::unique_ptr<onnxruntime::IAllocator> CreateMIGraphXPinnedAllocator(int16_t device_id, const char* name) = 0;
   virtual void MIGraphXMemcpy_HostToDevice(void* dst, const void* src, size_t count) = 0;
   virtual void MIGraphXMemcpy_DeviceToHost(void* dst, const void* src, size_t count) = 0;
-  virtual std::shared_ptr<onnxruntime::IAllocator> CreateMIGraphXAllocator(int16_t device_id, size_t gpu_mem_limit, onnxruntime::ArenaExtendStrategy arena_extend_strategy, onnxruntime::MIGraphXExecutionProviderExternalAllocatorInfo& external_allocator_info, const OrtArenaCfg* default_memory_arena_cfg) = 0;
+  virtual std::shared_ptr<onnxruntime::IAllocator> CreateMIGraphXAllocator(int16_t device_id, size_t migx_mem_limit, onnxruntime::ArenaExtendStrategy arena_extend_strategy, onnxruntime::MIGraphXExecutionProviderExternalAllocatorInfo& external_allocator_info, const OrtArenaCfg* default_memory_arena_cfg) = 0;
 
  protected:
   ~ProviderInfo_MIGraphX() = default;  // Can only be destroyed through a subclass instance

From 754603661f938ecb37323f63232e6f37b756bba1 Mon Sep 17 00:00:00 2001
From: Uros Petkovic <urpektov@amd.com>
Date: Mon, 23 Dec 2024 11:41:26 +0100
Subject: [PATCH 05/10] Updating comments

---
 onnxruntime/python/onnxruntime_pybind_ortvalue.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/onnxruntime/python/onnxruntime_pybind_ortvalue.cc b/onnxruntime/python/onnxruntime_pybind_ortvalue.cc
index 2b77fa081b98f..4d661cd8fb470 100644
--- a/onnxruntime/python/onnxruntime_pybind_ortvalue.cc
+++ b/onnxruntime/python/onnxruntime_pybind_ortvalue.cc
@@ -58,12 +58,12 @@ void addOrtValueMethods(pybind11::module& m) {
 
       // InputDeflist is null because OrtValue creation is not tied to a specific model
       // Likewise, there is no need to specify the name (as the name was previously used to lookup the def list)
-      // TODO: Add check to ensure that string arrays are not passed - we currently don't support string tensors in CUDA
+      // TODO: Add check to ensure that string arrays are not passed - we currently don't support string tensors in ROCm
       CreateGenericMLValue(nullptr, GetRocmAllocator(device.Id()), "", array_on_cpu, ml_value.get(), true, false, CpuToRocmMemCpy);
 #elif USE_MIGRAPHX
       // InputDeflist is null because OrtValue creation is not tied to a specific model
       // Likewise, there is no need to specify the name (as the name was previously used to lookup the def list)
-      // TODO: Add check to ensure that string arrays are not passed - we currently don't support string tensors in CUDA
+      // TODO: Add check to ensure that string arrays are not passed - we currently don't support string tensors in MIGraphX
       CreateGenericMLValue(nullptr, GetMIGraphXAllocator(device.Id()), "", array_on_cpu, ml_value.get(), true, false, CpuToMIGraphXMemCpy);
 #elif USE_DML
       // InputDeflist is null because OrtValue creation is not tied to a specific model

From 78043ac0b792cd2a38ed39067faeccd723a2f083 Mon Sep 17 00:00:00 2001
From: Uros Petkovic <urpektov@amd.com>
Date: Tue, 31 Dec 2024 15:30:02 +0100
Subject: [PATCH 06/10] Fixing build errors on Linux when using rocm and
 migraphx both

---
 .../migraphx/migraphx_execution_provider.cc   |  4 +--
 .../migraphx_execution_provider_info.cc       | 24 +++++++--------
 .../migraphx_execution_provider_info.h        |  7 ++---
 .../python/onnxruntime_pybind_mlvalue.cc      |  2 +-
 .../python/onnxruntime_pybind_state_common.cc |  7 +++--
 .../python/onnxruntime_pybind_state_common.h  | 29 ++++++++++++-------
 6 files changed, 40 insertions(+), 33 deletions(-)

diff --git a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc
index 0a9c4b977b6cc..6367475b5077d 100644
--- a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc
+++ b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc
@@ -249,10 +249,10 @@ AllocatorPtr MIGraphXExecutionProvider::CreateMIGraphXAllocator(OrtDevice::Devic
 
 std::vector<AllocatorPtr> MIGraphXExecutionProvider::CreatePreferredAllocators() {
   AllocatorCreationInfo default_memory_info(
-      [](OrtDevice::DeviceId device_id) { return CreateMIGraphXAllocator(device_id, onnxruntime::CUDA); }, info_.device_id);
+      [](OrtDevice::DeviceId device_id) { return std::make_unique<MIGraphXAllocator>(device_id, CUDA); }, info_.device_id);
   AllocatorCreationInfo pinned_allocator_info(
       [](OrtDevice::DeviceId device_id) {
-        return CreateMIGraphXPinnedAllocator(device_id, onnxruntime::CUDA_PINNED);
+        return std::make_unique<HIPPinnedAllocator>(device_id, CUDA_PINNED);
       },
       0);
   return std::vector<AllocatorPtr>{CreateAllocator(default_memory_info), CreateAllocator(pinned_allocator_info)};
diff --git a/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.cc b/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.cc
index f5638c19e91e6..11270f2e64b82 100644
--- a/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.cc
+++ b/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.cc
@@ -58,7 +58,7 @@ MIGraphXExecutionProviderInfo MIGraphXExecutionProviderInfo::FromProviderOptions
                 return Status::OK();
               })
           .AddValueParser(
-              migraphx_provider_option::kGpuExternalAlloc,
+              migraphx::provider_option_names::kGpuExternalAlloc,
               [&alloc](const std::string& value_str) -> Status {
                 size_t address;
                 ORT_RETURN_IF_ERROR(ParseStringWithClassicLocale(value_str, address));
@@ -66,7 +66,7 @@ MIGraphXExecutionProviderInfo MIGraphXExecutionProviderInfo::FromProviderOptions
                 return Status::OK();
               })
           .AddValueParser(
-              migraphx_provider_option::kGpuExternalFree,
+              migraphx::provider_option_names::kGpuExternalFree,
               [&free](const std::string& value_str) -> Status {
                 size_t address;
                 ORT_RETURN_IF_ERROR(ParseStringWithClassicLocale(value_str, address));
@@ -74,7 +74,7 @@ MIGraphXExecutionProviderInfo MIGraphXExecutionProviderInfo::FromProviderOptions
                 return Status::OK();
               })
           .AddValueParser(
-              migraphx_provider_option::kGpuExternalEmptyCache,
+              migraphx::provider_option_names::kGpuExternalEmptyCache,
               [&empty_cache](const std::string& value_str) -> Status {
                 size_t address;
                 ORT_RETURN_IF_ERROR(ParseStringWithClassicLocale(value_str, address));
@@ -86,8 +86,8 @@ MIGraphXExecutionProviderInfo MIGraphXExecutionProviderInfo::FromProviderOptions
           .AddAssignmentToReference(migraphx::provider_option_names::kSaveCompiledModel, info.save_compiled_model)
           .AddAssignmentToReference(migraphx::provider_option_names::kLoadCompiledModel, info.load_compiled_model)
           .AddAssignmentToReference(migraphx::provider_option_names::kExhaustiveTune, info.exhaustive_tune)
-          .AddAssignmentToReference(migraphx_provider_option::kMemLimit, info.mem_limit)
-          .AddAssignmentToEnumReference(migraphx_provider_option::kArenaExtendStrategy, arena_extend_strategy_mapping, info.arena_extend_strategy)
+          .AddAssignmentToReference(migraphx::provider_option_names::kMemLimit, info.mem_limit)
+          .AddAssignmentToEnumReference(migraphx::provider_option_names::kArenaExtendStrategy, arena_extend_strategy_mapping, info.arena_extend_strategy)
           .Parse(options));
 
   MIGraphXExecutionProviderExternalAllocatorInfo alloc_info{alloc, free, empty_cache};
@@ -103,11 +103,11 @@ ProviderOptions MIGraphXExecutionProviderInfo::ToProviderOptions(const MIGraphXE
       {migraphx::provider_option_names::kInt8Enable, MakeStringWithClassicLocale(info.int8_enable)},
       {migraphx::provider_option_names::kSaveCompiledModel, MakeStringWithClassicLocale(info.save_compiled_model)},
       {migraphx::provider_option_names::kLoadCompiledModel, MakeStringWithClassicLocale(info.load_compiled_model)},
-      {migraphx_provider_option::kMemLimit, MakeStringWithClassicLocale(info.mem_limit)},
-      {migraphx_provider_option::kGpuExternalAlloc, MakeStringWithClassicLocale(reinterpret_cast<size_t>(info.external_allocator_info.alloc))},
-      {migraphx_provider_option::kGpuExternalFree, MakeStringWithClassicLocale(reinterpret_cast<size_t>(info.external_allocator_info.free))},
-      {migraphx_provider_option::kGpuExternalEmptyCache, MakeStringWithClassicLocale(reinterpret_cast<size_t>(info.external_allocator_info.empty_cache))},
-      {migraphx_provider_option::kArenaExtendStrategy,
+      {migraphx::provider_option_names::kMemLimit, MakeStringWithClassicLocale(info.mem_limit)},
+      {migraphx::provider_option_names::kGpuExternalAlloc, MakeStringWithClassicLocale(reinterpret_cast<size_t>(info.external_allocator_info.alloc))},
+      {migraphx::provider_option_names::kGpuExternalFree, MakeStringWithClassicLocale(reinterpret_cast<size_t>(info.external_allocator_info.free))},
+      {migraphx::provider_option_names::kGpuExternalEmptyCache, MakeStringWithClassicLocale(reinterpret_cast<size_t>(info.external_allocator_info.empty_cache))},
+      {migraphx::provider_option_names::kArenaExtendStrategy,
        EnumToName(arena_extend_strategy_mapping, info.arena_extend_strategy)},
       {migraphx::provider_option_names::kExhaustiveTune, MakeStringWithClassicLocale(info.exhaustive_tune)},
   };
@@ -121,8 +121,8 @@ ProviderOptions MIGraphXExecutionProviderInfo::ToProviderOptions(const OrtMIGrap
       {migraphx::provider_option_names::kInt8Enable, MakeStringWithClassicLocale(info.migraphx_int8_enable)},
       {migraphx::provider_option_names::kSaveCompiledModel, MakeStringWithClassicLocale(info.migraphx_save_compiled_model)},
       {migraphx::provider_option_names::kLoadCompiledModel, MakeStringWithClassicLocale(info.migraphx_load_compiled_model)},
-      {migraphx_provider_option::kMemLimit, MakeStringWithClassicLocale(info.migraphx_mem_limit)},
-      {migraphx_provider_option::kArenaExtendStrategy, EnumToName(arena_extend_strategy_mapping, static_cast<onnxruntime::ArenaExtendStrategy>(info.migraphx_arena_extend_strategy))},
+      {migraphx::provider_option_names::kMemLimit, MakeStringWithClassicLocale(info.migraphx_mem_limit)},
+      {migraphx::provider_option_names::kArenaExtendStrategy, EnumToName(arena_extend_strategy_mapping, static_cast<onnxruntime::ArenaExtendStrategy>(info.migraphx_arena_extend_strategy))},
       {migraphx::provider_option_names::kExhaustiveTune, MakeStringWithClassicLocale(info.migraphx_exhaustive_tune)},
   };
   return options;
diff --git a/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.h b/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.h
index 9100d3efd47ba..35c44fe39b561 100644
--- a/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.h
+++ b/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.h
@@ -74,10 +74,9 @@ struct std::hash<::onnxruntime::MIGraphXExecutionProviderInfo> {
                   (static_cast<size_t>(info.fp16_enable) << 18) ^
                   (static_cast<size_t>(info.int8_enable) << 19) ^
                   (static_cast<size_t>(info.int8_use_native_calibration_table) << 20) ^
-                  (static_cast<size_t>(info.model_cache_enable) << 21) ^
-                  (static_cast<size_t>(info.save_compiled_model) << 22) ^
-                  (static_cast<size_t>(info.load_compiled_model) << 23) ^
-                  (static_cast<size_t>(info.exhaustive_tune) << 24);
+                  (static_cast<size_t>(info.save_compiled_model) << 21) ^
+                  (static_cast<size_t>(info.load_compiled_model) << 22) ^
+                  (static_cast<size_t>(info.exhaustive_tune) << 23);
     onnxruntime::HashCombine(data, value);
 
     onnxruntime::HashCombine(info.mem_limit, value);
diff --git a/onnxruntime/python/onnxruntime_pybind_mlvalue.cc b/onnxruntime/python/onnxruntime_pybind_mlvalue.cc
index da5e942d67f21..b69a1098fd8be 100644
--- a/onnxruntime/python/onnxruntime_pybind_mlvalue.cc
+++ b/onnxruntime/python/onnxruntime_pybind_mlvalue.cc
@@ -230,7 +230,7 @@ AllocatorPtr GetMIGraphXAllocator(OrtDevice::DeviceId id) {
 
   if (id_to_allocator_map->find(id) == id_to_allocator_map->end()) {
     // TODO: Expose knobs so that users can set fields associated with OrtArenaCfg so that we can pass it to the following method
-    id_to_allocator_map->insert({id, GetProviderInfo_MIGraphX().CreateMIGraphXAllocator(id, gpu_mem_limit, arena_extend_strategy, external_allocator_info, nullptr)});
+    id_to_allocator_map->insert({id, GetProviderInfo_MIGraphX().CreateMIGraphXAllocator(id, gpu_mem_limit, arena_extend_strategy, migx_external_allocator_info, nullptr)});
   }
 
   return (*id_to_allocator_map)[id];
diff --git a/onnxruntime/python/onnxruntime_pybind_state_common.cc b/onnxruntime/python/onnxruntime_pybind_state_common.cc
index 824aa9979b02b..8f35f88385095 100644
--- a/onnxruntime/python/onnxruntime_pybind_state_common.cc
+++ b/onnxruntime/python/onnxruntime_pybind_state_common.cc
@@ -39,14 +39,15 @@ bool do_copy_in_default_stream = true;
 // TODO remove deprecated global config
 onnxruntime::rocm::TunableOpInfo tunable_op{};
 onnxruntime::ROCMExecutionProviderExternalAllocatorInfo external_allocator_info{};
+#endif
+
+#if defined(USE_ROCM) || defined(USE_MIGRAPHX)
 // TODO remove deprecated global config
 onnxruntime::ArenaExtendStrategy arena_extend_strategy = onnxruntime::ArenaExtendStrategy::kNextPowerOfTwo;
 #endif
 
 #ifdef USE_MIGRAPHX
-onnxruntime::MIGraphXExecutionProviderExternalAllocatorInfo external_allocator_info{};
-// TODO remove deprecated global config
-onnxruntime::ArenaExtendStrategy arena_extend_strategy = onnxruntime::ArenaExtendStrategy::kNextPowerOfTwo;
+onnxruntime::MIGraphXExecutionProviderExternalAllocatorInfo migx_external_allocator_info{};
 #endif
 
 #ifdef ENABLE_TRAINING
diff --git a/onnxruntime/python/onnxruntime_pybind_state_common.h b/onnxruntime/python/onnxruntime_pybind_state_common.h
index 9e8e28096d639..e9db8aeb3f429 100644
--- a/onnxruntime/python/onnxruntime_pybind_state_common.h
+++ b/onnxruntime/python/onnxruntime_pybind_state_common.h
@@ -183,17 +183,6 @@ ProviderInfo_CANN& GetProviderInfo_CANN();
 }  // namespace onnxruntime
 #endif
 
-#ifdef USE_MIGRAPHX
-namespace onnxruntime {
-ProviderInfo_MIGraphX* TryGetProviderInfo_MIGraphX();
-ProviderInfo_MIGraphX& GetProviderInfo_MIGraphX();
-namespace python {
-extern onnxruntime::MIGraphXExecutionProviderExternalAllocatorInfo external_allocator_info;
-extern onnxruntime::ArenaExtendStrategy arena_extend_strategy;
-}  // namespace python
-}  // namespace onnxruntime
-#endif
-
 #ifdef USE_ROCM
 namespace onnxruntime {
 ProviderInfo_ROCM* TryGetProviderInfo_ROCM();
@@ -206,11 +195,29 @@ extern bool do_copy_in_default_stream;
 // TODO remove deprecated global config
 extern onnxruntime::rocm::TunableOpInfo tunable_op;
 extern onnxruntime::ROCMExecutionProviderExternalAllocatorInfo external_allocator_info;
+}  // namespace python
+}  // namespace onnxruntime
+#endif
+
+#if defined(USE_ROCM) || defined(USE_MIGRAPHX)
+namespace onnxruntime {
+namespace python {
 extern onnxruntime::ArenaExtendStrategy arena_extend_strategy;
 }  // namespace python
 }  // namespace onnxruntime
 #endif
 
+#ifdef USE_MIGRAPHX
+namespace onnxruntime {
+ProviderInfo_MIGraphX* TryGetProviderInfo_MIGraphX();
+ProviderInfo_MIGraphX& GetProviderInfo_MIGraphX();
+namespace python {
+extern onnxruntime::MIGraphXExecutionProviderExternalAllocatorInfo migx_external_allocator_info;
+}  // namespace python
+}  // namespace onnxruntime
+
+#endif
+
 #include "core/providers/dnnl/dnnl_provider_factory.h"
 #include "core/providers/shared_library/provider_host_api.h"
 

From bedb363e0e9b27723201e70634c809b2aaf9c022 Mon Sep 17 00:00:00 2001
From: Uros Petkovic <urpektov@amd.com>
Date: Tue, 31 Dec 2024 15:35:18 +0100
Subject: [PATCH 07/10] Adding namespace onnxruntime

---
 .../core/providers/migraphx/migraphx_execution_provider.cc    | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc
index 6367475b5077d..4e98377c80205 100644
--- a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc
+++ b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc
@@ -249,10 +249,10 @@ AllocatorPtr MIGraphXExecutionProvider::CreateMIGraphXAllocator(OrtDevice::Devic
 
 std::vector<AllocatorPtr> MIGraphXExecutionProvider::CreatePreferredAllocators() {
   AllocatorCreationInfo default_memory_info(
-      [](OrtDevice::DeviceId device_id) { return std::make_unique<MIGraphXAllocator>(device_id, CUDA); }, info_.device_id);
+      [](OrtDevice::DeviceId device_id) { return std::make_unique<MIGraphXAllocator>(device_id, onnxruntime::CUDA); }, info_.device_id);
   AllocatorCreationInfo pinned_allocator_info(
       [](OrtDevice::DeviceId device_id) {
-        return std::make_unique<HIPPinnedAllocator>(device_id, CUDA_PINNED);
+        return std::make_unique<HIPPinnedAllocator>(device_id, onnxrunime::CUDA_PINNED);
       },
       0);
   return std::vector<AllocatorPtr>{CreateAllocator(default_memory_info), CreateAllocator(pinned_allocator_info)};

From b4b980cafad278f5e36279b548986b9d6c084385 Mon Sep 17 00:00:00 2001
From: Uros Petkovic <urpektov@amd.com>
Date: Fri, 3 Jan 2025 13:20:52 +0100
Subject: [PATCH 08/10] Fixing spelling error

---
 .../core/providers/migraphx/migraphx_execution_provider.cc      | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc
index 4e98377c80205..7eef42ed5f7e2 100644
--- a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc
+++ b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc
@@ -252,7 +252,7 @@ std::vector<AllocatorPtr> MIGraphXExecutionProvider::CreatePreferredAllocators()
       [](OrtDevice::DeviceId device_id) { return std::make_unique<MIGraphXAllocator>(device_id, onnxruntime::CUDA); }, info_.device_id);
   AllocatorCreationInfo pinned_allocator_info(
       [](OrtDevice::DeviceId device_id) {
-        return std::make_unique<HIPPinnedAllocator>(device_id, onnxrunime::CUDA_PINNED);
+        return std::make_unique<HIPPinnedAllocator>(device_id, onnxruntime::CUDA_PINNED);
       },
       0);
   return std::vector<AllocatorPtr>{CreateAllocator(default_memory_info), CreateAllocator(pinned_allocator_info)};

From a3cdc74f5d1451e2fc782b24181063ea955ab496 Mon Sep 17 00:00:00 2001
From: Uros Petkovic <urpektov@amd.com>
Date: Fri, 3 Jan 2025 15:13:53 +0100
Subject: [PATCH 09/10] Fixing clang format issues

---
 .../migraphx/migraphx_execution_provider.cc   | 16 +++---
 .../migraphx/migraphx_execution_provider.h    |  2 +-
 .../migraphx_execution_provider_info.h        |  2 +-
 .../migraphx/migraphx_provider_factory.cc     |  2 +-
 .../python/onnxruntime_pybind_state.cc        | 51 +++++++++++--------
 5 files changed, 41 insertions(+), 32 deletions(-)

diff --git a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc
index 7eef42ed5f7e2..1a7a3a27b755b 100644
--- a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc
+++ b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc
@@ -210,18 +210,18 @@ MIGraphXExecutionProvider::~MIGraphXExecutionProvider() {
 }
 
 AllocatorPtr MIGraphXExecutionProvider::CreateMIGraphXAllocator(OrtDevice::DeviceId device_id,
-                                                        size_t migx_mem_limit,
-                                                        ArenaExtendStrategy arena_extend_strategy,
-                                                        MIGraphXExecutionProviderExternalAllocatorInfo
-                                                            external_allocator_info,
-                                                        const OrtArenaCfg* default_memory_arena_cfg) {
+                                                                size_t migx_mem_limit,
+                                                                ArenaExtendStrategy arena_extend_strategy,
+                                                                MIGraphXExecutionProviderExternalAllocatorInfo
+                                                                    external_allocator_info,
+                                                                const OrtArenaCfg* default_memory_arena_cfg) {
   if (external_allocator_info.UseExternalAllocator()) {
     AllocatorCreationInfo default_memory_info(
         [external_allocator_info](OrtDevice::DeviceId id) {
           return std::make_unique<MIGraphXExternalAllocator>(id, HIP,
-                                                         external_allocator_info.alloc,
-                                                         external_allocator_info.free,
-                                                         external_allocator_info.empty_cache);
+                                                             external_allocator_info.alloc,
+                                                             external_allocator_info.free,
+                                                             external_allocator_info.empty_cache);
         },
         device_id,
         false);
diff --git a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.h b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.h
index 13c45fe35db75..f79b720e758e3 100644
--- a/onnxruntime/core/providers/migraphx/migraphx_execution_provider.h
+++ b/onnxruntime/core/providers/migraphx/migraphx_execution_provider.h
@@ -77,7 +77,7 @@ class MIGraphXExecutionProvider : public IExecutionProvider {
   std::unique_ptr<onnxruntime::IDataTransfer> GetDataTransfer() const override;
 
   static AllocatorPtr CreateMIGraphXAllocator(OrtDevice::DeviceId device_id, size_t migx_mem_limit, ArenaExtendStrategy arena_extend_strategy,
-                                          MIGraphXExecutionProviderExternalAllocatorInfo external_alloc_info, const OrtArenaCfg* arena_cfg);
+                                              MIGraphXExecutionProviderExternalAllocatorInfo external_alloc_info, const OrtArenaCfg* arena_cfg);
 
   std::unique_ptr<IndexedSubGraph> GetSubGraph(const std::vector<std::size_t>& graph_nodes_index, const GraphViewer& graph) const;
   void RegisterStreamHandlers(IStreamCommandHandleRegistry& stream_handle_registry, AllocatorMap& allocators) const override;
diff --git a/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.h b/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.h
index 35c44fe39b561..3dbde11ddc4a9 100644
--- a/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.h
+++ b/onnxruntime/core/providers/migraphx/migraphx_execution_provider_info.h
@@ -51,7 +51,7 @@ struct MIGraphXExecutionProviderInfo {
   std::string load_model_file{"./compiled_model.mxr"};
   bool exhaustive_tune{false};
 
-  size_t mem_limit{std::numeric_limits<size_t>::max()};                         // Will be over-ridden by contents of `default_memory_arena_cfg` (if specified)
+  size_t mem_limit{std::numeric_limits<size_t>::max()};                             // Will be over-ridden by contents of `default_memory_arena_cfg` (if specified)
   ArenaExtendStrategy arena_extend_strategy{ArenaExtendStrategy::kNextPowerOfTwo};  // Will be over-ridden by contents of `default_memory_arena_cfg` (if specified)
 
   OrtArenaCfg* default_memory_arena_cfg{nullptr};
diff --git a/onnxruntime/core/providers/migraphx/migraphx_provider_factory.cc b/onnxruntime/core/providers/migraphx/migraphx_provider_factory.cc
index f8c962fa02d71..545b02a345830 100644
--- a/onnxruntime/core/providers/migraphx/migraphx_provider_factory.cc
+++ b/onnxruntime/core/providers/migraphx/migraphx_provider_factory.cc
@@ -61,7 +61,7 @@ struct ProviderInfo_MIGraphX_Impl final : ProviderInfo_MIGraphX {
     HIP_CALL_THROW(hipMemcpy(dst, src, count, hipMemcpyDeviceToHost));
   }
 
- std::shared_ptr<IAllocator> CreateMIGraphXAllocator(int16_t device_id, size_t migx_mem_limit, onnxruntime::ArenaExtendStrategy arena_extend_strategy, onnxruntime::MIGraphXExecutionProviderExternalAllocatorInfo& external_allocator_info, const OrtArenaCfg* default_memory_arena_cfg) override {
+  std::shared_ptr<IAllocator> CreateMIGraphXAllocator(int16_t device_id, size_t migx_mem_limit, onnxruntime::ArenaExtendStrategy arena_extend_strategy, onnxruntime::MIGraphXExecutionProviderExternalAllocatorInfo& external_allocator_info, const OrtArenaCfg* default_memory_arena_cfg) override {
     return MIGraphXExecutionProvider::CreateMIGraphXAllocator(device_id, migx_mem_limit, arena_extend_strategy, external_allocator_info, default_memory_arena_cfg);
   }
 } g_info;
diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc
index 527bab9f70eba..03dcb80c2f1d1 100644
--- a/onnxruntime/python/onnxruntime_pybind_state.cc
+++ b/onnxruntime/python/onnxruntime_pybind_state.cc
@@ -1432,7 +1432,7 @@ void addGlobalMethods(py::module& m) {
     ORT_UNUSED_PARAMETER(algo);
     ORT_THROW("set_cudnn_conv_algo_search is not supported in ROCM");
 #else
-    cudnn_conv_algo_search = algo;
+        cudnn_conv_algo_search = algo;
 #endif
   });
   // TODO remove deprecated global config
@@ -1443,7 +1443,7 @@ void addGlobalMethods(py::module& m) {
     ORT_UNUSED_PARAMETER(use_single_stream);
     ORT_THROW("set_do_copy_in_default_stream is not supported in ROCM");
 #else
-    do_copy_in_default_stream = use_single_stream;
+        do_copy_in_default_stream = use_single_stream;
 #endif
   });
   // TODO remove deprecated global config
@@ -1808,10 +1808,10 @@ Applies to session load, initialization, etc. Default is 0.)pbdoc")
         }
         ORT_THROW_IF_ERROR(options->value.AddExternalInitializers(names_ptrs, values_ptrs));
 #else
-        ORT_UNUSED_PARAMETER(options);
-        ORT_UNUSED_PARAMETER(names);
-        ORT_UNUSED_PARAMETER(ort_values);
-        ORT_THROW("External initializers are not supported in this build.");
+            ORT_UNUSED_PARAMETER(options);
+            ORT_UNUSED_PARAMETER(names);
+            ORT_UNUSED_PARAMETER(ort_values);
+            ORT_THROW("External initializers are not supported in this build.");
 #endif
       });
 
@@ -1873,7 +1873,8 @@ including arg name, arg type (contains both type and shape).)pbdoc")
             return *(na.Type());
           },
           "node type")
-      .def("__str__", [](const onnxruntime::NodeArg& na) -> std::string {
+      .def(
+          "__str__", [](const onnxruntime::NodeArg& na) -> std::string {
             std::ostringstream res;
             res << "NodeArg(name='" << na.Name() << "', type='" << *(na.Type()) << "', shape=";
             auto shape = na.Shape();
@@ -1900,7 +1901,8 @@ including arg name, arg type (contains both type and shape).)pbdoc")
             res << ")";
 
             return std::string(res.str()); }, "converts the node into a readable string")
-      .def_property_readonly("shape", [](const onnxruntime::NodeArg& na) -> std::vector<py::object> {
+      .def_property_readonly(
+          "shape", [](const onnxruntime::NodeArg& na) -> std::vector<py::object> {
             auto shape = na.Shape();
             std::vector<py::object> arr;
             if (shape == nullptr || shape->dim_size() == 0) {
@@ -2108,25 +2110,32 @@ including arg name, arg type (contains both type and shape).)pbdoc")
       .def_property_readonly("get_profiling_start_time_ns", [](const PyInferenceSession* sess) -> uint64_t {
         return sess->GetSessionHandle()->GetProfiling().GetStartTimeNs();
       })
-      .def("get_providers", [](const PyInferenceSession* sess) -> const std::vector<std::string>& { return sess->GetSessionHandle()->GetRegisteredProviderTypes(); }, py::return_value_policy::reference_internal)
-      .def("get_provider_options", [](const PyInferenceSession* sess) -> const ProviderOptionsMap& { return sess->GetSessionHandle()->GetAllProviderOptions(); }, py::return_value_policy::reference_internal)
-      .def_property_readonly("session_options", [](const PyInferenceSession* sess) -> PySessionOptions* {
+      .def(
+          "get_providers", [](const PyInferenceSession* sess) -> const std::vector<std::string>& { return sess->GetSessionHandle()->GetRegisteredProviderTypes(); }, py::return_value_policy::reference_internal)
+      .def(
+          "get_provider_options", [](const PyInferenceSession* sess) -> const ProviderOptionsMap& { return sess->GetSessionHandle()->GetAllProviderOptions(); }, py::return_value_policy::reference_internal)
+      .def_property_readonly(
+          "session_options", [](const PyInferenceSession* sess) -> PySessionOptions* {
             auto session_options = std::make_unique<PySessionOptions>();
             session_options->value = sess->GetSessionHandle()->GetSessionOptions();
             return session_options.release(); }, py::return_value_policy::take_ownership)
-      .def_property_readonly("inputs_meta", [](const PyInferenceSession* sess) -> const std::vector<const onnxruntime::NodeArg*>& {
+      .def_property_readonly(
+          "inputs_meta", [](const PyInferenceSession* sess) -> const std::vector<const onnxruntime::NodeArg*>& {
             auto res = sess->GetSessionHandle()->GetModelInputs();
             OrtPybindThrowIfError(res.first);
             return *(res.second); }, py::return_value_policy::reference_internal)
-      .def_property_readonly("outputs_meta", [](const PyInferenceSession* sess) -> const std::vector<const onnxruntime::NodeArg*>& {
+      .def_property_readonly(
+          "outputs_meta", [](const PyInferenceSession* sess) -> const std::vector<const onnxruntime::NodeArg*>& {
             auto res = sess->GetSessionHandle()->GetModelOutputs();
             OrtPybindThrowIfError(res.first);
             return *(res.second); }, py::return_value_policy::reference_internal)
-      .def_property_readonly("overridable_initializers", [](const PyInferenceSession* sess) -> const std::vector<const onnxruntime::NodeArg*>& {
+      .def_property_readonly(
+          "overridable_initializers", [](const PyInferenceSession* sess) -> const std::vector<const onnxruntime::NodeArg*>& {
             auto res = sess->GetSessionHandle()->GetOverridableInitializers();
             OrtPybindThrowIfError(res.first);
             return *(res.second); }, py::return_value_policy::reference_internal)
-      .def_property_readonly("model_meta", [](const PyInferenceSession* sess) -> const onnxruntime::ModelMetadata& {
+      .def_property_readonly(
+          "model_meta", [](const PyInferenceSession* sess) -> const onnxruntime::ModelMetadata& {
             auto res = sess->GetSessionHandle()->GetModelMetadata();
             OrtPybindThrowIfError(res.first);
             return *(res.second); }, py::return_value_policy::reference_internal)
@@ -2154,8 +2163,8 @@ including arg name, arg type (contains both type and shape).)pbdoc")
 
         return ret;
 #else
-        ORT_UNUSED_PARAMETER(sess);
-        ORT_THROW("TunableOp and get_tuning_results are not supported in this build.");
+            ORT_UNUSED_PARAMETER(sess);
+            ORT_THROW("TunableOp and get_tuning_results are not supported in this build.");
 #endif
       })
       .def("set_tuning_results", [](PyInferenceSession* sess, py::list results, bool error_on_invalid) -> void {
@@ -2186,10 +2195,10 @@ including arg name, arg type (contains both type and shape).)pbdoc")
           throw std::runtime_error("Error in execution: " + status.ErrorMessage());
         }
 #else
-        ORT_UNUSED_PARAMETER(sess);
-        ORT_UNUSED_PARAMETER(results);
-        ORT_UNUSED_PARAMETER(error_on_invalid);
-        ORT_THROW("TunableOp and set_tuning_results are not supported in this build.");
+            ORT_UNUSED_PARAMETER(sess);
+            ORT_UNUSED_PARAMETER(results);
+            ORT_UNUSED_PARAMETER(error_on_invalid);
+            ORT_THROW("TunableOp and set_tuning_results are not supported in this build.");
 #endif
       });
 

From 6f119cb24c0fdff76bde28c43a3a84cc09f5d4f6 Mon Sep 17 00:00:00 2001
From: Uros Petkovic <urpetkov@amd.com>
Date: Thu, 9 Jan 2025 15:39:27 -0600
Subject: [PATCH 10/10] Reordering ROCm and MIGraphX elifs

---
 onnxruntime/python/onnxruntime_pybind_ortvalue.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/onnxruntime/python/onnxruntime_pybind_ortvalue.cc b/onnxruntime/python/onnxruntime_pybind_ortvalue.cc
index 4d661cd8fb470..66377b48a9975 100644
--- a/onnxruntime/python/onnxruntime_pybind_ortvalue.cc
+++ b/onnxruntime/python/onnxruntime_pybind_ortvalue.cc
@@ -177,13 +177,13 @@ void addOrtValueMethods(pybind11::module& m) {
             throw std::runtime_error("The provided device id doesn't match any available GPUs on the machine.");
           }
           allocator = GetCudaAllocator(device.Id());
-#elif USE_MIGRAPHX
-          allocator = GetMIGraphXAllocator(device.Id());
 #elif USE_ROCM
           if (!IsRocmDeviceIdValid(logging::LoggingManager::DefaultLogger(), device.Id())) {
             throw std::runtime_error("The provided device id doesn't match any available GPUs on the machine.");
           }
           allocator = GetRocmAllocator(device.Id());
+#elif USE_MIGRAPHX
+          allocator = GetMIGraphXAllocator(device.Id());
 #else
       throw std::runtime_error(
           "Can't allocate memory on the CUDA device using this package of OnnxRuntime. "