From 54b07d981c9dc4a6675c4bee9107f7b2ab3912bd Mon Sep 17 00:00:00 2001 From: Eddie Richter Date: Fri, 20 Sep 2024 14:07:02 -0600 Subject: [PATCH 1/3] Freeing the commands and the command chain created during dispatch --- .../hsa-runtime/core/runtime/amd_aie_aql_queue.cpp | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/runtime/hsa-runtime/core/runtime/amd_aie_aql_queue.cpp b/runtime/hsa-runtime/core/runtime/amd_aie_aql_queue.cpp index 6f796441a..e446e48d8 100644 --- a/runtime/hsa-runtime/core/runtime/amd_aie_aql_queue.cpp +++ b/runtime/hsa-runtime/core/runtime/amd_aie_aql_queue.cpp @@ -439,6 +439,17 @@ hsa_status_t AieAqlQueue::SubmitCmd( // Executing all commands in the command chain ExecCmdAndWait(&exec_cmd_0, hw_ctx_handle, fd); + // Freeing the commands that we created + drm_gem_close close_bo_args{0}; + for (int i = 0; i < cmd_handles.size(); i++) { + close_bo_args.handle = cmd_handles[i]; + ioctl(fd, DRM_IOCTL_GEM_CLOSE, &close_bo_args); + } + + // Freeing the command chain BO + close_bo_args.handle = cmd_chain_bo_handle; + ioctl(fd, DRM_IOCTL_GEM_CLOSE, &close_bo_args); + // Syncing BOs after we execute the command if (SyncBos(bo_args, fd)) return HSA_STATUS_ERROR; From 57c599b4b6396c90a2288bffcdc6bbd58e7a6cce Mon Sep 17 00:00:00 2001 From: Eddie Richter Date: Fri, 20 Sep 2024 17:46:30 -0600 Subject: [PATCH 2/3] Unmapping memory instead of sending a close ioctl --- runtime/hsa-runtime/core/runtime/amd_aie_aql_queue.cpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/runtime/hsa-runtime/core/runtime/amd_aie_aql_queue.cpp b/runtime/hsa-runtime/core/runtime/amd_aie_aql_queue.cpp index e446e48d8..246bd315d 100644 --- a/runtime/hsa-runtime/core/runtime/amd_aie_aql_queue.cpp +++ b/runtime/hsa-runtime/core/runtime/amd_aie_aql_queue.cpp @@ -346,6 +346,7 @@ hsa_status_t AieAqlQueue::SubmitCmd( case HSA_AMD_AIE_ERT_START_CU: { std::vector bo_args; std::vector cmd_handles; + std::vector cmd_sizes; // Iterating over future packets and seeing how many contiguous HSA_AMD_AIE_ERT_START_CU // packets there are. All can be combined into a single chain. @@ -391,6 +392,7 @@ hsa_status_t AieAqlQueue::SubmitCmd( // Keeping track of the handle cmd_handles.push_back(cmd_bo_handle); + cmd_sizes.push_back(cmd_size); } // Creating a packet that contains the command chain @@ -440,15 +442,12 @@ hsa_status_t AieAqlQueue::SubmitCmd( ExecCmdAndWait(&exec_cmd_0, hw_ctx_handle, fd); // Freeing the commands that we created - drm_gem_close close_bo_args{0}; for (int i = 0; i < cmd_handles.size(); i++) { - close_bo_args.handle = cmd_handles[i]; - ioctl(fd, DRM_IOCTL_GEM_CLOSE, &close_bo_args); + munmap(vmem_handle_mappings[cmd_handles[i]], cmd_sizes[i]); } // Freeing the command chain BO - close_bo_args.handle = cmd_chain_bo_handle; - ioctl(fd, DRM_IOCTL_GEM_CLOSE, &close_bo_args); + munmap(cmd_chain, cmd_chain_size); // Syncing BOs after we execute the command if (SyncBos(bo_args, fd)) From 44a3b1874ef60c31063c706c6dfa5a8794e99c89 Mon Sep 17 00:00:00 2001 From: Eddie Richter Date: Sat, 21 Sep 2024 13:06:31 -0600 Subject: [PATCH 3/3] Unammping and closing cmd BOs as well as freeing the queue ring buffer --- runtime/hsa-runtime/core/inc/amd_aie_agent.h | 6 ++++++ .../core/runtime/amd_aie_agent.cpp | 2 ++ .../core/runtime/amd_aie_aql_queue.cpp | 20 +++++++++++++++---- 3 files changed, 24 insertions(+), 4 deletions(-) diff --git a/runtime/hsa-runtime/core/inc/amd_aie_agent.h b/runtime/hsa-runtime/core/inc/amd_aie_agent.h index 0925a206b..d99b71ed7 100644 --- a/runtime/hsa-runtime/core/inc/amd_aie_agent.h +++ b/runtime/hsa-runtime/core/inc/amd_aie_agent.h @@ -93,6 +93,9 @@ class AieAgent : public core::Agent { return system_allocator_; } + /// @brief Getter for the AIE system deallocator. + const std::function& system_deallocator() const { return system_deallocator_; } + // AIE agent methods. /// @brief Get the number of columns on this AIE agent. int GetNumCols() const { return num_cols_; } @@ -117,6 +120,9 @@ class AieAgent : public core::Agent { core::MemoryRegion::AllocateFlags flags)> system_allocator_; + + std::function system_deallocator_; + const hsa_profile_t profile_ = HSA_PROFILE_BASE; const uint32_t min_aql_size_ = 0x40; const uint32_t max_aql_size_ = 0x40; diff --git a/runtime/hsa-runtime/core/runtime/amd_aie_agent.cpp b/runtime/hsa-runtime/core/runtime/amd_aie_agent.cpp index 6340c2bb6..4bce61323 100644 --- a/runtime/hsa-runtime/core/runtime/amd_aie_agent.cpp +++ b/runtime/hsa-runtime/core/runtime/amd_aie_agent.cpp @@ -345,6 +345,8 @@ void AieAgent::InitAllocators() { ? mem : nullptr; }; + + system_deallocator_ = [](void* ptr) { core::Runtime::runtime_singleton_->FreeMemory(ptr); }; break; } } diff --git a/runtime/hsa-runtime/core/runtime/amd_aie_aql_queue.cpp b/runtime/hsa-runtime/core/runtime/amd_aie_aql_queue.cpp index 246bd315d..283b5af60 100644 --- a/runtime/hsa-runtime/core/runtime/amd_aie_aql_queue.cpp +++ b/runtime/hsa-runtime/core/runtime/amd_aie_aql_queue.cpp @@ -129,7 +129,12 @@ AieAqlQueue::AieAqlQueue(AieAgent *agent, size_t req_size_pkts, .CreateQueue(*this); } -AieAqlQueue::~AieAqlQueue() { AieAqlQueue::Inactivate(); } +AieAqlQueue::~AieAqlQueue() { + AieAqlQueue::Inactivate(); + if (ring_buf_) { + agent_.system_deallocator()(ring_buf_); + } +} hsa_status_t AieAqlQueue::Inactivate() { bool active(active_.exchange(false, std::memory_order_relaxed)); @@ -347,6 +352,7 @@ hsa_status_t AieAqlQueue::SubmitCmd( std::vector bo_args; std::vector cmd_handles; std::vector cmd_sizes; + std::vector cmds; // Iterating over future packets and seeing how many contiguous HSA_AMD_AIE_ERT_START_CU // packets there are. All can be combined into a single chain. @@ -392,6 +398,7 @@ hsa_status_t AieAqlQueue::SubmitCmd( // Keeping track of the handle cmd_handles.push_back(cmd_bo_handle); + cmds.push_back(cmd); cmd_sizes.push_back(cmd_size); } @@ -441,13 +448,18 @@ hsa_status_t AieAqlQueue::SubmitCmd( // Executing all commands in the command chain ExecCmdAndWait(&exec_cmd_0, hw_ctx_handle, fd); - // Freeing the commands that we created + // Unmapping and closing the cmd BOs + drm_gem_close close_bo_args{0}; for (int i = 0; i < cmd_handles.size(); i++) { - munmap(vmem_handle_mappings[cmd_handles[i]], cmd_sizes[i]); + munmap(cmds[i], cmd_sizes[i]); + close_bo_args.handle = cmd_handles[i]; + ioctl(fd, DRM_IOCTL_GEM_CLOSE, &close_bo_args); } - // Freeing the command chain BO + // Unmapping and closing the cmd_chain BO munmap(cmd_chain, cmd_chain_size); + close_bo_args.handle = cmd_chain_bo_handle; + ioctl(fd, DRM_IOCTL_GEM_CLOSE, &close_bo_args); // Syncing BOs after we execute the command if (SyncBos(bo_args, fd))