Skip to content
This repository has been archived by the owner on Dec 24, 2024. It is now read-only.

Adding soft queue dispatch logic to dispatch commands to AIE agents #2

Merged
merged 6 commits into from
Sep 3, 2024
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 20 additions & 1 deletion runtime/hsa-runtime/core/driver/xdna/amd_xdna_driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,9 @@ hsa_status_t XdnaDriver::GetAgentProperties(core::Agent &agent) const {
return HSA_STATUS_ERROR;
}

aie_agent.SetNumCols(aie_metadata.cols);
// Right now can only target N-1 columns so putting this
// here as a workaround
Comment on lines +121 to +122
Copy link
Collaborator

@makslevental makslevental Sep 2, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

question: why can we target only N-1 columns?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Discussed offline - this is specific to Phoenix (shim DMA missing from 0th col) and needs to be revisited for strix

aie_agent.SetNumCols(aie_metadata.cols - 1);
aie_agent.SetNumCoreRows(aie_metadata.core.row_count);

return HSA_STATUS_SUCCESS;
Expand Down Expand Up @@ -351,6 +353,16 @@ hsa_status_t XdnaDriver::InitDeviceHeap() {
return HSA_STATUS_SUCCESS;
}

hsa_status_t XdnaDriver::GetHandleMappings(std::unordered_map<uint32_t, void*> &vmem_handle_mappings) {
vmem_handle_mappings = this->vmem_handle_mappings;
return HSA_STATUS_SUCCESS;
}

hsa_status_t XdnaDriver::GetFd(int &fd) {
fd = fd_;
return HSA_STATUS_SUCCESS;
}

hsa_status_t XdnaDriver::FreeDeviceHeap() {
if (dev_heap_parent) {
munmap(dev_heap_parent, dev_heap_align * 2 - 1);
Expand Down Expand Up @@ -388,6 +400,13 @@ hsa_status_t XdnaDriver::ConfigHwCtxCU(
config_cu_param.cu_configs[i].cu_config_bo;
xdna_config_cu_param->cu_configs[i].cu_func =
config_cu_param.cu_configs[i].cu_func;

// sync configuration buffer
amdxdna_drm_sync_bo sync_args = {};
sync_args.handle = xdna_config_cu_param->cu_configs[i].cu_bo;
if (ioctl(fd_, DRM_IOCTL_AMDXDNA_SYNC_BO, &sync_args) < 0) {
return HSA_STATUS_ERROR;
}
}

amdxdna_drm_config_hwctx config_hw_ctx_args{
Expand Down
62 changes: 62 additions & 0 deletions runtime/hsa-runtime/core/inc/amd_aie_aql_queue.h
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,35 @@
#include "core/inc/signal.h"
#include "core/util/locks.h"

/*
* Interpretation of the beginning of data payload for ERT_CMD_CHAIN in
* amdxdna_cmd. The rest of the payload in amdxdna_cmd is cmd BO handles.
*/
struct amdxdna_cmd_chain {
__u32 command_count;
__u32 submit_index;
__u32 error_index;
__u32 reserved[3];
__u64 data[] __counted_by(command_count);
};


/* Exec buffer command header format */
struct amdxdna_cmd {
union {
struct {
__u32 state : 4;
__u32 unused : 6;
__u32 extra_cu_masks : 2;
__u32 count : 11;
__u32 opcode : 5;
__u32 reserved : 4;
};
__u32 header;
};
__u32 data[] __counted_by(count);
};

namespace rocr {
namespace AMD {

Expand Down Expand Up @@ -119,7 +148,7 @@
hsa_fence_scope_t releaseFence = HSA_FENCE_SCOPE_NONE,
hsa_signal_t *signal = NULL) override;

uint32_t queue_id_ = INVALID_QUEUEID;

Check warning on line 151 in runtime/hsa-runtime/core/inc/amd_aie_aql_queue.h

View workflow job for this annotation

GitHub Actions / Build (linux)

implicit conversion from 'unsigned long long' to 'uint32_t' (aka 'unsigned int') changes value from 18446744073709551615 to 4294967295 [-Wconstant-conversion]
/// @brief ID of AIE device on which this queue has been mapped.
uint32_t node_id_ = std::numeric_limits<uint32_t>::max();
/// @brief Queue size in bytes.
Expand All @@ -134,6 +163,39 @@
/// @brief Base of the queue's ring buffer storage.
void *ring_buf_ = nullptr;

hsa_status_t SubmitCmd(uint32_t hw_ctx_handle, int fd, void *queue_base,
uint64_t read_dispatch_id, uint64_t write_dispatch_id,
std::unordered_map<uint32_t, void*> &vmem_handle_mappings);

/// @brief Creates a command BO and returns a pointer to the memory and
// the corresponding handle
///
/// @param size size of memory to allocate
/// @param handle A pointer to the BO handle
/// @param cmd A pointer to the buffer
hsa_status_t CreateCmd(uint32_t size, uint32_t *handle, amdxdna_cmd **cmd, int fd);

/// @brief Adds all BOs in a command packet payload to a vector
/// and replaces the handles with a virtual address
///
/// @param count Number of entries in the command
/// @param bo_args A pointer to a vector that contains all bo handles
/// @param cmd_pkt_payload A pointer to the payload of the command
void RegisterCmdBOs(uint32_t count, std::vector<uint32_t> &bo_args,
hsa_amd_aie_ert_start_kernel_data_t *cmd_pkt_payload,
std::unordered_map<uint32_t, void*> &vmem_handle_mappings);

/// @brief Syncs all BOs referenced in bo_args
///
/// @param bo_args vector containing handles of BOs to sync
hsa_status_t SyncBos(std::vector<uint32_t> &bo_args, int fd);

/// @brief Executes a command and waits for its completion
///
/// @param exec_cmd Structure containing the details of the command to execute
/// @param hw_ctx_handle the handle of the hardware context to run this command
hsa_status_t ExecCmdAndWait(amdxdna_drm_exec_cmd *exec_cmd, uint32_t hw_ctx_handle, int fd);

/// @brief Handle for an application context on the AIE device.
///
/// Each user queue will have an associated context. This handle is assigned
Expand Down
4 changes: 4 additions & 0 deletions runtime/hsa-runtime/core/inc/amd_xdna_driver.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@

#include "core/inc/driver.h"
#include "core/inc/memory_region.h"
#include "core/driver/xdna/uapi/amdxdna_accel.h"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

question: shouldn't we be getting this from the kernel somewhere?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That is a good question. I know @atgutier added this to the runtime but not sure how this is usually done. @atgutier what is the preferred way of doing this?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is needed for now to ensure core ROCr can at least build on systems that do not have a XRT installed (e.g., the Gerrit test infra currently). Typically, the installer would place this UAPI (user API) header in a known include directory. When using XRT they put this header here: /usr/src/xrt-amdxdna-2.18.0/include/uapi/drm_local/amdxdna_accel.h.

The solution for now is just to keep a copy of this header here for now to avoid issues where we cannot find it installed globally on the system.

The GPU driver interface also directly includes the kfd_ioctl.h header in the runtime for convenience.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Isn't this in the kernel now though? https://patchwork.kernel.org/project/dri-devel/cover/[email protected]/

Or meant to be? Admittedly in my 6.10.7 I only have that header in places that XRT would've installed it.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's in the kernel but only for inclusion by the kernel driver. We need it to be installed somewhere accessible by user-mode. So far the only thing that does that is the XRT installer. I confirmed with Max that this is indeed the only way to get the header. I'd prefer not to use that so this is a solution for now.

Eventually, we should get to a point where the driver module installer installs this header.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can make a CMake find_package integration to search for the usual locations of the XDNA driver.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We'd need a package to install first. As I said, currently that is only thru XRT which nobody wants to require as a dep here.


namespace rocr {
namespace core {
Expand All @@ -69,6 +70,9 @@ class XdnaDriver : public core::Driver {
hsa_status_t Init() override;
hsa_status_t QueryKernelModeDriver(core::DriverQuery query) override;

hsa_status_t GetHandleMappings(std::unordered_map<uint32_t, void*> &vmem_handle_mappings);
hsa_status_t GetFd(int &fd);

hsa_status_t GetAgentProperties(core::Agent &agent) const override;
hsa_status_t
GetMemoryProperties(uint32_t node_id,
Expand Down
229 changes: 227 additions & 2 deletions runtime/hsa-runtime/core/runtime/amd_aie_aql_queue.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,16 @@
////////////////////////////////////////////////////////////////////////////////

#include "core/inc/amd_aie_aql_queue.h"
#include "core/inc/amd_xdna_driver.h"

#ifdef __linux__
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <unistd.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#endif

#ifdef _WIN32
Expand Down Expand Up @@ -195,8 +198,230 @@ uint64_t AieAqlQueue::AddWriteIndexAcqRel(uint64_t value) {
}

void AieAqlQueue::StoreRelaxed(hsa_signal_value_t value) {
atomic::Store(signal_.hardware_doorbell_ptr, uint64_t(value),
std::memory_order_release);
std::unordered_map<uint32_t, void*> vmem_handle_mappings;
if(static_cast<XdnaDriver&>(core::Runtime::runtime_singleton_->AgentDriver(agent_.driver_type)).GetHandleMappings(vmem_handle_mappings) != HSA_STATUS_SUCCESS)
return;

int fd = 0;
if(static_cast<XdnaDriver&>(core::Runtime::runtime_singleton_->AgentDriver(agent_.driver_type)).GetFd(fd) != HSA_STATUS_SUCCESS)
return;

SubmitCmd(hw_ctx_handle_, fd, amd_queue_.hsa_queue.base_address, amd_queue_.read_dispatch_id, amd_queue_.write_dispatch_id, vmem_handle_mappings);
}

hsa_status_t AieAqlQueue::SyncBos(std::vector<uint32_t> &bo_args, int fd) {
for (int i = 0 ; i < bo_args.size(); i++) {
amdxdna_drm_sync_bo sync_params = {};
sync_params.handle = bo_args[i];
if (ioctl(fd, DRM_IOCTL_AMDXDNA_SYNC_BO, &sync_params))
return HSA_STATUS_ERROR;
}

return HSA_STATUS_SUCCESS;
}

hsa_status_t AieAqlQueue::ExecCmdAndWait(amdxdna_drm_exec_cmd *exec_cmd, uint32_t hw_ctx_handle, int fd) {
// Submit the cmd
if (ioctl(fd, DRM_IOCTL_AMDXDNA_EXEC_CMD, exec_cmd))
return HSA_STATUS_ERROR;

// Waiting for command to finish
amdxdna_drm_wait_cmd wait_cmd = {};
wait_cmd.hwctx = hw_ctx_handle;
wait_cmd.timeout = 50; // 50ms timeout
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can we make this an env variable or something?

wait_cmd.seq = exec_cmd->seq;

if (ioctl(fd, DRM_IOCTL_AMDXDNA_WAIT_CMD, &wait_cmd))
return HSA_STATUS_ERROR;

return HSA_STATUS_SUCCESS;
}

void AieAqlQueue::RegisterCmdBOs(uint32_t count, std::vector<uint32_t> &bo_args, hsa_amd_aie_ert_start_kernel_data_t *cmd_pkt_payload, std::unordered_map<uint32_t, void*> &vmem_handle_mappings) {

// This is the index where the operand addresses start in a command
const int operand_starting_index = 5;

// We have 6 arguments of the packet before we start passing operands
// and operands are 64-bits so we need to divide by two
constexpr int non_operand_count = 6;
uint32_t num_operands = (count - non_operand_count) / 2;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you promote these to ALL_CAPS_CONSTANTS at the top of the file


// Keep track of the handles before we submit the packet
bo_args.push_back(cmd_pkt_payload->data[2]); // we know element 2 is the instruction sequence
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

question: anyway to check this rather than relying on it? i guess not hmm. seems silly but can you promote 2 to an ALL_CAPS_CONSTANT called something like CMD_PKT_PAYLOAD_INSTRUCTION_SEQUENCE_IDX



// Going through all of the operands in the command, keeping track of the
// handles and turning the handles into addresses. The starting index of
// the operands in a command is `operand_starting_index` and the fields
// are 32-bits we need to iterate over every two
for (int operand_iter = 0; operand_iter < num_operands; operand_iter++) {
bo_args.push_back(cmd_pkt_payload->data[operand_starting_index + 2 * operand_iter]);
cmd_pkt_payload->data[operand_starting_index + 2 * operand_iter + 1 ] = ((uint64_t)vmem_handle_mappings[cmd_pkt_payload->data[operand_starting_index + 2 * operand_iter]] >> 32) & 0xFFFFFFFF;
cmd_pkt_payload->data[operand_starting_index + 2 * operand_iter ] = (uint64_t)vmem_handle_mappings[cmd_pkt_payload->data[operand_starting_index + 2 * operand_iter]] & 0xFFFFFFFF;
}

// We know data[2] is the DPU
cmd_pkt_payload->data[2] = 0x04000000 | (reinterpret_cast<uint64_t>(vmem_handle_mappings[cmd_pkt_payload->data[2]]) & 0x02FFFFFF);
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you promote 0x04000000 and 0x02FFFFFF to named ALL_CAPS_CONSTANTS at the top of the file.


return;
}

hsa_status_t AieAqlQueue::CreateCmd(uint32_t size, uint32_t *handle, amdxdna_cmd **cmd, int fd) {

// Creating the command
amdxdna_drm_create_bo create_cmd_bo = {};
create_cmd_bo.type = AMDXDNA_BO_CMD,
create_cmd_bo.size = 64;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ALL_CAPS_CONSTANT at the top of the file

if (ioctl(fd, DRM_IOCTL_AMDXDNA_CREATE_BO, &create_cmd_bo))
return HSA_STATUS_ERROR;

amdxdna_drm_get_bo_info cmd_bo_get_bo_info = {};
cmd_bo_get_bo_info.handle = create_cmd_bo.handle;
if (ioctl(fd, DRM_IOCTL_AMDXDNA_GET_BO_INFO, &cmd_bo_get_bo_info))
return HSA_STATUS_ERROR;

*cmd = static_cast<amdxdna_cmd *>(mmap(0, 64, PROT_READ | PROT_WRITE, MAP_SHARED, fd, cmd_bo_get_bo_info.map_offset));
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is the 64 here not create_cmd_bo.size? if so, can you use it, if not can you promote

*handle = create_cmd_bo.handle;

return HSA_STATUS_SUCCESS;
}

hsa_status_t AieAqlQueue::SubmitCmd(uint32_t hw_ctx_handle, int fd, void *queue_base, uint64_t read_dispatch_id, uint64_t write_dispatch_id, std::unordered_map<uint32_t, void*> &vmem_handle_mappings) {

// This is the index where the operand addresses start in a command
const int operand_starting_index = 5;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

reuse previous promotion


uint64_t cur_id = read_dispatch_id;
while (cur_id < write_dispatch_id) {

hsa_amd_aie_ert_packet_t *pkt = static_cast<hsa_amd_aie_ert_packet_t *>(queue_base) + cur_id;

// Get the packet header information
if (pkt->header.header != HSA_PACKET_TYPE_VENDOR_SPECIFIC || pkt->header.AmdFormat != HSA_AMD_PACKET_TYPE_AIE_ERT)
return HSA_STATUS_ERROR;

// Get the payload information
switch (pkt->opcode) {
case HSA_AMD_AIE_ERT_START_CU: {

std::vector<uint32_t> bo_args;
std::vector<uint32_t> cmd_handles;

// Iterating over future packets and seeing how many contigous HSA_AMD_AIE_ERT_START_CU
// packets there are. All can be combined into a single chain.
int num_cont_start_cu_pkts = 1;
for (int peak_pkt_id = cur_id + 1; peak_pkt_id < write_dispatch_id; peak_pkt_id++) {
hsa_amd_aie_ert_packet_t *peak_pkt = static_cast<hsa_amd_aie_ert_packet_t *>(queue_base) + peak_pkt_id;
if (pkt->opcode == HSA_AMD_AIE_ERT_START_CU) {
num_cont_start_cu_pkts++;
}
else {
break;
}
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
if (pkt->opcode == HSA_AMD_AIE_ERT_START_CU) {
num_cont_start_cu_pkts++;
}
else {
break;
}
if (pkt->opcode != HSA_AMD_AIE_ERT_START_CU) {
break
}
num_cont_start_cu_pkts++;

}

// Iterating over all of the contigous HSA_AMD_AIE_ERT_CMD_CHAIN packets
for (int pkt_iter = cur_id; pkt_iter < cur_id + num_cont_start_cu_pkts; pkt_iter++) {

// Getting the current command packet
hsa_amd_aie_ert_packet_t *pkt = static_cast<hsa_amd_aie_ert_packet_t *>(queue_base) + pkt_iter;
hsa_amd_aie_ert_start_kernel_data_t *cmd_pkt_payload = reinterpret_cast<hsa_amd_aie_ert_start_kernel_data_t *>(pkt->payload_data);

// Add the handles for all of the BOs to bo_args as well as rewrite the command
// payload handles to contain the actual virtual addresses
RegisterCmdBOs(pkt->count, bo_args, cmd_pkt_payload, vmem_handle_mappings);

// Creating a packet that contains the command to execute the kernel
uint32_t cmd_bo_handle = 0;
amdxdna_cmd *cmd = nullptr;
if (CreateCmd(64, &cmd_bo_handle, &cmd, fd))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is this the same 64 as create_cmd_bo.size up above? if so, can you use the previously promoted/named constant, if not can you promote

return HSA_STATUS_ERROR;

// Filling in the fields of the command
cmd->state = pkt->state;
cmd->extra_cu_masks = 0;

// For some reason the first count needs to be a little larger than
// it actually is, assuming there is some other data structure at the
// beginning
// TODO: Look more into this
if (pkt_iter == cur_id) {
cmd->count = pkt->count + 5;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

i guess the 5 here is operand_starting_index? if so please reuse previous promotion, if not please promote 5 to ALL_CAPS_CONSTANT

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Different so moving to a separate constant This is something that I don't fully understand and need to dig a bit deeper into the driver to see why the counts are different. Hence the comment.

}
else {
cmd->count = pkt->count;
}
cmd->opcode = pkt->opcode;
cmd->data[0] = cmd_pkt_payload->cu_mask;
memcpy((cmd->data + 1), cmd_pkt_payload->data, 4 * pkt->count);

// Keeping track of the handle
cmd_handles.push_back(cmd_bo_handle);
}

// Creating a packet that contains the command chain
uint32_t cmd_chain_bo_handle = 0;
amdxdna_cmd *cmd_chain = nullptr;
if (CreateCmd(4096, &cmd_chain_bo_handle, &cmd_chain, fd))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please promote 4096 to an ALL_CAPS_CONSTANT at the top of the file

return HSA_STATUS_ERROR;

// Writing information to the command buffer
amdxdna_cmd_chain *cmd_chain_payload = reinterpret_cast<amdxdna_cmd_chain *>(cmd_chain->data);

// Creating a command chain
cmd_chain->state = HSA_AMD_AIE_ERT_STATE_NEW;
cmd_chain->extra_cu_masks = 0;
cmd_chain->count = 0xA; // TODO: Figure out why this is the value
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just fyi these //... that follow a semicolon ; play havoc with clang-format. please shift to just above this line. also any idea where to find this out? somewhere in the FW? or is it the driver?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am guessing the FW would be the quickest to understand why the count is larger than I would have expected.

cmd_chain->opcode = HSA_AMD_AIE_ERT_CMD_CHAIN;
cmd_chain_payload->command_count = cmd_handles.size();
cmd_chain_payload->submit_index = 0;
cmd_chain_payload->error_index = 0;
for (int i = 0; i < cmd_handles.size(); i++) {
cmd_chain_payload->data[i] = cmd_handles[i];
}

// Syncing BOs before we execute the command
if (SyncBos(bo_args, fd))
return HSA_STATUS_ERROR;

// Removing duplicates in the bo container. The driver will report
// an error if we provide the same BO handle multiple times.
// This can happen if any of the BOs are the same across jobs
std::sort(bo_args.begin(), bo_args.end());
bo_args.erase(std::unique(bo_args.begin(), bo_args.end()), bo_args.end());

// Filling in the fields to execute the command chain
amdxdna_drm_exec_cmd exec_cmd_0 = {};
exec_cmd_0.ext = 0;
exec_cmd_0.ext_flags = 0;
exec_cmd_0.hwctx = hw_ctx_handle;
exec_cmd_0.type = AMDXDNA_CMD_SUBMIT_EXEC_BUF;
exec_cmd_0.cmd_handles = cmd_chain_bo_handle;
exec_cmd_0.args = (__u64)bo_args.data();
exec_cmd_0.cmd_count = 1;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

question: what's the difference between cmd_count and cmd_chain->count?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

My understanding is that cmd_count is the number of chains that you are submitting and cmd_chain->count is the number of commands in a particular chain. Currently the driver and/or FW only supports one command chain at a time as per this comment

exec_cmd_0.arg_count = bo_args.size();

// Executing all commands in the command chain
ExecCmdAndWait(&exec_cmd_0, hw_ctx_handle, fd);

// Syncing BOs after we execute the command
if (SyncBos(bo_args, fd))
return HSA_STATUS_ERROR;

cur_id += num_cont_start_cu_pkts;
break;
}
default: {
return HSA_STATUS_ERROR;
break;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
break;

}

}

}

return HSA_STATUS_SUCCESS;
}

void AieAqlQueue::StoreRelease(hsa_signal_value_t value) {
Expand Down
Loading