Skip to content
This repository has been archived by the owner on Dec 24, 2024. It is now read-only.

Fixes to work with latest driver and userspace BO sync #36

Draft
wants to merge 7 commits into
base: iree-aie
Choose a base branch
from
Draft
48 changes: 20 additions & 28 deletions rocrtst/suites/aie/aie_hsa_dispatch_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@
#include "hsa/hsa.h"
#include "hsa/hsa_ext_amd.h"

#define LOW_ADDR(addr) (reinterpret_cast<uint64_t>(addr) & 0xFFFFFFFF)
#define HIGH_ADDR(addr) (reinterpret_cast<uint64_t>(addr) >> 32)

namespace {

hsa_status_t get_agent(hsa_agent_t agent, std::vector<hsa_agent_t> *agents,
Expand Down Expand Up @@ -97,7 +100,7 @@ hsa_status_t get_coarse_global_kernarg_mem_pool(hsa_amd_memory_pool_t pool,
}

void load_pdi_file(hsa_amd_memory_pool_t mem_pool, const std::string &file_name,
void **buf) {
void **buf, uint32_t &pdi_size) {
std::ifstream bin_file(file_name,
std::ios::binary | std::ios::ate | std::ios::in);

Expand All @@ -109,6 +112,7 @@ void load_pdi_file(hsa_amd_memory_pool_t mem_pool, const std::string &file_name,
auto r = hsa_amd_memory_pool_allocate(mem_pool, size, 0, buf);
assert(r == HSA_STATUS_SUCCESS);
bin_file.read(reinterpret_cast<char *>(*buf), size);
pdi_size = size;
}

void load_instr_file(hsa_amd_memory_pool_t mem_pool, const std::string &file_name,
Expand Down Expand Up @@ -199,24 +203,18 @@ int main(int argc, char **argv) {
// Load the DPU and PDI files into a global pool that doesn't support kernel
// args (DEV BO).
uint32_t num_instr;
uint32_t pdi_size;
load_instr_file(global_dev_mem_pool, instr_inst_file_name,
reinterpret_cast<void **>(&instr_inst_buf), num_instr);
uint32_t instr_handle = 0;
r = hsa_amd_get_handle_from_vaddr(instr_inst_buf, &instr_handle);
assert(r == HSA_STATUS_SUCCESS);
assert(instr_handle != 0);

load_pdi_file(global_dev_mem_pool, pdi_file_name,
reinterpret_cast<void **>(&pdi_buf));
uint32_t pdi_handle = 0;
r = hsa_amd_get_handle_from_vaddr(pdi_buf, &pdi_handle);
assert(r == HSA_STATUS_SUCCESS);
assert(pdi_handle != 0);
reinterpret_cast<void **>(&pdi_buf), pdi_size);

hsa_amd_aie_ert_hw_ctx_cu_config_t cu_config{.cu_config_bo = pdi_handle,
.cu_func = 0};
hsa_amd_aie_ert_hw_ctx_cu_config_addr_t cu_config {
.cu_config_addr = reinterpret_cast<uint64_t>(pdi_buf),
.cu_func = 0,
.cu_size = pdi_size};

hsa_amd_aie_ert_hw_ctx_config_cu_param_t config_cu_args{
hsa_amd_aie_ert_hw_ctx_config_cu_param_addr_t config_cu_args {
.num_cus = 1, .cu_configs = &cu_config};

// Configure the queue's hardware context.
Expand All @@ -232,8 +230,6 @@ int main(int argc, char **argv) {
std::vector<uint32_t *> input(num_pkts);
std::vector<uint32_t *> output(num_pkts);
std::vector<hsa_amd_aie_ert_start_kernel_data_t *> cmd_payloads(num_pkts);
std::vector<uint32_t> input_handle(num_pkts);
std::vector<uint32_t> output_handle(num_pkts);

uint64_t wr_idx = 0;
uint64_t packet_id = 0;
Expand All @@ -242,16 +238,10 @@ int main(int argc, char **argv) {
r = hsa_amd_memory_pool_allocate(global_kernarg_mem_pool, data_buffer_size, 0,
reinterpret_cast<void **>(&input[pkt_iter]));
assert(r == HSA_STATUS_SUCCESS);
r = hsa_amd_get_handle_from_vaddr(input[pkt_iter], &input_handle[pkt_iter]);
assert(r == HSA_STATUS_SUCCESS);
assert(input_handle[pkt_iter] != 0);

r = hsa_amd_memory_pool_allocate(global_kernarg_mem_pool, data_buffer_size, 0,
reinterpret_cast<void **>(&output[pkt_iter]));
assert(r == HSA_STATUS_SUCCESS);
r = hsa_amd_get_handle_from_vaddr(output[pkt_iter], &output_handle[pkt_iter]);
assert(r == HSA_STATUS_SUCCESS);
assert(output_handle[pkt_iter] != 0);

for (std::size_t i = 0; i < num_data_elements; i++) {
*(input[pkt_iter] + i) = i * (pkt_iter + 1);
Expand Down Expand Up @@ -284,13 +274,15 @@ int main(int argc, char **argv) {
// Transaction opcode
cmd_payload->data[0] = 0x3;
cmd_payload->data[1] = 0x0;
cmd_payload->data[2] = instr_handle;
cmd_payload->data[3] = 0x0;
cmd_payload->data[2] = LOW_ADDR(instr_inst_buf);
cmd_payload->data[3] = HIGH_ADDR(instr_inst_buf);
cmd_payload->data[4] = num_instr;
cmd_payload->data[5] = input_handle[pkt_iter];
cmd_payload->data[6] = 0;
cmd_payload->data[7] = output_handle[pkt_iter];
cmd_payload->data[8] = 0;
cmd_payload->data[5] = LOW_ADDR(input[pkt_iter]);
cmd_payload->data[6] = HIGH_ADDR(input[pkt_iter]);
cmd_payload->data[7] = LOW_ADDR(output[pkt_iter]);
cmd_payload->data[8] = HIGH_ADDR(output[pkt_iter]);
cmd_payload->data[9] = num_data_elements * sizeof(uint32_t);
cmd_payload->data[10] = num_data_elements * sizeof(uint32_t);
cmd_pkt->payload_data = reinterpret_cast<uint64_t>(cmd_payload);

// Keeping track of payloads so we can free them at the end
Expand Down
5 changes: 0 additions & 5 deletions runtime/hsa-runtime/core/common/hsa_table_interface.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -930,11 +930,6 @@ hsa_amd_queue_hw_ctx_config(const hsa_queue_t *queue,
return amdExtTable->hsa_amd_queue_hw_ctx_config_fn(queue, config_type, args);
}

// Mirrors AMD Extension APIs.
hsa_status_t hsa_amd_get_handle_from_vaddr(void* ptr, uint32_t* handle) {
return amdExtTable->hsa_amd_get_handle_from_vaddr_fn(ptr, handle);
}

// Mirrors Amd Extension Apis
hsa_status_t HSA_API hsa_amd_queue_cu_set_mask(const hsa_queue_t* queue,
uint32_t num_cu_mask_count,
Expand Down
5 changes: 0 additions & 5 deletions runtime/hsa-runtime/core/driver/kfd/amd_kfd_driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -257,11 +257,6 @@ KfdDriver::ConfigHwCtx(core::Queue &queue,
return HSA_STATUS_ERROR_INVALID_AGENT;
}

hsa_status_t KfdDriver::GetHandleFromVaddr(void* ptr, uint32_t* handle) {
// Only AIE queues support this for now.
return HSA_STATUS_ERROR_INVALID_AGENT;
}

void *KfdDriver::AllocateKfdMemory(const HsaMemFlags &flags, uint32_t node_id,
size_t size) {
void *mem = nullptr;
Expand Down
40 changes: 19 additions & 21 deletions runtime/hsa-runtime/core/driver/xdna/amd_xdna_driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -189,14 +189,14 @@
}

vmem_handle_mappings.emplace(create_bo_args.handle, mapped_mem);
vmem_handle_mappings_reverse.emplace(mapped_mem, create_bo_args.handle);
vmem_addr_mappings.emplace(mapped_mem, create_bo_args.handle);

return HSA_STATUS_SUCCESS;
}

hsa_status_t XdnaDriver::FreeMemory(void* ptr, size_t size) {
auto it = vmem_handle_mappings_reverse.find(ptr);
if (it == vmem_handle_mappings_reverse.end())
auto it = vmem_addr_mappings.find(ptr);
if (it == vmem_addr_mappings.end())
return HSA_STATUS_ERROR_INVALID_ALLOCATION;

// TODO:ypapadop-amd: need to unmap memory, but we don't know if it's mapped or not as we don't have
Expand All @@ -211,7 +211,7 @@
}

vmem_handle_mappings.erase(handle);
vmem_handle_mappings_reverse.erase(it);
vmem_addr_mappings.erase(it);

return HSA_STATUS_SUCCESS;
}
Expand Down Expand Up @@ -272,20 +272,12 @@
case HSA_AMD_QUEUE_AIE_ERT_HW_CXT_CONFIG_CU:
return ConfigHwCtxCU(
queue,
*reinterpret_cast<hsa_amd_aie_ert_hw_ctx_config_cu_param_t *>(args));
*reinterpret_cast<hsa_amd_aie_ert_hw_ctx_config_cu_param_addr_t *>(args));
default:
return HSA_STATUS_ERROR_INVALID_ARGUMENT;
}
}

hsa_status_t XdnaDriver::GetHandleFromVaddr(void* ptr, uint32_t* handle) {
auto it = vmem_handle_mappings_reverse.find(ptr);
if (it == vmem_handle_mappings_reverse.end())
return HSA_STATUS_ERROR_INVALID_ALLOCATION;
*handle = it->second;
return HSA_STATUS_SUCCESS;
}

hsa_status_t XdnaDriver::QueryDriverVersion() {
amdxdna_drm_query_aie_version aie_version{0, 0};
amdxdna_drm_get_info args{DRM_AMDXDNA_QUERY_AIE_VERSION, sizeof(aie_version),
Expand All @@ -304,7 +296,7 @@
hsa_status_t XdnaDriver::InitDeviceHeap() {
amdxdna_drm_create_bo create_bo_args{.type = AMDXDNA_BO_DEV_HEAP,
.vaddr =
reinterpret_cast<uintptr_t>(nullptr),

Check warning on line 299 in runtime/hsa-runtime/core/driver/xdna/amd_xdna_driver.cpp

View workflow job for this annotation

GitHub Actions / Build (linux)

ISO C++ requires field designators to be specified in declaration order; field 'type' will be initialized after field 'vaddr' [-Wreorder-init-list]
.size = dev_heap_size};
amdxdna_drm_get_bo_info get_bo_info_args{0};
drm_gem_close close_bo_args{0};
Expand Down Expand Up @@ -358,6 +350,11 @@
return HSA_STATUS_SUCCESS;
}

hsa_status_t XdnaDriver::GetAddrMappings(std::unordered_map<void*, uint32_t> &vmem_handle_mappings) {
vmem_handle_mappings = this->vmem_addr_mappings;
return HSA_STATUS_SUCCESS;
}

hsa_status_t XdnaDriver::GetFd(int &fd) {
fd = fd_;
return HSA_STATUS_SUCCESS;
Expand All @@ -379,7 +376,7 @@

hsa_status_t XdnaDriver::ConfigHwCtxCU(
core::Queue &queue,
hsa_amd_aie_ert_hw_ctx_config_cu_param_t &config_cu_param) {
hsa_amd_aie_ert_hw_ctx_config_cu_param_addr_t &config_cu_param) {
if (!AieAqlQueue::IsType(&queue)) {
return HSA_STATUS_ERROR_INVALID_QUEUE;
}
Expand All @@ -401,17 +398,18 @@
xdna_config_cu_param->num_cus = config_cu_param.num_cus;

for (int i = 0; i < xdna_config_cu_param->num_cus; ++i) {
xdna_config_cu_param->cu_configs[i].cu_bo =
config_cu_param.cu_configs[i].cu_config_bo;

// Get the handle from the address
auto cu_bo = vmem_addr_mappings.find(reinterpret_cast<void *>(config_cu_param.cu_configs[i].cu_config_addr));
if (cu_bo == vmem_addr_mappings.end())
return HSA_STATUS_ERROR_INVALID_ALLOCATION;

xdna_config_cu_param->cu_configs[i].cu_bo = cu_bo->second;
xdna_config_cu_param->cu_configs[i].cu_func =
config_cu_param.cu_configs[i].cu_func;

// sync configuration buffer
amdxdna_drm_sync_bo sync_args = {};
sync_args.handle = xdna_config_cu_param->cu_configs[i].cu_bo;
if (ioctl(fd_, DRM_IOCTL_AMDXDNA_SYNC_BO, &sync_args) < 0) {
return HSA_STATUS_ERROR;
}
clflush_data(reinterpret_cast<void *>(config_cu_param.cu_configs[i].cu_config_addr), 0, config_cu_param.cu_configs[i].cu_size);
}

amdxdna_drm_config_hwctx config_hw_ctx_args{
Expand Down
Loading
Loading