From 574c4027c3f4f153af803240c0a5fe00280b4cf1 Mon Sep 17 00:00:00 2001 From: Ch Vamshi Krishna <40261882+chvamshi-xilinx@users.noreply.github.com> Date: Mon, 9 Dec 2024 02:17:34 +0530 Subject: [PATCH] Enabled control packet flow for next gen AIE devices (#8634) * merge conflicts * merge conflicts * rebased * fixing warnings --------- Co-authored-by: ch vamshi krishna --- .../core/common/api/xrt_module.cpp | 163 +++++++++++++----- src/runtime_src/core/edge/common/aie_parser.h | 2 +- src/runtime_src/core/edge/user/aie/aie.cpp | 6 +- 3 files changed, 125 insertions(+), 46 deletions(-) diff --git a/src/runtime_src/core/common/api/xrt_module.cpp b/src/runtime_src/core/common/api/xrt_module.cpp index f85e4274c1..2a7a718335 100644 --- a/src/runtime_src/core/common/api/xrt_module.cpp +++ b/src/runtime_src/core/common/api/xrt_module.cpp @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include @@ -48,6 +49,7 @@ static constexpr uint8_t Elf_Amd_Aie2ps = 64; static const char* Scratch_Pad_Mem_Symbol = "scratch-pad-mem"; static const char* Control_Packet_Symbol = "control-packet"; +static const char* Control_Code_Symbol = "control-code"; struct buf { @@ -116,6 +118,7 @@ struct patcher control_packet_48 = 4, // patching scheme needed by firmware to patch control packet shim_dma_48 = 5, // patching scheme needed by firmware to patch instruction buffer shim_dma_aie4_base_addr_symbol_kind = 6, // patching scheme needed by AIE4 firmware + control_packet_57 = 7, // patching scheme needed by firmware to patch control packet for aie2ps unknown_symbol_kind = 8 }; @@ -126,7 +129,8 @@ struct patcher preempt_restore = 3, // preempt_restore pdi = 4, // pdi ctrlpkt_pm = 5, // preemption ctrl pkt - buf_type_count = 6 // total number of buf types + pad = 6, // scratchpad/control packet section name for next gen aie devices + buf_type_count = 7 // total number of buf types }; buf_type m_buf_type = buf_type::ctrltext; @@ -149,7 +153,8 @@ struct patcher ".preempt_save", ".preempt_restore", ".pdi", - ".ctrlpkt.pm"}; + ".ctrlpkt.pm", + ".pad"}; return Section_Name_Array[static_cast(bt)]; } @@ -202,6 +207,19 @@ struct patcher bd_data_ptr[0] = (bd_data_ptr[0] & 0xFE000000) | ((base_address >> 32) & 0x1FFFFFF);// NOLINT } + void + patch_ctrl57(uint32_t* bd_data_ptr, uint64_t patch) const + { + //TODO need to change below logic to patch 57 bits + uint64_t base_address = + ((static_cast(bd_data_ptr[3]) & 0xFFF) << 32) | // NOLINT + ((static_cast(bd_data_ptr[2]))); + + base_address = base_address + patch; + bd_data_ptr[2] = (uint32_t)(base_address & 0xFFFFFFFC); // NOLINT + bd_data_ptr[3] = (bd_data_ptr[3] & 0xFFFF0000) | (base_address >> 32); // NOLINT + } + void patch_ctrl48(uint32_t* bd_data_ptr, uint64_t patch) const { @@ -250,6 +268,10 @@ struct patcher // new_value is a bo address patch57_aie4(bd_data_ptr, new_value + item.offset_to_base_bo_addr); break; + case symbol_type::control_packet_57: + // new_value is a bo address + patch_ctrl57(bd_data_ptr, new_value + item.offset_to_base_bo_addr); + break; case symbol_type::control_packet_48: // new_value is a bo address patch_ctrl48(bd_data_ptr, new_value + item.offset_to_base_bo_addr); @@ -606,7 +628,7 @@ class module_elf : public module_impl // structures that manages the control code data for each column and // page, then create ctrlcode objects from the data. std::vector - initialize_column_ctrlcode(const ELFIO::elfio& elf) + initialize_column_ctrlcode(const ELFIO::elfio& elf, std::vector& pad_offsets) { // Elf sections for a single page struct column_page @@ -651,6 +673,7 @@ class module_elf : public module_impl // at a time. std::vector ctrlcodes; ctrlcodes.resize(col_secs.size()); + pad_offsets.resize(col_secs.size()); for (auto& [col, col_sec] : col_secs) { for (auto& [page, page_sec] : col_sec.pages) { @@ -662,11 +685,36 @@ class module_elf : public module_impl ctrlcodes[col].pad_to_page(page); } + pad_offsets[col] = ctrlcodes[col].size(); + } + + // Append pad section to the control code. + // This section may contain scratchpad/control-packet etc + for (const auto& sec : elf.sections) { + auto name = sec->get_name(); + if (name.find(patcher::section_name_to_string(patcher::buf_type::pad)) == std::string::npos) + continue; + + auto col = get_col_idx(name); + ctrlcodes[col].append_section_data(sec.get()); } return ctrlcodes; } + // This function returns the column number for which this arg belongs to + static int + get_col_idx(const std::string& name) + { + // arg name will be of format - .control_code-.* + std::regex expr("\\d+"); // Regular expression to match one or more digits + std::smatch match; + if (!(std::regex_search(name, match, expr))) + throw std::runtime_error("incorrect section name found when parsing ctrlpkt"); + + return std::stoi(match.str()); + } + std::pair determine_section_type(const std::string& section_name) { @@ -762,7 +810,7 @@ class module_elf : public module_impl } std::map - initialize_arg_patchers(const ELFIO::elfio& elf, const std::vector& ctrlcodes) + initialize_arg_patchers(const ELFIO::elfio& elf, const std::vector& ctrlcodes, const std::vector& pad_offsets) { auto dynsym = elf.sections[".dynsym"]; auto dynstr = elf.sections[".dynstr"]; @@ -791,31 +839,46 @@ class module_elf : public module_impl throw std::runtime_error("Invalid symbol name offset " + std::to_string(dynstr_offset)); auto symname = dynstr->get_data() + dynstr_offset; - // Get control code section referenced by the symbol, col, and page - auto ctrl_sec = elf.sections[sym->st_shndx]; - if (!ctrl_sec) + // patching can be done to ctrlcode or ctrlpkt section + auto patch_sec = elf.sections[sym->st_shndx]; + if (!patch_sec) throw std::runtime_error("Invalid section index " + std::to_string(sym->st_shndx)); - auto [col, page] = get_column_and_page(ctrl_sec->get_name()); - - auto column_ctrlcode_size = ctrlcodes.at(col).size(); - auto column_ctrlcode_offset = page * column_page_size + rela->r_offset + 16; // NOLINT magic number 16?? - if (column_ctrlcode_offset >= column_ctrlcode_size) - throw std::runtime_error("Invalid ctrlcode offset " + std::to_string(column_ctrlcode_offset)); - - // The control code for all columns will be represented as one - // contiguous buffer object. The patcher will need to know - // the offset into the buffer object for the particular column - // and page being patched. Past first [0, col) columns plus - // page offset within column and the relocation offset within - // the page. - uint64_t ctrlcode_offset = 0; - for (uint32_t i = 0; i < col; ++i) - ctrlcode_offset += ctrlcodes.at(i).size(); - ctrlcode_offset += column_ctrlcode_offset; + + auto patch_sec_name = patch_sec->get_name(); + size_t abs_offset = 0; + patcher::buf_type buf_type; + + if (patch_sec_name.find(patcher::section_name_to_string(patcher::buf_type::pad)) != std::string::npos) { + auto col = get_col_idx(patch_sec_name); + for (int i = 0; i < col; ++i) + abs_offset += ctrlcodes.at(i).size(); + abs_offset += pad_offsets.at(col); + abs_offset += rela->r_offset; + buf_type = patcher::buf_type::pad; + } + else { + // section to patch is ctrlcode + // Get control code section referenced by the symbol, col, and page + auto [col, page] = get_column_and_page(patch_sec_name); + auto column_ctrlcode_size = ctrlcodes.at(col).size(); + auto sec_offset = page * column_page_size + rela->r_offset + 16; // NOLINT magic number 16?? + if (sec_offset >= column_ctrlcode_size) + throw std::runtime_error("Invalid ctrlcode offset " + std::to_string(sec_offset)); + + // The control code for all columns will be represented as one + // contiguous buffer object. The patcher will need to know + // the offset into the buffer object for the particular column + // and page being patched. Past first [0, col) columns plus + // page offset within column and the relocation offset within + // the page. + for (uint32_t i = 0; i < col; ++i) + abs_offset += ctrlcodes.at(i).size(); + abs_offset += sec_offset; + buf_type = patcher::buf_type::ctrltext; + } // Construct the patcher for the argument with the symbol name std::string argnm{ symname, symname + std::min(strlen(symname), dynstr->get_size()) }; - patcher::buf_type buf_type = patcher::buf_type::ctrltext; auto symbol_type = static_cast(rela->r_addend); @@ -830,9 +893,9 @@ class module_elf : public module_impl // On all further occurences of arg, add patch_info structure to existing vector if (auto search = arg2patcher.find(key_string); search != arg2patcher.end()) - search->second.m_ctrlcode_patchinfo.emplace_back(patcher::patch_info{ctrlcode_offset, 0, 0}); + search->second.m_ctrlcode_patchinfo.emplace_back(patcher::patch_info{abs_offset, 0, 0}); else - arg2patcher.emplace(std::move(key_string), patcher{symbol_type, {{ctrlcode_offset, 0}}, buf_type}); + arg2patcher.emplace(std::move(key_string), patcher{symbol_type, {{abs_offset, 0}}, buf_type}); } } @@ -897,8 +960,9 @@ class module_elf : public module_impl , m_os_abi{ xrt_core::elf_int::get_elfio(m_elf).get_os_abi() } { if (m_os_abi == Elf_Amd_Aie2ps) { - m_ctrlcodes = initialize_column_ctrlcode(xrt_core::elf_int::get_elfio(m_elf)); - m_arg2patcher = initialize_arg_patchers(xrt_core::elf_int::get_elfio(m_elf), m_ctrlcodes); + std::vector pad_offsets; + m_ctrlcodes = initialize_column_ctrlcode(xrt_core::elf_int::get_elfio(m_elf),pad_offsets); + m_arg2patcher = initialize_arg_patchers(xrt_core::elf_int::get_elfio(m_elf), m_ctrlcodes, pad_offsets); } else if (m_os_abi == Elf_Amd_Aie2p) { m_instr_buf = initialize_instr_buf(xrt_core::elf_int::get_elfio(m_elf)); @@ -1105,17 +1169,30 @@ class module_sram : public module_impl m_column_bo_address.push_back({ m_instr_bo.address(), m_instr_bo.size() }); // NOLINT } - // Fill the instruction buffer object with the ctrlcodes for each + // Fill the instruction buffer object with the data for each // column and sync the buffer to device. void - fill_instruction_buffer(xrt::bo& bo, const std::vector& ctrlcodes) + fill_instruction_buffer(const std::vector& ctrlcodes) { - auto ptr = bo.map(); + auto ptr = m_buffer.map(); for (const auto& ctrlcode : ctrlcodes) { std::memcpy(ptr, ctrlcode.data(), ctrlcode.size()); ptr += ctrlcode.size(); } - bo.sync(XCL_BO_SYNC_BO_TO_DEVICE); + + // Iterate over control packets of all columns & patch it in instruction + // buffer + auto col_data = m_parent->get_data(); + size_t offset = 0; + for (size_t i = 0; i < col_data.size(); ++i) { + // find the control-code-* sym-name and patch it in instruction buffer + // This name is an agreement between aiebu and XRT + auto sym_name = std::string(Control_Code_Symbol) + "-" + std::to_string(i); + if (patch_instr_value(m_buffer, sym_name, std::numeric_limits::max() , m_buffer.address() + offset, patcher::buf_type::ctrltext)) + m_patched_args.insert(sym_name); + offset += col_data[i].size(); + } + m_buffer.sync(XCL_BO_SYNC_BO_TO_DEVICE); } void @@ -1255,8 +1332,7 @@ class module_sram : public module_impl } } - // Create the instruction buffer object and fill it with column - // ctrlcodes. + // Create the instruction buffer with all columns data along with pad section void create_instruction_buffer(const module_impl* parent) { @@ -1273,7 +1349,7 @@ class module_sram : public module_impl m_buffer = xrt::bo{ m_hwctx, sz, xrt::bo::flags::cacheable, 1 /* fix me */ }; - fill_instruction_buffer(m_buffer, data); + fill_instruction_buffer(data); } void @@ -1292,13 +1368,17 @@ class module_sram : public module_impl if (m_parent->patch_it(m_ctrlpkt_bo.map(), argnm, index, value, patcher::buf_type::ctrldata)) patched = true; } - // patch instruction buffer if (m_parent->patch_it(m_instr_bo.map(), argnm, index, value, patcher::buf_type::ctrltext)) patched = true; } - else if (m_parent->patch_it(m_buffer.map(), argnm, index, value, patcher::buf_type::ctrltext)) - patched = true; + else { + if (m_parent->patch_it(m_buffer.map(), argnm, index, value, patcher::buf_type::ctrltext)) + patched = true; + + if (m_parent->patch_it(m_buffer.map(), argnm, index, value, patcher::buf_type::pad)) + patched = true; + } if (patched) { m_patched_args.insert(argnm); @@ -1306,13 +1386,14 @@ class module_sram : public module_impl } } - void + bool patch_instr_value(xrt::bo& bo, const std::string& argnm, size_t index, uint64_t value, patcher::buf_type type) { if (!m_parent->patch_it(bo.map(), argnm, index, value, type)) - return; + return false; m_dirty = true; + return true; } void diff --git a/src/runtime_src/core/edge/common/aie_parser.h b/src/runtime_src/core/edge/common/aie_parser.h index bcbe621810..80b297fc9a 100644 --- a/src/runtime_src/core/edge/common/aie_parser.h +++ b/src/runtime_src/core/edge/common/aie_parser.h @@ -29,7 +29,7 @@ class device; namespace edge { namespace aie { -constexpr auto full_array_id = 1; +constexpr uint32_t full_array_id = 1; enum class module_type { core = 0, diff --git a/src/runtime_src/core/edge/user/aie/aie.cpp b/src/runtime_src/core/edge/user/aie/aie.cpp index 0f9631a4e1..a6501e9ec2 100755 --- a/src/runtime_src/core/edge/user/aie/aie.cpp +++ b/src/runtime_src/core/edge/user/aie/aie.cpp @@ -54,8 +54,7 @@ aie_array(const std::shared_ptr& device) /* TODO get partition id and uid from XCLBIN or PDI */ auto partition_id = xrt_core::edge::aie::full_array_id; - auto uid = 0; - drm_zocl_aie_fd aiefd = { 0 , partition_id, uid, 0 }; + drm_zocl_aie_fd aiefd = { 0 , partition_id, 0 , 0 }; int ret = drv->getPartitionFd(aiefd); if (ret) throw xrt_core::error(ret, "Create AIE failed. Can not get AIE fd"); @@ -119,9 +118,8 @@ aie_array(const std::shared_ptr& device, const zynqaie::hwctx_ auto drv = ZYNQ::shim::handleCheck(device->get_device_handle()); auto partition_id = part_info.partition_id; - auto uid = 0; auto hw_context_id = hwctx_obj->get_slotidx(); - drm_zocl_aie_fd aiefd = { hw_context_id , partition_id, uid, 0 }; + drm_zocl_aie_fd aiefd = { hw_context_id , partition_id, 0 , 0 }; //TODO: getparitionFd from driver instead of from shim if (auto ret = drv->getPartitionFd(aiefd))