From 0f909e0c43d31265f119723f5a717a6cca478b8b Mon Sep 17 00:00:00 2001 From: rbramand Date: Fri, 25 Oct 2024 19:40:52 +0530 Subject: [PATCH] Enable flow without xclbin using new elf Signed-off-by: rbramand --- .../core/common/api/hw_context_int.h | 4 + src/runtime_src/core/common/api/module_int.h | 15 +- .../core/common/api/xrt_hw_context.cpp | 121 ++ .../core/common/api/xrt_kernel.cpp | 149 ++- .../core/common/api/xrt_module.cpp | 1190 +++++++++++------ src/runtime_src/core/common/ishim.h | 9 + src/runtime_src/core/common/xclbin_parser.cpp | 65 +- src/runtime_src/core/common/xclbin_parser.h | 9 + src/runtime_src/core/include/ert.h | 64 +- .../core/include/experimental/xrt_ext.h | 17 + .../core/include/experimental/xrt_module.h | 4 +- src/runtime_src/core/include/shim_int.h | 3 + .../core/include/xrt/xrt_hw_context.h | 59 +- .../xbtracer/src/lib/xrt_module_inst.cpp | 2 +- 14 files changed, 1201 insertions(+), 510 deletions(-) diff --git a/src/runtime_src/core/common/api/hw_context_int.h b/src/runtime_src/core/common/api/hw_context_int.h index 1215de1288d..6178e6f920d 100644 --- a/src/runtime_src/core/common/api/hw_context_int.h +++ b/src/runtime_src/core/common/api/hw_context_int.h @@ -6,6 +6,7 @@ #include "core/common/config.h" // This file defines implementation extensions to the XRT XCLBIN APIs. #include "core/include/experimental/xrt_hw_context.h" +#include "core/include/experimental/xrt_module.h" #include @@ -43,6 +44,9 @@ XRT_CORE_COMMON_EXPORT xrt::hw_context create_hw_context_from_implementation(void* hwctx_impl); +xrt::module +get_module(const xrt::hw_context& hwctx, const std::string& kname); + }} // hw_context_int, xrt_core #endif diff --git a/src/runtime_src/core/common/api/module_int.h b/src/runtime_src/core/common/api/module_int.h index 75e95e723c4..fc5da60a520 100644 --- a/src/runtime_src/core/common/api/module_int.h +++ b/src/runtime_src/core/common/api/module_int.h @@ -10,6 +10,8 @@ #include "core/include/experimental/xrt_bo.h" #include "core/include/experimental/xrt_module.h" +#include "ert.h" + #include namespace xrt_core::module_int { @@ -33,7 +35,8 @@ patch(const xrt::module&, const std::string& argnm, size_t index, const xrt::bo& // of the control code buffer. XRT_CORE_COMMON_EXPORT void -patch(const xrt::module&, uint8_t*, size_t*, const std::vector>*); +patch(const xrt::module&, uint8_t*, size_t*, const std::vector>*, + uint32_t index = 0); // Patch scalar into control code at given argument XRT_CORE_COMMON_EXPORT @@ -54,6 +57,16 @@ get_ert_opcode(const xrt::module& module); void dump_scratchpad_mem(const xrt::module& module); +std::string +get_kernel_signature(const xrt::module& module); + +std::string +get_kernel_name(const xrt::module& module); + +// Get partition size if ELF has info +uint32_t +get_partition_size(const xrt::module& module); + } // xrt_core::module_int #endif diff --git a/src/runtime_src/core/common/api/xrt_hw_context.cpp b/src/runtime_src/core/common/api/xrt_hw_context.cpp index 70b72d44ff8..b1dbe22be3a 100644 --- a/src/runtime_src/core/common/api/xrt_hw_context.cpp +++ b/src/runtime_src/core/common/api/xrt_hw_context.cpp @@ -6,8 +6,12 @@ #define XRT_API_SOURCE // exporting xrt_hwcontext.h #define XCL_DRIVER_DLL_EXPORT // exporting xrt_xclbin.h #define XRT_CORE_COMMON_SOURCE // in same dll as coreutil + +#include "core/include/experimental/xrt_module.h" #include "core/include/xrt/xrt_hw_context.h" #include "hw_context_int.h" +#include "module_int.h" +#include "xclbin_int.h" #include "core/common/device.h" #include "core/common/trace.h" @@ -30,6 +34,8 @@ class hw_context_impl : public std::enable_shared_from_this std::shared_ptr m_core_device; xrt::xclbin m_xclbin; + std::map m_module_map; // map b/w kernel name and module + uint32_t m_partition_size = 0; cfg_param_type m_cfg_param; access_mode m_mode; std::unique_ptr m_hdl; @@ -53,6 +59,25 @@ class hw_context_impl : public std::enable_shared_from_this , m_hdl{m_core_device->create_hw_context(xclbin_id, m_cfg_param, m_mode)} {} + hw_context_impl(std::shared_ptr device, cfg_param_type cfg_param, access_mode mode) + : m_core_device{std::move(device)} + , m_cfg_param{std::move(cfg_param)} + , m_mode{mode} + {} + + hw_context_impl(std::shared_ptr device, const xrt::elf& elf, cfg_param_type cfg_param, access_mode mode) + : m_core_device{std::move(device)} + , m_cfg_param{std::move(cfg_param)} + , m_mode{mode} + { + auto module = xrt::module(elf); + auto kernel_name = xrt_core::module_int::get_kernel_name(module); + m_module_map[kernel_name] = std::move(module); + + m_partition_size = xrt_core::module_int::get_partition_size(m_module_map.begin()->second); + m_hdl = m_core_device->create_hw_context(m_partition_size, m_cfg_param, mode); + } + std::shared_ptr get_shared_ptr() { @@ -81,6 +106,34 @@ class hw_context_impl : public std::enable_shared_from_this hw_context_impl& operator=(const hw_context_impl&) = delete; hw_context_impl& operator=(hw_context_impl&&) = delete; + void + add_config(const xrt::elf& elf) + { + auto module = xrt::module(elf); + auto kernel_name = xrt_core::module_int::get_kernel_name(module); + auto part_size = xrt_core::module_int::get_partition_size(module); + + // create hw ctx handle if not already created + if (m_hdl == nullptr) { + m_module_map[kernel_name] = std::move(module); + + m_partition_size = part_size; + m_hdl = m_core_device->create_hw_context(m_partition_size, m_cfg_param, m_mode); + return; + } + + // add module only if partition size matches existing configuration + if (m_partition_size != part_size) + throw std::runtime_error("can not add config to ctx with different configuration\n"); + + // add module to map if kernel name is different, else throw + for (const auto& m : m_module_map) { + if (kernel_name == xrt_core::module_int::get_kernel_name(m.second)) + throw std::runtime_error("config with kernel already exists, cannot add this config\n"); + } + m_module_map[kernel_name] = std::move(module); + } + void update_qos(const qos_type& qos) { @@ -129,6 +182,16 @@ class hw_context_impl : public std::enable_shared_from_this { return m_usage_logger.get(); } + + xrt::module + get_module(const std::string& kname) const + { + for (const auto& m : m_module_map) { + if (kname == xrt_core::module_int::get_kernel_name(m.second)) + return m.second; + } + throw std::runtime_error("no module found with given kernel name in ctx"); + } }; } // xrt @@ -166,6 +229,12 @@ create_hw_context_from_implementation(void* hwctx_impl) return xrt::hw_context(impl_ptr->get_shared_ptr()); } +xrt::module +get_module(const xrt::hw_context& ctx, const std::string& kname) +{ + return ctx.get_handle()->get_module(kname); +} + } // xrt_core::hw_context_int //////////////////////////////////////////////////////////////// @@ -207,6 +276,41 @@ alloc_hwctx_from_mode(const xrt::device& device, const xrt::uuid& xclbin_id, xrt return handle; } +static std::shared_ptr +alloc_empty_hwctx(const xrt::device& device, const xrt::hw_context::cfg_param_type& cfg_param, xrt::hw_context::access_mode mode) +{ + XRT_TRACE_POINT_SCOPE(xrt_hw_context); + auto handle = std::make_shared(device.get_handle(), cfg_param, mode); + + // Update device is called with a raw pointer to dyanamically + // link to callbacks that exist in XDP via a C-style interface + // The create_hw_context_from_implementation function is then + // called in XDP create a hw_context to the underlying implementation + xrt_core::xdp::update_device(handle.get()); + + handle->get_usage_logger()->log_hw_ctx_info(handle.get()); + + return handle; +} + +static std::shared_ptr +alloc_hwctx_from_elf(const xrt::device& device, const xrt::elf& elf, const xrt::hw_context::cfg_param_type& cfg_param, + xrt::hw_context::access_mode mode) +{ + XRT_TRACE_POINT_SCOPE(xrt_hw_context); + auto handle = std::make_shared(device.get_handle(), elf, cfg_param, mode); + + // Update device is called with a raw pointer to dyanamically + // link to callbacks that exist in XDP via a C-style interface + // The create_hw_context_from_implementation function is then + // called in XDP create a hw_context to the underlying implementation + xrt_core::xdp::update_device(handle.get()); + + handle->get_usage_logger()->log_hw_ctx_info(handle.get()); + + return handle; +} + hw_context:: hw_context(const xrt::device& device, const xrt::uuid& xclbin_id, const xrt::hw_context::cfg_param_type& cfg_param) : detail::pimpl(alloc_hwctx_from_cfg(device, xclbin_id, cfg_param)) @@ -217,6 +321,23 @@ hw_context(const xrt::device& device, const xrt::uuid& xclbin_id, access_mode mo : detail::pimpl(alloc_hwctx_from_mode(device, xclbin_id, mode)) {} +hw_context:: +hw_context(const xrt::device& device, const xrt::elf& elf, const cfg_param_type& cfg_param, access_mode mode) + : detail::pimpl(alloc_hwctx_from_elf(device, elf, cfg_param, mode)) +{} + +hw_context:: +hw_context(const xrt::device& device, const cfg_param_type& cfg_param, access_mode mode) + : detail::pimpl(alloc_empty_hwctx(device, cfg_param, mode)) +{} + +void +hw_context:: +add_config(const xrt::elf& elf) +{ + get_handle()->add_config(elf); +} + void hw_context:: update_qos(const qos_type& qos) diff --git a/src/runtime_src/core/common/api/xrt_kernel.cpp b/src/runtime_src/core/common/api/xrt_kernel.cpp index ac792483ce6..fce31d4bec8 100644 --- a/src/runtime_src/core/common/api/xrt_kernel.cpp +++ b/src/runtime_src/core/common/api/xrt_kernel.cpp @@ -1086,7 +1086,7 @@ class argument size_t size; // size in bytes of argument per xclbin explicit - global_type(size_t bytes) + global_type(size_t bytes = 0) : size(bytes) {} @@ -1306,7 +1306,7 @@ class kernel_impl : public std::enable_shared_from_this xrt::xclbin::kernel xkernel; // kernel xclbin metadata std::vector args; // kernel args sorted by argument index std::vector ipctxs; // CU context locks - const property_type& properties; // Kernel properties from XML meta + property_type properties; // Kernel properties from XML meta std::bitset cumask; // cumask for command execution size_t regmap_size = 0; // CU register map size size_t fa_num_inputs = 0; // Fast adapter number of inputs per meta data @@ -1316,6 +1316,7 @@ class kernel_impl : public std::enable_shared_from_this size_t num_cumasks = 1; // Required number of command cu masks control_type protocol = control_type::none; // Default opcode uint32_t uid; // Internal unique id for debug + uint32_t m_ctrl_code_index = 0; // Index to identify which ctrl code to load in elf std::shared_ptr m_usage_logger = xrt_core::usage_metrics::get_usage_metrics_logger(); @@ -1520,6 +1521,64 @@ class kernel_impl : public std::enable_shared_from_this throw xrt_core::error("No such kernel '" + nm + "'"); } + static std::vector + split(const std::string& s, char delimiter) + { + std::vector tokens; + std::stringstream ss(s); + std::string item; + + while (getline(ss, item, delimiter)) + tokens.push_back(item); + + return tokens; + } + + void + construct_elf_kernel_args(const std::string& kernel_name) + { + // kernel signature - name(argtype, argtype ...) + size_t start_pos = kernel_name.find('('); + size_t end_pos = kernel_name.find(')', start_pos); + + if (start_pos == std::string::npos || end_pos == std::string::npos || start_pos > end_pos) + throw std::runtime_error("Failed to construct kernel args"); + + std::string argstring = kernel_name.substr(start_pos + 1, end_pos - start_pos - 1); + std::vector argstrings = split(argstring, ','); + + size_t count = 0; + size_t offset = 0; + for (const std::string& str : argstrings) { + xrt_core::xclbin::kernel_argument arg; + arg.name = "argv" + std::to_string(count); + arg.hosttype = "no-type"; + arg.port = "no-port"; + arg.index = count; + arg.offset = offset; + arg.dir = xrt_core::xclbin::kernel_argument::direction::input; + // if arg has pointer(*) in its name (eg: char*, void*) it is of type global otherwise scalar + arg.type = (str.find('*') != std::string::npos) + ? xrt_core::xclbin::kernel_argument::argtype::global + : xrt_core::xclbin::kernel_argument::argtype::scalar; + + // At present only global args are supported + // TODO : Add support for scalar args in ELF flow + if (arg.type == xrt_core::xclbin::kernel_argument::argtype::scalar) + throw std::runtime_error("scalar args are not yet supported for this kind of kernel"); + else { + // global arg + static constexpr size_t global_arg_size = 0x8; + arg.size = global_arg_size; + + offset += global_arg_size; + } + + args.emplace_back(arg); + count++; + } + } + public: // kernel_type - constructor // @@ -1579,10 +1638,54 @@ class kernel_impl : public std::enable_shared_from_this m_usage_logger->log_kernel_info(device->core_device.get(), hwctx, name, args.size()); } - // Delegating constructor with no module kernel_impl(std::shared_ptr dev, xrt::hw_context ctx, const std::string& nm) - : kernel_impl{std::move(dev), std::move(ctx), {}, nm} - {} + : device(std::move(dev)) // share ownership + , hwctx(std::move(ctx)) // hw context + , hwqueue(hwctx) // hw queue + , uid(create_uid()) + { + XRT_DEBUGF("kernel_impl::kernel_impl(%d)\n", uid); + + // ELF use case, identify module from ctx that has given kernel name and + // get kernel signature from the module to construct kernel args etc + + // kernel name will be of format - : + auto i = nm.find(":"); + if (i == std::string::npos) { + // default case - ctrl code 0 will be used + name = nm.substr(0, nm.size()); + m_ctrl_code_index = 0; + } + else { + name = nm.substr(0, i); + m_ctrl_code_index = std::stoul(nm.substr(i+1, nm.size()-i-1)); + } + + m_module = xrt_core::hw_context_int::get_module(hwctx, name); + auto demangled_name = xrt_core::module_int::get_kernel_signature(m_module); + + // extract kernel name + size_t pos = demangled_name.find('('); + if (pos == std::string::npos) + throw std::runtime_error("Failed to get kernel - " + nm); + + if (name != demangled_name.substr(0, pos)) + throw std::runtime_error("Kernel name mismatch, incorrect module picked\n"); + + construct_elf_kernel_args(demangled_name); + + // fill kernel properties + properties.name = name; + properties.type = xrt_core::xclbin::kernel_properties::kernel_type::dpu; + properties.counted_auto_restart = xrt_core::xclbin::get_restart_from_ini(name); + properties.mailbox = xrt_core::xclbin::get_mailbox_from_ini(name); + properties.sw_reset = xrt_core::xclbin::get_sw_reset_from_ini(name); + + // amend args with computed data based on kernel protocol + amend_args(); + + m_usage_logger->log_kernel_info(device->core_device.get(), hwctx, name, args.size()); + } std::shared_ptr get_shared_ptr() @@ -1646,6 +1749,12 @@ class kernel_impl : public std::enable_shared_from_this return name; } + uint32_t + get_ctrl_code_index() const + { + return m_ctrl_code_index; + } + xrt::xclbin get_xclbin() const { @@ -1943,12 +2052,12 @@ class run_impl // This function copies the module into a hw_context. The module // will be associated with hwctx specific memory. static xrt::module - copy_module(const xrt::module& module, const xrt::hw_context& hwctx) + copy_module(const xrt::module& module, const xrt::hw_context& hwctx, uint32_t ctrl_code_idx) { if (!module) return {}; - return {module, hwctx}; + return {module, hwctx, ctrl_code_idx}; } virtual std::unique_ptr @@ -2011,6 +2120,10 @@ class run_impl xrt::bo validate_bo_at_index(size_t index, const xrt::bo& bo) { + // ELF flow doesn't have arg connectivity, so skip validation + if (!kernel->get_xclbin()) + return bo; + xcl_bo_flags grp {xrt_core::bo::group_id(bo)}; if (validate_ip_arg_connectivity(index, grp.bank)) return bo; @@ -2060,15 +2173,14 @@ class run_impl { auto kcmd = pkt->get_ert_cmd(); auto payload = kernel->initialize_command(pkt); - - if (kcmd->opcode == ERT_START_DPU || kcmd->opcode == ERT_START_NPU || kcmd->opcode == ERT_START_NPU_PREEMPT) { + if (kcmd->opcode == ERT_START_DPU || kcmd->opcode == ERT_START_NPU || kcmd->opcode == ERT_START_NPU_PREEMPT || + kcmd->opcode == ERT_START_NPU_PDI_IN_ELF) { auto payload_past_dpu = initialize_dpu(payload); // adjust count to include the prepended ert_dpu_data structures kcmd->count += payload_past_dpu - payload; payload = payload_past_dpu; } - return payload; } @@ -2125,7 +2237,7 @@ class run_impl explicit run_impl(std::shared_ptr k) : kernel(std::move(k)) - , m_module{copy_module(kernel->get_module(), kernel->get_hw_context())} + , m_module{copy_module(kernel->get_module(), kernel->get_hw_context(), kernel->get_ctrl_code_index())} , m_hwqueue(kernel->get_hw_queue()) , ips(kernel->get_ips()) , cumask(kernel->get_cumask()) @@ -3442,7 +3554,8 @@ alloc_kernel_from_ctx(const std::shared_ptr& dev, const xrt::hw_context& hwctx, const std::string& name) { - return std::make_shared(dev, hwctx, name); + // Delegating constructor with no module + return std::make_shared(dev, hwctx, xrt::module{}, name); } static std::shared_ptr @@ -3454,6 +3567,14 @@ alloc_kernel_from_module(const std::shared_ptr& dev, return std::make_shared(dev, hwctx, module, name); } +static std::shared_ptr +alloc_kernel_from_name(const std::shared_ptr& dev, + const xrt::hw_context& hwctx, + const std::string& name) +{ + return std::make_shared(dev, hwctx, name); +} + static std::shared_ptr get_mailbox_impl(const xrt::run& run) { @@ -4136,6 +4257,10 @@ kernel(const xrt::hw_context& ctx, const xrt::module& mod, const std::string& na : xrt::kernel::kernel{alloc_kernel_from_module(get_device(ctx.get_device()), ctx, mod, name)} {} +kernel:: +kernel(const xrt::hw_context& ctx, const std::string& name) + : xrt::kernel::kernel{alloc_kernel_from_name(get_device(ctx.get_device()), ctx, name)} +{} } // xrt::ext //////////////////////////////////////////////////////////////// diff --git a/src/runtime_src/core/common/api/xrt_module.cpp b/src/runtime_src/core/common/api/xrt_module.cpp index 7c3e7539b19..de895299523 100644 --- a/src/runtime_src/core/common/api/xrt_module.cpp +++ b/src/runtime_src/core/common/api/xrt_module.cpp @@ -31,6 +31,14 @@ #include #include #include +#include + +#ifdef _WIN32 +#include +#pragma comment(lib, "dbghelp.lib") +#else +#include +#endif #ifndef AIE_COLUMN_PAGE_SIZE # define AIE_COLUMN_PAGE_SIZE 8192 // NOLINT @@ -43,8 +51,9 @@ namespace // 0 if no padding is required. The page size should be // embedded as ELF metadata in the future. static constexpr size_t column_page_size = AIE_COLUMN_PAGE_SIZE; -static constexpr uint8_t Elf_Amd_Aie2p = 69; -static constexpr uint8_t Elf_Amd_Aie2ps = 64; +static constexpr uint8_t Elf_Amd_Aie2p = 69; +static constexpr uint8_t Elf_Amd_Aie2ps = 64; +static constexpr uint8_t Elf_Amd_Aie2p_config = 70; static const char* Scratch_Pad_Mem_Symbol = "scratch-pad-mem"; static const char* Control_Packet_Symbol = "control-packet"; @@ -92,6 +101,13 @@ struct buf m_data.resize(pad); } + + static const buf& + get_empty_buf() + { + static const buf b = {}; + return b; + } }; using instr_buf = buf; @@ -111,20 +127,22 @@ struct patcher { enum class symbol_type { uc_dma_remote_ptr_symbol_kind = 1, - shim_dma_base_addr_symbol_kind = 2, // patching scheme needed by AIE2PS firmware + shim_dma_base_addr_symbol_kind = 2, // patching scheme needed by AIE2PS firmware scalar_32bit_kind = 3, - control_packet_48 = 4, // patching scheme needed by firmware to patch control packet - shim_dma_48 = 5, // patching scheme needed by firmware to patch instruction buffer + control_packet_48 = 4, // patching scheme needed by firmware to patch control packet + shim_dma_48 = 5, // patching scheme needed by firmware to patch instruction buffer shim_dma_aie4_base_addr_symbol_kind = 6, // patching scheme needed by AIE4 firmware + address_64 = 7, // patching scheme needed to patch pdi address unknown_symbol_kind = 8 }; enum class buf_type { - ctrltext = 0, // control code - ctrldata = 1, // control packet - preempt_save = 2, // preempt_save + ctrltext = 0, // control code + ctrldata = 1, // control packet + preempt_save = 2, // preempt_save preempt_restore = 3, // preempt_restore - buf_type_count = 4 // total number of buf types + pdi = 4, // pdi + buf_type_count = 5 // total number of buf types }; inline static const char* @@ -133,7 +151,8 @@ struct patcher static const char* Section_Name_Array[static_cast(buf_type::buf_type_count)] = { ".ctrltext", ".ctrldata", ".preempt_save", - ".preempt_restore" }; + ".preempt_restore", + ".pdi"}; return Section_Name_Array[static_cast(bt)]; } @@ -155,9 +174,16 @@ struct patcher , m_ctrlcode_patchinfo(std::move(ctrlcode_offset)) {} -// Replace certain bits of *data_to_patch with register_value. Which bits to be replaced is specified by mask -// For *data_to_patch be 0xbb11aaaa and mask be 0x00ff0000 -// To make *data_to_patch be 0xbb55aaaa, register_value must be 0x00550000 + void + patch64(uint32_t* data_to_patch, uint64_t addr) + { + *data_to_patch = static_cast(addr & 0xffffffff); + *(data_to_patch + 1) = static_cast((addr >> 32) & 0xffffffff); + } + + // Replace certain bits of *data_to_patch with register_value. Which bits to be replaced is specified by mask + // For *data_to_patch be 0xbb11aaaa and mask be 0x00ff0000 + // To make *data_to_patch be 0xbb55aaaa, register_value must be 0x00550000 void patch32(uint32_t* data_to_patch, uint64_t register_value, uint32_t mask) { @@ -232,6 +258,10 @@ struct patcher for (auto item : m_ctrlcode_patchinfo) { auto bd_data_ptr = reinterpret_cast(base + item.offset_to_patch_buffer); switch (m_symbol_type) { + case symbol_type::address_64: + // new_value is a 64bit address + patch64(bd_data_ptr, new_value); + break; case symbol_type::scalar_32bit_kind: // new_value is a register value if (item.mask) @@ -260,33 +290,55 @@ struct patcher } }; - XRT_CORE_UNUSED void - dump_bo(xrt::bo& bo, const std::string& filename) - { - std::ofstream ofs(filename, std::ios::out | std::ios::binary); - if (!ofs.is_open()) - throw std::runtime_error("Failure opening file " + filename + " for writing!"); +XRT_CORE_UNUSED void +dump_bo(xrt::bo& bo, const std::string& filename) +{ + std::ofstream ofs(filename, std::ios::out | std::ios::binary); + if (!ofs.is_open()) + throw std::runtime_error("Failure opening file " + filename + " for writing!"); - auto buf = bo.map(); - ofs.write(buf, bo.size()); - } + auto buf = bo.map(); + ofs.write(buf, bo.size()); +} - XRT_CORE_UNUSED std::string - generate_key_string(const std::string& argument_name, patcher::buf_type type) - { - std::string buf_string = std::to_string(static_cast(type)); - return argument_name + buf_string; - } +XRT_CORE_UNUSED std::string +generate_key_string(const std::string& argument_name, patcher::buf_type type, uint32_t index) +{ + std::string buf_string = std::to_string(static_cast(type)); + return argument_name + buf_string + std::to_string(index); +} + +static std::string +demangle(const std::string& mangled_name) +{ +#ifdef _WIN32 + char demangled_name[1024]; + if (UnDecorateSymbolName(mangled_name.c_str(), demangled_name, sizeof(demangled_name), UNDNAME_COMPLETE)) + return std::string(demangled_name); + else + throw std::runtime_error("Error demangling kernel signature"); +#else + int status = 0; + char* demangled_name = abi::__cxa_demangle(mangled_name.c_str(), nullptr, nullptr, &status); + + if (status) + throw std::runtime_error("Error demangling kernel signature"); + + std::string result {demangled_name}; + std::free(demangled_name); // Free the allocated memory by api + return result; +#endif +} } // namespace namespace xrt { - // class module_impl - Base class for different implementations class module_impl { xrt::uuid m_cfg_uuid; // matching hw configuration id + public: explicit module_impl(xrt::uuid cfg_uuid) : m_cfg_uuid(std::move(cfg_uuid)) @@ -319,32 +371,44 @@ class module_impl throw std::runtime_error("Not supported"); } - [[nodiscard]] virtual const instr_buf& - get_instr() const + [[nodiscard]] virtual std::pair + get_instr(uint32_t /*index*/ = 0) const { throw std::runtime_error("Not supported"); } - [[nodiscard]] virtual const buf& - get_preempt_save() const + [[nodiscard]] virtual std::pair + get_preempt_save() const { - throw std::runtime_error("Not supported"); + throw std::runtime_error("Not supported"); + } + + [[nodiscard]] virtual std::pair + get_preempt_restore() const + { + throw std::runtime_error("Not supported"); } [[nodiscard]] virtual const buf& - get_preempt_restore() const + get_pdi(const std::string& /*pdi_name*/) const + { + throw std::runtime_error("Not supported"); + } + + [[nodiscard]] virtual const std::unordered_set& + get_patch_pdis(uint32_t /*index*/ = 0) const { - throw std::runtime_error("Not supported"); + throw std::runtime_error("Not supported"); } [[nodiscard]] virtual size_t - get_scratch_pad_mem_size() const + get_scratch_pad_mem_size() const { - throw std::runtime_error("Not supported"); + throw std::runtime_error("Not supported"); } - [[nodiscard]] virtual const control_packet& - get_ctrlpkt() const + [[nodiscard]] virtual std::pair + get_ctrlpkt(uint32_t /*index*/ = 0) const { throw std::runtime_error("Not supported"); } @@ -383,8 +447,9 @@ class module_impl // @param index - argument index // @param bo - global argument to patch into ctrlcode // @param buf_type - whether it is control-code, control-packet, preempt-save or preempt-restore + // @param sec_index - index of section to be patched virtual void - patch_instr(xrt::bo&, const std::string&, size_t, const xrt::bo&, patcher::buf_type) + patch_instr(xrt::bo&, const std::string&, size_t, const xrt::bo&, patcher::buf_type, uint32_t) { throw std::runtime_error("Not supported "); } @@ -418,9 +483,10 @@ class module_impl // @param index - argument index // @param patch - patch value // @param buf_type - whether it is control-code, control-packet, preempt-save or preempt-restore - // @Return true if symbol was patched, false otherwise // + // @param sec_index - index of section to be patched + // @Return true if symbol was patched, false otherwise virtual bool - patch(uint8_t*, const std::string&, size_t, uint64_t, patcher::buf_type) + patch(uint8_t*, const std::string&, size_t, uint64_t, patcher::buf_type, uint32_t) { throw std::runtime_error("Not supported"); } @@ -448,6 +514,73 @@ class module_impl { throw std::runtime_error("Not supported"); } + + // get partition size if elf has the info + [[nodiscard]] virtual uint32_t + get_partition_size() const + { + throw std::runtime_error("Not supported"); + } + + // get kernel signature in demmangled format + [[nodiscard]] virtual std::string + get_kernel_signature() const + { + throw std::runtime_error("Not supported"); + } + + // get only kernel name without args from kernel signature + [[nodiscard]] virtual std::string + get_kernel_name() const + { + throw std::runtime_error("Not supported"); + } +}; + +// class module_userptr - Opaque userptr provided by application +class module_userptr : public module_impl +{ + std::vector m_ctrlcode; + instr_buf m_instr_buf; + control_packet m_ctrl_pkt; + + // Create a ctrlcode object from the userptr. + static std::vector + initialize_ctrlcode(const char* userptr, size_t sz) + { + std::vector ctrlcodes; + ctrlcodes.resize(1); + ctrlcodes[0].append_section_data(reinterpret_cast(userptr), sz); + return ctrlcodes; + } + +public: + module_userptr(const char* userptr, size_t sz, const xrt::uuid& uuid) + : module_impl{ uuid } + , m_ctrlcode{ initialize_ctrlcode(userptr, sz) } + {} + + module_userptr(const void* userptr, size_t sz, const xrt::uuid& uuid) + : module_userptr(static_cast(userptr), sz, uuid) + {} + + [[nodiscard]] const std::vector& + get_data() const override + { + return m_ctrlcode; + } + + [[nodiscard]] std::pair + get_instr(uint32_t /*index*/) const override + { + return {0, m_instr_buf}; + } + + [[nodiscard]] std::pair + get_ctrlpkt(uint32_t /*index*/) const override + { + return {0, m_ctrl_pkt}; + } }; // class module_elf - Elf provided by application @@ -464,115 +597,455 @@ class module_impl // of a scalar object used as an argument. The relocations are used to // construct patcher objects for each argument. class module_elf : public module_impl +{ +protected: + const ELFIO::elfio& m_elfio; // we should not modify underlying elf + uint8_t m_os_abi = Elf_Amd_Aie2p; + std::map m_arg2patcher; + + explicit module_elf(xrt::elf elf) + : module_impl{ elf.get_cfg_uuid() } + , m_elfio(xrt_core::elf_int::get_elfio(elf)) + , m_os_abi(m_elfio.get_os_abi()) + {} + +public: + bool + patch(uint8_t* base, const std::string& argnm, size_t index, uint64_t patch, + patcher::buf_type type, uint32_t sec_index) override + { + const std::string key_string = generate_key_string(argnm, type, sec_index); + auto it = m_arg2patcher.find(key_string); + auto not_found_use_argument_name = (it == m_arg2patcher.end()); + if (not_found_use_argument_name) {// Search using index + auto index_string = std::to_string(index); + const std::string key_index_string = generate_key_string(index_string, type, sec_index); + it = m_arg2patcher.find(key_index_string); + if (it == m_arg2patcher.end()) + return false; + } + + it->second.patch(base, patch); + if (xrt_core::config::get_xrt_debug()) { + if (not_found_use_argument_name) { + std::stringstream ss; + ss << "Patched " << patcher::section_name_to_string(type) << " using argument index " << index << " with value " << std::hex << patch; + xrt_core::message::send( xrt_core::message::severity_level::debug, "xrt_module", ss.str()); + } + else { + std::stringstream ss; + ss << "Patched " << patcher::section_name_to_string(type) << " using argument name " << argnm << " with value " << std::hex << patch; + xrt_core::message::send( xrt_core::message::severity_level::debug, "xrt_module", ss.str()); + } + } + return true; + } + + [[nodiscard]] uint8_t + get_os_abi() const override + { + return m_os_abi; + } + + [[nodiscard]] size_t + number_of_arg_patchers() const override + { + return m_arg2patcher.size(); + } +}; + +// module class for ELFs with os_abi - Elf_Amd_Aie2p & ELF_Amd_Aie2p_config +class module_elf_aie2p : public module_elf { // rela->addend have offset to base-bo-addr info along with schema // [0:3] bit are used for patching schema, [4:31] used for base-bo-addr constexpr static uint32_t addend_shift = 4; constexpr static uint32_t addend_mask = ~((uint32_t)0) << addend_shift; constexpr static uint32_t schema_mask = ~addend_mask; - xrt::elf m_elf; - uint8_t m_os_abi = Elf_Amd_Aie2p; - std::vector m_ctrlcodes; - std::map m_arg2patcher; - instr_buf m_instr_buf; - control_packet m_ctrl_packet; - bool m_ctrl_packet_exist = false; + + // New Elf of Aie2p contain multiple ctrltext, ctrldata sections + // sections will be of format .ctrltext.* where .* has index of that section type + // Below maps has this index as key and value is pair of
+ std::map> m_instr_buf_map; + std::map> m_ctrl_packet_map; + + // Also these new Elfs have multiple PDI sections of format .pdi.* + // Below map has pdi section symbol name as key and section data as value + std::map m_pdi_buf_map; + // map storing pdi symbols that needs patching in ctrl codes + std::map> m_ctrl_pdi_map; + buf m_save_buf; + uint32_t m_save_buf_sec_idx = UINT32_MAX; bool m_save_buf_exist = false; + buf m_restore_buf; + uint32_t m_restore_buf_sec_idx = UINT32_MAX; bool m_restore_buf_exist = false; + size_t m_scratch_pad_mem_size = 0; + uint32_t m_partition_size = UINT32_MAX; + std::string m_kernel_signature; - // The ELF sections embed column and page information in their - // names. Extract the column and page information from the - // section name, default to single column and page when nothing - // is specified. - static std::pair - get_column_and_page(const std::string& name) + static uint32_t + get_section_name_index(const std::string& name) { - constexpr size_t first_dot = 9; // .ctrltext.. - auto dot1 = name.find_first_of(".", first_dot); - auto dot2 = name.find_first_of(".", first_dot + 1); - auto col = dot1 != std::string::npos - ? std::stoi(name.substr(dot1 + 1, dot2)) - : 0; - auto page = dot2 != std::string::npos - ? std::stoi(name.substr(dot2 + 1)) - : 0; - return { col, page }; + // Elf_Amd_Aie2p has sections .sec_name + // Elf_Amd_Aie2p_config has sections .sec_name.* + auto pos = name.find_last_of("."); + return (pos == 0) ? 0 : std::stoul(name.substr(pos + 1, 1)); } - // Extract instruction buffer from ELF sections without assuming anything - // about order of sections in the ELF file. - instr_buf - initialize_instr_buf(const ELFIO::elfio& elf) + void + initialize_partition_size() { - instr_buf instrbuf; + static constexpr const char* partition_section_name {".note.xrt.configuration"}; + // note 0 in .note.xrt.configuration section has partition size + static constexpr ELFIO::Elf_Word partition_note_num = 0; - for (const auto& sec : elf.sections) { - auto name = sec->get_name(); - // Instruction buffer is in .ctrltext section. - if (name.find(patcher::section_name_to_string(patcher::buf_type::ctrltext)) == std::string::npos) - continue; - instrbuf.append_section_data(sec.get()); - break; - } + auto partition_section = m_elfio.sections[partition_section_name]; + if (!partition_section) + return; // elf doesn't have partition info section, partition size holds UINT32_MAX - return instrbuf; + ELFIO::note_section_accessor accessor(m_elfio, partition_section); + ELFIO::Elf_Word type; + std::string name; + char* desc; + ELFIO::Elf_Word desc_size; + if (!accessor.get_note(partition_note_num, type, name, desc, desc_size)) + throw std::runtime_error("Failed to get partition info, partition note not found\n"); + m_partition_size = std::stoul(std::string{static_cast(desc), desc_size}); } - // Extract control-packet buffer from ELF sections without assuming anything + void + initialize_kernel_signature() + { + static constexpr const char* symtab_section_name {".symtab"}; + + ELFIO::section* symtab = m_elfio.sections[symtab_section_name]; + if (!symtab) + return; // elf doesn't have .symtab section, kernel_signature will be empty string + + // Get the symbol table + const ELFIO::symbol_section_accessor symbols(m_elfio, symtab); + // Iterate over all symbols + for (ELFIO::Elf_Xword i = 0; i < symbols.get_symbols_num(); ++i) { + std::string name; + ELFIO::Elf64_Addr value; + ELFIO::Elf_Xword size; + unsigned char bind; + unsigned char type; + ELFIO::Elf_Half section_index; + unsigned char other; + + // Read symbol data + if (symbols.get_symbol(i, name, value, size, bind, type, section_index, other)) { + // there will be only 1 kernel signature symbol entry in .symtab section whose + // type is FUNC + if (type == ELFIO::STT_FUNC) { + m_kernel_signature = demangle(name); + break; + } + } + } + } + + // Extract buffer from ELF sections without assuming anything // about order of sections in the ELF file. - bool initialize_ctrl_packet(const ELFIO::elfio& elf, control_packet& ctrlpacket) + template + void + initialize_buf(patcher::buf_type type, std::map>& map) { - for (const auto& sec : elf.sections) { + for (const auto& sec : m_elfio.sections) { auto name = sec->get_name(); - if (name.find(patcher::section_name_to_string(patcher::buf_type::ctrldata)) == std::string::npos) + auto sec_index = sec->get_index(); + buf_type buf; + // Instruction, control pkt buffers are in section of type .ctrltext.* .ctrldata.*. + if (name.find(patcher::section_name_to_string(type)) == std::string::npos) continue; + + uint32_t index = get_section_name_index(name); + buf.append_section_data(sec.get()); + map.emplace(std::make_pair(index, std::make_pair(sec_index, buf))); + } + } - ctrlpacket.append_section_data(sec.get()); - return true; + void + initialize_pdi_buf() + { + for (const auto& sec : m_elfio.sections) { + auto name = sec->get_name(); + if (name.find(patcher::section_name_to_string(patcher::buf_type::pdi)) == std::string::npos) + continue; + + buf pdi_buf; + pdi_buf.append_section_data(sec.get()); + m_pdi_buf_map.emplace(std::make_pair(name, pdi_buf)); } - return false; } - // Extract preempt_save buffer from ELF sections + // Extract preempt_save/preempt_restore buffer from ELF sections // return true if section exist - bool initialize_save_buf(const ELFIO::elfio& elf, buf& save_buf) + bool + initialize_save_restore_buf(buf& buf, uint32_t& index, patcher::buf_type type) { - for (const auto& sec : elf.sections) { + for (const auto& sec : m_elfio.sections) { auto name = sec->get_name(); - if (name.find(patcher::section_name_to_string(patcher::buf_type::preempt_save)) == std::string::npos) + if (name.find(patcher::section_name_to_string(type)) == std::string::npos) continue; - save_buf.append_section_data(sec.get()); + buf.append_section_data(sec.get()); + index = sec->get_index(); return true; } return false; } - // Extract preempt_restore buffer from ELF sections - // return true if section exist - bool initialize_restore_buf(const ELFIO::elfio& elf, buf& restore_buf) + std::pair + determine_section_type(const std::string& section_name) { - for (const auto& sec : elf.sections) { - auto name = sec->get_name(); - if (name.find(patcher::section_name_to_string(patcher::buf_type::preempt_restore)) == std::string::npos) - continue; + if (section_name.find(patcher::section_name_to_string(patcher::buf_type::ctrltext)) != std::string::npos) { + auto index = get_section_name_index(section_name); + if (index >= m_instr_buf_map.size()) + throw std::runtime_error("Invalid section passed, section info is not cached\n"); + return { m_instr_buf_map[index].second.size(), patcher::buf_type::ctrltext}; + } + else if (!m_ctrl_packet_map.empty() && + section_name.find(patcher::section_name_to_string(patcher::buf_type::ctrldata)) != std::string::npos) { + auto index = get_section_name_index(section_name); + if (index >= m_ctrl_packet_map.size()) + throw std::runtime_error("Invalid section passed, section info is not cached\n"); + return { m_ctrl_packet_map[index].second.size(), patcher::buf_type::ctrldata}; + } + else if (m_save_buf_exist && (section_name == patcher::section_name_to_string(patcher::buf_type::preempt_save))) + return { m_save_buf.size(), patcher::buf_type::preempt_save }; + else if (m_restore_buf_exist && (section_name == patcher::section_name_to_string(patcher::buf_type::preempt_restore))) + return { m_restore_buf.size(), patcher::buf_type::preempt_restore }; + else if (!m_pdi_buf_map.empty() && + section_name.find(patcher::section_name_to_string(patcher::buf_type::pdi)) != std::string::npos) { + if (m_pdi_buf_map.find(section_name) == m_pdi_buf_map.end()) + throw std::runtime_error("Invalid pdi section passed, section info is not cached\n"); + return { m_pdi_buf_map[section_name].size(), patcher::buf_type::pdi }; + } + else + throw std::runtime_error("Invalid section name " + section_name); + } - restore_buf.append_section_data(sec.get()); - return true; + void + initialize_arg_patchers() + { + auto dynsym = m_elfio.sections[".dynsym"]; + auto dynstr = m_elfio.sections[".dynstr"]; + auto dynsec = m_elfio.sections[".rela.dyn"]; + + if (!dynsym || !dynstr || !dynsec) + return; + + auto name = dynsec->get_name(); + + // Iterate over all relocations and construct a patcher for each + // relocation that refers to a symbol in the .dynsym section. + auto begin = reinterpret_cast(dynsec->get_data()); + auto end = begin + dynsec->get_size() / sizeof(const ELFIO::Elf32_Rela); + for (auto rela = begin; rela != end; ++rela) { + auto symidx = ELFIO::get_sym_and_type::get_r_sym(rela->r_info); + + auto dynsym_offset = symidx * sizeof(ELFIO::Elf32_Sym); + if (dynsym_offset >= dynsym->get_size()) + throw std::runtime_error("Invalid symbol index " + std::to_string(symidx)); + auto sym = reinterpret_cast(dynsym->get_data() + dynsym_offset); + + auto dynstr_offset = sym->st_name; + if (dynstr_offset >= dynstr->get_size()) + throw std::runtime_error("Invalid symbol name offset " + std::to_string(dynstr_offset)); + auto symname = dynstr->get_data() + dynstr_offset; + + if (!m_scratch_pad_mem_size && (strcmp(symname, Scratch_Pad_Mem_Symbol) == 0)) { + m_scratch_pad_mem_size = static_cast(sym->st_size); + } + + // Get control code section referenced by the symbol, col, and page + auto section = m_elfio.sections[sym->st_shndx]; + if (!section) + throw std::runtime_error("Invalid section index " + std::to_string(sym->st_shndx)); + + auto offset = rela->r_offset; + auto [sec_size, buf_type] = determine_section_type(section->get_name()); + auto sec_index = section->get_index(); + + if (offset >= sec_size) + throw std::runtime_error("Invalid offset " + std::to_string(offset)); + + if (std::string(symname).find("pdi") != std::string::npos) { + // pdi symbol, add to map of which ctrl code needs it + auto idx = get_section_name_index(section->get_name()); + m_ctrl_pdi_map[idx].insert(symname); + } + + uint32_t add_end_higher_28bit = (rela->r_addend & addend_mask) >> addend_shift; + std::string argnm{ symname, symname + std::min(strlen(symname), dynstr->get_size()) }; + + auto patch_scheme = static_cast(rela->r_addend & schema_mask); + + patcher::patch_info pi = patch_scheme == patcher::symbol_type::scalar_32bit_kind ? + // st_size is is encoded using register value mask for scaler_32 + // for other pacthing scheme it is encoded using size of dma + patcher::patch_info{ offset, add_end_higher_28bit, static_cast(sym->st_size) } : + patcher::patch_info{ offset, add_end_higher_28bit, 0 }; + + std::string key_string = generate_key_string(argnm, buf_type, sec_index); + + if (auto search = m_arg2patcher.find(key_string); search != m_arg2patcher.end()) + search->second.m_ctrlcode_patchinfo.emplace_back(pi); + else { + m_arg2patcher.emplace(std::move(key_string), patcher{ patch_scheme, {pi}, buf_type}); + } } + } - return false; +public: + explicit module_elf_aie2p(const xrt::elf& elf) + : module_elf(elf) + { + initialize_partition_size(); + initialize_kernel_signature(); + initialize_buf(patcher::buf_type::ctrltext, m_instr_buf_map); + initialize_buf(patcher::buf_type::ctrldata, m_ctrl_packet_map); + + m_save_buf_exist = initialize_save_restore_buf(m_save_buf, + m_save_buf_sec_idx, + patcher::buf_type::preempt_save); + m_restore_buf_exist = initialize_save_restore_buf(m_restore_buf, + m_restore_buf_sec_idx, + patcher::buf_type::preempt_restore); + if (m_save_buf_exist != m_restore_buf_exist) + throw std::runtime_error{ "Invalid elf because preempt save and restore is not paired" }; + + initialize_pdi_buf(); + initialize_arg_patchers(); + } + + ert_cmd_opcode + get_ert_opcode() const override + { + if (!m_pdi_buf_map.empty()) + return ERT_START_NPU_PDI_IN_ELF; + + if (m_save_buf_exist && m_restore_buf_exist) + return ERT_START_NPU_PREEMPT; + + return ERT_START_NPU; + } + + [[nodiscard]] const std::unordered_set& + get_patch_pdis(uint32_t index = 0) const override + { + static const std::unordered_set empty_set = {}; + auto it = m_ctrl_pdi_map.find(index); + if (it != m_ctrl_pdi_map.end()) + return it->second; + + return empty_set; + } + + [[nodiscard]] const buf& + get_pdi(const std::string& pdi_name) const override + { + auto it = m_pdi_buf_map.find(pdi_name); + if (it != m_pdi_buf_map.end()) + return it->second; + + return buf::get_empty_buf(); + } + + [[nodiscard]] std::pair + get_instr(uint32_t index) const override + { + auto it = m_instr_buf_map.find(index); + if (it != m_instr_buf_map.end()) + return it->second; + return std::make_pair(UINT32_MAX, instr_buf::get_empty_buf()); + } + + [[nodiscard]] std::pair + get_ctrlpkt(uint32_t index) const override + { + auto it = m_ctrl_packet_map.find(index); + if (it != m_ctrl_packet_map.end()) + return it->second; + return std::make_pair(UINT32_MAX, control_packet::get_empty_buf()); + } + + [[nodiscard]] std::pair + get_preempt_save() const override + { + return {m_save_buf_sec_idx, m_save_buf}; + } + + [[nodiscard]] std::pair + get_preempt_restore() const override + { + return {m_restore_buf_sec_idx, m_restore_buf}; + } + + [[nodiscard]] virtual uint32_t + get_partition_size() const override + { + if (m_partition_size == UINT32_MAX) + throw std::runtime_error("No partition info available, wrong ELF passed\n"); + return m_partition_size; + } + + [[nodiscard]] virtual std::string + get_kernel_signature() const override + { + if (m_kernel_signature.empty()) + throw std::runtime_error("No kernel signature available, wrong ELF passed\n"); + return m_kernel_signature; + } + + [[nodiscard]] virtual std::string + get_kernel_name() const override + { + std::string demangled_name = get_kernel_signature(); + // extract kernel name + size_t pos = demangled_name.find('('); + if (pos == std::string::npos) + throw std::runtime_error("Failed to get kernel name"); + return demangled_name.substr(0, pos); + } +}; + +// module class for ELFs with os_abi - Elf_Amd_Aie2ps +class module_elf_aie2ps : public module_elf +{ + std::vector m_ctrlcodes; + + // The ELF sections embed column and page information in their + // names. Extract the column and page information from the + // section name, default to single column and page when nothing + // is specified. + static std::pair + get_column_and_page(const std::string& name) + { + constexpr size_t first_dot = 9; // .ctrltext.. + auto dot1 = name.find_first_of(".", first_dot); + auto dot2 = name.find_first_of(".", first_dot + 1); + auto col = dot1 != std::string::npos + ? std::stoi(name.substr(dot1 + 1, dot2)) + : 0; + auto page = dot2 != std::string::npos + ? std::stoi(name.substr(dot2 + 1)) + : 0; + return { col, page }; } // Extract control code from ELF sections without assuming anything // about order of sections in the ELF file. Build helper data // structures that manages the control code data for each column and // page, then create ctrlcode objects from the data. - std::vector - initialize_column_ctrlcode(const ELFIO::elfio& elf) + void + initialize_column_ctrlcode() { // Elf sections for a single page struct column_page @@ -599,7 +1072,7 @@ class module_elf : public module_impl // Iterate sections in elf, collect ctrltext and ctrldata // per column and page - for (const auto& sec : elf.sections) { + for (const auto& sec : m_elfio.sections) { auto name = sec->get_name(); if (name.find(patcher::section_name_to_string(patcher::buf_type::ctrltext)) != std::string::npos) { auto [col, page] = get_column_and_page(sec->get_name()); @@ -615,121 +1088,28 @@ class module_elf : public module_impl // If page requirement, then pad to page size for page // of a column so that embedded processor can load a page // at a time. - std::vector ctrlcodes; - ctrlcodes.resize(col_secs.size()); + m_ctrlcodes.resize(col_secs.size()); for (auto& [col, col_sec] : col_secs) { for (auto& [page, page_sec] : col_sec.pages) { if (page_sec.ctrltext) - ctrlcodes[col].append_section_data(page_sec.ctrltext); + m_ctrlcodes[col].append_section_data(page_sec.ctrltext); if (page_sec.ctrldata) - ctrlcodes[col].append_section_data(page_sec.ctrldata); - - ctrlcodes[col].pad_to_page(page); - } - } - - return ctrlcodes; - } - - std::pair - determine_section_type(const std::string& section_name) - { - if (section_name == patcher::section_name_to_string(patcher::buf_type::ctrltext)) - return { m_instr_buf.size(), patcher::buf_type::ctrltext}; - - else if (m_ctrl_packet_exist && (section_name == patcher::section_name_to_string(patcher::buf_type::ctrldata))) - return { m_ctrl_packet.size(), patcher::buf_type::ctrldata}; - - else if (m_save_buf_exist && (section_name == patcher::section_name_to_string(patcher::buf_type::preempt_save))) - return { m_save_buf.size(), patcher::buf_type::preempt_save }; - - else if (m_restore_buf_exist && (section_name == patcher::section_name_to_string(patcher::buf_type::preempt_restore))) - return { m_restore_buf.size(), patcher::buf_type::preempt_restore }; - - else - throw std::runtime_error("Invalid section name " + section_name); - } - - std::map - initialize_arg_patchers(const ELFIO::elfio& elf) - { - auto dynsym = elf.sections[".dynsym"]; - auto dynstr = elf.sections[".dynstr"]; - - std::map arg2patchers; - - for (const auto& sec : elf.sections) { - auto name = sec->get_name(); - if (name.find(".rela.dyn") == std::string::npos) - continue; - - // Iterate over all relocations and construct a patcher for each - // relocation that refers to a symbol in the .dynsym section. - auto begin = reinterpret_cast(sec->get_data()); - auto end = begin + sec->get_size() / sizeof(const ELFIO::Elf32_Rela); - for (auto rela = begin; rela != end; ++rela) { - auto symidx = ELFIO::get_sym_and_type::get_r_sym(rela->r_info); - - auto dynsym_offset = symidx * sizeof(ELFIO::Elf32_Sym); - if (dynsym_offset >= dynsym->get_size()) - throw std::runtime_error("Invalid symbol index " + std::to_string(symidx)); - auto sym = reinterpret_cast(dynsym->get_data() + dynsym_offset); - - auto dynstr_offset = sym->st_name; - if (dynstr_offset >= dynstr->get_size()) - throw std::runtime_error("Invalid symbol name offset " + std::to_string(dynstr_offset)); - auto symname = dynstr->get_data() + dynstr_offset; - - if (!m_scratch_pad_mem_size && (strcmp(symname, Scratch_Pad_Mem_Symbol) == 0)) { - m_scratch_pad_mem_size = static_cast(sym->st_size); - } - - // Get control code section referenced by the symbol, col, and page - auto section = elf.sections[sym->st_shndx]; - if (!section) - throw std::runtime_error("Invalid section index " + std::to_string(sym->st_shndx)); - - auto offset = rela->r_offset; - auto [sec_size, buf_type] = determine_section_type(section->get_name()); - - if (offset >= sec_size) - throw std::runtime_error("Invalid offset " + std::to_string(offset)); + m_ctrlcodes[col].append_section_data(page_sec.ctrldata); - uint32_t add_end_higher_28bit = (rela->r_addend & addend_mask) >> addend_shift; - std::string argnm{ symname, symname + std::min(strlen(symname), dynstr->get_size()) }; - - auto patch_scheme = static_cast(rela->r_addend & schema_mask); - - patcher::patch_info pi = patch_scheme == patcher::symbol_type::scalar_32bit_kind ? - // st_size is is encoded using register value mask for scaler_32 - // for other pacthing scheme it is encoded using size of dma - patcher::patch_info{ offset, add_end_higher_28bit, static_cast(sym->st_size) } : - patcher::patch_info{ offset, add_end_higher_28bit, 0 }; - - std::string key_string = generate_key_string(argnm, buf_type); - - if (auto search = arg2patchers.find(key_string); search != arg2patchers.end()) - search->second.m_ctrlcode_patchinfo.emplace_back(pi); - else { - arg2patchers.emplace(std::move(key_string), patcher{ patch_scheme, {pi}, buf_type}); - } + m_ctrlcodes[col].pad_to_page(page); } } - - return arg2patchers; } - std::map - initialize_arg_patchers(const ELFIO::elfio& elf, const std::vector& ctrlcodes) + void + initialize_arg_patchers(const std::vector& ctrlcodes) { - auto dynsym = elf.sections[".dynsym"]; - auto dynstr = elf.sections[".dynstr"]; + auto dynsym = m_elfio.sections[".dynsym"]; + auto dynstr = m_elfio.sections[".dynstr"]; - std::map arg2patcher; - - for (const auto& sec : elf.sections) { + for (const auto& sec : m_elfio.sections) { auto name = sec->get_name(); if (name.find(".rela.dyn") == std::string::npos) continue; @@ -752,7 +1132,7 @@ class module_elf : public module_impl auto symname = dynstr->get_data() + dynstr_offset; // Get control code section referenced by the symbol, col, and page - auto ctrl_sec = elf.sections[sym->st_shndx]; + auto ctrl_sec = m_elfio.sections[sym->st_shndx]; if (!ctrl_sec) throw std::runtime_error("Invalid section index " + std::to_string(sym->st_shndx)); auto [col, page] = get_column_and_page(ctrl_sec->get_name()); @@ -778,85 +1158,23 @@ class module_elf : public module_impl patcher::buf_type buf_type = patcher::buf_type::ctrltext; auto symbol_type = static_cast(rela->r_addend); - arg2patcher.emplace(std::move(generate_key_string(argnm, buf_type)), patcher{ symbol_type, {{ctrlcode_offset, 0}}, buf_type}); - } - } - - return arg2patcher; - } - - bool - patch(uint8_t* base, const std::string& argnm, size_t index, uint64_t patch, patcher::buf_type type) override - { - const std::string key_string = generate_key_string(argnm, type); - auto it = m_arg2patcher.find(key_string); - auto not_found_use_argument_name = (it == m_arg2patcher.end()); - if (not_found_use_argument_name) {// Search using index - auto index_string = std::to_string(index); - const std::string key_index_string = generate_key_string(index_string, type); - it = m_arg2patcher.find(key_index_string); - if (it == m_arg2patcher.end()) - return false; - } - - it->second.patch(base, patch); - if (xrt_core::config::get_xrt_debug()) { - if (not_found_use_argument_name) { - std::stringstream ss; - ss << "Patched " << patcher::section_name_to_string(type) << " using argument index " << index << " with value " << std::hex << patch; - xrt_core::message::send( xrt_core::message::severity_level::debug, "xrt_module", ss.str()); - } - else { - std::stringstream ss; - ss << "Patched " << patcher::section_name_to_string(type) << " using argument name " << argnm << " with value " << std::hex << patch; - xrt_core::message::send( xrt_core::message::severity_level::debug, "xrt_module", ss.str()); + m_arg2patcher.emplace(std::move(generate_key_string(argnm, buf_type, UINT32_MAX)), patcher{ symbol_type, {{ctrlcode_offset, 0}}, buf_type}); } } - return true; } - [[nodiscard]] uint8_t - get_os_abi() const override +public: + explicit module_elf_aie2ps(const xrt::elf& elf) + : module_elf(elf) { - return m_os_abi; + initialize_column_ctrlcode(); + initialize_arg_patchers(m_ctrlcodes); } - ert_cmd_opcode + [[nodiscard]] ert_cmd_opcode get_ert_opcode() const override { - if (m_os_abi == Elf_Amd_Aie2ps) - return ERT_START_DPU; - - if (m_os_abi != Elf_Amd_Aie2p) - throw std::runtime_error("ELF os_abi Not supported"); - - if (m_save_buf_exist && m_restore_buf_exist) - return ERT_START_NPU_PREEMPT; - - return ERT_START_NPU; - } - -public: - explicit module_elf(xrt::elf elf) - : module_impl{ elf.get_cfg_uuid() } - , m_elf(std::move(elf)) - , m_os_abi{ xrt_core::elf_int::get_elfio(m_elf).get_os_abi() } - { - if (m_os_abi == Elf_Amd_Aie2ps) { - m_ctrlcodes = initialize_column_ctrlcode(xrt_core::elf_int::get_elfio(m_elf)); - m_arg2patcher = initialize_arg_patchers(xrt_core::elf_int::get_elfio(m_elf), m_ctrlcodes); - } - else if (m_os_abi == Elf_Amd_Aie2p) { - m_instr_buf = initialize_instr_buf(xrt_core::elf_int::get_elfio(m_elf)); - m_ctrl_packet_exist = initialize_ctrl_packet(xrt_core::elf_int::get_elfio(m_elf), m_ctrl_packet); - - m_save_buf_exist = initialize_save_buf(xrt_core::elf_int::get_elfio(m_elf), m_save_buf); - m_restore_buf_exist = initialize_restore_buf(xrt_core::elf_int::get_elfio(m_elf), m_restore_buf); - if (m_save_buf_exist != m_restore_buf_exist) - throw std::runtime_error{ "Invalid elf because preempt save and restore is not paired" }; - - m_arg2patcher = initialize_arg_patchers(xrt_core::elf_int::get_elfio(m_elf)); - } + return ERT_START_DPU; } [[nodiscard]] const std::vector& @@ -864,91 +1182,9 @@ class module_elf : public module_impl { return m_ctrlcodes; } - - [[nodiscard]] const instr_buf& - get_instr() const override - { - return m_instr_buf; - } - - [[nodiscard]] const buf& - get_preempt_save() const override - { - return m_save_buf; - } - - [[nodiscard]] const buf& - get_preempt_restore() const override - { - return m_restore_buf; - } - - [[nodiscard]] virtual size_t - get_scratch_pad_mem_size() const override - { - return m_scratch_pad_mem_size; - } - - [[nodiscard]] const control_packet& - get_ctrlpkt() const override - { - return m_ctrl_packet; - } - - [[nodiscard]] size_t - number_of_arg_patchers() const override - { - return m_arg2patcher.size(); - } -}; - -// class module_userptr - Opaque userptr provided by application -class module_userptr : public module_impl -{ - std::vector m_ctrlcode; - instr_buf m_instr_buf; - control_packet m_ctrl_pkt; - - // Create a ctrlcode object from the userptr. - static std::vector - initialize_ctrlcode(const char* userptr, size_t sz) - { - std::vector ctrlcodes; - ctrlcodes.resize(1); - ctrlcodes[0].append_section_data(reinterpret_cast(userptr), sz); - return ctrlcodes; - } - -public: - module_userptr(const char* userptr, size_t sz, const xrt::uuid& uuid) - : module_impl{ uuid } - , m_ctrlcode{ initialize_ctrlcode(userptr, sz) } - {} - - module_userptr(const void* userptr, size_t sz, const xrt::uuid& uuid) - : module_userptr(static_cast(userptr), sz, uuid) - {} - - [[nodiscard]] const std::vector& - get_data() const override - { - return m_ctrlcode; - } - - [[nodiscard]] const instr_buf& - get_instr() const override - { - return m_instr_buf; - } - - [[nodiscard]] const control_packet& - get_ctrlpkt() const override - { - return m_ctrl_pkt; - } }; -// class module_sram - Create an hwct specific (sram) module from parent +// class module_sram - Create an hwctx specific (sram) module from parent // // Allocate a buffer object to hold the ctrlcodes for each column created // by parent module. The ctrlcodes are concatenated into a single buffer @@ -957,6 +1193,9 @@ class module_sram : public module_impl { std::shared_ptr m_parent; xrt::hw_context m_hwctx; + // New ELFs have multiple ctrl sections + // we need index to identify which ctrl section to pick from parent module + uint32_t m_index; // The instruction buffer object contains the ctrlcodes for each // column. The ctrlcodes are concatenated into a single buffer @@ -968,6 +1207,9 @@ class module_sram : public module_impl xrt::bo m_preempt_save_bo; xrt::bo m_preempt_restore_bo; + uint32_t m_instr_sec_idx; + uint32_t m_ctrlpkt_sec_idx; + // Column bo address is the address of the ctrlcode for each column // in the (sram) buffer object. The first ctrlcode is at the base // address (m_buffer.address()) of the buffer object. The addresses @@ -1066,7 +1308,8 @@ class module_sram : public module_impl create_instr_buf(const module_impl* parent) { XRT_DEBUGF("-> module_sram::create_instr_buf()\n"); - const auto& data = parent->get_instr(); + instr_buf data; + std::tie(m_instr_sec_idx, data) = parent->get_instr(m_index); size_t sz = data.size(); if (sz == 0) throw std::runtime_error("Invalid instruction buffer size"); @@ -1086,10 +1329,10 @@ class module_sram : public module_impl xrt_core::message::send(xrt_core::message::severity_level::debug, "xrt_module", ss.str()); } - const auto& preempt_save_data = parent->get_preempt_save(); + auto [save_sec_idx, preempt_save_data] = parent->get_preempt_save(); auto preempt_save_data_size = preempt_save_data.size(); - const auto& preempt_restore_data = parent->get_preempt_restore(); + auto [restore_sec_idx, preempt_restore_data] = parent->get_preempt_restore(); auto preempt_restore_data_size = preempt_restore_data.size(); if ((preempt_save_data_size > 0) && (preempt_restore_data_size > 0)) { @@ -1118,8 +1361,10 @@ class module_sram : public module_impl if ((preempt_save_data_size > 0) && (preempt_restore_data_size > 0)) { m_scratch_pad_mem = xrt::ext::bo{ m_hwctx, m_parent->get_scratch_pad_mem_size() }; - patch_instr(m_preempt_save_bo, Scratch_Pad_Mem_Symbol, 0, m_scratch_pad_mem, patcher::buf_type::preempt_save); - patch_instr(m_preempt_restore_bo, Scratch_Pad_Mem_Symbol, 0, m_scratch_pad_mem, patcher::buf_type::preempt_restore); + patch_instr(m_preempt_save_bo, Scratch_Pad_Mem_Symbol, 0, m_scratch_pad_mem, + patcher::buf_type::preempt_save, save_sec_idx); + patch_instr(m_preempt_restore_bo, Scratch_Pad_Mem_Symbol, 0, m_scratch_pad_mem, + patcher::buf_type::preempt_restore, restore_sec_idx); if (is_dump_preemption_codes()) { std::stringstream ss; @@ -1128,8 +1373,18 @@ class module_sram : public module_impl } } + // patch all pdi addresses + auto pdi_symbols = parent->get_patch_pdis(m_index); + for (const auto& symbol : pdi_symbols) { + const auto& pdi_data = parent->get_pdi(symbol); + auto pdi_bo = xrt::bo{ m_hwctx, pdi_data.size(), xrt::bo::flags::cacheable, 1 /* fix me */ }; + fill_bo_with_data(pdi_bo, pdi_data); + // patch instr buffer with pdi address + patch_instr(m_instr_bo, symbol, 0, pdi_bo, patcher::buf_type::ctrltext, m_instr_sec_idx); + } + if (m_ctrlpkt_bo) { - patch_instr(m_instr_bo, Control_Packet_Symbol, 0, m_ctrlpkt_bo, patcher::buf_type::ctrltext); + patch_instr(m_instr_bo, Control_Packet_Symbol, 0, m_ctrlpkt_bo, patcher::buf_type::ctrltext, m_instr_sec_idx); } XRT_DEBUGF("<- module_sram::create_instr_buf()\n"); } @@ -1137,7 +1392,8 @@ class module_sram : public module_impl void create_ctrlpkt_buf(const module_impl* parent) { - const auto& data = parent->get_ctrlpkt(); + control_packet data; + std::tie(m_ctrlpkt_sec_idx, data) = parent->get_ctrlpkt(m_index); size_t sz = data.size(); if (sz == 0) { @@ -1150,12 +1406,12 @@ class module_sram : public module_impl fill_ctrlpkt_buf(m_ctrlpkt_bo, data); if (is_dump_control_packet()) { - std::string dump_file_name = "ctr_packet_pre_patch" + std::to_string(get_id()) + ".bin"; - dump_bo(m_ctrlpkt_bo, dump_file_name); + std::string dump_file_name = "ctr_packet_pre_patch" + std::to_string(get_id()) + ".bin"; + dump_bo(m_ctrlpkt_bo, dump_file_name); - std::stringstream ss; - ss << "dumped file " << dump_file_name; - xrt_core::message::send(xrt_core::message::severity_level::debug, "xrt_module", ss.str()); + std::stringstream ss; + ss << "dumped file " << dump_file_name; + xrt_core::message::send(xrt_core::message::severity_level::debug, "xrt_module", ss.str()); } } @@ -1181,27 +1437,28 @@ class module_sram : public module_impl } virtual void - patch_instr(xrt::bo& bo_ctrlcode, const std::string& argnm, size_t index, const xrt::bo& bo, patcher::buf_type type) override + patch_instr(xrt::bo& bo_ctrlcode, const std::string& argnm, size_t index, const xrt::bo& bo, + patcher::buf_type type, uint32_t sec_idx) override { - patch_instr_value(bo_ctrlcode, argnm, index, bo.address(), type); + patch_instr_value(bo_ctrlcode, argnm, index, bo.address(), type, sec_idx); } void patch_value(const std::string& argnm, size_t index, uint64_t value) { bool patched = false; - if (m_parent->get_os_abi() == Elf_Amd_Aie2p) { + if (m_parent->get_os_abi() == Elf_Amd_Aie2p || m_parent->get_os_abi() == Elf_Amd_Aie2p_config) { // patch control-packet buffer if (m_ctrlpkt_bo) { - if (m_parent->patch(m_ctrlpkt_bo.map(), argnm, index, value, patcher::buf_type::ctrldata)) + if (m_parent->patch(m_ctrlpkt_bo.map(), argnm, index, value, patcher::buf_type::ctrldata, m_ctrlpkt_sec_idx)) patched = true; } // patch instruction buffer - if (m_parent->patch(m_instr_bo.map(), argnm, index, value, patcher::buf_type::ctrltext)) + if (m_parent->patch(m_instr_bo.map(), argnm, index, value, patcher::buf_type::ctrltext, m_instr_sec_idx)) patched = true; } - else if (m_parent->patch(m_buffer.map(), argnm, index, value, patcher::buf_type::ctrltext)) + else if (m_parent->patch(m_buffer.map(), argnm, index, value, patcher::buf_type::ctrltext, UINT32_MAX)) patched = true; if (patched) { @@ -1211,30 +1468,15 @@ class module_sram : public module_impl } void - patch_instr_value(xrt::bo& bo, const std::string& argnm, size_t index, uint64_t value, patcher::buf_type type) + patch_instr_value(xrt::bo& bo, const std::string& argnm, size_t index, uint64_t value, + patcher::buf_type type, uint32_t sec_index) { - if (!m_parent->patch(bo.map(), argnm, index, value, type)) + if (!m_parent->patch(bo.map(), argnm, index, value, type, sec_index)) return; m_dirty = true; } - void - patch(const std::string& argnm, size_t index, const xrt::bo& bo) override - { - patch_value(argnm, index, bo.address()); - } - - void - patch(const std::string& argnm, size_t index, const void* value, size_t size) override - { - if (size > 8) // NOLINT - throw std::runtime_error{ "patch_value() only supports 64-bit values or less" }; - - auto arg_value = *static_cast(value); - patch_value(argnm, index, arg_value); - } - // Check that all arguments have been patched and sync the buffer // to device if it is dirty. void @@ -1252,7 +1494,7 @@ class module_sram : public module_impl } m_buffer.sync(XCL_BO_SYNC_BO_TO_DEVICE); } - else if (os_abi == Elf_Amd_Aie2p) { + else if (os_abi == Elf_Amd_Aie2p || os_abi == Elf_Amd_Aie2p_config) { m_instr_bo.sync(XCL_BO_SYNC_BO_TO_DEVICE); if (is_dump_control_codes()) { @@ -1303,31 +1545,33 @@ class module_sram : public module_impl } uint32_t* - fill_ert_aie2p(uint32_t *payload) const - { - if (m_preempt_save_bo && m_preempt_restore_bo) { - // npu preemption - auto npu = reinterpret_cast(payload); - npu->instruction_buffer = m_instr_bo.address(); - npu->instruction_buffer_size = static_cast(m_instr_bo.size()); - npu->save_buffer = m_preempt_save_bo.address(); - npu->save_buffer_size = static_cast(m_preempt_save_bo.size()); - npu->restore_buffer = m_preempt_restore_bo.address(); - npu->restore_buffer_size = static_cast(m_preempt_restore_bo.size()); - npu->instruction_prop_count = 0; // Reserved for future use - payload += sizeof(ert_npu_preempt_data) / sizeof(uint32_t); - - return payload; - } + fill_ert_aie2p_preempt_data(uint32_t *payload) const + { + // npu preemption in elf_flow + auto npu = reinterpret_cast(payload); + npu->instruction_buffer = m_instr_bo.address(); + npu->instruction_buffer_size = static_cast(m_instr_bo.size()); + npu->instruction_prop_count = 0; // Reserved for future use + if (m_preempt_save_bo && m_preempt_restore_bo) { + npu->save_buffer = m_preempt_save_bo.address(); + npu->save_buffer_size = static_cast(m_preempt_save_bo.size()); + npu->restore_buffer = m_preempt_restore_bo.address(); + npu->restore_buffer_size = static_cast(m_preempt_restore_bo.size()); + } + payload += sizeof(ert_npu_preempt_data) / sizeof(uint32_t); + return payload; + } - // npu non-preemption - auto npu = reinterpret_cast(payload); - npu->instruction_buffer = m_instr_bo.address(); - npu->instruction_buffer_size = static_cast(m_instr_bo.size()); - npu->instruction_prop_count = 0; // Reserved for future use - payload += sizeof(ert_npu_data) / sizeof(uint32_t); + uint32_t* + fill_ert_aie2p_non_preempt_data(uint32_t *payload) const + { + auto npu = reinterpret_cast(payload); + npu->instruction_buffer = m_instr_bo.address(); + npu->instruction_buffer_size = static_cast(m_instr_bo.size()); + npu->instruction_prop_count = 0; // Reserved for future use + payload += sizeof(ert_npu_data) / sizeof(uint32_t); - return payload; + return payload; } uint32_t* @@ -1350,10 +1594,11 @@ class module_sram : public module_impl } public: - module_sram(std::shared_ptr parent, xrt::hw_context hwctx) + module_sram(std::shared_ptr parent, xrt::hw_context hwctx, uint32_t index) : module_impl{ parent->get_cfg_uuid() } , m_parent{ std::move(parent) } , m_hwctx{ std::move(hwctx) } + , m_index{ index } { if (xrt_core::config::get_xrt_debug()) { m_debug_mode.debug_flags.dump_control_codes = xrt_core::config::get_feature_toggle("Debug.dump_control_codes"); @@ -1365,8 +1610,8 @@ class module_sram : public module_impl auto os_abi = m_parent.get()->get_os_abi(); - if (os_abi == Elf_Amd_Aie2p) { - // make sure to create control-packet buffer frist because we may + if (os_abi == Elf_Amd_Aie2p || os_abi == Elf_Amd_Aie2p_config) { + // make sure to create control-packet buffer first because we may // need to patch control-packet address to instruction buffer create_ctrlpkt_buf(m_parent.get()); create_instr_buf(m_parent.get()); @@ -1383,16 +1628,22 @@ class module_sram : public module_impl { auto os_abi = m_parent.get()->get_os_abi(); - if (os_abi == Elf_Amd_Aie2p) - return fill_ert_aie2p(payload); + if (os_abi == Elf_Amd_Aie2ps) + return fill_ert_aie2ps(payload); + else if (os_abi == Elf_Amd_Aie2p_config) + return fill_ert_aie2p_preempt_data(payload); - return fill_ert_aie2ps(payload); + // os abi is Elf_Amd_Aie2p + if (m_preempt_save_bo && m_preempt_restore_bo) + return fill_ert_aie2p_preempt_data(payload); + else + return fill_ert_aie2p_non_preempt_data(payload); } [[nodiscard]] virtual xrt::bo& - get_scratch_pad_mem() override + get_scratch_pad_mem() override { - return m_scratch_pad_mem; + return m_scratch_pad_mem; } void @@ -1414,6 +1665,22 @@ class module_sram : public module_impl msg.append(dump_file_name); xrt_core::message::send(xrt_core::message::severity_level::debug, "xrt_module", msg); } + + void + patch(const std::string& argnm, size_t index, const xrt::bo& bo) override + { + patch_value(argnm, index, bo.address()); + } + + void + patch(const std::string& argnm, size_t index, const void* value, size_t size) override + { + if (size > 8) // NOLINT + throw std::runtime_error{ "patch_value() only supports 64-bit values or less" }; + + auto arg_value = *static_cast(value); + patch_value(argnm, index, arg_value); + } }; } // namespace xrt @@ -1432,19 +1699,25 @@ fill_ert_dpu_data(const xrt::module& module, uint32_t* payload) void patch(const xrt::module& module, const std::string& argnm, size_t index, const xrt::bo& bo) { - module.get_handle()->patch(argnm, index, bo); + auto module_sram = std::dynamic_pointer_cast(module.get_handle()); + if (!module_sram) + throw std::runtime_error("Getting module_sram failed, wrong module object passed\n"); + module_sram->patch(argnm, index, bo); } void -patch(const xrt::module& module, uint8_t* ibuf, size_t* sz, const std::vector>* args) +patch(const xrt::module& module, uint8_t* ibuf, size_t* sz, const std::vector>* args, + uint32_t idx) { auto hdl = module.get_handle(); size_t orig_sz = *sz; const buf* inst = nullptr; + uint32_t patch_index = UINT32_MAX; - if (hdl->get_os_abi() == Elf_Amd_Aie2p) { - const auto& instr_buf = hdl->get_instr(); - inst = &instr_buf; + if (hdl->get_os_abi() == Elf_Amd_Aie2p || Elf_Amd_Aie2p_config) { + instr_buf buf; + std::tie(patch_index, buf) = hdl->get_instr(idx); + inst = &buf; } else if(hdl->get_os_abi() == Elf_Amd_Aie2ps) { const auto& instr_buf = hdl->get_data(); @@ -1466,7 +1739,7 @@ patch(const xrt::module& module, uint8_t* ibuf, size_t* sz, const std::vectorpatch(ibuf, arg_name, index, arg_addr, patcher::buf_type::ctrltext)) + if (!hdl->patch(ibuf, arg_name, index, arg_addr, patcher::buf_type::ctrltext, patch_index)) throw std::runtime_error{"Failed to patch " + arg_name}; index++; } @@ -1475,7 +1748,10 @@ patch(const xrt::module& module, uint8_t* ibuf, size_t* sz, const std::vectorpatch(argnm, index, value, size); + auto module_sram = std::dynamic_pointer_cast(module.get_handle()); + if (!module_sram) + throw std::runtime_error("Getting module_sram failed, wrong module object passed\n"); + module_sram->patch(argnm, index, value, size); } void @@ -1500,8 +1776,44 @@ dump_scratchpad_mem(const xrt::module& module) module_sram->dump_scratchpad_mem(); } +std::string +get_kernel_name(const xrt::module& module) +{ + return module.get_handle()->get_kernel_name(); +} + +std::string +get_kernel_signature(const xrt::module& module) +{ + return module.get_handle()->get_kernel_signature(); +} + +uint32_t +get_partition_size(const xrt::module& module) +{ + return module.get_handle()->get_partition_size(); +} + } // xrt_core::module_int +namespace +{ +static std::shared_ptr +construct_module_elf(const xrt::elf& elf) +{ + auto os_abi = xrt_core::elf_int::get_elfio(elf).get_os_abi(); + switch (os_abi) { + case Elf_Amd_Aie2p : + case Elf_Amd_Aie2p_config : + return std::make_shared(elf); + case Elf_Amd_Aie2ps : + return std::make_shared(elf); + default : + throw std::runtime_error("unknown ELF type passed\n"); + } +} +} + //////////////////////////////////////////////////////////////// // xrt_module C++ API implementation (xrt_module.h) //////////////////////////////////////////////////////////////// @@ -1509,7 +1821,7 @@ namespace xrt { module:: module(const xrt::elf& elf) -: detail::pimpl{ std::make_shared(elf) } +: detail::pimpl(construct_module_elf(elf)) {} module:: @@ -1518,8 +1830,8 @@ module(void* userptr, size_t sz, const xrt::uuid& uuid) {} module:: -module(const xrt::module& parent, const xrt::hw_context& hwctx) -: detail::pimpl{ std::make_shared(parent.handle, hwctx) } +module(const xrt::module& parent, const xrt::hw_context& hwctx, uint32_t ctrl_code_idx) +: detail::pimpl{ std::make_shared(parent.handle, hwctx, ctrl_code_idx) } {} xrt::uuid diff --git a/src/runtime_src/core/common/ishim.h b/src/runtime_src/core/common/ishim.h index 9a41ce6a790..88192910e22 100755 --- a/src/runtime_src/core/common/ishim.h +++ b/src/runtime_src/core/common/ishim.h @@ -156,6 +156,15 @@ struct ishim const xrt::hw_context::cfg_param_type& /*cfg_params*/, xrt::hw_context::access_mode /*mode*/) const = 0; + // creates hw context using partition size + // Used in elf flow + // This function is not supported by all platforms + virtual std::unique_ptr + create_hw_context(uint32_t /*partition_size*/, + const xrt::hw_context::cfg_param_type& /*cfg_params*/, + xrt::hw_context::access_mode /*mode*/) const + { throw not_supported_error{__func__}; } + // Registers an xclbin with shim, but does not load it. // This is no-op for most platform shims virtual void diff --git a/src/runtime_src/core/common/xclbin_parser.cpp b/src/runtime_src/core/common/xclbin_parser.cpp index 2293f5ae1f9..1e2119446d5 100644 --- a/src/runtime_src/core/common/xclbin_parser.cpp +++ b/src/runtime_src/core/common/xclbin_parser.cpp @@ -86,39 +86,6 @@ convert_to_mailbox_type(const std::string& str) return (*itr).second; } - -// Kernel mailbox -// Needed until meta-data support (Vitis-1147) -// Format is "[/kernel_name/]*" -// mailbox="/kernel1_name/kernel2_name/" -static xrt_core::xclbin::kernel_properties::mailbox_type -get_mailbox_from_ini(const std::string& kname) -{ - static auto mailbox_kernels = xrt_core::config::get_mailbox_kernels(); - return (mailbox_kernels.find("/" + kname + "/") != std::string::npos) - ? xrt_core::xclbin::kernel_properties::mailbox_type::inout - : xrt_core::xclbin::kernel_properties::mailbox_type::none; -} - -// Kernel auto restart counter offset -// Needed until meta-data support (Vitis-1147) -static xrt_core::xclbin::kernel_properties::restart_type -get_restart_from_ini(const std::string& kname) -{ - static auto restart_kernels = xrt_core::config::get_auto_restart_kernels(); - return (restart_kernels.find("/" + kname + "/") != std::string::npos) - ? 1 - : 0; -} - -// Kernel software reset -static bool -get_sw_reset_from_ini(const std::string& kname) -{ - static auto reset_kernels = xrt_core::config::get_sw_reset_kernels(); - return (reset_kernels.find("/" + kname + "/") != std::string::npos); -} - static bool is_sw_emulation() { @@ -988,6 +955,38 @@ get_kernel_arguments(const axlf* top, const std::string& kname) return get_kernel_arguments(xml.first, xml.second, kname); } +// Kernel mailbox +// Needed until meta-data support (Vitis-1147) +// Format is "[/kernel_name/]*" +// mailbox="/kernel1_name/kernel2_name/" +kernel_properties::mailbox_type +get_mailbox_from_ini(const std::string& kname) +{ + static auto mailbox_kernels = xrt_core::config::get_mailbox_kernels(); + return (mailbox_kernels.find("/" + kname + "/") != std::string::npos) + ? xrt_core::xclbin::kernel_properties::mailbox_type::inout + : xrt_core::xclbin::kernel_properties::mailbox_type::none; +} + +// Kernel auto restart counter offset +// Needed until meta-data support (Vitis-1147) +kernel_properties::restart_type +get_restart_from_ini(const std::string& kname) +{ + static auto restart_kernels = xrt_core::config::get_auto_restart_kernels(); + return (restart_kernels.find("/" + kname + "/") != std::string::npos) + ? 1 + : 0; +} + +// Kernel software reset +bool +get_sw_reset_from_ini(const std::string& kname) +{ + static auto reset_kernels = xrt_core::config::get_sw_reset_kernels(); + return (reset_kernels.find("/" + kname + "/") != std::string::npos); +} + kernel_properties get_kernel_properties(const char* xml_data, size_t xml_size, const std::string& kname) { diff --git a/src/runtime_src/core/common/xclbin_parser.h b/src/runtime_src/core/common/xclbin_parser.h index 74c99d99158..e840c2316f1 100644 --- a/src/runtime_src/core/common/xclbin_parser.h +++ b/src/runtime_src/core/common/xclbin_parser.h @@ -424,6 +424,15 @@ get_project_name(const axlf* top); std::string get_fpga_device_name(const char* xml_data, size_t xml_size); +kernel_properties::mailbox_type +get_mailbox_from_ini(const std::string& kname); + +kernel_properties::restart_type +get_restart_from_ini(const std::string& kname); + +bool +get_sw_reset_from_ini(const std::string& kname); + }} // xclbin, xrt_core #endif diff --git a/src/runtime_src/core/include/ert.h b/src/runtime_src/core/include/ert.h index ac5858db474..1c79819a38e 100644 --- a/src/runtime_src/core/include/ert.h +++ b/src/runtime_src/core/include/ert.h @@ -626,28 +626,29 @@ struct cu_cmd_state_timestamps { * @ERT_START_NPU_PREEMPT: instruction buffer command with preemption format on NPU */ enum ert_cmd_opcode { - ERT_START_CU = 0, - ERT_START_KERNEL = 0, - ERT_CONFIGURE = 2, - ERT_EXIT = 3, - ERT_ABORT = 4, - ERT_EXEC_WRITE = 5, - ERT_CU_STAT = 6, - ERT_START_COPYBO = 7, - ERT_SK_CONFIG = 8, - ERT_SK_START = 9, - ERT_SK_UNCONFIG = 10, - ERT_INIT_CU = 11, - ERT_START_FA = 12, - ERT_CLK_CALIB = 13, - ERT_MB_VALIDATE = 14, - ERT_START_KEY_VAL = 15, - ERT_ACCESS_TEST_C = 16, - ERT_ACCESS_TEST = 17, - ERT_START_DPU = 18, - ERT_CMD_CHAIN = 19, - ERT_START_NPU = 20, - ERT_START_NPU_PREEMPT = 21, + ERT_START_CU = 0, + ERT_START_KERNEL = 0, + ERT_CONFIGURE = 2, + ERT_EXIT = 3, + ERT_ABORT = 4, + ERT_EXEC_WRITE = 5, + ERT_CU_STAT = 6, + ERT_START_COPYBO = 7, + ERT_SK_CONFIG = 8, + ERT_SK_START = 9, + ERT_SK_UNCONFIG = 10, + ERT_INIT_CU = 11, + ERT_START_FA = 12, + ERT_CLK_CALIB = 13, + ERT_MB_VALIDATE = 14, + ERT_START_KEY_VAL = 15, + ERT_ACCESS_TEST_C = 16, + ERT_ACCESS_TEST = 17, + ERT_START_DPU = 18, + ERT_CMD_CHAIN = 19, + ERT_START_NPU = 20, + ERT_START_NPU_PREEMPT = 21, + ERT_START_NPU_PDI_IN_ELF = 22, }; /** @@ -985,6 +986,11 @@ ert_valid_opcode(struct ert_packet *pkt) /* 1 mandatory cumask + extra_cu_masks + ert_npu_preempt_data */ valid = (skcmd->count >= 1+ skcmd->extra_cu_masks + sizeof(struct ert_npu_preempt_data) / sizeof(uint32_t)); break; + case ERT_START_NPU_PDI_IN_ELF: + skcmd = to_start_krnl_pkg(pkt); + /* 1 mandatory cumask + extra_cu_masks + ert_npu_preempt_data */ + valid = (skcmd->count >= 1+ skcmd->extra_cu_masks + sizeof(struct ert_npu_preempt_data) / sizeof(uint32_t)); + break; case ERT_START_KEY_VAL: skcmd = to_start_krnl_pkg(pkt); /* 1 cu mask */ @@ -1094,6 +1100,15 @@ get_ert_npu_preempt_data(struct ert_start_kernel_cmd* pkt) return (struct ert_npu_preempt_data*) (pkt->data + pkt->extra_cu_masks); } +static inline struct ert_npu_preempt_data* +get_ert_npu_elf_data(struct ert_start_kernel_cmd* pkt) +{ + if (pkt->opcode != ERT_START_NPU_PDI_IN_ELF) + return NULL; + // past extra cu_masks embedded in the packet data + return (struct ert_npu_preempt_data*) (pkt->data + pkt->extra_cu_masks); +} + static inline uint32_t* get_ert_regmap_begin(struct ert_start_kernel_cmd* pkt) { @@ -1112,6 +1127,11 @@ get_ert_regmap_begin(struct ert_start_kernel_cmd* pkt) + sizeof(struct ert_npu_preempt_data) / sizeof(uint32_t) + get_ert_npu_preempt_data(pkt)->instruction_prop_count; + case ERT_START_NPU_PDI_IN_ELF: + return pkt->data + pkt->extra_cu_masks + + sizeof(struct ert_npu_preempt_data) / sizeof(uint32_t) + + get_ert_npu_elf_data(pkt)->instruction_prop_count; + default: // skip past embedded extra cu_masks return pkt->data + pkt->extra_cu_masks; diff --git a/src/runtime_src/core/include/experimental/xrt_ext.h b/src/runtime_src/core/include/experimental/xrt_ext.h index 267ba0469e5..ba5429f96b9 100644 --- a/src/runtime_src/core/include/experimental/xrt_ext.h +++ b/src/runtime_src/core/include/experimental/xrt_ext.h @@ -254,6 +254,23 @@ class kernel : public xrt::kernel */ XRT_API_EXPORT kernel(const xrt::hw_context& ctx, const xrt::module& mod, const std::string& name); + + /** + * kernel() - Constructor from kernel name + * + * @param ctx + * The hardware context that this kernel is created in + * @param name + * Name of kernel function to construct + * + * Constructs a kernel object by searching through all the ELF files + * that are registered with the provided context. The function looks + * for an ELF file that contains a kernel with the specified name. + * Once a matching ELF file is found, it is used to construct the + * kernel object. + */ + XRT_API_EXPORT + kernel(const xrt::hw_context& ctx, const std::string& name); }; } // xrt::ext diff --git a/src/runtime_src/core/include/experimental/xrt_module.h b/src/runtime_src/core/include/experimental/xrt_module.h index 41492afa982..28632ae4a15 100644 --- a/src/runtime_src/core/include/experimental/xrt_module.h +++ b/src/runtime_src/core/include/experimental/xrt_module.h @@ -85,6 +85,8 @@ class module : public detail::pimpl * Parent module with instruction buffer to move into hwctx * @param hwctx * Hardware context to associate with module + * @param ctrl_code_idx + * index of control code inside the parent module * * Copy content of existing module into an allocation associated * with the specified hardware context. @@ -92,7 +94,7 @@ class module : public detail::pimpl * Throws if module is not compatible with hardware context */ XRT_API_EXPORT - module(const xrt::module& parent, const xrt::hw_context& hwctx); + module(const xrt::module& parent, const xrt::hw_context& hwctx, uint32_t ctrl_code_idx = 0); /** * get_cfg_uuid() - Get the uuid of the hardware configuration diff --git a/src/runtime_src/core/include/shim_int.h b/src/runtime_src/core/include/shim_int.h index 13e4f290933..607036740f2 100644 --- a/src/runtime_src/core/include/shim_int.h +++ b/src/runtime_src/core/include/shim_int.h @@ -75,6 +75,9 @@ create_hw_context(xclDeviceHandle handle, const xrt::hw_context::cfg_param_type& cfg_param, xrt::hw_context::access_mode mode); +std::unique_ptr +create_hw_context(xclDeviceHandle handle, uint32_t partition_size); + // get_hw_queue() - xrt_core::hwqueue_handle* get_hw_queue(xclDeviceHandle handle, xrt_core::hwctx_handle* ctxhdl); diff --git a/src/runtime_src/core/include/xrt/xrt_hw_context.h b/src/runtime_src/core/include/xrt/xrt_hw_context.h index 200c7aa2a27..9321d818fa6 100644 --- a/src/runtime_src/core/include/xrt/xrt_hw_context.h +++ b/src/runtime_src/core/include/xrt/xrt_hw_context.h @@ -9,6 +9,8 @@ #include "xrt/xrt_device.h" #include "xrt/xrt_uuid.h" +#include "experimental/xrt_elf.h" + #ifdef __cplusplus #include @@ -76,6 +78,59 @@ class hw_context : public detail::pimpl */ hw_context() = default; + /** + * hw_context() - Constructor with QoS control and access control + * + * @param device + * Device where context is created + * @param cfg_param + * Configuration Parameters (incl. Quality of Service) + * @param mode + * Access control for the context + * + * When application uses this constructor no hw resources are allocated + * It acts as placeholder and used for setting QoS and access control + * The QoS definition is subject to change, so this API is not guaranteed + * to be ABI compatible in future releases + */ + XRT_API_EXPORT + hw_context(const xrt::device& device, const cfg_param_type& cfg_param, access_mode mode); + + /** + * hw_context() - Constructor with Elf file + * + * @param device + * Device where context is created + * @param elf + * XRT Elf object created from config Elf file + * @param cfg_param + * Configuration Parameters (incl. Quality of Service) + * @param mode + * Access control for the context + * + * The QoS definition is subject to change, so this API is not guaranteed + * to be ABI compatible in future releases. When cfg_param and access_mode + * are not passed hw context with shared access mode is created. + */ + XRT_API_EXPORT + hw_context(const xrt::device& device, const xrt::elf& elf, + const cfg_param_type& cfg_param = cfg_param_type{}, + access_mode mode = access_mode::shared); + + /** + * add_config() - adds config Elf file to the context + * + * @param elf + * XRT Elf object created from config Elf file + * + * Adds config Elf to context if it is the first config added + * If config already exists, it will be added only when configuration matches + * with existing one else an exception is thrown + */ + XRT_API_EXPORT + void + add_config(const xrt::elf& elf); + /** * hw_context() - Constructor with QoS control * @@ -83,7 +138,7 @@ class hw_context : public detail::pimpl * Device where context is created * @param xclbin_id * UUID of xclbin that should be assigned to HW resources - * @cfg_param + * @param cfg_param * Configuration Parameters (incl. Quality of Service) * * The QoS definition is subject to change, so this API is not guaranteed @@ -165,6 +220,7 @@ class hw_context : public detail::pimpl /** * get_xclbin_uuid() - UUID of xclbin from which context was created + * Returns empty uuid if context was created without xclbin (created with Elf) */ XRT_API_EXPORT xrt::uuid @@ -172,6 +228,7 @@ class hw_context : public detail::pimpl /** * get_xclbin() - Retrieve underlying xclbin matching the UUID + * Returns empty xclbin if context was created without xclbin (created with Elf) */ XRT_API_EXPORT xrt::xclbin diff --git a/src/runtime_src/core/tools/xbtracer/src/lib/xrt_module_inst.cpp b/src/runtime_src/core/tools/xbtracer/src/lib/xrt_module_inst.cpp index c3f050dcbcf..76c1892cfb6 100755 --- a/src/runtime_src/core/tools/xbtracer/src/lib/xrt_module_inst.cpp +++ b/src/runtime_src/core/tools/xbtracer/src/lib/xrt_module_inst.cpp @@ -38,7 +38,7 @@ module::module(void* userptr, size_t sz, const xrt::uuid& uuid) XRT_TOOLS_XBT_FUNC_EXIT(func); } -module::module(const xrt::module& parent, const xrt::hw_context& hwctx) +module::module(const xrt::module& parent, const xrt::hw_context& hwctx, uint32_t /*ctrl_code_idx*/) { auto func = "xrt::module::module(const xrt::module&, const xrt::hw_context&)"; XRT_TOOLS_XBT_CALL_CTOR(dtbl.module.ctor_mod_ctx, this, parent, hwctx);