diff --git a/src/runtime_src/core/common/api/module_int.h b/src/runtime_src/core/common/api/module_int.h index 72655f382c..cb00f52bee 100644 --- a/src/runtime_src/core/common/api/module_int.h +++ b/src/runtime_src/core/common/api/module_int.h @@ -38,6 +38,8 @@ patch(const xrt::module&, const std::string& argnm, size_t index, const xrt::bo& // Note that if size passed in is 0, real buffer size required will be returned // without any patching. This is useful if caller wishes to discover the exact size // of the control code buffer. +// New ELfs pack multiple control codes info in it, to identify which control code +// to run we use index XRT_CORE_COMMON_EXPORT void patch(const xrt::module&, uint8_t*, size_t*, const std::vector>*, diff --git a/src/runtime_src/core/common/api/xrt_hw_context.cpp b/src/runtime_src/core/common/api/xrt_hw_context.cpp index 54102b206b..adb412aca0 100644 --- a/src/runtime_src/core/common/api/xrt_hw_context.cpp +++ b/src/runtime_src/core/common/api/xrt_hw_context.cpp @@ -7,8 +7,8 @@ #define XCL_DRIVER_DLL_EXPORT // exporting xrt_xclbin.h #define XRT_CORE_COMMON_SOURCE // in same dll as coreutil -#include "core/include/experimental/xrt_module.h" #include "core/include/xrt/xrt_hw_context.h" +#include "core/include/experimental/xrt_module.h" #include "hw_context_int.h" #include "module_int.h" #include "xclbin_int.h" @@ -78,7 +78,7 @@ class hw_context_impl : public std::enable_shared_from_this // creation successful, store the module in the map auto kernel_name = xrt_core::module_int::get_kernel_info(module).props.name; - m_module_map[kernel_name] = std::move(module); + m_module_map.emplace(std::move(kernel_name), std::move(module)); } std::shared_ptr @@ -117,9 +117,8 @@ class hw_context_impl : public std::enable_shared_from_this auto part_size = xrt_core::module_int::get_partition_size(module); // create hw ctx handle if not already created - if (m_hdl == nullptr) { - m_module_map[kernel_name] = std::move(module); - + if (!m_hdl) { + m_module_map.emplace(std::move(kernel_name), std::move(module)); m_partition_size = part_size; m_hdl = m_core_device->create_hw_context(m_partition_size, m_cfg_param, m_mode); return; @@ -187,10 +186,10 @@ class hw_context_impl : public std::enable_shared_from_this xrt::module get_module(const std::string& kname) const { - auto itr = m_module_map.find(kname); - if (itr == m_module_map.end()) - throw std::runtime_error("no module found with given kernel name in ctx"); - return itr->second; + if (auto itr = m_module_map.find(kname); itr != m_module_map.end()) + return itr->second; + + throw std::runtime_error("no module found with given kernel name in ctx"); } }; @@ -241,56 +240,38 @@ get_module(const xrt::hw_context& ctx, const std::string& kname) // xrt_hwcontext C++ API implmentations (xrt_hw_context.h) //////////////////////////////////////////////////////////////// namespace xrt { - +// common function called with hw ctx created from different ways static std::shared_ptr -alloc_hwctx_from_cfg(const xrt::device& device, const xrt::uuid& xclbin_id, const xrt::hw_context::cfg_param_type& cfg_param) +post_alloc_hwctx(const std::shared_ptr& handle) { - XRT_TRACE_POINT_SCOPE(xrt_hw_context); - auto handle = std::make_shared(device.get_handle(), xclbin_id, cfg_param); - // Update device is called with a raw pointer to dyanamically // link to callbacks that exist in XDP via a C-style interface // The create_hw_context_from_implementation function is then // called in XDP create a hw_context to the underlying implementation xrt_core::xdp::update_device(handle.get()); - handle->get_usage_logger()->log_hw_ctx_info(handle.get()); - return handle; } static std::shared_ptr -alloc_hwctx_from_mode(const xrt::device& device, const xrt::uuid& xclbin_id, xrt::hw_context::access_mode mode) +alloc_hwctx_from_cfg(const xrt::device& device, const xrt::uuid& xclbin_id, const xrt::hw_context::cfg_param_type& cfg_param) { XRT_TRACE_POINT_SCOPE(xrt_hw_context); - auto handle = std::make_shared(device.get_handle(), xclbin_id, mode); - - // Update device is called with a raw pointer to dyanamically - // link to callbacks that exist in XDP via a C-style interface - // The create_hw_context_from_implementation function is then - // called in XDP create a hw_context to the underlying implementation - xrt_core::xdp::update_device(handle.get()); - - handle->get_usage_logger()->log_hw_ctx_info(handle.get()); + return post_alloc_hwctx(std::make_shared(device.get_handle(), xclbin_id, cfg_param)); +} - return handle; +static std::shared_ptr +alloc_hwctx_from_mode(const xrt::device& device, const xrt::uuid& xclbin_id, xrt::hw_context::access_mode mode) +{ + XRT_TRACE_POINT_SCOPE(xrt_hw_context); + return post_alloc_hwctx(std::make_shared(device.get_handle(), xclbin_id, mode)); } static std::shared_ptr alloc_empty_hwctx(const xrt::device& device, const xrt::hw_context::cfg_param_type& cfg_param, xrt::hw_context::access_mode mode) { XRT_TRACE_POINT_SCOPE(xrt_hw_context); - auto handle = std::make_shared(device.get_handle(), cfg_param, mode); - - // Update device is called with a raw pointer to dyanamically - // link to callbacks that exist in XDP via a C-style interface - // The create_hw_context_from_implementation function is then - // called in XDP create a hw_context to the underlying implementation - xrt_core::xdp::update_device(handle.get()); - - handle->get_usage_logger()->log_hw_ctx_info(handle.get()); - - return handle; + return post_alloc_hwctx(std::make_shared(device.get_handle(), cfg_param, mode)); } static std::shared_ptr @@ -298,17 +279,7 @@ alloc_hwctx_from_elf(const xrt::device& device, const xrt::elf& elf, const xrt:: xrt::hw_context::access_mode mode) { XRT_TRACE_POINT_SCOPE(xrt_hw_context); - auto handle = std::make_shared(device.get_handle(), elf, cfg_param, mode); - - // Update device is called with a raw pointer to dyanamically - // link to callbacks that exist in XDP via a C-style interface - // The create_hw_context_from_implementation function is then - // called in XDP create a hw_context to the underlying implementation - xrt_core::xdp::update_device(handle.get()); - - handle->get_usage_logger()->log_hw_ctx_info(handle.get()); - - return handle; + return post_alloc_hwctx(std::make_shared(device.get_handle(), elf, cfg_param, mode)); } hw_context:: diff --git a/src/runtime_src/core/common/api/xrt_kernel.cpp b/src/runtime_src/core/common/api/xrt_kernel.cpp index 1705a6d47a..dd6a04ccb4 100644 --- a/src/runtime_src/core/common/api/xrt_kernel.cpp +++ b/src/runtime_src/core/common/api/xrt_kernel.cpp @@ -1083,11 +1083,8 @@ class argument struct global_type : iarg { - size_t size; // size in bytes of argument per xclbin - explicit - global_type(size_t bytes = 0) - : size(bytes) + global_type() {} std::vector @@ -1182,7 +1179,7 @@ class argument } case xarg::argtype::global : case xarg::argtype::constant : - content = std::make_unique(arg.size); + content = std::make_unique(); break; case xarg::argtype::local : // local memory case xarg::argtype::stream : // stream connection @@ -1505,39 +1502,16 @@ class kernel_impl : public std::enable_shared_from_this return data; // no skipping } - xrt::module - get_module(const xrt::hw_context& ctx, const std::string& kname) + // parse the name passed to kernel constructor to + // extract kernel name and control code index + static std::pair + get_kname_ctrl_idx_pair(const std::string& name) { - // ELF use case, identify module from ctx that has given kernel name and - // get kernel signature from the module to construct kernel args etc - // kernel name will be of format - : - auto i = kname.find(":"); - if (i == std::string::npos) { - // default case - ctrl code 0 will be used - name = kname.substr(0, kname.size()); - m_ctrl_code_index = 0; - } - else { - name = kname.substr(0, i); - m_ctrl_code_index = std::stoul(kname.substr(i+1, kname.size()-i-1)); - } - - return xrt_core::hw_context_int::get_module(ctx, name); - } - - const property_type& - get_elf_kernel_properties() - { - // Get kernel info from module - const auto& kernel_info = xrt_core::module_int::get_kernel_info(m_module); - if (name != kernel_info.props.name) - throw std::runtime_error("Kernel name mismatch, incorrect module picked\n"); - - // set kernel args - for (auto& arg : kernel_info.args) - args.emplace_back(arg); - - return kernel_info.props; + // kernel name will be of format - : + if (auto i = name.find(':'); i != std::string::npos) + return std::make_pair(name.substr(0, i), std::stoul(name.substr(i+1, name.size()-i-1))); + else + return std::make_pair(name, 0); // default case - ctrl code 0 will be used } static uint32_t @@ -1616,15 +1590,21 @@ class kernel_impl : public std::enable_shared_from_this } kernel_impl(std::shared_ptr dev, xrt::hw_context ctx, const std::string& nm) - : device(std::move(dev)) // share ownership - , hwctx(std::move(ctx)) // hw context - , hwqueue(hwctx) // hw queue - , m_module(get_module(hwctx, nm)) // module object with matching kernel name - , properties(get_elf_kernel_properties()) // kernel info present in Elf + : name(get_kname_ctrl_idx_pair(nm).first) // get canonical kernel name + , device(std::move(dev)) // share ownership + , hwctx(std::move(ctx)) // hw context + , hwqueue(hwctx) // hw queue + , m_module(xrt_core::hw_context_int::get_module(ctx, name)) // module obj with matching kernel name + , properties(xrt_core::module_int::get_kernel_info(m_module).props) , uid(create_uid()) + , m_ctrl_code_index(get_kname_ctrl_idx_pair(nm).second) // control code index { XRT_DEBUGF("kernel_impl::kernel_impl(%d)\n", uid); + // get kernel info from module and initialize kernel args + for (auto& arg : xrt_core::module_int::get_kernel_info(m_module).args) + args.emplace_back(arg); + // amend args with computed data based on kernel protocol amend_args(); m_usage_logger->log_kernel_info(device->core_device.get(), hwctx, name, args.size()); @@ -1992,8 +1972,11 @@ class run_impl return count++; } - // This function copies the module into a hw_context. The module + // This function copies the module into a hw_context. The module // will be associated with hwctx specific memory. + // If module has multiple control codes, index is used to identify + // the control code that needs to be run. + // By default control code at zeroth index is picked static xrt::module copy_module(const xrt::module& module, const xrt::hw_context& hwctx, uint32_t ctrl_code_idx) { @@ -3489,7 +3472,7 @@ alloc_kernel(const std::shared_ptr& dev, xrt::kernel::cu_access_mode mode) { auto amode = hwctx_access_mode(mode); // legacy access mode to hwctx qos - return std::make_shared(dev, xrt::hw_context{dev->get_xrt_device(), xclbin_id, amode}, name); + return std::make_shared(dev, xrt::hw_context{dev->get_xrt_device(), xclbin_id, amode}, xrt::module{}, name); } static std::shared_ptr diff --git a/src/runtime_src/core/common/api/xrt_module.cpp b/src/runtime_src/core/common/api/xrt_module.cpp index 979da54fe2..a718462c5a 100644 --- a/src/runtime_src/core/common/api/xrt_module.cpp +++ b/src/runtime_src/core/common/api/xrt_module.cpp @@ -1055,6 +1055,12 @@ class module_elf_aie2p : public module_elf return std::make_pair(UINT32_MAX, control_packet::get_empty_buf()); } + [[nodiscard]] virtual size_t + get_scratch_pad_mem_size() const override + { + return m_scratch_pad_mem_size; + } + [[nodiscard]] std::pair get_preempt_save() const override { @@ -1377,8 +1383,9 @@ class module_sram : public module_impl create_instr_buf(const module_impl* parent) { XRT_DEBUGF("-> module_sram::create_instr_buf()\n"); - instr_buf data; - std::tie(m_instr_sec_idx, data) = parent->get_instr(m_index); + auto instr_buf_info = parent->get_instr(m_index); + m_instr_sec_idx = instr_buf_info.first; + const instr_buf& data = instr_buf_info.second; size_t sz = data.size(); if (sz == 0) throw std::runtime_error("Invalid instruction buffer size"); @@ -1461,8 +1468,9 @@ class module_sram : public module_impl void create_ctrlpkt_buf(const module_impl* parent) { - control_packet data; - std::tie(m_ctrlpkt_sec_idx, data) = parent->get_ctrlpkt(m_index); + auto ctrl_pkt_info = parent->get_ctrlpkt(m_index); + m_ctrlpkt_sec_idx = ctrl_pkt_info.first; + const control_packet& data = ctrl_pkt_info.second; size_t sz = data.size(); if (sz == 0) { @@ -1663,7 +1671,7 @@ class module_sram : public module_impl } public: - module_sram(std::shared_ptr parent, xrt::hw_context hwctx, uint32_t index) + module_sram(std::shared_ptr parent, xrt::hw_context hwctx, uint32_t index = 0) : module_impl{ parent->get_cfg_uuid() } , m_parent{ std::move(parent) } , m_hwctx{ std::move(hwctx) } @@ -1788,7 +1796,7 @@ patch(const xrt::module& module, uint8_t* ibuf, size_t* sz, const std::vectorget_os_abi(); if (os_abi == Elf_Amd_Aie2p || os_abi == Elf_Amd_Aie2p_config) { - const auto& buf_info = hdl->get_instr(idx); + auto buf_info = hdl->get_instr(idx); patch_index = buf_info.first; inst = &(buf_info.second); } @@ -1896,6 +1904,11 @@ module(void* userptr, size_t sz, const xrt::uuid& uuid) : detail::pimpl{ std::make_shared(userptr, sz, uuid) } {} +module:: +module(const xrt::module& parent, const xrt::hw_context& hwctx) +: detail::pimpl{ std::make_shared(parent.handle, hwctx) } +{} + module:: module(const xrt::module& parent, const xrt::hw_context& hwctx, uint32_t ctrl_code_idx) : detail::pimpl{ std::make_shared(parent.handle, hwctx, ctrl_code_idx) } diff --git a/src/runtime_src/core/include/experimental/xrt_module.h b/src/runtime_src/core/include/experimental/xrt_module.h index 4e22a36de6..42ef1b7892 100644 --- a/src/runtime_src/core/include/experimental/xrt_module.h +++ b/src/runtime_src/core/include/experimental/xrt_module.h @@ -78,6 +78,22 @@ class module : public detail::pimpl XRT_API_EXPORT module(void* userptr, size_t sz, const xrt::uuid& uuid); + /** + * module() - Constructor associate module with hardware context + * + * @param parent + * Parent module with instruction buffer to move into hwctx + * @param hwctx + * Hardware context to associate with module + * + * Copy content of existing module into an allocation associated + * with the specified hardware context. + * + * Throws if module is not compatible with hardware context + */ + XRT_API_EXPORT + module(const xrt::module& parent, const xrt::hw_context& hwctx); + /** * module() - Constructor associate module with hardware context * @@ -90,11 +106,13 @@ class module : public detail::pimpl * * Copy content of existing module into an allocation associated * with the specified hardware context. + * If module has multiple control codes, index is used to identify + * the control code that needs to be run. * * Throws if module is not compatible with hardware context */ XRT_API_EXPORT - module(const xrt::module& parent, const xrt::hw_context& hwctx, uint32_t ctrl_code_idx = 0); + module(const xrt::module& parent, const xrt::hw_context& hwctx, uint32_t ctrl_code_idx); /** * get_cfg_uuid() - Get the uuid of the hardware configuration diff --git a/src/runtime_src/core/tools/xbtracer/src/lib/xrt_module_inst.cpp b/src/runtime_src/core/tools/xbtracer/src/lib/xrt_module_inst.cpp index 76c1892cfb..c3f050dcbc 100755 --- a/src/runtime_src/core/tools/xbtracer/src/lib/xrt_module_inst.cpp +++ b/src/runtime_src/core/tools/xbtracer/src/lib/xrt_module_inst.cpp @@ -38,7 +38,7 @@ module::module(void* userptr, size_t sz, const xrt::uuid& uuid) XRT_TOOLS_XBT_FUNC_EXIT(func); } -module::module(const xrt::module& parent, const xrt::hw_context& hwctx, uint32_t /*ctrl_code_idx*/) +module::module(const xrt::module& parent, const xrt::hw_context& hwctx) { auto func = "xrt::module::module(const xrt::module&, const xrt::hw_context&)"; XRT_TOOLS_XBT_CALL_CTOR(dtbl.module.ctor_mod_ctx, this, parent, hwctx);