diff --git a/src/.clang-tidy b/src/.clang-tidy index c9a7249083..5729706e1d 100644 --- a/src/.clang-tidy +++ b/src/.clang-tidy @@ -30,7 +30,6 @@ modernize-*, WarningsAsErrors: '' HeaderFilterRegex: '' -AnalyzeTemporaryDtors: false FormatStyle: none User: sonals CheckOptions: @@ -124,6 +123,8 @@ CheckOptions: value: llvm - key: cert-oop54-cpp.WarnOnlyIfThisHasSuspiciousField value: '0' + - key: cppcoreguidelines-avoid-non-const-global-variables.AllowInternalLinkage + value: '1' - key: cppcoreguidelines-avoid-magic-numbers.IgnoredFloatingPointValues value: '1.0;100.0;' - key: cppcoreguidelines-avoid-magic-numbers.IgnoredIntegerValues diff --git a/src/runtime_src/core/common/CMakeLists.txt b/src/runtime_src/core/common/CMakeLists.txt index 242e6e19ef..2455180f1e 100644 --- a/src/runtime_src/core/common/CMakeLists.txt +++ b/src/runtime_src/core/common/CMakeLists.txt @@ -3,6 +3,7 @@ # Copyright (C) 2022-2024 Advanced Micro Devices, Inc. All rights reserved. add_subdirectory(api) add_subdirectory(xdp) +add_subdirectory(runner) if(CMAKE_VERSION VERSION_LESS "3.18.0") message(WARNING "CMake version is less than 3.18.0, build of submodule aiebu disabled") @@ -62,12 +63,14 @@ target_include_directories(core_common_objects add_library(xrt_coreutil SHARED $ + $ $ $ ) add_library(xrt_coreutil_static STATIC $ + $ $ $ ) diff --git a/src/runtime_src/core/common/runner/CMakeLists.txt b/src/runtime_src/core/common/runner/CMakeLists.txt new file mode 100644 index 0000000000..68d0197b31 --- /dev/null +++ b/src/runtime_src/core/common/runner/CMakeLists.txt @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: Apache-2.0 +# Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved. +add_library(core_common_runner_objects OBJECT + runner.cpp + cpu.cpp + ) + +target_include_directories(core_common_runner_objects + PRIVATE + ${XRT_SOURCE_DIR}/runtime_src + ) + diff --git a/src/runtime_src/core/common/runner/README.md b/src/runtime_src/core/common/runner/README.md new file mode 100644 index 0000000000..2739279850 --- /dev/null +++ b/src/runtime_src/core/common/runner/README.md @@ -0,0 +1,561 @@ + + +# Run recipe for XRT + +A run-recipe defines how to execute a graph model using XRT. + +This directory contains a stand-alone `xrt::runner` class that reads and +executes a run-recipe json file. The idea is to have tools, e.g. VAIML +geneate the run-recipe along with xclbin and control code for kernels. + +The format (schema) of the recipe json is loosely defined. The +implementation of the runner drove some of the defintion of the json +format. + +A run-recipe is associated with exactly one xclbin which, when loaded into +a region (partition) on the device, can run the recipe. + +# JSON format + +There are three sections in the run-recipe. + +1. [header](#header) +2. [resources](#resources) +3. [execution](#execution) + +The `header` trivially contains the path (full name) of the xclbin that should +be loaded before resources can be created or the recipe can be executed. + +The `resources` section defines all buffer objects, kernel objects, +and cpu function objects used to execute the recipe. The resources are +created as the run recipe is loaded. External input and output buffer +may be bound later during the execution stage of recipe. + +The `execution` section defines how the resources are connected +together during execution. It simply executes kernels and cpu +functions that were previously defined in the resource section with +arguments that were also defined in the resource section. Execution +of kernels can consume partial buffer input and produce partial buffer +output per `size` and `offset` fields define as part of specifying the +kernel arguments. + +## Header + +For the time being, the header stores nothing but the path to the +xclbin. The xclbin contais the kernel meta data used by XRT when +xrt::kernel objects are created. The xclbin contains PDIs for each +kernel, the PDIs are loaded by firmware prior to running a kernel. + +The header section can be amended with other meta data as needed. + +``` +{ + "header": { + "xclbin_path": "design.xclbin", + }, + + ... +} +``` + +The runner will use the xclbin from the `header` section to create an +xrt::hw_context, which is subsequently used to create xrt::kernel +objects. + +## Resources + +The resources section is a complete list of all objects that are used +when the recipe is executed. Each kernel used in the `execution` +section must be listed in the resources section. All kernel argument +buffers used by kernels in the `execution` section must be listed in +the resources section. Also all functions executed on the CPU must +be listed in the resources section. + +### Kernel functions + +Kernels listed in the resoruces section result in runner creating +`xrt::kernel` objects. In XRT, the kernel objects are identified by +name which must match a kernel name in the xclbin. + +Kernels are constructed from the xclbin name and by specifying which +xrt::hw_context should execute the kernel and what control code the +kernel should execute. The hardware context is created by the runner +from the xclbin specified in the recipe `header` section, so kernels +in the resources section must contain just the xclbin kernel name +and the full path to an ELF with the control code. + +``` + "resources": { + "kernels": [ + { + "name": "k1", + "xclbin_kernel_name": "DPU", + "ctrlcode": "no-ctrl-packet.elf" + } + ] + }, +``` + +The name of the kernel in resources section must be unique in the list +of kernel instances, the name is used in the `execution` section to refer +to which instance should be executed. + +If a kernel is instantiated from the same xclbin kernel name and same +control code, then only one such kernel isntance needs to be listed in +the resources section. Listing multiple kernel instances referring to +the same xclbin kernel and using the same control code is not error, +but is not necessary. + +### CPU functions + +Functions to be executed on the CPU are listed in the resource section +along with a path to a library containing the individual function. +The library will be runtime loaded (dlopen); it will expose functions +through a function pointer that is returned through a query lookup +method, which it returned through a library entry (extern "C") function. + +CPU function arguments are expected to be `xrt::bo` objects, for +example format converting functions will take an input buffer and +and populate an output buffer, both buffers must be specified in the +resource buffer section of the recipe. + +A library path is relative to the install location of XRT based on +the environment value of `XILINX_XRT` or from its inferred location if +not set. On windows, the inferred location would be the driver store. + +``` + "resources": { + "cpus": [ + { + "name": "convert_ifm", + "library_path": "umd/convert.dll" + }, + { + "name": "convert_ofm", + "library_path": "umd/convert.dll" + }, + { + "name": "average_pool", + "library_path": "umd/operators.dll" + } + ] + }, +``` + +### Buffer + +The buffer instances listed in the resources section refer to +`xrt::bo` objects that are used during execution of kernels. The +buffers can be graph inputs or outputs, which refer to application +created input and output tensors, or they can be internal buffers used +during execution of the compiled graph at the discretion of the +compiler (VAIML). + +#### External buffers (graph input and output) + +External buffers (input and output) are created by the framework / +applicaiton outside of the runner and bound to the recipe during +execution. The runner itself does not create `xrt::bo` objects for +external buffers, but does rely on the framework to bind these buffers +to runner object created from the recipe. The external buffers must +still be listed in the resources section and specify a name that can +be used when execution sets kernel arguments. + +``` + "resources": { + "buffers": [ + { + "name": "wts", + "type": "input", + }, + { + "name": "ifm", + "type": "input", + }, + { + "name": "ofm", + "type": "output", + } + ] + } + +``` + +The `name` of the buffers in the resources section must be unique. +The name is used in the `execution` seciton to refer to kernel or cpu +buffer arguments. + + + +#### Internal buffers + +Internal buffers are created and managed by the runner. These are +buffers that are used internally within a graph to carry data from one +kernel or cpu execution to another. + +These buffers are created and managed by runner, hence unlike the +external buffers, the size of internal buffer size must be specified +in the recipe. + +``` + "resources": { + "buffers": [ + { + "name": "ifm_int", + "type": "internal", + "size": "1024" + }, + { + "name": "ofm_int", + "type": "internal", + "size": "1024" + }, + { + "name": "b0", + "type": "internal", + "size": "1024" + }, + { + "name": "b1", + "type": "internal:, + "size": "1024" + }, + { + "name": "b2", + "type": "internal", + "size": "1024" + } + ] + } + +``` +The `size` is currently specified in bytes, we could add support +K/M, e.g. `1048576 = 1024K = 1M` + +## Execution + +The execution section is an ordered list of xrt::kernel or cpu runs +with arguments from the resources section. + +Before the runner can execute the recipe in the execution section, all +graph inputs and outputs must be bound to the recipe. As mentioned +earlier, external inputs and outputs are defined by the framework that +uses the runner. Typically these external inputs are outputs are not +available at the time when the runner is initialized from the recipe +json. In other words, the runner can be created even before the +framework has created input and output tensors, but it can of course +not be executed until the inputs and outputs are defined. The runner +API has methods that must be called to bind the external inputs and +outputs. + +Arguments to a run can be a sub-buffer of the corresponding +resource. A buffer in the resources section refer to the full buffer, +but a run can use just a portion of the resource. By default +a run argument will use the full buffer, but optional attributes in +the json for a buffer can specify the size and an offset into the +resource buffer. + +As an example below, the kernel resource `k1` is executed twice with +3 arguments. The 3rd input is a sub-buffer of the `ifm_int` resource, the +4th is the full resource `wts`, and the finally the 5th is a +sub-buffer of `ofm_int`. + +The example illustrates the calling of a CPU function from the `cpu` +resources section. The CPU function calls are passed buffers from the +resources section and scalar values as needed. + +``` + "execution": { + "runs": [ + { + "name": "convert_ifm", + "where": "cpu", + "arguments" : [ + { "name": "ifm", "argidx": 0 }, + { "name": "ifm_int", "argidx": 1 } + ], + "constants" : [ + { "value": "nchw2nchw4c", "type": "string", "argidx": 2 } + ] + ] + }, + { + "name": "k1", + "arguments" : [ + { "name": "ifm_int", "size": 512, "offset": 0, "argidx": 3 }, + { "name": "wts", "argidx": 4 }, + { "name": "ofm_int", "size": 512, "offset": 512, "argidx": 5 } + ] + }, + { + "name": "k1", + "arguments" : [ + { "name": "ifm_int", "size": 512, "offset": 512, "argidx": 3 }, + { "name": "wts", "argidx": 4 }, + { "name": "ofm_int", "size": 512, "offset": 0, "argidx": 5 } + ] + }, + { + "name": "convert_ofm", + "where": "cpu" + "arguments" : [ + { "name": "ofm_int", "argidx": 0 }, + { "name": "ofm", "argidx": 1 } + ], + "constants" : [ + { "value": "nchw4c2nchw", "argidx": 2 } + ] + ] + }, + ... + ] + } +``` + +The runner internally creates sub-buffers out of the specified +resource buffers for each run. Both external and internal +resource buffers can be sliced and diced as required. + +The runner creates `xrt::run` or `xrt_core::cpu::run` objects out of +the specified execution runs. The runner creates a CPU or NPU runlist +for each contiguous sequence of CPU runs or NPU runs specified in the +run recipe. The runlist is inserted into a vector of runlists where +each individual runlist will be executed in sequence, when the +framework calls the runner API execute method. + +In addition to the buffer arguments referring to resource buffers, the +xclbin kernels and cpu functions may have additional arguments that +need to be set. For example the current DPU kernel have 8 arguments +and some of these must be set to some sentinel value. Here the +argument with index 0, represents the kernel opcode which specifies +the type of control packet used for the kernel resource object. The +value `3` implies transaction buffer. + +``` + "execution": { + "runs": [ + { + "name": "k1", + "arguments" : [ + { "name": "wts", "argidx": 4 }, + { "name": "ifm", "argidx": 3 }, + { "name": "ofm", "argidx": 5 } + ], + "constants" : [ + { "value": "3", "type": "int", "argidx": 0 }, + { "value": "0", "type": "int", "argidx": 1 }, + { "value": "0", "type": "int", "argidx": 2 }, + { "value": "0", "type": "int", "argidx": 6 }, + { "value": "0", "type": "int", "argidx": 7 } + ] + } + ] + } +``` + +# Complete run recipe + +For illustration here is a simple complete run-recipe.json file that +has been validated on NPU. There are no internal buffer and external +input and output are consumed during one kernel execution. See the +`runner/test/recipe.json` for an example leveraging cpu functions. + +``` +{ + "header": { + "xclbin_path": "design.xclbin", + }, + "resources": { + "buffers": [ + { + "name": "wts", + "type": "input", + }, + { + "name": "ifm", + "type": "input", + }, + { + "name": "ofm", + "type": "output", + } + ], + "kernels": [ + { + "name": "k1", + "xclbin_kernel_name": "DPU", + "ctrlcode": "no-ctrl-packet.elf" + } + ] + }, + "execution": { + "runs": [ + { + "name": "k1", + "arguments" : [ + { "name": "wts", "argidx": 4 }, + { "name": "ifm", "argidx": 3 }, + { "name": "ofm", "argidx": 5 } + ], + "constants": [ + { "value": "3", "type": "int", "argidx": 0 }, + { "value": "0", "type": "int", "argidx": 1 }, + { "value": "0", "type": "int", "argidx": 2 }, + { "value": "0", "type": "int", "argidx": 6 }, + { "value": "0", "type": "int", "argidx": 7 } + ] + } + ] + } +} +``` + +# Runner API + +The runner is contructed from a recipe json file and a device object. +The runner is a standard XRT C++ first class object with the following +API. Include documentation will be beefed up when the runner code is +moved to public XRT. + +``` +class runner_impl; +class runner +{ + std::shared_ptr m_impl; // probably unique_ptr is enough +public: + // ctor - Create runner from a recipe json + runner(const xrt::device& device, const std::string& recipe); + + // bind_input() - Bind a buffer object to an input tensor + void + bind_input(const std::string& name, const xrt::bo& bo); + + // bind_output() - Bind a buffer object to an output tensor + void + bind_output(const std::string& name, const xrt::bo& bo); + + // execute() - Execute the runner + void + execute(); + + // wait() - Wait for the execution to complete + void + wait(); +}; +``` + +# CPU library requirements + +The run recipe can refer to functions executed on the CPU. These +functions should be implemented in a shared library that can be +loaded at runtime by the runner based on `resources/cpus` section. + +A referenced library is loaded by the runner, which subsequently looks +for exported entry point (symbol) called `open` to initialize the shared +library. The `open()` is supposed to return function objects for callback +functions within the library. At present time, only one callback function +is required is the `lookup()` function, which the runner +uses to lookup functions referenced in the recipe resources section. + +The `lookup()` function must return the callable function that the +runner is requesting along with the number of arguments this function +expects. If the function the runner is looking for is not available, +then the `lookup()` function should throw an exception (TODO: define +the exact exception to throw). The reason the `lookup()` function is +not itself an exported "extern C" function like `open()` is that the +call semantics must be C++ with the bells and whistles that follow +(exceptions). + +The signature of the `extern "C"` exported `open()` function and the +C++ signature of the `lookup()` function is defined in `xrt_runner.h` +under `namespace xrt::cpu { ... }`. + +``` +/** + * The xrt::runner supports execution of CPU functions as well + * as xrt::kernel objects. + * + * The CPU functions are implemented in runtime loaded dynamic + * libraries. A library must define and export a function that + * initializes a callback structure with a lookup function. + * + * The signature of the lookup function must be + * @code + * void lookup_fn(const std::string& name, xrt::cpu::lookup_args* args) + * @endcode + * where the name is the name of the function to lookup and args is a + * structure that the lookup function must populate with the function + * information. + * + * The arguments to the CPU functions are elided via std::any and + * the signature of the CPU functions is fixed to + * @code + * void cpu_function(std::vector& args) + * @endcode + * Internally, the CPU library unwraps the arguments and calls the + * actual function. + */ +namespace xrt::cpu { +/** + * struct lookup_args - argument structure for the lookup function + * + * The lookup function takes as arguments the name of the function + * to lookup along with lookup_args to be populated with information + * about the function. + * + * @num_args - number of arguments to function + * @callable - a C++ function object wrapping the function + * + * The callable library functions uses type erasure on their arguments + * through a std::vector of std::any objects. The callable must + * unwrap the std::any objects to its expected type, which is + * cumbersome, but type safe. The type erased arguments allow the + * runner to be generic and not tied to a specific function signature. +*/ +struct lookup_args +{ + std::uint32_t num_args; + std::function&)> callable; +}; + +/** + * struct library_init_args - argument structure for libray initialization + * + * The library initialization function is the only function exported + * from the run time loaded library. The library initialization + * function is called by the runner when a resource references a + * function in a library and the library is not already loaded. + * + * @lookup_fn - a callback function to be populated with the + * lookup function. + * + * The library initialization function is C callable exported symbol, + * but returns a C++ function pointer to the lookup function. +*/ +struct library_init_args +{ + std::function lookup_fn; +}; + +/** + * library_init_fn - type of the library initialization function + * The name of the library initialization function is fixed to + * "library_init". +*/ +using library_init_fn = void (*)(library_init_args*); +} // xrt::cpu + +``` + +A unit test for the cpu library and corresponding sample run recipe +that references the cpu library is under `test/cpulib.cpp` and +`test/main.cpp` + + + + + diff --git a/src/runtime_src/core/common/runner/cpu.cpp b/src/runtime_src/core/common/runner/cpu.cpp new file mode 100644 index 0000000000..c933804e0c --- /dev/null +++ b/src/runtime_src/core/common/runner/cpu.cpp @@ -0,0 +1,193 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved. +#define XCL_DRIVER_DLL_EXPORT // in same dll as exported xrt apis +#define XRT_CORE_COMMON_SOURCE // in same dll as coreutil +#define XRT_API_SOURCE // in same dll as coreutil + +//#define XRT_VERBOSE +#include "cpu.h" + +#include "core/common/debug.h" +#include "core/common/dlfcn.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace { + +using lookup_args = xrt_core::cpu::lookup_args; +using library_init_args = xrt_core::cpu::library_init_args; +using library_init_fn = xrt_core::cpu::library_init_fn; + +// struct dllwrap - wrapper class to manange the lifetime of a loaded library +struct dllwrap +{ + using dll_guard = std::unique_ptr; + dll_guard dll; + + explicit dllwrap(const std::filesystem::path& path) + : dll{xrt_core::dlopen(path.string().c_str(), RTLD_NOW | RTLD_GLOBAL), xrt_core::dlclose} + { + if (!dll) + throw std::runtime_error("Failed to open " + path.string() + ": " + xrt_core::dlerror()); + + XRT_DEBUGF("dllwrap::dllwrap(%s) loaded\n", path.c_str()); + } +}; + +// Control the order of destruction of static objects. In particular +// the dlls cannot be unloaded before the library init args have been +// destroyed +static std::map s_library_handles; // NOLINT +static std::map s_function_map; // NOLINT +static std::map s_library_callbacks; // NOLINT +static std::mutex s_mutex; // NOLINT + +static std::filesystem::path +adjust_path(std::filesystem::path path) +{ +#ifdef _WIN32 + std::filesystem::path fn = path.filename(); + fn += ".dll"; +#else + std::filesystem::path fn = "lib"; + fn += path.filename(); + fn += ".so"; +#endif + return path.replace_filename(fn); +} + +static void* +open_library(std::filesystem::path dll) +{ + std::lock_guard lock(s_mutex); + if (auto it = s_library_handles.find(dll); it != s_library_handles.end()) + return it->second.dll.get(); + + auto [it, inserted] = s_library_handles.emplace(dll, dllwrap{dll}); + return it->second.dll.get(); +} + +static const lookup_args* +lookup(const std::string& lname, const std::string& fname) +{ + XRT_DEBUGF("lookup(%s, %s)\n", lname.c_str(), fname.c_str()); + + // Check if the function is already loaded + std::lock_guard lock(s_mutex); + if (auto it = s_function_map.find(fname); it != s_function_map.end()) + return &it->second; + + // Check if the library is not already loaded in which case load and + // initialize the library to get the callback functions + auto cb_itr = s_library_callbacks.find(lname); + if (cb_itr == s_library_callbacks.end()) { // load and initialize + auto lhdl = open_library(adjust_path(lname)); + auto sym = xrt_core::dlsym(lhdl, "library_init"); + auto init = reinterpret_cast(sym); + library_init_args args; + init(&args); + std::tie(cb_itr, std::ignore) = s_library_callbacks.emplace(lname, std::move(args)); + } + + // Use lookup callback function to get the function information, which + // is cached for future reference + auto& cb = cb_itr->second; + lookup_args args; + cb.lookup_fn(fname, &args); + auto [fitr, emplaced] = s_function_map.emplace(fname, std::move(args)); + return &fitr->second; +} + +} // namespace + +namespace xrt_core::cpu { + +class function_impl +{ + const lookup_args* m_fcn_info; +public: + function_impl(const std::string& name, const std::string& libname) + : m_fcn_info{lookup(libname, name)} + {} + + uint32_t + get_number_of_args() const + { + return m_fcn_info->num_args; + } + + void + call(std::vector& args) const + { + m_fcn_info->callable(args); + } +}; + +// class run - Facade for exexcuting functions within a library on the CPU +// +// Provides interface for run-time loading of a library with functions +// to be executed on the CPU by the xrt::runner class. +class run_impl +{ + std::shared_ptr m_fn; + std::vector m_args; + +public: + explicit run_impl(std::shared_ptr fn) + : m_fn{std::move(fn)} + , m_args(m_fn->get_number_of_args()) // cannot be initializer list + {} + + void + set_arg(int argidx, std::any value) + { + m_args.at(argidx) = std::move(value); + } + + void + execute() + { + // Call the function + m_fn->call(m_args); + } +}; + +//////////////////////////////////////////////////////////////// +function:: +function(const std::string& fname, const std::string& lname) + : m_impl(std::make_shared(fname, lname)) +{} + +function:: +~function() = default; + +run:: +run(const function& f) + : m_impl{std::make_shared(f.get_handle())} +{} + +run:: +~run() = default; + +void +run:: +set_arg(int argidx, const std::any& value) +{ + m_impl->set_arg(argidx, value); +} + +void +run:: +execute() +{ + m_impl->execute(); +} + +} // namespace xrt_core::cpu diff --git a/src/runtime_src/core/common/runner/cpu.h b/src/runtime_src/core/common/runner/cpu.h new file mode 100644 index 0000000000..d66389da25 --- /dev/null +++ b/src/runtime_src/core/common/runner/cpu.h @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved. +#ifndef XRT_COMMON_RUNNER_CPU_H_ +#define XRT_COMMON_RUNNER_CPU_H_ +#include "core/common/config.h" +#include "runner.h" + +#include +#include +#include +#include +#include +#include + +namespace xrt_core::cpu { + +// class function - Manage a CPU function within a library +// +// Functions are created by the xrt::runner class as part of +// initializing resources specified in a run-recipe json. +class function_impl; +class function +{ + std::shared_ptr m_impl; +public: + XRT_CORE_COMMON_EXPORT + function(const std::string& fcn, const std::string& libname); + + XRT_CORE_COMMON_EXPORT + ~function(); + + std::shared_ptr + get_handle() const + { + return m_impl; + } +}; + +// class run - Manage execution of a CPU function +// +// A run object is created by the xrt::runner class to bind arguments +// specified in run-recipe json to the function and execute it. +class run_impl; +class run +{ + std::shared_ptr m_impl; + public: + XRT_CORE_COMMON_EXPORT + explicit run(const function&); + + XRT_CORE_COMMON_EXPORT + ~run(); + + XRT_CORE_COMMON_EXPORT + void + set_arg(int argidx, const std::any& value); + + XRT_CORE_COMMON_EXPORT + void + execute(); +}; // run + +} // xrt_core::cpu +#endif diff --git a/src/runtime_src/core/common/runner/runner.cpp b/src/runtime_src/core/common/runner/runner.cpp new file mode 100644 index 0000000000..fc49820672 --- /dev/null +++ b/src/runtime_src/core/common/runner/runner.cpp @@ -0,0 +1,1086 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved. +#define XCL_DRIVER_DLL_EXPORT // in same dll as exported xrt apis +#define XRT_CORE_COMMON_SOURCE // in same dll as coreutil +#define XRT_API_SOURCE // in same dll as coreutil + +#define XRT_VERBOSE +#include "runner.h" +#include "cpu.h" + +#include "core/common/debug.h" +#include "core/common/dlfcn.h" +#include "core/common/error.h" +#include "core/common/module_loader.h" +#include "core/include/xrt/xrt_bo.h" +#include "core/include/xrt/xrt_device.h" +#include "core/include/xrt/xrt_hw_context.h" +#include "core/include/xrt/xrt_kernel.h" +#include "core/include/experimental/xrt_elf.h" +#include "core/include/experimental/xrt_ext.h" +#include "core/include/experimental/xrt_kernel.h" +#include "core/include/experimental/xrt_module.h" +#include "core/include/experimental/xrt_queue.h" +#include "core/include/experimental/xrt_xclbin.h" + +#ifdef _WIN32 +# pragma warning (push) +# pragma warning (disable: 4702) +#endif +#include "boost/property_tree/json_parser.hpp" +#include "boost/property_tree/ptree.hpp" +#ifdef _WIN32 +# pragma warning (pop) +#endif + +#include +#include +#include +#include +#include +#include + +#ifdef _WIN32 +# pragma warning (disable: 4100 4189 4505) +#endif + +namespace { + +const boost::property_tree::ptree default_ptree; + +// struct streambuf - wrap a std::streambuf around an external buffer +// +// This is used create elf files from memory through a std::istream +struct streambuf : public std::streambuf +{ + streambuf(char* begin, char* end) + { + setg(begin, begin, end); + } + + template + streambuf(T* begin, T* end) + : streambuf(reinterpret_cast(begin), reinterpret_cast(end)) + {} + + template + streambuf(const T* begin, const T* end) // NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast) + : streambuf(const_cast(begin), const_cast(end)) + {} + + std::streampos + seekpos(std::streampos pos, std::ios_base::openmode which) override + { + setg(eback(), eback() + pos, egptr()); + return gptr() - eback(); + } + + std::streampos + seekoff(std::streamoff off, std::ios_base::seekdir way, std::ios_base::openmode which) override + { + if (way == std::ios_base::cur) + gbump(static_cast(off)); + else if (way == std::ios_base::end) + setg(eback(), egptr() + off, egptr()); + else if (way == std::ios_base::beg) + setg(eback() + off, gptr(), egptr()); + return gptr() - eback(); + } +}; + +// Artifacts are encoded / referenced in recipe by string. +// The artifacts can be stored in a file system or in memory +// depending on how the recipe is loaded +namespace artifacts { + +// class repo - artifact repository +class repo +{ +protected: + mutable std::map> m_data; + +public: + virtual ~repo() = default; + + virtual const std::vector& + get(const std::string& path) const = 0; +}; + +// class file_repo - file system artifact repository +// Artifacts are loaded from disk and stored in persistent storage +class file_repo : public repo +{ +public: + const std::vector& + get(const std::string& path) const override + { + if (auto it = m_data.find(path); it != m_data.end()) + return (*it).second; + + std::ifstream ifs(path, std::ios::binary); + if (!ifs) + throw std::runtime_error{"Failed to open file: " + path}; + + ifs.seekg(0, std::ios::end); + std::vector data(ifs.tellg()); + ifs.seekg(0, std::ios::beg); + ifs.read(data.data(), data.size()); + auto [itr, success] = m_data.emplace(path, std::move(data)); + + return (*itr).second; + } +}; + +// class ram_repo - in-memory artifact repository +// Used artifacts are copied to persistent storage +class ram_repo : public repo +{ + const std::map>& m_reference; +public: + explicit ram_repo(const std::map>& data) + : m_reference{data} + {} + + const std::vector& + get(const std::string& path) const override + { + if (auto it = m_data.find(path); it != m_data.end()) + return (*it).second; + + if (auto it = m_reference.find(path); it != m_reference.end()) { + auto [itr, success] = m_data.emplace(path, it->second); + return (*itr).second; + } + + throw std::runtime_error{"Failed to find artifact: " + path}; + } +}; + +} // namespace artifacts + +namespace module_cache { + +// Cache of elf files to modules to avoid recreating modules +// referring to the same elf file. +static std::map s_path2elf; // NOLINT +static std::map s_elf2mod; // NOLINT + +static xrt::module +get(const xrt::elf& elf) +{ + if (auto it = s_elf2mod.find(elf); it != s_elf2mod.end()) + return (*it).second; + + xrt::module mod{elf}; + s_elf2mod.emplace(elf, mod); + return mod; +} + +static xrt::module +get(const std::string& path, const artifacts::repo& repo) +{ + if (auto it = s_path2elf.find(path); it != s_path2elf.end()) + return get((*it).second); + + auto& data = repo.get(path); + streambuf buf{data.data(), data.data() + data.size()}; + std::istream is{&buf}; + xrt::elf elf{is}; + s_path2elf.emplace(path, elf); + + return get(elf); +} + +} // module_cache + +class recipe +{ + // class header - header section of the recipe + class header + { + xrt::xclbin m_xclbin; + + static xrt::xclbin + read_xclbin(const boost::property_tree::ptree& pt, const artifacts::repo& repo) + { + auto path = pt.get("xclbin_path"); + auto data = repo.get(path); + return xrt::xclbin{data}; + } + + public: + header(const boost::property_tree::ptree& pt, const artifacts::repo& repo) + : m_xclbin{read_xclbin(pt, repo)} + { + XRT_DEBUGF("Loaded xclbin: %s\n", m_xclbin.get_uuid().to_string().c_str()); + } + + header(const header&) = default; + + xrt::xclbin + get_xclbin() const + { + return m_xclbin; + } + }; // class recipe::header + + // class resources - resource section of the recipe + class resources + { + public: + class buffer + { + std::string m_name; + + enum class type { input, output, internal }; + type m_type; + + size_t m_size; + + // Buffer object is created for internal nodes, but not for + // input/output which are bound during execution. + xrt::bo m_xrt_bo; + + // Only internal buffers have a size and are created during + // as part of loading the recipe. External buffers are bound + // during execution. + buffer(const xrt::device& device, std::string name, type t, size_t sz) + : m_name(std::move(name)) + , m_type(t) + , m_size(sz) + , m_xrt_bo{m_type == type::internal ? xrt::ext::bo{device, m_size} : xrt::bo{}} + { + XRT_DEBUGF("recipe::resources::buffer(%s)\n", m_name.c_str()); + } + + // Copy constructor creates a new buffer with same properties as other + // The xrt::bo is not copied, but a new one is created. + buffer(const xrt::device& device, const buffer& other) + : m_name(other.m_name) + , m_type(other.m_type) + , m_size(other.m_size) + , m_xrt_bo{m_type == type::internal ? xrt::ext::bo{device, m_size} : xrt::bo{}} + {} + + static type + to_type(const std::string& t) + { + if (t == "input") + return type::input; + if (t == "output") + return type::output; + if (t == "internal") + return type::internal; + + throw std::runtime_error("Unknown buffer type '" + t + "'"); + } + public: + buffer(const buffer& rhs) = default; + buffer(buffer&& rhs) = default; + + // create_buffer - create a buffer object from a property tree node + static buffer + create_buffer(const xrt::device& device, const boost::property_tree::ptree& pt) + { + auto tp = to_type(pt.get("type")); // required, input/output/internal + auto sz = (tp == type::internal) ? pt.get("size") : 0; // required for internal buffers + return {device, pt.get("name"), tp, sz}; + } + + // create_buffer - create a buffer object from another buffer object + // This will create a new buffer object with the same properties as the + // other buffer, but with a new xrt::bo object. + static buffer + create_buffer(const xrt::device& device, const buffer& other) + { + return {device, other}; + } + + xrt::bo + get_xrt_bo() const + { + return m_xrt_bo; + } + + std::string + get_name() const + { + return m_name; + } + + void + bind(const xrt::bo& bo) + { + m_xrt_bo = bo; + } + }; // class recipe::resources::buffer + + class kernel + { + std::string m_name; + std::string m_xclbin_name; + xrt::xclbin::kernel m_xclbin_kernel; + xrt::kernel m_xrt_kernel; + + // Kernel must be in xclbin. The xclbin was used when the hwctx was + // constructed. Here we lookup the xclbin kernel object for additional + // meta data (may not be needed). + kernel(const xrt::hw_context& ctx, const xrt::module& mod, std::string name, std::string xname) + : m_name{std::move(name)} + , m_xclbin_name{std::move(xname)} + , m_xclbin_kernel{ctx.get_xclbin().get_kernel(m_xclbin_name)} + , m_xrt_kernel{xrt::ext::kernel{ctx, mod, m_xclbin_name}} + { + XRT_DEBUGF("recipe::resources::kernel(%s, %s)\n", m_name.c_str(), m_xclbin_name.c_str()); + } + + // Legacy kernel (alveo) + kernel(const xrt::hw_context& ctx, std::string name, std::string xname) + : m_name(std::move(name)) + , m_xclbin_name(std::move(xname)) + , m_xclbin_kernel{ctx.get_xclbin().get_kernel(m_xclbin_name)} + , m_xrt_kernel{xrt::kernel{ctx, m_xclbin_name}} + { + XRT_DEBUGF("recipe::resources::kernel(%s, %s)\n", m_name.c_str(), m_xclbin_name.c_str()); + } + + public: + kernel(const kernel& rhs) = default; + kernel(kernel&& rhs) = default; + + // create_kernel - create a kernel object from a property tree node + // The kernel control module is created if necessary. + static kernel + create_kernel(const xrt::hw_context& hwctx, const boost::property_tree::ptree& pt, + const artifacts::repo& repo) + { + auto name = pt.get("name"); // required, default xclbin kernel name + auto elf = pt.get("ctrlcode", ""); // optional elf file + if (elf.empty()) + return kernel{hwctx, name, pt.get("xclbin_kernel_name", name)}; + + auto mod = module_cache::get(elf, repo); + return kernel{hwctx, mod, name, pt.get("xclbin_kernel_name", name)}; + } + + xrt::kernel + get_xrt_kernel() const + { + return m_xrt_kernel; + } + }; // class recipe::resources::kernel + + class cpu + { + private: + std::string m_name; + std::string m_path; + xrt_core::cpu::function m_fcn; + + cpu(std::string name, std::string path) + : m_name{std::move(name)} + , m_path{std::move(path)} + , m_fcn{m_name, m_path} + { + XRT_DEBUGF("recipe::resources::cpu(%s, %s)\n", m_name.c_str(), m_path.c_str()); + } + + public: + cpu(const cpu& rhs) = default; + cpu(cpu&& rhs) = default; + + // create_cpu - create a cpu object from a property tree node + static cpu + create_cpu(const boost::property_tree::ptree& pt) + { + auto name = pt.get("name"); // required + auto library_path = xrt_core::environment::xilinx_xrt() + / pt.get("library_path"); // required + return cpu{name, library_path.string()}; + } + + xrt_core::cpu::function + get_function() const + { + return m_fcn; + } + }; // class recipe::resources::cpu + + xrt::device m_device; + xrt::hw_context m_hwctx; + std::map m_buffers; + std::map m_kernels; + std::map m_cpus; + + // create_buffers - create buffer objects from buffer property tree nodes + static std::map + create_buffers(const xrt::device& device, const boost::property_tree::ptree& pt) + { + std::map buffers; + for (const auto& [name, node] : pt) + buffers.emplace(node.get("name"), buffer::create_buffer(device, node)); + + return buffers; + } + + // create_buffers - create buffer objects from buffer objects + // This will create new buffer objects with the same properties as the + // other buffers, but with new xrt::bo objects. + static std::map + create_buffers(const xrt::device& device, const std::map& others) + { + std::map buffers; + for (const auto& [name, other] : others) + buffers.emplace(name, buffer::create_buffer(device, other)); + + return buffers; + } + + // create_kernels - create kernel objects from kernel property tree nodes + static std::map + create_kernels(xrt::device device, const xrt::hw_context& hwctx, + const boost::property_tree::ptree& pt, const artifacts::repo& repo) + { + std::map kernels; + for (const auto& [name, node] : pt) + kernels.emplace(node.get("name"), kernel::create_kernel(hwctx, node, repo)); + + return kernels; + } + + // create_cpus - create cpu objects from cpu property tree nodes + static std::map + create_cpus(const boost::property_tree::ptree& pt) + { + std::map cpus; + for (const auto& [name, node] : pt) + cpus.emplace(node.get("name"), cpu::create_cpu(node)); + + return cpus; + } + + public: + resources(xrt::device device, const xrt::xclbin& xclbin, + const boost::property_tree::ptree& recipe, const artifacts::repo& repo) + : m_device{std::move(device)} + , m_hwctx{m_device, m_device.register_xclbin(xclbin)} + , m_buffers{create_buffers(m_device, recipe.get_child("buffers"))} + , m_kernels{create_kernels(m_device, m_hwctx, recipe.get_child("kernels"), repo)} + , m_cpus{create_cpus(recipe.get_child("cpus", default_ptree))} // optional + {} + + resources(const resources& other) + : m_device{other.m_device} // share device + , m_hwctx{other.m_hwctx} // share hwctx + , m_buffers{create_buffers(m_device, other.m_buffers)} // new buffers + , m_kernels{other.m_kernels} // share kernels + , m_cpus{other.m_cpus} // share cpus + {} + + xrt::hw_context + get_xrt_hwctx() const + { + return m_hwctx; + } + + xrt::kernel + get_xrt_kernel_or_error(const std::string& name) const + { + auto it = m_kernels.find(name); + if (it == m_kernels.end()) + throw std::runtime_error("Unknown kernel '" + name + "'"); + return it->second.get_xrt_kernel(); + } + + xrt_core::cpu::function + get_cpu_function_or_error(const std::string& name) const + { + auto it = m_cpus.find(name); + if (it == m_cpus.end()) + throw std::runtime_error("Unknown cpu '" + name + "'"); + return it->second.get_function(); + } + + resources::buffer + get_buffer_or_error(const std::string& name) const + { + auto it = m_buffers.find(name); + if (it == m_buffers.end()) + throw std::runtime_error("Unknown buffer '" + name + "'"); + + return it->second; + } + }; // class recipe::resources + + // class execution - execution section of the recipe + class execution + { + class run + { + struct argument + { + resources::buffer m_buffer; + + // Buffer object for the argument. This can be a sub-buffer + // if the argument is sliced or it can be null bo if the + // argument is unbound. + size_t m_offset; + size_t m_size; // 0 indicates the entire buffer + int m_argidx; + + xrt::bo m_xrt_bo; + + static xrt::bo + create_xrt_bo(const resources::buffer& buffer, size_t offset, size_t size) + { + auto bo = buffer.get_xrt_bo(); + if (bo && (bo.size() < size)) + throw std::runtime_error("buffer size mismatch for buffer: " + buffer.get_name()); + + if (bo && (size < bo.size())) + // sub-buffer + return xrt::bo{bo, size, offset}; + + return bo; // may be null bo for unbound buffer arguments + } + + argument(const resources& resources, const boost::property_tree::ptree& pt) + : m_buffer{resources.get_buffer_or_error(pt.get("name"))} + , m_offset{pt.get("offset", 0)} + , m_size{pt.get("size", 0)} + , m_argidx{pt.get("argidx")} + , m_xrt_bo{create_xrt_bo(m_buffer, m_offset, m_size)} + { + XRT_DEBUGF("recipe::execution::run::argument(%s, %d, %d, %d) bound(%s)\n", + m_buffer.get_name().c_str(), m_offset, m_size, m_argidx, m_xrt_bo ? "true" : "false"); + } + + void + bind(const xrt::bo& bo) + { + m_buffer.bind(bo); + m_xrt_bo = create_xrt_bo(m_buffer, m_offset, m_size); + } + + xrt::bo + get_xrt_bo() const + { + return m_xrt_bo; + } + }; // class recipe::execution::run::argument + + using run_type = std::variant; + std::string m_name; + run_type m_run; + std::map m_args; + + template + struct set_arg_visitor { + int m_idx; + ArgType m_value; + set_arg_visitor(int idx, ArgType&& arg) : m_idx(idx), m_value(std::move(arg)) {} + void operator() (xrt::run& run) const { run.set_arg(m_idx, m_value); } + void operator() (xrt_core::cpu::run& run) const { run.set_arg(m_idx, m_value); } + }; + + struct copy_visitor { + const std::string& m_name; + const resources& m_res; + copy_visitor(const std::string& nm, const resources& res) : m_name{nm}, m_res{res} {} + run_type operator() (const xrt::run&) + { return xrt::run{m_res.get_xrt_kernel_or_error(m_name)}; }; + run_type operator() (const xrt_core::cpu::run&) + { return xrt_core::cpu::run{m_res.get_cpu_function_or_error(m_name)}; }; + }; + + static std::map + create_and_set_args(const resources& resources, run_type run, const boost::property_tree::ptree& pt) + { + std::map args; + for (const auto& [name, node] : pt) { + argument arg {resources, node}; + if (auto bo = arg.get_xrt_bo()) + std::visit(set_arg_visitor{arg.m_argidx, std::move(bo)}, run); + + args.emplace(node.get("name"), std::move(arg)); + } + return args; + } + + static void + set_constant_args(run_type run, const boost::property_tree::ptree& pt) + { + for (const auto& [name, node] : pt) { + auto argidx = node.get("argidx"); + auto type = node.get("type"); + if (type == "int") + std::visit(set_arg_visitor{argidx, node.get("value")}, run); + else if (type == "string") + std::visit(set_arg_visitor{argidx, node.get("value")}, run); + else + throw std::runtime_error("Unknown constant argument type '" + type + "'"); + } + } + + static xrt_core::cpu::run + create_cpu_run(const resources& resources, const boost::property_tree::ptree& pt) + { + auto name = pt.get("name"); + return xrt_core::cpu::run{resources.get_cpu_function_or_error(name)}; + } + + static xrt::run + create_kernel_run(const resources& resources, const boost::property_tree::ptree& pt) + { + auto name = pt.get("name"); + return xrt::run{resources.get_xrt_kernel_or_error(name)}; + } + + static run_type + create_run(const resources& resources, const boost::property_tree::ptree& pt) + { + auto where = pt.get("where", "npu"); + if (where == "cpu") + return create_cpu_run(resources, pt); + + return create_kernel_run(resources, pt); + } + + static run_type + create_run(const resources& resources, const run& other) + { + return std::visit(copy_visitor{other.m_name, resources}, other.m_run); + } + + public: + run(const resources& resources, const boost::property_tree::ptree& pt) + : m_name{pt.get("name")} + , m_run{create_run(resources, pt)} + , m_args{create_and_set_args(resources, m_run, pt.get_child("arguments"))} + { + XRT_DEBUGF("recipe::execution::run(%s)\n", pt.get("name").c_str()); + + if (auto constants = pt.get_child_optional("constants")) +#if BOOST_VERSION >= 105600 + set_constant_args(m_run, constants.value()); +#else + set_constant_args(m_run, constants.get()); +#endif + } + + // Create a run from another run but using argument resources + // The ctor creates a new xrt::run or cpu::run from other, these + // runs refer to resources per argument resources + run(const resources& resources, const run& other) + : m_name{other.m_name} + , m_run{create_run(resources, other)} + {} + + bool + is_npu_run() const + { + return std::holds_alternative(m_run); + } + + bool + is_cpu_run() const + { + return std::holds_alternative(m_run); + } + + xrt::run + get_xrt_run() const + { + if (std::holds_alternative(m_run)) + return std::get(m_run); + + throw std::runtime_error("recipe::execution::run::get_xrt_run() called on a CPU run"); + } + + xrt_core::cpu::run + get_cpu_run() const + { + if (std::holds_alternative(m_run)) + return std::get(m_run); + + throw std::runtime_error("recipe::execution::run::get_cpu_run() called on a GPU run"); + } + + void + bind(const std::string& name, const xrt::bo& bo) + { + auto it = m_args.find(name); + if (it == m_args.end()) + return; // the argument is not used in this run + + auto& arg = (*it).second; + arg.bind(bo); + std::visit(set_arg_visitor{arg.m_argidx, arg.get_xrt_bo()}, m_run); + } + }; // class recipe::execution::run + + // struct runlist - a list of runs to execute + // Need to support CPU and NPU runlists. The CPU runlist will be + // a vector of xrt_core::cpu::run objects. The NPU runlist is + // simply an xrt::runlist object. + struct runlist + { + virtual ~runlist() = default; + virtual void execute() = 0; + virtual void wait() {} + }; + + struct cpu_runlist : runlist + { + std::vector m_runs; + + void + execute() override + { + for (auto& run : m_runs) + run.execute(); + } + }; + + struct npu_runlist : runlist + { + xrt::runlist m_runlist; + + explicit npu_runlist(const xrt::hw_context& hwctx) + : m_runlist{hwctx} + {} + + void + execute() override + { + m_runlist.execute(); + } + + void + wait() override + { + m_runlist.wait(); + } + }; + + + std::vector m_runs; + xrt::queue m_queue; // Queue that executes the runlists in sequence + xrt::queue::event m_event; // Event that signals the completion of the last runlist + std::exception_ptr m_eptr; + + std::vector> m_runlists; + + static std::vector> + create_runlists(const resources& resources, const std::vector& runs) + { + std::vector> runlists; + + // A CPU or NPU runlist is created for each contiguous sequence + // of CPU runs or NPU runs. The runlist is inserted into a + // vector of runlists where each individual runlist will be + // executed in sequence. + npu_runlist* nrl = nullptr; + cpu_runlist* crl = nullptr; + for (const auto& run : runs) { + if (run.is_npu_run()) { + if (crl) + crl = nullptr; + + if (!nrl) { + auto rl = std::make_unique(resources.get_xrt_hwctx()); + nrl = rl.get(); + runlists.push_back(std::move(rl)); + } + + nrl->m_runlist.add(run.get_xrt_run()); + } + else if (run.is_cpu_run()) { + if (nrl) + nrl = nullptr; + + if (!crl) { + auto rl = std::make_unique(); + crl = rl.get(); + runlists.push_back(std::move(rl)); + } + + crl->m_runs.push_back(run.get_cpu_run()); + } + } + return runlists; + } + + // create_runs() - create a vector of runs from a property tree + static std::vector + create_runs(const resources& resources, const boost::property_tree::ptree& pt) + { + std::vector runs; + for (const auto& [name, node] : pt) + runs.emplace_back(resources, node); + + return runs; + } + + // create_runs() - create a vector of runs from existing runs + // A run object is a variant, the new run objects are created + // from the variant matching the type of the existing run. + static std::vector + create_runs(const resources& resources, const std::vector& others) + { + std::vector runs; + for (const auto& run : others) + runs.emplace_back(resources, run); + + return runs; + } + + public: + // execution() - create an execution object from a property tree + // The runs are created from the property tree and either xrt::run + // or cpu::run objects. + execution(const resources& resources, const boost::property_tree::ptree& recipe) + : m_runs{create_runs(resources, recipe.get_child("runs"))} + , m_runlists{create_runlists(resources, m_runs)} + {} + + // execution() - create an execution object from existing runs + // New run objects are created from the existing runs. + execution(const resources& resources, const execution& other) + : m_runs{create_runs(resources, other.m_runs)} + , m_runlists{create_runlists(resources, m_runs)} + {} + + void + bind(const std::string& name, const xrt::bo& bo) + { + // Iterate over all runs and bind the buffer. + // Note, that not all runs need to use the buffer. + // Maybe some optimization could be done here. + for (auto& run : m_runs) + run.bind(name, bo); + } + + void + execute() + { + XRT_DEBUGF("recipe::execution::execute()\n"); + + // execute_runlist() - execute a runlist synchronously + // The lambda function is executed asynchronously by an + // xrt::queue object. The wait is necessary for an NPU runlist, + // which must complete before next enqueue operation can be + // executed. Execution of an NPU runlist is itself asynchronous. + static auto execute_runlist = [](runlist* runlist, std::exception_ptr& eptr) { + try { + runlist->execute(); + runlist->wait(); // needed for NPU runlists, noop for CPU + } + catch (const xrt::runlist::command_error&) { + eptr = std::current_exception(); + } + catch (const std::exception&) { + eptr = std::current_exception(); + } + }; + + // A recipe can have multiple runlists. Each runlist can have + // multiple runs. Runlists are executed sequentially, execution + // is orchestrated by xrt::queue which uses one thread to + // asynchronously (from called pov) execute all runlists + for (auto& runlist : m_runlists) + m_event = m_queue.enqueue([this, &runlist] { execute_runlist(runlist.get(), m_eptr); }); + } + + void + wait() + { + XRT_DEBUGF("recipe::execution::wait()\n"); + // Sufficient to wait for last runlist to finish since last list + // must have waited for all previous lists to finish. + auto runlist = m_runlists.back().get(); + if (runlist) + m_event.wait(); + + if (m_eptr) + std::rethrow_exception(m_eptr); + } + }; // class recipe::execution + + xrt::device m_device; + + boost::property_tree::ptree m_recipe; + header m_header; + resources m_resources; + execution m_execution; + + static boost::property_tree::ptree + load(const std::string& path) + { + boost::property_tree::ptree pt; + boost::property_tree::read_json(path, pt); + return pt; + } + +public: + recipe(xrt::device device, const std::string& path, const artifacts::repo& repo) + : m_device{std::move(device)} + , m_recipe{load(path)} + , m_header{m_recipe.get_child("header"), repo} + , m_resources{m_device, m_header.get_xclbin(), m_recipe.get_child("resources"), repo} + , m_execution{m_resources, m_recipe.get_child("execution")} + {} + + recipe(const recipe&) = default; + + void + bind_input(const std::string& name, const xrt::bo& bo) + { + XRT_DEBUGF("recipe::bind_input(%s)\n", name.c_str()); + m_execution.bind(name, bo); + } + + void + bind_output(const std::string& name, const xrt::bo& bo) + { + XRT_DEBUGF("recipe::bind_output(%s)\n", name.c_str()); + m_execution.bind(name, bo); + } + + void + bind(const std::string& name, const xrt::bo& bo) + { + XRT_DEBUGF("recipe::bind(%s)\n", name.c_str()); + m_execution.bind(name, bo); + } + + // The recipe can be executed with its currently bound + // input and output resources + void + execute() + { + XRT_DEBUGF("recipe::execute()\n"); + // Verify that all required resources are bound + // ... + + // Execute the runlist + m_execution.execute(); + } + + void + wait() + { + XRT_DEBUGF("recipe::wait()\n"); + m_execution.wait(); + } +}; // class recipe + +} // namespace + +namespace xrt_core { + +// class runner_impl - +// +// A runner implementation is default created with one instance of a +// recipe. But the runner can be used by multiple threads and new +// recipe instances are created for each thread as needed. +// +// The runner can be created from any thread, but member functions +// are thread specific. +class runner_impl +{ + //std::map m_recipes; + recipe m_recipe; + //thread_local recipe m_thread_recipe; + +public: + runner_impl(const xrt::device& device, const std::string& recipe) + : m_recipe{device, recipe, artifacts::file_repo{}} + {} + + runner_impl(const xrt::device& device, const std::string& recipe, const runner::artifacts_repository& artifacts) + : m_recipe{device, recipe, artifacts::ram_repo(artifacts)} + {} + + void + bind_input(const std::string& name, const xrt::bo& bo) + { + m_recipe.bind_input(name, bo); + } + + void + bind_output(const std::string& name, const xrt::bo& bo) + { + m_recipe.bind_output(name, bo); + } + + void + bind(const std::string& name, const xrt::bo& bo) + { + m_recipe.bind(name, bo); + } + + void + execute() + { + m_recipe.execute(); + } + + void + wait() + { + m_recipe.wait(); + } +}; + +//////////////////////////////////////////////////////////////// +// Public runner interface APIs +//////////////////////////////////////////////////////////////// +runner:: +runner(const xrt::device& device, const std::string& recipe) + : m_impl{std::make_unique(device, recipe)} +{} + +runner:: +runner(const xrt::device& device, const std::string& recipe, const artifacts_repository& repo) + : m_impl{std::make_unique(device, recipe, repo)} +{} + +void +runner:: +bind_input(const std::string& name, const xrt::bo& bo) +{ + m_impl->bind_input(name, bo); +} + +// bind_output() - Bind a buffer object to an output tensor +void +runner:: +bind_output(const std::string& name, const xrt::bo& bo) +{ + m_impl->bind_output(name, bo); +} + +void +runner:: +bind(const std::string& name, const xrt::bo& bo) +{ + m_impl->bind(name, bo); +} + +// execute() - Execute the runner +void +runner:: +execute() +{ + m_impl->execute(); +} + +void +runner:: +wait() +{ + m_impl->wait(); +} + +} // namespace xrt_core diff --git a/src/runtime_src/core/common/runner/runner.h b/src/runtime_src/core/common/runner/runner.h new file mode 100644 index 0000000000..787c6b98c5 --- /dev/null +++ b/src/runtime_src/core/common/runner/runner.h @@ -0,0 +1,153 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved. +#ifndef XRT_COMMON_RUNNER_RUNNER_H_ +#define XRT_COMMON_RUNNER_RUNNER_H_ +#include "core/common/config.h" + +#include +#include +#include +#include +#include +#include +#include + +namespace xrt { +class device; +class bo; +} + +namespace xrt_core { + +/** + * class runner - A class to execute a run recipe json + */ +class runner_impl; +class runner +{ + std::shared_ptr m_impl; // probably unique_ptr is enough + +public: + /** + * artifacts_repository - A map of artifacts + * + * The runner can be constructed with an artifacts repository, in + * which case the recipe references are looked up in the artifacts are + * looked up in the repository rather than from disk. + */ + using artifacts_repository = std::map>; + + // ctor - Create runner from a recipe json + XRT_CORE_COMMON_EXPORT + runner(const xrt::device& device, const std::string& recipe); + + // ctor - Create runner from a recipe json and artifacts repository + // The lifetime of the repo must extend the lifetime of the runner + XRT_CORE_COMMON_EXPORT + runner(const xrt::device& device, const std::string& recipe, const artifacts_repository&); + + // bind_input() - Bind a buffer object to an input tensor + XRT_CORE_COMMON_EXPORT + void + bind_input(const std::string& name, const xrt::bo& bo); + + // bind_output() - Bind a buffer object to an output tensor + XRT_CORE_COMMON_EXPORT + void + bind_output(const std::string& name, const xrt::bo& bo); + + // bind() - Bind a buffer object to a tensor + XRT_CORE_COMMON_EXPORT + void + bind(const std::string& name, const xrt::bo& bo); + + // execute() - Execute the runner + XRT_CORE_COMMON_EXPORT + void + execute(); + + // wait() - Wait for the execution to complete + XRT_CORE_COMMON_EXPORT + void + wait(); +}; + +/** + * The xrt::runner supports execution of CPU functions as well + * as xrt::kernel objects. + * + * The CPU functions are implemented in runtime loaded dynamic + * libraries. A library must define and export a function that + * initializes a callback structure with a lookup function. + * + * The signature of the lookup function must be + * @code + * void lookup_fn(const std::string& name, xrt::cpu::lookup_args* args) + * @endcode + * where the name is the name of the function to lookup and args is a + * structure that the lookup function must populate with the function + * information. + * + * The arguments to the CPU functions are elided via std::any and + * the signature of the CPU functions is fixed to + * @code + * void cpu_function(std::vector& args) + * @endcode + * Internally, the CPU library unwraps the arguments and calls the + * actual function. + */ +namespace cpu { + +/** + * struct lookup_args - argument structure for the lookup function + * + * The lookup function takes as arguments the name of the function + * to lookup along with lookup_args to be populated with information + * about the function. + * + * @num_args - number of arguments to function + * @callable - a C++ function object wrapping the function + * + * The callable library functions uses type erasure on their arguments + * through a std::vector of std::any objects. The callable must + * unwrap the std::any objects to its expected type, which is + * cumbersome, but type safe. The type erased arguments allow the + * runner to be generic and not tied to a specific function signature. +*/ +struct lookup_args +{ + std::uint32_t num_args {0}; + std::function&)> callable; +}; + +/** + * struct library_init_args - argument structure for libray initialization + * + * The library initialization function is the only function exported + * from the run time loaded library. The library initialization + * function is called by the runner when a resource references a + * function in a library and the library is not already loaded. + * + * @lookup_fn - a callback function to be populated with the + * lookup function. The lookup function must throw an exception + * if it fails. + * + * The library initialization function is C callable exported symbol, + * but returns a C++ function pointer to the lookup function. +*/ +struct library_init_args +{ + std::function lookup_fn; +}; + +/** + * library_init_fn - type of the library initialization function + * The name of the library initialization function is fixed to + * "library_init". +*/ +using library_init_fn = void (*)(library_init_args*); + +} // cpu + +} // namespace xrt +#endif diff --git a/src/runtime_src/core/common/runner/test/.gitignore b/src/runtime_src/core/common/runner/test/.gitignore new file mode 100644 index 0000000000..34074f3d75 --- /dev/null +++ b/src/runtime_src/core/common/runner/test/.gitignore @@ -0,0 +1,2 @@ +# Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved. +build/* diff --git a/src/runtime_src/core/common/runner/test/CMakeLists.txt b/src/runtime_src/core/common/runner/test/CMakeLists.txt new file mode 100644 index 0000000000..1d519d5f40 --- /dev/null +++ b/src/runtime_src/core/common/runner/test/CMakeLists.txt @@ -0,0 +1,32 @@ +# SPDX-License-Identifier: Apache-2.0 +# Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved. +CMAKE_MINIMUM_REQUIRED(VERSION 3.18.0) +PROJECT(runner) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED OFF) +set(CMAKE_VERBOSE_MAKEFILE ON) +set(CMAKE_MSVC_RUNTIME_LIBRARY "MultiThreaded$<$:Debug>") + +if (WIN32) + add_compile_options(/Zc:__cplusplus) +endif() + +find_package(XRT REQUIRED HINTS ${XILINX_XRT}/share/cmake/XRT) +message("-- XRT_INCLUDE_DIRS=${XRT_INCLUDE_DIRS}") + +add_executable(runner runner.cpp) +target_include_directories(runner PRIVATE ${XRT_INCLUDE_DIRS} ${XRT_ROOT}/src/runtime_src) +target_link_libraries(runner PRIVATE XRT::xrt_coreutil) + +add_executable(recipe recipe.cpp) +target_include_directories(recipe PRIVATE ${XRT_INCLUDE_DIRS} ${XRT_ROOT}/src/runtime_src) +target_link_libraries(recipe PRIVATE XRT::xrt_coreutil) + +if (NOT WIN32) + target_link_libraries(runner PRIVATE pthread uuid dl) + target_link_libraries(recipe PRIVATE pthread uuid dl) +endif() + +install(TARGETS runner recipe) + diff --git a/src/runtime_src/core/common/runner/test/README.md b/src/runtime_src/core/common/runner/test/README.md new file mode 100644 index 0000000000..88e2d43adb --- /dev/null +++ b/src/runtime_src/core/common/runner/test/README.md @@ -0,0 +1,74 @@ + + +# Runner tests + +This directory contains runner test code. + +## recipe.cpp + +A test wrapper for creating a `runner` from a `run-recipe.json`. Used +for debugging purposes, basically validates that the run-recipe can be +parsed and that resources can be created. + +## runner.cpp + +A complete host code for creating a runner and executing the execution +section of the recipe. + +The code will execute an argument recipe with external resources bound +through command line arguments. + +``` +% runner.exe [-kr name:path]* [-kb name:path]* [-kg name:path]* -recipe +``` + +The recipe references resources through `name` matching. External resources +must be made available to the runner in two ways: + +1. The resource must be bound to the runner after the runner has been created. +2. The resource must be in-memory in a repository passed to the runner constructor. + +The runner.cpp file supports creating `xrt::bo` external objects from +a binary file specified through `-buffer name:path` command line switch. +This triggers the host code to create an `xrt::bo` and populate it +with the content of the file pointed to by `path`. The host code +binds this resource to the runner using 1) above before the runner is +executed. The `-buffer` switch can be repeated any number of times. + +The runner.cpp supports loading external resources, for example elf +files, into memory before calling the constructor of the runner. This +is done using the `-resource name:path` command line switch and is the +2) method above. The content of the file pointed to by `path` is read +into memory and associated with `name` in an artifacts resposotory +passed as argument the runner constructor. The `-resource` switch can +be specified any number of times. + +Fianlly, the runner supports loading golden data to be compared with +the content of an external buffer populated by the runner. This is +done using the `-golden name:path` command line switch. The `name` must +match that of a external buffer created with `-buffer`. The `path` +identifies a file with golden data. The golden data is compared to +the content of the external buffer after the runner has completed +execution. + +The host code has 3 steps: + +1. Create artifacts repository from `-resource` switches +2. Create an xrt_core::runner object from artifacts repo and `recipe` +3. Create external buffer resources from the `-buffer` switches +4. Bind the external resources to the runner +5. Execute runner +6. Wait for runner to complete +7. Compare golden data specified in `-golden` switches. + + +## Build instructions + +``` +% mkdir build +% cd build +% cmake -DXILINX_XRT=c:/users/stsoe/git/stsoe/XRT-MCDM/build/WDebug/xilinx/xrt \ + -DXRT_ROOT=c:/users/stsoe/git/stsoe/XRT-MCDM/src/xrt .. +% cmake --build . --config Debug +``` + diff --git a/src/runtime_src/core/common/runner/test/cpulib.cpp b/src/runtime_src/core/common/runner/test/cpulib.cpp new file mode 100644 index 0000000000..758aec76bb --- /dev/null +++ b/src/runtime_src/core/common/runner/test/cpulib.cpp @@ -0,0 +1,95 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved. +#include "experimental/xrt_runner.h" +#include "xrt/xrt_bo.h" + +#include +#include +#include +#include +#include + +#pragma warning(disable: 4100 4505) + + + +namespace cpux { + +static void +convert_ifm(std::vector& args) +{ + auto src = std::any_cast(args.at(0)); + auto dst = std::any_cast(args.at(1)); + + if (src.size() != dst.size()) + throw std::runtime_error("src and dst size mismatch"); + + auto src_data = src.map(); + auto dst_data = dst.map(); + + // convert + std::memcpy(dst_data, src_data, src.size()); +} + +static void +convert_ofm(std::vector& args) +{ + auto src = std::any_cast(args.at(0)); + auto dst = std::any_cast(args.at(1)); + + if (src.size() != dst.size()) + throw std::runtime_error("src and dst size mismatch"); + + auto src_data = src.map(); + auto dst_data = dst.map(); + + // convert + std::memcpy(dst_data, src_data, src.size()); +} + +static void +hello(const std::vector& args) +{ + auto value = std::any_cast(args.at(0)); + auto str = std::any_cast(args.at(1)); + auto out = std::any_cast(args.at(2)); + + if (!out) + throw std::runtime_error("output argument is null"); + + *out = "hello out " + std::to_string(value) + " " + str; +} + +static void +lookup(const std::string& fnm, xrt::cpu::lookup_args* args) +{ + using function_info = xrt::cpu::lookup_args; + static std::map function_map = + { + { "convert_ifm", {2, convert_ifm} }, + { "convert_ofm", {2, convert_ofm} }, + { "hello", {3, hello} }, + }; + + if (auto it = function_map.find(fnm); it != function_map.end()) { + const auto& [num_args, fn] = it->second; + args->num_args = num_args; + args->callable = fn; + return; + } + + throw std::runtime_error("function '" + std::string(fnm) + "' not found"); +} + +} // cpux + +extern "C" { + +__declspec(dllexport) +void +library_init(xrt::cpu::library_init_args* args) +{ + args->lookup_fn = &cpux::lookup; +} + +} // extern "C" diff --git a/src/runtime_src/core/common/runner/test/recipe.cpp b/src/runtime_src/core/common/runner/test/recipe.cpp new file mode 100644 index 0000000000..073df26e6c --- /dev/null +++ b/src/runtime_src/core/common/runner/test/recipe.cpp @@ -0,0 +1,38 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved. +#include "core/common/runner/runner.h" +#include "xrt/xrt_device.h" + +#include +#include + +#ifdef _WIN32 +# pragma warning (disable: 4100) +#endif + +static void +run(int argc, char* argv[]) +{ + std::string recipe { argv[1] }; + xrt::device device{0}; + + xrt_core::runner runner{device, recipe}; +} + +int +main(int argc, char* argv[]) +{ + try { + if (argc < 2) { + std::cout << "Usage: " << argv[0] << " " << '\n'; + return 1; + } + + run(argc, argv); + return 0; + } + catch (const std::exception& e) { + std::cerr << "Error: " << e.what() << '\n'; + return 1; + } +} diff --git a/src/runtime_src/core/common/runner/test/recipe.json b/src/runtime_src/core/common/runner/test/recipe.json new file mode 100644 index 0000000000..d2f3f80cd8 --- /dev/null +++ b/src/runtime_src/core/common/runner/test/recipe.json @@ -0,0 +1,83 @@ +{ + "header": { + "xclbin_path": "design.xclbin" + }, + "resources": { + "buffers": [ + { + "name": "wts", + "type": "input" + }, + { + "name": "ifm", + "type": "input" + }, + { + "name": "ifm_int", + "type": "internal", + "size": "1536" + }, + { + "name": "ofm_int", + "type": "internal", + "size": "320" + }, + { + "name": "ofm", + "type": "output" + } + ], + "cpus": [ + { + "name": "convert_ifm", + "library_path": "cpulib" + }, + { + "name": "convert_ofm", + "library_path": "cpulib" + } + ], + "kernels": [ + { + "name": "k1", + "xclbin_kernel_name": "DPU", + "ctrlcode": "no-ctrl-packet.elf" + } + ] + }, + "execution": { + "runs": [ + { + "name": "convert_ifm", + "where": "cpu", + "arguments" : [ + { "name": "ifm", "argidx": 0 }, + { "name": "ifm_int", "argidx": 1 } + ] + }, + { + "name": "k1", + "arguments" : [ + { "name": "wts", "argidx": 4 }, + { "name": "ifm_int", "argidx": 3 }, + { "name": "ofm_int", "argidx": 5 } + ], + "constants": [ + { "value": "3", "type": "int", "argidx": 0 }, + { "value": "0", "type": "int", "argidx": 1 }, + { "value": "0", "type": "int", "argidx": 2 }, + { "value": "0", "type": "int", "argidx": 6 }, + { "value": "0", "type": "int", "argidx": 7 } + ] + }, + { + "name": "convert_ofm", + "where": "cpu", + "arguments" : [ + { "name": "ofm_int", "argidx": 0 }, + { "name": "ofm", "argidx": 1 } + ] + } + ] + } +} diff --git a/src/runtime_src/core/common/runner/test/runner.cpp b/src/runtime_src/core/common/runner/test/runner.cpp new file mode 100644 index 0000000000..93009da5de --- /dev/null +++ b/src/runtime_src/core/common/runner/test/runner.cpp @@ -0,0 +1,208 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved. + +// This test configures and runs a recipe one time +// g++ -g -std=c++17 +// -I/home/stsoe/git/stsoe/XRT/build/Debug/opt/xilinx/xrt/include +// -I/home/stsoe/git/stsoe/XRT/src/runtime_src +// -L/home/stsoe/git/stsoe/XRT/build/Debug/opt/xilinx/xrt/lib +// -o runner.exe runner.cpp -lxrt_coreutil -pthread +// +// or +// +// mkdir build +// cd build +// cmake -DXILINX_XRT=/home/stsoe/git/stsoe/XRT/build/Debug/opt/xilinx/xrt +// -DXRT_ROOT=/home/stsoe/git/stsoe/XRT .. +// cmake --build . --config Debug +// +// ./runner.exe -kp ... -kp ... -bd ... -bd ... -bg ... -recipe ... + +#include "xrt/xrt_device.h" +#include "experimental/xrt_ext.h" +#include "core/common/runner/runner.h" + +#include +#include +#include +#include +#include +#include +#include + +static xrt_core::runner::artifacts_repository g_repo; +static std::map g_buffer2data; +static std::map g_buffer2bo; +static std::map g_buffer2golden; +static std::string g_recipe; + +static void +usage() +{ + std::cout << "usage: %s [options]\n"; + std::cout << " --resource artifact key data pair, the key is referenced by recipe\n"; + std::cout << " --buffer external buffer data, the key is referenced by recipe\n"; + std::cout << " --golden external buffer goldendata, the key matches a -bd pair\n"; + std::cout << " --recipe recipe file to run\n"; + std::cout << "\n\n"; + std::cout << "host.exe -r elf:foo.elf \n" + << " -b ifm:ifm.bin -b ofm:ofm.bin -b wts:wts.bin\n" + << " -g ofm:gold.bin\n" + << " --recipe recipe.json\n"; +} + +static std::vector +read_file(const std::string& fnm) +{ + std::ifstream ifs{fnm, std::ios::binary}; + if (!ifs) + throw std::runtime_error("Failed to open file '" + fnm + "' for reading"); + + ifs.seekg(0, std::ios::end); + std::vector data(ifs.tellg()); + ifs.seekg(0, std::ios::beg); + ifs.read(data.data(), data.size()); + return data; +} + +static void +add_repo_file(const std::string& key, const std::string& path) +{ + auto data = read_file(path); + g_repo.emplace(key, std::move(data)); +} + +static void +run(const xrt::device& device, const std::string& recipe) +{ + // 1. Add artifacts to the repository (done during cmdline parsing) + + // 2. Create the runner from the recipe + xrt_core::runner runner {device, recipe, g_repo}; + + // 3. Create buffers for external input and output + // 4. Bind to runner + for (auto& [buffer, path] : g_buffer2data) { + auto data = read_file(path); + std::cout << buffer << " size = " << data.size() << "\n"; + xrt::bo bo = xrt::ext::bo{device, data.size()}; + auto bo_data = bo.map(); + std::copy(data.data(), data.data() + data.size(), bo_data); + bo.sync(XCL_BO_SYNC_BO_TO_DEVICE); + runner.bind(buffer, bo); + + // Save if referenced for golden comparison + g_buffer2bo.emplace(buffer, bo); + } + + // 5. Execute the runner and wait for completion + runner.execute(); + + // 6. Wait for the runner to finish + runner.wait(); + + // 7. Compare the output with golden if any + for (auto& [buffer, golden] : g_buffer2golden) { + auto bo = g_buffer2bo.at(buffer); + bo.sync(XCL_BO_SYNC_BO_FROM_DEVICE); + + auto bo_data = bo.map(); + auto golden_data = read_file(golden); + if (bo.size() != golden_data.size()) + throw std::runtime_error("Golden and output size mismatch"); + + std::cout << "Comparing golden and output data\n"; + if (!std::equal(golden_data.data(), golden_data.data() + golden_data.size(), bo_data)) { + for (uint64_t i = 0; i < golden_data.size(); ++i) { + if (golden_data[i] != bo_data[i]) + throw std::runtime_error("Golden and output mismatch at index " + std::to_string(i)); + } + } + } +} + +static void +run(const std::string& recipe) +{ + // Create device + xrt::device device{0}; + run(device, recipe); +} + +static void +run(int argc, char* argv[]) +{ + std::vector args(argv+1,argv+argc); + std::string cur; + std::string recipe; + for (auto& arg : args) { + if (arg == "-h") { + usage(); + return; + } + + if (arg[0] == '-') { + cur = arg; + continue; + } + + if (cur == "--resource" || cur == "-r") { + auto pos = arg.find(":"); + if (pos == std::string::npos) + throw std::runtime_error("resource option must take the form of '-resource key:path'"); + + auto key = arg.substr(0,pos); + auto path = arg.substr(pos+1); + + std::cout << "Adding repo (key, path): (" << key << ", " << path << ")\n"; + add_repo_file(key, path); + } + else if (cur == "--buffer" || cur =="-b") { + auto pos = arg.find(":"); + if (pos == std::string::npos) + throw std::runtime_error("buffer data option must take the form of '-buffer buffer:path'"); + + auto buffer = arg.substr(0,pos); + auto datapath = arg.substr(pos+1); + + std::cout << "Using (buffer, path): (" << buffer << ", " << datapath << ")\n"; + g_buffer2data.emplace(buffer, datapath); + } + else if (cur == "-golden" || cur == "-g") { + auto pos = arg.find(":"); + if (pos == std::string::npos) + throw std::runtime_error("golden data option must take the form of '-golden buffer:path'"); + + auto buffer = arg.substr(0,pos); + auto datapath = arg.substr(pos+1); + + std::cout << "Using golden (buffer, path): (" << buffer << ", " << datapath << ")\n"; + g_buffer2golden.emplace(buffer, datapath); + } + else if (cur == "--recipe") { + std::cout << "Using recipe: " << arg << '\n'; + recipe = arg; + } + else + throw std::runtime_error("Unknown option value " + cur + " " + arg); + } + + run(recipe); +} + +int +main(int argc, char **argv) +{ + try { + run(argc, argv); + return 0; + } + catch (const std::exception& ex) { + std::cerr << "Error: " << ex.what() << '\n'; + } + catch (...) { + std::cerr << "Unknown error\n"; + } + return 1; + +} diff --git a/src/runtime_src/core/common/runner/test/tcpu.cpp b/src/runtime_src/core/common/runner/test/tcpu.cpp new file mode 100644 index 0000000000..4b05f1411e --- /dev/null +++ b/src/runtime_src/core/common/runner/test/tcpu.cpp @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: Apache-2.0 +// Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved. +#include +#include "../xrt_runner.h" +#include "../cpu.h" + +static void +run(int argc, char **argv) +{ + if (argc != 2) { + std::cerr << "Usage: " << argv[0] << " \n"; + return; + } + + auto dll = argv[1]; + xrt_core::cpu::function hello{"hello", dll}; + xrt_core::cpu::run run{hello}; + run.set_arg(0, 10); + run.set_arg(1, std::string("world")); + std::string out; + run.set_arg(2, &out); + run.execute(); + std::cout << out << "\n"; +} + +int +main(int argc, char **argv) +{ + try { + run(argc, argv); + return 0; + } + catch (const std::exception& ex) { + std::cerr << "Error: " << ex.what() << "\n"; + } + catch (...) { + std::cerr << "Unknown error" << "\n"; + } + return 1; + +} +