Skip to content

Commit

Permalink
VITIS-11401 : Shim DMA based array reconfiguration overhead (#8308)
Browse files Browse the repository at this point in the history
* VITIS-11401 : Shim DMA based array reconfiguration overhead

Signed-off-by: Akshay Tondak <[email protected]>

* VITIS-11401 : Shim DMA based array reconfiguration overhead v2

Signed-off-by: Akshay Tondak <[email protected]>

* VITIS-11401 : Shim DMA based array reconfiguration overhead v3 : Name changes

Signed-off-by: Akshay Tondak <[email protected]>

* VITIS-11401: Fix for review comments
Signed-off-by: Akshay Tondak <[email protected]>

* Algorithm update

Signed-off-by: Akshay Tondak <[email protected]>

* Unit change to ms for better visualization

Signed-off-by: Akshay Tondak <[email protected]>

---------

Signed-off-by: Akshay Tondak <[email protected]>
Co-authored-by: Akshay Tondak <[email protected]>
  • Loading branch information
aktondak and Akshay Tondak authored Aug 21, 2024
1 parent 64dae6c commit 276e24a
Show file tree
Hide file tree
Showing 11 changed files with 249 additions and 179 deletions.
5 changes: 4 additions & 1 deletion src/runtime_src/core/common/query_requests.h
Original file line number Diff line number Diff line change
Expand Up @@ -566,7 +566,8 @@ struct sequence_name : request
df_bandwidth,
tct_one_column,
tct_all_column,
gemm_int8
gemm_int8,
aie_reconfig_overhead
};

static std::string
Expand All @@ -581,6 +582,8 @@ struct sequence_name : request
return "tct_all_column";
case type::gemm_int8:
return "gemm_int8";
case type::aie_reconfig_overhead:
return "aie_reconfig_overhead";
}
return "unknown";
}
Expand Down
40 changes: 40 additions & 0 deletions src/runtime_src/core/tools/common/TestRunner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ namespace XBU = XBUtilities;
#include <boost/property_tree/json_parser.hpp>

// System - Include Files
#include <fstream>
#include <iostream>
#include <regex>
#include <thread>
Expand Down Expand Up @@ -359,6 +360,45 @@ TestRunner::validate_binary_file(const std::string& binaryfile)
return EXIT_SUCCESS;
}

// Copy values from text files into buff, expecting values are ascii encoded hex
void
TestRunner::init_instr_buf(xrt::bo &bo_instr, const std::string& dpu_file) {
std::ifstream dpu_stream(dpu_file);
if (!dpu_stream.is_open()) {
throw std::runtime_error(boost::str(boost::format("Failed to open %s for reading") % dpu_file));
}

auto instr = bo_instr.map<int*>();
std::string line;
while (std::getline(dpu_stream, line)) {
if (line.at(0) == '#') {
continue;
}
std::stringstream ss(line);
unsigned int word = 0;
ss >> std::hex >> word;
*(instr++) = word;
}
}

size_t
TestRunner::get_instr_size(const std::string& dpu_file) {
std::ifstream file(dpu_file);
if (!file.is_open()) {
throw std::runtime_error(boost::str(boost::format("Failed to open %s for reading") % dpu_file));
}
size_t size = 0;
std::string line;
while (std::getline(file, line)) {
if (line.at(0) != '#') {
size++;
}
}
if (size == 0) {
throw std::runtime_error("Invalid DPU instruction length");
}
return size;
}
bool
TestRunner::search_and_program_xclbin(const std::shared_ptr<xrt_core::device>& dev, boost::property_tree::ptree& ptTest)
{
Expand Down
3 changes: 3 additions & 0 deletions src/runtime_src/core/tools/common/TestRunner.h
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
#include "core/common/query_requests.h"
#include "JSONConfigurable.h"
#include "xrt/xrt_device.h"
#include "xrt/xrt_bo.h"

// 3rd Party Library - Include Files
#include <boost/property_tree/ptree.hpp>
Expand Down Expand Up @@ -45,6 +46,8 @@ class TestRunner : public JSONConfigurable {
std::vector<std::string> findDependencies( const std::string& test_path,
const std::string& ps_kernel_name);
int validate_binary_file(const std::string& binaryfile);
void init_instr_buf(xrt::bo &bo_instr, const std::string& dpu_file);
size_t get_instr_size(const std::string& dpu_file);

const std::string test_token_skipped = "SKIPPED";
const std::string test_token_failed = "FAILED";
Expand Down
182 changes: 182 additions & 0 deletions src/runtime_src/core/tools/common/tests/TestAIEReconfigOverhead.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
// ------ I N C L U D E F I L E S -------------------------------------------
// Local - Include Files

#include "TestAIEReconfigOverhead.h"
#include "tools/common/XBUtilities.h"
#include "xrt/xrt_bo.h"
#include "xrt/xrt_device.h"
#include "xrt/xrt_hw_context.h"
#include "xrt/xrt_kernel.h"

// System - Include Files
#include <fstream>
#include <filesystem>
#include <thread>
#include <iostream>
static constexpr size_t buffer_size_mb = 128;
static constexpr size_t buffer_size = buffer_size_mb * 1024 * 1024; //128 MB
static constexpr size_t word_count = buffer_size/16;
static constexpr int itr_count = 1000;
static constexpr size_t inter_size = 1024 * 1024;
static constexpr unsigned int StartAddr = 32 * 1024 * 1024;

TestAIEReconfigOverhead::TestAIEReconfigOverhead()
: TestRunner("aie-reconfig-overhead", "Run end-to-end array reconfiguration overhead through shim DMA")
{}

boost::property_tree::ptree
TestAIEReconfigOverhead::run(std::shared_ptr<xrt_core::device> dev)
{
boost::property_tree::ptree ptree = get_test_header();
ptree.erase("xclbin");

const auto xclbin_name = xrt_core::device_query<xrt_core::query::xclbin_name>(dev, xrt_core::query::xclbin_name::type::validate);
auto xclbin_path = findPlatformFile(xclbin_name, ptree);
if (!std::filesystem::exists(xclbin_path))
return ptree;

logger(ptree, "Xclbin", xclbin_path);

xrt::xclbin xclbin;
try {
xclbin = xrt::xclbin(xclbin_path);
}
catch (const std::runtime_error& ex) {
logger(ptree, "Error", ex.what());
ptree.put("status", test_token_failed);
return ptree;
}

// Determine The DPU Kernel Name
auto xkernels = xclbin.get_kernels();

auto itr = std::find_if(xkernels.begin(), xkernels.end(), [](xrt::xclbin::kernel& k) {
auto name = k.get_name();
return name.rfind("DPU",0) == 0; // Starts with "DPU"
});

xrt::xclbin::kernel xkernel;
if (itr!=xkernels.end())
xkernel = *itr;
else {
logger(ptree, "Error", "No kernel with `DPU` found in the xclbin");
ptree.put("status", test_token_failed);
return ptree;
}
auto kernelName = xkernel.get_name();
if(XBUtilities::getVerbose())
logger(ptree, "Details", boost::str(boost::format("Kernel name is '%s'") % kernelName));

auto working_dev = xrt::device(dev);
working_dev.register_xclbin(xclbin);
xrt::hw_context hwctx;
xrt::kernel kernel;
try {
hwctx = xrt::hw_context(working_dev, xclbin.get_uuid());
kernel = xrt::kernel(hwctx, kernelName);
}
catch (const std::exception& ex)
{
logger(ptree, "Error", ex.what());
ptree.put("status", test_token_failed);
return ptree;
}

const auto seq_name = xrt_core::device_query<xrt_core::query::sequence_name>(dev, xrt_core::query::sequence_name::type::aie_reconfig_overhead);
auto dpu_instr = findPlatformFile(seq_name, ptree);
if (!std::filesystem::exists(dpu_instr))
return ptree;

logger(ptree, "DPU-Sequence", dpu_instr);

size_t instr_size = 0;
try {
instr_size = get_instr_size(dpu_instr);
}
catch(const std::exception& ex) {
logger(ptree, "Error", ex.what());
ptree.put("status", test_token_failed);
return ptree;
}

//Create BOs
int argno = 1;
xrt::bo bo_ifm(working_dev, buffer_size, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(argno++));
argno++;
xrt::bo bo_ofm(working_dev, buffer_size, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(argno++));
xrt::bo bo_inter(working_dev, inter_size, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(argno++));
xrt::bo bo_instr(working_dev, instr_size*sizeof(int), XCL_BO_FLAGS_CACHEABLE, kernel.group_id(argno));
xrt::bo bo_instr_no_op(working_dev, instr_size*sizeof(int), XCL_BO_FLAGS_CACHEABLE, kernel.group_id(argno++));
argno++;
xrt::bo bo_mc(working_dev, 16, XRT_BO_FLAGS_HOST_ONLY, kernel.group_id(argno++));

init_instr_buf(bo_instr, dpu_instr);
//Create ctrlcode with NOPs
std::memset(bo_instr_no_op.map<char*>(), 0, instr_size);

// map input buffer
// Incremental byte pattern
auto ifm_mapped = bo_ifm.map<int*>();
for (size_t i = 0; i < word_count; i++)
ifm_mapped[i] = (int)(i % word_count);

//Sync BOs
bo_instr.sync(XCL_BO_SYNC_BO_TO_DEVICE);
bo_mc.sync(XCL_BO_SYNC_BO_TO_DEVICE);
bo_ifm.sync(XCL_BO_SYNC_BO_TO_DEVICE);

//Log
if(XBUtilities::getVerbose()) {
logger(ptree, "Details", boost::str(boost::format("Buffer size: '%f'MB") % buffer_size_mb));
logger(ptree, "Details", boost::str(boost::format("No. of iterations: '%f'") % itr_count));
}

auto start = std::chrono::high_resolution_clock::now();
for (int i = 0 ;i < itr_count ; i++){
try{
auto run = kernel(1, bo_ifm, NULL, bo_ofm, bo_inter, bo_instr_no_op, instr_size, bo_mc);
run.wait2();
}
catch (const std::exception& ex)
{
logger(ptree, "Error", ex.what());
ptree.put("status", test_token_failed);
return ptree;
}
bo_ofm.sync(XCL_BO_SYNC_BO_FROM_DEVICE);
}
auto end = std::chrono::high_resolution_clock::now();
float elapsedSecsNoOpAverage = std::chrono::duration_cast<std::chrono::duration<float>>(end-start).count();
elapsedSecsNoOpAverage /= itr_count;

start = std::chrono::high_resolution_clock::now();
for (int i = 0; i< itr_count; i++)
{
try{
auto run = kernel(1, bo_ifm, NULL, bo_ofm, bo_inter, bo_instr, instr_size, bo_mc);
run.wait2();
}
catch (const std::exception& ex)
{
logger(ptree, "Error", ex.what());
ptree.put("status", test_token_failed);
return ptree;
}
bo_ofm.sync(XCL_BO_SYNC_BO_FROM_DEVICE);
auto *ofm_mapped = bo_ofm.map<int8_t*>();
if(std::memcmp(ifm_mapped, ofm_mapped + StartAddr, word_count)){
logger(ptree, "Error", "Value read back does not match reference for array reconfiguration instruction buffer");
ptree.put("status", test_token_failed);
return ptree;
}
}

end = std::chrono::high_resolution_clock::now();
float elapsedSecsAverage = std::chrono::duration_cast<std::chrono::duration<float>>(end-start).count();
elapsedSecsAverage /= itr_count;
float overhead = elapsedSecsAverage - elapsedSecsNoOpAverage;
logger(ptree, "Details", boost::str(boost::format("Array reconfiguration overhead: '%.1f'ms") % (overhead * 1000)));

ptree.put("status", test_token_passed);
return ptree;
}
16 changes: 16 additions & 0 deletions src/runtime_src/core/tools/common/tests/TestAIEReconfigOverhead.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
// SPDX-License-Identifier: Apache-2.0
// Copyright (C) 2024 Advanced Micro Devices, Inc. All rights reserved.

#ifndef __TestArrayReconfOverhead_h_
#define __TestArrayReconfOverhead_h_

#include "tools/common/TestRunner.h"
#include "xrt/xrt_device.h"

class TestAIEReconfigOverhead : public TestRunner {
public :
boost::property_tree::ptree run(std::shared_ptr<xrt_core::device> dev);

TestAIEReconfigOverhead();
};
#endif
44 changes: 0 additions & 44 deletions src/runtime_src/core/tools/common/tests/TestDF_bandwidth.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,50 +28,6 @@ TestDF_bandwidth::TestDF_bandwidth()
: TestRunner("df-bw", "Run bandwidth test on data fabric")
{}

namespace {

// Copy values from text files into buff, expecting values are ascii encoded hex
static void
init_instr_buf(xrt::bo &bo_instr, const std::string& dpu_file) {
std::ifstream dpu_stream(dpu_file);
if (!dpu_stream.is_open()) {
throw std::runtime_error(boost::str(boost::format("Failed to open %s for reading") % dpu_file));
}

auto instr = bo_instr.map<int*>();
std::string line;
while (std::getline(dpu_stream, line)) {
if (line.at(0) == '#') {
continue;
}
std::stringstream ss(line);
unsigned int word = 0;
ss >> std::hex >> word;
*(instr++) = word;
}
}

static size_t
get_instr_size(const std::string& dpu_file) {
std::ifstream file(dpu_file);
if (!file.is_open()) {
throw std::runtime_error(boost::str(boost::format("Failed to open %s for reading") % dpu_file));
}
size_t size = 0;
std::string line;
while (std::getline(file, line)) {
if (line.at(0) != '#') {
size++;
}
}
if (size == 0) {
throw std::runtime_error("Invalid DPU instruction length");
}
return size;
}

} //anonymous namespace

boost::property_tree::ptree
TestDF_bandwidth::run(std::shared_ptr<xrt_core::device> dev)
{
Expand Down
44 changes: 0 additions & 44 deletions src/runtime_src/core/tools/common/tests/TestGemm.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,50 +35,6 @@ TestGemm::TestGemm()
: TestRunner("gemm", "Measure the TOPS value of GEMM operations")
{}

namespace {

// Copy values from text files into buff, expecting values are ascii encoded hex
static void
init_instr_buf(xrt::bo &bo_instr, const std::string& dpu_file) {
std::ifstream dpu_stream(dpu_file);
if (!dpu_stream.is_open()) {
throw std::runtime_error(boost::str(boost::format("Failed to open %s for reading") % dpu_file));
}

auto instr = bo_instr.map<int*>();
std::string line;
while (std::getline(dpu_stream, line)) {
if (line.at(0) == '#') {
continue;
}
std::stringstream ss(line);
unsigned int word = 0;
ss >> std::hex >> word;
*(instr++) = word;
}
}

static size_t
get_instr_size(const std::string& dpu_file) {
std::ifstream file(dpu_file);
if (!file.is_open()) {
throw std::runtime_error(boost::str(boost::format("Failed to open %s for reading") % dpu_file));
}
size_t size = 0;
std::string line;
while (std::getline(file, line)) {
if (line.at(0) != '#') {
size++;
}
}
if (size == 0) {
throw std::runtime_error("Invalid DPU instruction length");
}
return size;
}

} //anonymous namespace

boost::property_tree::ptree
TestGemm::run(std::shared_ptr<xrt_core::device> dev)
{
Expand Down
Loading

0 comments on commit 276e24a

Please sign in to comment.