From 12984b3f889001fa57f4b4e9d4b9956f3d14919c Mon Sep 17 00:00:00 2001 From: nullptr Date: Fri, 22 Nov 2024 07:47:14 +0000 Subject: [PATCH] feat: halio engine and halio nms for yolo detection --- sscma/core/engine/ma_engine_halio.cpp | 139 +++++++++++------- sscma/core/engine/ma_engine_halio.h | 6 +- sscma/core/ma_types.h | 3 +- sscma/core/model/ma_model_yolov5.cpp | 204 +++++++++++++++++++++++++- sscma/core/model/ma_model_yolov5.h | 2 + 5 files changed, 297 insertions(+), 57 deletions(-) diff --git a/sscma/core/engine/ma_engine_halio.cpp b/sscma/core/engine/ma_engine_halio.cpp index 2f1c9df0..52f6a920 100644 --- a/sscma/core/engine/ma_engine_halio.cpp +++ b/sscma/core/engine/ma_engine_halio.cpp @@ -121,11 +121,16 @@ ma_err_t EngineHalio::load(const string& model_path) { { - auto create_internal_bindings = [&](const string& name, const InferModel::InferStream& tsr, shared_ptr& tensor) { + auto create_internal_bindings = + [&](const string& name, const InferModel::InferStream& tsr, shared_ptr& tensor, hailort::ConfiguredInferModel::Bindings::InferStream* cis, bool is_input) -> ma_err_t { auto shape = tsr.shape(); auto size = tsr.get_frame_size(); auto format = tsr.format(); + if (!cis) { + return MA_FAILED; + } + void* buffer = aligned_alloc(4096, size); if (!buffer) { return MA_ENOMEM; @@ -145,27 +150,30 @@ ma_err_t EngineHalio::load(const string& model_path) { return MA_ENOMEM; } + cis->set_buffer(MemoryView(buffer, size)); + tensor->data.data = buffer; tensor->size = size; - tensor->shape.size = 3; + tensor->shape.size = 4; + tensor->shape.dims[0] = 1; switch (format.order) { case HAILO_FORMAT_ORDER_NCHW: - tensor->shape.dims[0] = shape.features; - tensor->shape.dims[1] = shape.height; - tensor->shape.dims[2] = shape.width; + tensor->shape.dims[1] = shape.features; + tensor->shape.dims[2] = shape.height; + tensor->shape.dims[3] = shape.width; break; case HAILO_FORMAT_ORDER_NHWC: case HAILO_FORMAT_ORDER_FCR: case HAILO_FORMAT_ORDER_HAILO_NMS: - tensor->shape.dims[0] = shape.height; - tensor->shape.dims[1] = shape.width; - tensor->shape.dims[2] = shape.features; + tensor->shape.dims[1] = shape.height; + tensor->shape.dims[2] = shape.width; + tensor->shape.dims[3] = shape.features; break; case HAILO_FORMAT_ORDER_NHCW: - tensor->shape.dims[0] = shape.height; - tensor->shape.dims[1] = shape.features; - tensor->shape.dims[2] = shape.width; + tensor->shape.dims[1] = shape.height; + tensor->shape.dims[2] = shape.features; + tensor->shape.dims[3] = shape.width; break; default: break; @@ -192,44 +200,76 @@ ma_err_t EngineHalio::load(const string& model_path) { break; case HAILO_FORMAT_TYPE_FLOAT32: tensor->type = MA_TENSOR_TYPE_F32; - if (format.order == HAILO_FORMAT_ORDER_HAILO_NMS) { + break; + default: + tensor->type = MA_TENSOR_TYPE_NONE; + break; + } + + if (format.order == HAILO_FORMAT_ORDER_HAILO_NMS) { + switch (format.type) { + case HAILO_FORMAT_TYPE_UINT16: + tensor->type = MA_TENSOR_TYPE_NMS_BBOX_U16; + break; + case HAILO_FORMAT_TYPE_FLOAT32: tensor->type = MA_TENSOR_TYPE_NMS_BBOX_F32; + break; + default: + tensor->type = MA_TENSOR_TYPE_NONE; + break; + } - function f = [this_ptr = this, name](int flag, void* data, size_t size) -> ma_err_t { - if (!data || sizeof(float) != size) { + auto fp = make_shared([this_ptr = this, name, is_input](int flag, void* data, size_t size) -> ma_err_t { + if (!data) { + return MA_EINVAL; + } + auto tsr = is_input ? this_ptr->_model->input(name) : this_ptr->_model->output(name); + if (!tsr) { + return MA_FAILED; + } + switch (flag) { + case 0: // get score threshold + return MA_ENOTSUP; + case 1: // set score threshold + { + if (sizeof(float) != size) { return MA_EINVAL; } float threshold = *static_cast(data); - auto tsr = this_ptr->_model->output(name); - if (!tsr) { + tsr->set_nms_score_threshold(threshold); + return MA_OK; + } + case 2: // get iou threshold + return MA_ENOTSUP; + case 3: // set iou threshold + { + if (sizeof(float) != size) { + return MA_EINVAL; + } + float threshold = *static_cast(data); + tsr->set_nms_iou_threshold(threshold); + return MA_OK; + } + case 4: // get nms shape + { + auto nms_shape = tsr->get_nms_shape(); + if (!nms_shape) { return MA_FAILED; } - switch (flag) { - case 0: // get score threshold - return MA_ENOTSUP; - case 1: // set score threshold - tsr->set_nms_score_threshold(threshold); - return MA_OK; - case 2: // get iou threshold - return MA_ENOTSUP; - case 3: // set iou threshold - tsr->set_nms_iou_threshold(threshold); - return MA_OK; - default: - return MA_ENOTSUP; + auto shape = nms_shape.value(); + if (sizeof(hailo_nms_shape_t) != size) { + return MA_EINVAL; } - }; - - _external_handlers[name] = f; - if (!_external_handlers[name]) { - break; + *static_cast(data) = shape; + return MA_OK; } - tensor->external_handler = reinterpret_cast(&_external_handlers[name]); + default: + return MA_ENOTSUP; } - break; - default: - tensor->type = MA_TENSOR_TYPE_NONE; - break; + }); + + _external_handlers[name] = fp; + tensor->external_handler = reinterpret_cast(fp.get()); } _io_buffers[name] = tensor; @@ -243,14 +283,15 @@ ma_err_t EngineHalio::load(const string& model_path) { if (_io_buffers.find(name) != _io_buffers.end()) { continue; } - shared_ptr tensor = nullptr; - - auto ret = create_internal_bindings(name, tsr, tensor); + auto bindings_input = _bindings->input(name); + if (!bindings_input) { + return MA_FAILED; + } + auto ret = create_internal_bindings(name, tsr, tensor, &bindings_input.value(), true); if (ret != MA_OK) { return ret; } - _input_tensors.push_back(tensor); } @@ -260,19 +301,19 @@ ma_err_t EngineHalio::load(const string& model_path) { if (_io_buffers.find(name) != _io_buffers.end()) { continue; } - shared_ptr tensor = nullptr; - - auto ret = create_internal_bindings(name, tsr, tensor); + auto bindings_output = _bindings->output(name); + if (!bindings_output) { + return MA_FAILED; + } + auto ret = create_internal_bindings(name, tsr, tensor, &bindings_output.value(), false); if (ret != MA_OK) { return ret; } - _output_tensors.push_back(tensor); } } - return MA_OK; } @@ -348,7 +389,7 @@ ma_quant_param_t EngineHalio::getOutputQuantParam(int32_t index) { ma_err_t EngineHalio::setInput(int32_t index, const ma_tensor_t& tensor) { - return MA_ENOTSUP; + return MA_ENOTSUP; } } // namespace ma::engine diff --git a/sscma/core/engine/ma_engine_halio.h b/sscma/core/engine/ma_engine_halio.h index ad7e16ee..21757674 100644 --- a/sscma/core/engine/ma_engine_halio.h +++ b/sscma/core/engine/ma_engine_halio.h @@ -23,6 +23,8 @@ using namespace hailort; class EngineHalio final : public Engine { public: + using ExternalHandler = function; + EngineHalio(); ~EngineHalio() override; @@ -56,7 +58,7 @@ class EngineHalio final : public Engine { shared_ptr _bindings; unordered_map> _io_buffers; - unordered_map> _external_handlers; + unordered_map> _external_handlers; vector> _input_tensors; vector> _output_tensors; @@ -66,4 +68,4 @@ class EngineHalio final : public Engine { #endif -#endif +#endif \ No newline at end of file diff --git a/sscma/core/ma_types.h b/sscma/core/ma_types.h index 5ba0ffcf..984c65e5 100644 --- a/sscma/core/ma_types.h +++ b/sscma/core/ma_types.h @@ -67,7 +67,8 @@ typedef enum { MA_TENSOR_TYPE_STR = 12, MA_TENSOR_TYPE_BOOL = 13, MA_TENSOR_TYPE_BF16 = 14, - MA_TENSOR_TYPE_NMS_BBOX_F32 = 15, + MA_TENSOR_TYPE_NMS_BBOX_U16 = 15, + MA_TENSOR_TYPE_NMS_BBOX_F32 = 16, } ma_tensor_type_t; typedef struct { diff --git a/sscma/core/model/ma_model_yolov5.cpp b/sscma/core/model/ma_model_yolov5.cpp index 7472726d..a4169f06 100644 --- a/sscma/core/model/ma_model_yolov5.cpp +++ b/sscma/core/model/ma_model_yolov5.cpp @@ -1,6 +1,7 @@ #include #include -#include +#include +#include #include "../utils/ma_nms.h" @@ -22,8 +23,7 @@ YoloV5::YoloV5(Engine* p_engine_) : Detector(p_engine_, "yolov5", MA_MODEL_TYPE_ YoloV5::~YoloV5() {} -bool YoloV5::isValid(Engine* engine) { - +static bool generalValid(Engine* engine) { const auto inputs_count = engine->getInputSize(); const auto outputs_count = engine->getOutputSize(); @@ -62,9 +62,54 @@ bool YoloV5::isValid(Engine* engine) { return true; } -ma_err_t YoloV5::postprocess() { - results_.clear(); +static bool nmsValid(Engine* engine) { +#if MA_USE_ENGINE_HALIO + if (engine->getInputSize() != 1 || engine->getOutputSize() != 1) + return false; + + auto input = engine->getInput(0); + auto output = engine->getOutput(0); + + if (input.shape.size != 4 || output.shape.size != 4) + return false; + + auto n = input.shape.dims[0]; + auto h = input.shape.dims[1]; + auto w = input.shape.dims[2]; + auto c = input.shape.dims[3]; + + if (n != 1 || h < 32 || h % 32 != 0 || (c != 3 && c != 1)) + return false; + + auto b = output.shape.dims[0]; + auto cs = output.shape.dims[1]; + auto mb = output.shape.dims[2]; + auto f = output.shape.dims[3]; + + if (b != 1 || cs <= 0 || mb <= 1 || f != 0) + return false; + return true; +#else + return false; +#endif +} + +bool YoloV5::isValid(Engine* engine) { + if (!engine || engine->getOutputSize() != 1) + return false; + auto output = engine->getOutput(0); + + switch (output.type) { + case MA_TENSOR_TYPE_NMS_BBOX_U16: + case MA_TENSOR_TYPE_NMS_BBOX_F32: + return nmsValid(engine); + default: + return generalValid(engine); + } +} + +ma_err_t YoloV5::generalPostProcess() { if (output_.type == MA_TENSOR_TYPE_S8) { auto* data = output_.data.s8; auto scale = output_.quant_param.scale; @@ -161,4 +206,153 @@ ma_err_t YoloV5::postprocess() { return MA_OK; } + +ma_err_t YoloV5::nmsPostProcess() { +#if MA_USE_ENGINE_HALIO + + auto& output = output_; + + if (output.shape.size < 4) { + return MA_FAILED; + } + + size_t w = output.shape.dims[1]; + size_t h = output.shape.dims[2]; + size_t c = output.shape.dims[3]; + + hailo_nms_shape_t nms_shape; + if (output.external_handler) { + auto rc = (*reinterpret_cast(output.external_handler))(4, &nms_shape, sizeof(hailo_nms_shape_t)); + if (rc == MA_OK) { + w = nms_shape.number_of_classes; + h = nms_shape.max_bboxes_per_class; + c = nms_shape.max_accumulated_mask_size; + } + } + + switch (output.type) { + case MA_TENSOR_TYPE_NMS_BBOX_U16: { + using T = uint16_t; + using P = hailo_bbox_t; + + const auto zp = output.quant_param.zero_point; + const auto scale = output.quant_param.scale; + + auto ptr = output.data.u8; + for (size_t i = 0; i < w; ++i) { + auto bc = *reinterpret_cast(ptr); + ptr += sizeof(T); + + if (bc <= 0) { + continue; + } else if (bc > h) { + break; + } + + for (size_t j = 0; j < static_cast(bc); ++j) { + auto bbox = *reinterpret_cast(ptr); + ptr += sizeof(P); + + ma_bbox_t res; + + auto x_min = static_cast(bbox.x_min - zp) * scale; + auto y_min = static_cast(bbox.y_min - zp) * scale; + auto x_max = static_cast(bbox.x_max - zp) * scale; + auto y_max = static_cast(bbox.y_max - zp) * scale; + res.w = x_max - x_min; + res.h = y_max - y_min; + res.x = x_min + res.w * 0.5; + res.y = y_min + res.h * 0.5; + res.score = static_cast(bbox.score - zp) * scale; + + res.target = static_cast(i); + + res.x = MA_CLIP(res.x, 0, 1.0f); + res.y = MA_CLIP(res.y, 0, 1.0f); + res.w = MA_CLIP(res.w, 0, 1.0f); + res.h = MA_CLIP(res.h, 0, 1.0f); + + results_.emplace_front(res); + } + } + } break; + + case MA_TENSOR_TYPE_NMS_BBOX_F32: { + using T = float32_t; + using P = hailo_bbox_float32_t; + + auto ptr = output.data.u8; + for (size_t i = 0; i < w; ++i) { + auto bc = *reinterpret_cast(ptr); + ptr += sizeof(T); + + if (bc <= 0) { + continue; + } else if (bc > h) { + break; + } + + for (size_t j = 0; j < static_cast(bc); ++j) { + auto bbox = *reinterpret_cast(ptr); + ptr += sizeof(P); + + ma_bbox_t res; + + res.w = bbox.x_max - bbox.x_min; + res.h = bbox.y_max - bbox.y_min; + res.x = bbox.x_min + res.w * 0.5; + res.y = bbox.y_min + res.h * 0.5; + res.score = bbox.score; + + res.target = static_cast(i); + + res.x = MA_CLIP(res.x, 0, 1.0f); + res.y = MA_CLIP(res.y, 0, 1.0f); + res.w = MA_CLIP(res.w, 0, 1.0f); + res.h = MA_CLIP(res.h, 0, 1.0f); + + results_.emplace_front(res); + } + } + } break; + + default: + return MA_ENOTSUP; + } + + return MA_OK; +#else + return MA_FAILED; +#endif +} + +ma_err_t YoloV5::postprocess() { + results_.clear(); + + switch (output_.type) { + case MA_TENSOR_TYPE_NMS_BBOX_U16: + case MA_TENSOR_TYPE_NMS_BBOX_F32: { + // TODO: can be optimized by whihout calling this handler for each frame + if (output.external_handler) { + auto ph = reinterpret_cast(output.external_handler); + float thr = threshold_score_; + auto rc = (*ph)(1, &thr, sizeof(float)); + if (rc == MA_OK) { + threshold_score_ = thr; + } + thr = threshold_nms_; + rc = (*ph)(3, &thr, sizeof(float)); + if (rc == MA_OK) { + threshold_nms_ = thr; + } + } + return nmsPostProcess(); + } + + default: + return generalPostProcess(); + } + + return MA_ENOTSUP; +} } // namespace ma::model diff --git a/sscma/core/model/ma_model_yolov5.h b/sscma/core/model/ma_model_yolov5.h index 002855b1..6941d791 100644 --- a/sscma/core/model/ma_model_yolov5.h +++ b/sscma/core/model/ma_model_yolov5.h @@ -24,6 +24,8 @@ class YoloV5 : public Detector { protected: ma_err_t postprocess() override; + ma_err_t generalPostProcess(); + ma_err_t nmsPostProcess(); public: YoloV5(Engine* engine);