From 509970ab15eab91aad7be8b4ef628f8d73d0b5f3 Mon Sep 17 00:00:00 2001 From: LynnL4 Date: Fri, 22 Nov 2024 06:15:24 +0000 Subject: [PATCH] feat: add support for YOLO11segmentation model --- sscma/core/ma_types.h | 12 ++ sscma/core/model/ma_model_factory.cpp | 4 + sscma/core/model/ma_model_factory.h | 3 +- sscma/core/model/ma_model_segmenter.cpp | 108 ++++++++++++++ sscma/core/model/ma_model_segmenter.h | 43 ++++++ sscma/core/model/ma_model_yolo11_seg.cpp | 174 +++++++++++++++++++++++ sscma/core/model/ma_model_yolo11_seg.h | 34 +++++ 7 files changed, 377 insertions(+), 1 deletion(-) create mode 100644 sscma/core/model/ma_model_segmenter.cpp create mode 100644 sscma/core/model/ma_model_segmenter.h create mode 100644 sscma/core/model/ma_model_yolo11_seg.cpp create mode 100644 sscma/core/model/ma_model_yolo11_seg.h diff --git a/sscma/core/ma_types.h b/sscma/core/ma_types.h index d9f981c9..5ba0ffcf 100644 --- a/sscma/core/ma_types.h +++ b/sscma/core/ma_types.h @@ -212,6 +212,17 @@ struct ma_keypoint4f_t { ma_bbox_t box; std::vector pts; }; + +struct ma_segm2f_t { + ma_bbox_t box; + struct { + uint16_t width; + uint16_t height; + std::vector data; + } mask; +}; + + #endif typedef enum { @@ -271,6 +282,7 @@ typedef enum { MA_MODEL_TYPE_YOLO_WORLD = 8u, MA_MODEL_TYPE_YOLO11 = 9u, MA_MODEL_TYPE_YOLO11_POSE = 10u, + MA_MODEL_TYPE_YOLO11_SEG = 11u, } ma_model_type_t; typedef struct { diff --git a/sscma/core/model/ma_model_factory.cpp b/sscma/core/model/ma_model_factory.cpp index 11c0c734..551d9f37 100644 --- a/sscma/core/model/ma_model_factory.cpp +++ b/sscma/core/model/ma_model_factory.cpp @@ -59,6 +59,10 @@ Model* ModelFactory::create(Engine* engine, size_t algorithm_id) { if (Yolo11Pose::isValid(engine)) { return new Yolo11Pose(engine); } + case MA_MODEL_TYPE_YOLO11_SEG: + if (Yolo11Seg::isValid(engine)) { + return new Yolo11Seg(engine); + } } return nullptr; diff --git a/sscma/core/model/ma_model_factory.h b/sscma/core/model/ma_model_factory.h index 7c80059d..c462610f 100644 --- a/sscma/core/model/ma_model_factory.h +++ b/sscma/core/model/ma_model_factory.h @@ -14,11 +14,12 @@ #include "ma_model_nvidia_det.h" #include "ma_model_pfld.h" #include "ma_model_yolo11.h" +#include "ma_model_yolo11_pose.h" +#include "ma_model_yolo11_seg.h" #include "ma_model_yolo_world.h" #include "ma_model_yolov5.h" #include "ma_model_yolov8.h" #include "ma_model_yolov8_pose.h" -#include "ma_model_yolo11_pose.h" namespace ma { diff --git a/sscma/core/model/ma_model_segmenter.cpp b/sscma/core/model/ma_model_segmenter.cpp new file mode 100644 index 00000000..af873e0b --- /dev/null +++ b/sscma/core/model/ma_model_segmenter.cpp @@ -0,0 +1,108 @@ +#include "ma_model_segmenter.h" + +#include "core/cv/ma_cv.h" + +namespace ma::model { + +constexpr char TAG[] = "ma::model::segmenter"; + +Segmenter::Segmenter(Engine* p_engine, const char* name, ma_model_type_t type) : Model(p_engine, name, MA_INPUT_TYPE_IMAGE | MA_OUTPUT_TYPE_SEGMENTATION | type) { + input_ = p_engine_->getInput(0); + threshold_nms_ = 0.45; + threshold_score_ = 0.25; + + is_nhwc_ = input_.shape.dims[3] == 3 || input_.shape.dims[3] == 1; + + if (is_nhwc_) { + img_.height = input_.shape.dims[1]; + img_.width = input_.shape.dims[2]; + img_.size = input_.shape.dims[1] * input_.shape.dims[2] * input_.shape.dims[3]; + img_.format = input_.shape.dims[3] == 3 ? MA_PIXEL_FORMAT_RGB888 : MA_PIXEL_FORMAT_GRAYSCALE; + + } else { + img_.height = input_.shape.dims[2]; + img_.width = input_.shape.dims[3]; + img_.size = input_.shape.dims[3] * input_.shape.dims[2] * input_.shape.dims[1]; + img_.format = input_.shape.dims[1] == 3 ? MA_PIXEL_FORMAT_RGB888 : MA_PIXEL_FORMAT_GRAYSCALE; + } + + img_.data = input_.data.u8; +} + +Segmenter::~Segmenter() {} +ma_err_t Segmenter::preprocess() { + ma_err_t ret = MA_OK; + + ret = ma::cv::convert(input_img_, &img_); + if (ret != MA_OK) { + return ret; + } + if (input_.type == MA_TENSOR_TYPE_S8) { + for (int i = 0; i < input_.size; i++) { + input_.data.u8[i] -= 128; + } + } + + return ret; +} + +const void* Segmenter::getInput() { + return static_cast(&img_); +} + +const std::forward_list& Segmenter::getResults() const { + return results_; +} + +ma_err_t Segmenter::run(const ma_img_t* img) { + MA_ASSERT(img != nullptr); + + input_img_ = img; + + return underlyingRun(); +} + +ma_err_t Segmenter::setConfig(ma_model_cfg_opt_t opt, ...) { + ma_err_t ret = MA_OK; + va_list args; + va_start(args, opt); + switch (opt) { + case MA_MODEL_CFG_OPT_THRESHOLD: + threshold_score_ = va_arg(args, double); + ret = MA_OK; + break; + case MA_MODEL_CFG_OPT_NMS: + threshold_nms_ = va_arg(args, double); + ret = MA_OK; + break; + default: + ret = MA_EINVAL; + break; + } + va_end(args); + return ret; +} + +ma_err_t Segmenter::getConfig(ma_model_cfg_opt_t opt, ...) { + ma_err_t ret = MA_OK; + va_list args; + void* p_arg = nullptr; + va_start(args, opt); + switch (opt) { + case MA_MODEL_CFG_OPT_THRESHOLD: + p_arg = va_arg(args, void*); + *(static_cast(p_arg)) = threshold_score_; + break; + case MA_MODEL_CFG_OPT_NMS: + p_arg = va_arg(args, void*); + *(static_cast(p_arg)) = threshold_nms_; + break; + default: + ret = MA_EINVAL; + break; + } + va_end(args); + return ret; +} + +} // namespace ma::model diff --git a/sscma/core/model/ma_model_segmenter.h b/sscma/core/model/ma_model_segmenter.h new file mode 100644 index 00000000..258b340f --- /dev/null +++ b/sscma/core/model/ma_model_segmenter.h @@ -0,0 +1,43 @@ +#ifndef _MA_MODEL_SEGMENTER_H_ +#define _MA_MODEL_SEGMENTER_H_ + +#include + +#include "ma_model_base.h" + +namespace ma::model { + +class Segmenter : public Model { +protected: + ma_tensor_t input_; + ma_img_t img_; + const ma_img_t* input_img_; + + float threshold_nms_; + float threshold_score_; + + bool is_nhwc_; + + std::forward_list results_; + +protected: + ma_err_t preprocess() override; + +public: + Segmenter(Engine* engine, const char* name, ma_model_type_t type); + virtual ~Segmenter(); + + const std::forward_list& getResults() const; + + ma_err_t run(const ma_img_t* img); + + const void* getInput() override; + + ma_err_t setConfig(ma_model_cfg_opt_t opt, ...) override; + + ma_err_t getConfig(ma_model_cfg_opt_t opt, ...) override; +}; + +} // namespace ma::model + +#endif // _MA_MODEL_SEGMENTER_H_ diff --git a/sscma/core/model/ma_model_yolo11_seg.cpp b/sscma/core/model/ma_model_yolo11_seg.cpp new file mode 100644 index 00000000..4e9220a0 --- /dev/null +++ b/sscma/core/model/ma_model_yolo11_seg.cpp @@ -0,0 +1,174 @@ +#include "ma_model_yolo11_seg.h" + +#include +#include +#include +#include +#include +#include +#include + +#include "core/math/ma_math.h" +#include "core/utils/ma_nms.h" + +constexpr char TAG[] = "ma::model::yolo11_seg"; + +namespace ma::model { + +Yolo11Seg::Yolo11Seg(Engine* p_engine_) : Segmenter(p_engine_, "yolo11_seg", MA_MODEL_TYPE_YOLO11_SEG) { + MA_ASSERT(p_engine_ != nullptr); + + bboxes_ = p_engine_->getOutput(0); + protos_ = p_engine_->getOutput(1); + + num_class_ = bboxes_.shape.dims[1] - 36; // 4 + 1 + 32 + num_record_ = bboxes_.shape.dims[2]; +} + +Yolo11Seg::~Yolo11Seg() {} + +bool Yolo11Seg::isValid(Engine* engine) { + + const auto inputs_count = engine->getInputSize(); + const auto outputs_count = engine->getOutputSize(); + + if (inputs_count != 1 || outputs_count != 2) { + return false; + } + const auto& input_shape = engine->getInputShape(0); + const auto& output_shape = engine->getOutputShape(0); + const auto& mask_shape = engine->getOutputShape(1); + + // Validate input shape + if (input_shape.size != 4) { + return false; + } + + int n = input_shape.dims[0], h = input_shape.dims[1], w = input_shape.dims[2], c = input_shape.dims[3]; + bool is_nhwc = c == 3 || c == 1; + + if (!is_nhwc) + std::swap(h, c); + + + if (n != 1 || h < 32 || h % 32 != 0 || (c != 3 && c != 1)) { + return false; + } + + // Calculate expected output size based on input + int s = w >> 5, m = w >> 4, l = w >> 3; + int ibox_len = (s * s + m * m + l * l); + + // Validate output shape + if ((output_shape.size != 3 && output_shape.size != 4) || mask_shape.size != 4) { + return false; + } + + if (output_shape.dims[0] != 1 || output_shape.dims[2] != ibox_len || output_shape.dims[1] < 37) { + return false; + } + + if (mask_shape.dims[0] != 1 || mask_shape.dims[1] != 32 || mask_shape.dims[2] != w >> 2 || mask_shape.dims[3] != w >> 2) { + return false; + } + + return true; +} + +ma_err_t Yolo11Seg::postprocess() { + results_.clear(); + if (bboxes_.type == MA_TENSOR_TYPE_F32) { + return postProcessF32(); + } + return MA_ENOTSUP; +} + +ma_err_t Yolo11Seg::postProcessF32() { + + std::forward_list multi_level_bboxes; + auto* data = bboxes_.data.f32; + for (decltype(num_record_) i = 0; i < num_record_; ++i) { + + float max = threshold_score_; + int target = -1; + + for (int c = 0; c < num_class_; c++) { + float score = data[i + num_record_ * (4 + c)]; + if (score < max) [[likely]] { + continue; + } + max = score; + target = c; + } + + if (target < 0) + continue; + + float x = data[i]; + float y = data[i + num_record_]; + float w = data[i + num_record_ * 2]; + float h = data[i + num_record_ * 3]; + + + ma_bbox_ext_t bbox; + bbox.level = 0; + bbox.index = i; + bbox.x = x / img_.width; + bbox.y = y / img_.height; + bbox.w = w / img_.width; + bbox.h = h / img_.height; + bbox.score = max; + bbox.target = target; + + multi_level_bboxes.emplace_front(std::move(bbox)); + } + + ma::utils::nms(multi_level_bboxes, threshold_nms_, threshold_score_, false, true); + + if (multi_level_bboxes.empty()) + return MA_OK; + + // fetch mask + for (auto& bbox : multi_level_bboxes) { + ma_segm2f_t seg; + seg.box = {.x = bbox.x, .y = bbox.y, .w = bbox.w, .h = bbox.h, .score = bbox.score, .target = bbox.target}; + seg.mask.width = protos_.shape.dims[2]; + seg.mask.height = protos_.shape.dims[3]; + seg.mask.data.resize(protos_.shape.dims[2] * protos_.shape.dims[3] / 8, 0); // bitwise + + const int mask_size = protos_.shape.dims[2] * protos_.shape.dims[3]; + + std::vector masks(mask_size, 0.0f); + + // TODO: parallel for + for (int j = 0; j < protos_.shape.dims[1]; ++j) { + float mask_in = bboxes_.data.f32[bbox.index + num_record_ * (4 + num_class_ + j)]; + for (int i = 0; i < mask_size; ++i) { + masks[i] += mask_in * protos_.data.f32[j * mask_size + i]; + } + } + + int x1 = (bbox.x - bbox.w / 2) * protos_.shape.dims[2]; + int y1 = (bbox.y - bbox.h / 2) * protos_.shape.dims[3]; + int x2 = (bbox.x + bbox.w / 2) * protos_.shape.dims[2]; + int y2 = (bbox.y + bbox.h / 2) * protos_.shape.dims[3]; + + for (int i = 0; i < protos_.shape.dims[2]; i++) { + for (int j = 0; j < protos_.shape.dims[3]; j++) { + if (i < y1 || i >= y2 || j < x1 || j >= x2) [[likely]] { + continue; + } + if (masks[i * protos_.shape.dims[3] + j] > 0.5) { + seg.mask.data[i * protos_.shape.dims[3] / 8 + j / 8] |= (1 << (j % 8)); + } + } + } + + results_.emplace_front(std::move(seg)); + } + + + return MA_OK; +} + +} // namespace ma::model diff --git a/sscma/core/model/ma_model_yolo11_seg.h b/sscma/core/model/ma_model_yolo11_seg.h new file mode 100644 index 00000000..f7025e3c --- /dev/null +++ b/sscma/core/model/ma_model_yolo11_seg.h @@ -0,0 +1,34 @@ +#ifndef _MA_MODEL_YOLO11_SEG_H_ +#define _MA_MODEL_YOLO11_SEG_H_ + +#include +#include +#include +#include + +#include "ma_model_segmenter.h" + +namespace ma::model { + +class Yolo11Seg : public Segmenter { +private: + ma_tensor_t bboxes_; + ma_tensor_t protos_; + int32_t num_record_; + int32_t num_class_; + +protected: + ma_err_t postprocess() override; + + ma_err_t postProcessF32(); + +public: + Yolo11Seg(Engine* engine); + ~Yolo11Seg(); + + static bool isValid(Engine* engine); +}; + +} // namespace ma::model + +#endif // _MA_MODEL_YOLO_H