Skip to content

Commit

Permalink
feat: add support for YOLO11segmentation model
Browse files Browse the repository at this point in the history
  • Loading branch information
LynnL4 committed Nov 22, 2024
1 parent d737124 commit 509970a
Show file tree
Hide file tree
Showing 7 changed files with 377 additions and 1 deletion.
12 changes: 12 additions & 0 deletions sscma/core/ma_types.h
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,17 @@ struct ma_keypoint4f_t {
ma_bbox_t box;
std::vector<ma_pt4f_t> pts;
};

struct ma_segm2f_t {
ma_bbox_t box;
struct {
uint16_t width;
uint16_t height;
std::vector<uint8_t> data;
} mask;
};


#endif

typedef enum {
Expand Down Expand Up @@ -271,6 +282,7 @@ typedef enum {
MA_MODEL_TYPE_YOLO_WORLD = 8u,
MA_MODEL_TYPE_YOLO11 = 9u,
MA_MODEL_TYPE_YOLO11_POSE = 10u,
MA_MODEL_TYPE_YOLO11_SEG = 11u,
} ma_model_type_t;

typedef struct {
Expand Down
4 changes: 4 additions & 0 deletions sscma/core/model/ma_model_factory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,10 @@ Model* ModelFactory::create(Engine* engine, size_t algorithm_id) {
if (Yolo11Pose::isValid(engine)) {
return new Yolo11Pose(engine);
}
case MA_MODEL_TYPE_YOLO11_SEG:
if (Yolo11Seg::isValid(engine)) {
return new Yolo11Seg(engine);
}
}

return nullptr;
Expand Down
3 changes: 2 additions & 1 deletion sscma/core/model/ma_model_factory.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,12 @@
#include "ma_model_nvidia_det.h"
#include "ma_model_pfld.h"
#include "ma_model_yolo11.h"
#include "ma_model_yolo11_pose.h"
#include "ma_model_yolo11_seg.h"
#include "ma_model_yolo_world.h"
#include "ma_model_yolov5.h"
#include "ma_model_yolov8.h"
#include "ma_model_yolov8_pose.h"
#include "ma_model_yolo11_pose.h"

namespace ma {

Expand Down
108 changes: 108 additions & 0 deletions sscma/core/model/ma_model_segmenter.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
#include "ma_model_segmenter.h"

#include "core/cv/ma_cv.h"

namespace ma::model {

constexpr char TAG[] = "ma::model::segmenter";

Segmenter::Segmenter(Engine* p_engine, const char* name, ma_model_type_t type) : Model(p_engine, name, MA_INPUT_TYPE_IMAGE | MA_OUTPUT_TYPE_SEGMENTATION | type) {
input_ = p_engine_->getInput(0);
threshold_nms_ = 0.45;
threshold_score_ = 0.25;

is_nhwc_ = input_.shape.dims[3] == 3 || input_.shape.dims[3] == 1;

if (is_nhwc_) {
img_.height = input_.shape.dims[1];
img_.width = input_.shape.dims[2];
img_.size = input_.shape.dims[1] * input_.shape.dims[2] * input_.shape.dims[3];
img_.format = input_.shape.dims[3] == 3 ? MA_PIXEL_FORMAT_RGB888 : MA_PIXEL_FORMAT_GRAYSCALE;

} else {
img_.height = input_.shape.dims[2];
img_.width = input_.shape.dims[3];
img_.size = input_.shape.dims[3] * input_.shape.dims[2] * input_.shape.dims[1];
img_.format = input_.shape.dims[1] == 3 ? MA_PIXEL_FORMAT_RGB888 : MA_PIXEL_FORMAT_GRAYSCALE;
}

img_.data = input_.data.u8;
}

Segmenter::~Segmenter() {}
ma_err_t Segmenter::preprocess() {
ma_err_t ret = MA_OK;

ret = ma::cv::convert(input_img_, &img_);
if (ret != MA_OK) {
return ret;
}
if (input_.type == MA_TENSOR_TYPE_S8) {
for (int i = 0; i < input_.size; i++) {
input_.data.u8[i] -= 128;
}
}

return ret;
}

const void* Segmenter::getInput() {
return static_cast<const void*>(&img_);
}

const std::forward_list<ma_segm2f_t>& Segmenter::getResults() const {
return results_;
}

ma_err_t Segmenter::run(const ma_img_t* img) {
MA_ASSERT(img != nullptr);

input_img_ = img;

return underlyingRun();
}

ma_err_t Segmenter::setConfig(ma_model_cfg_opt_t opt, ...) {
ma_err_t ret = MA_OK;
va_list args;
va_start(args, opt);
switch (opt) {
case MA_MODEL_CFG_OPT_THRESHOLD:
threshold_score_ = va_arg(args, double);
ret = MA_OK;
break;
case MA_MODEL_CFG_OPT_NMS:
threshold_nms_ = va_arg(args, double);
ret = MA_OK;
break;
default:
ret = MA_EINVAL;
break;
}
va_end(args);
return ret;
}

ma_err_t Segmenter::getConfig(ma_model_cfg_opt_t opt, ...) {
ma_err_t ret = MA_OK;
va_list args;
void* p_arg = nullptr;
va_start(args, opt);
switch (opt) {
case MA_MODEL_CFG_OPT_THRESHOLD:
p_arg = va_arg(args, void*);
*(static_cast<double*>(p_arg)) = threshold_score_;
break;
case MA_MODEL_CFG_OPT_NMS:
p_arg = va_arg(args, void*);
*(static_cast<double*>(p_arg)) = threshold_nms_;
break;
default:
ret = MA_EINVAL;
break;
}
va_end(args);
return ret;
}

} // namespace ma::model
43 changes: 43 additions & 0 deletions sscma/core/model/ma_model_segmenter.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
#ifndef _MA_MODEL_SEGMENTER_H_
#define _MA_MODEL_SEGMENTER_H_

#include <vector>

#include "ma_model_base.h"

namespace ma::model {

class Segmenter : public Model {
protected:
ma_tensor_t input_;
ma_img_t img_;
const ma_img_t* input_img_;

float threshold_nms_;
float threshold_score_;

bool is_nhwc_;

std::forward_list<ma_segm2f_t> results_;

protected:
ma_err_t preprocess() override;

public:
Segmenter(Engine* engine, const char* name, ma_model_type_t type);
virtual ~Segmenter();

const std::forward_list<ma_segm2f_t>& getResults() const;

ma_err_t run(const ma_img_t* img);

const void* getInput() override;

ma_err_t setConfig(ma_model_cfg_opt_t opt, ...) override;

ma_err_t getConfig(ma_model_cfg_opt_t opt, ...) override;
};

} // namespace ma::model

#endif // _MA_MODEL_SEGMENTER_H_
174 changes: 174 additions & 0 deletions sscma/core/model/ma_model_yolo11_seg.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,174 @@
#include "ma_model_yolo11_seg.h"

#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <forward_list>
#include <numeric>
#include <utility>
#include <vector>

#include "core/math/ma_math.h"
#include "core/utils/ma_nms.h"

constexpr char TAG[] = "ma::model::yolo11_seg";

namespace ma::model {

Yolo11Seg::Yolo11Seg(Engine* p_engine_) : Segmenter(p_engine_, "yolo11_seg", MA_MODEL_TYPE_YOLO11_SEG) {
MA_ASSERT(p_engine_ != nullptr);

bboxes_ = p_engine_->getOutput(0);
protos_ = p_engine_->getOutput(1);

num_class_ = bboxes_.shape.dims[1] - 36; // 4 + 1 + 32
num_record_ = bboxes_.shape.dims[2];
}

Yolo11Seg::~Yolo11Seg() {}

bool Yolo11Seg::isValid(Engine* engine) {

const auto inputs_count = engine->getInputSize();
const auto outputs_count = engine->getOutputSize();

if (inputs_count != 1 || outputs_count != 2) {
return false;
}
const auto& input_shape = engine->getInputShape(0);
const auto& output_shape = engine->getOutputShape(0);
const auto& mask_shape = engine->getOutputShape(1);

// Validate input shape
if (input_shape.size != 4) {
return false;
}

int n = input_shape.dims[0], h = input_shape.dims[1], w = input_shape.dims[2], c = input_shape.dims[3];
bool is_nhwc = c == 3 || c == 1;

if (!is_nhwc)
std::swap(h, c);


if (n != 1 || h < 32 || h % 32 != 0 || (c != 3 && c != 1)) {
return false;
}

// Calculate expected output size based on input
int s = w >> 5, m = w >> 4, l = w >> 3;
int ibox_len = (s * s + m * m + l * l);

// Validate output shape
if ((output_shape.size != 3 && output_shape.size != 4) || mask_shape.size != 4) {
return false;
}

if (output_shape.dims[0] != 1 || output_shape.dims[2] != ibox_len || output_shape.dims[1] < 37) {
return false;
}

if (mask_shape.dims[0] != 1 || mask_shape.dims[1] != 32 || mask_shape.dims[2] != w >> 2 || mask_shape.dims[3] != w >> 2) {
return false;
}

return true;
}

ma_err_t Yolo11Seg::postprocess() {
results_.clear();
if (bboxes_.type == MA_TENSOR_TYPE_F32) {
return postProcessF32();
}
return MA_ENOTSUP;
}

ma_err_t Yolo11Seg::postProcessF32() {

std::forward_list<ma_bbox_ext_t> multi_level_bboxes;
auto* data = bboxes_.data.f32;
for (decltype(num_record_) i = 0; i < num_record_; ++i) {

float max = threshold_score_;
int target = -1;

for (int c = 0; c < num_class_; c++) {
float score = data[i + num_record_ * (4 + c)];
if (score < max) [[likely]] {
continue;
}
max = score;
target = c;
}

if (target < 0)
continue;

float x = data[i];
float y = data[i + num_record_];
float w = data[i + num_record_ * 2];
float h = data[i + num_record_ * 3];


ma_bbox_ext_t bbox;
bbox.level = 0;
bbox.index = i;
bbox.x = x / img_.width;
bbox.y = y / img_.height;
bbox.w = w / img_.width;
bbox.h = h / img_.height;
bbox.score = max;
bbox.target = target;

multi_level_bboxes.emplace_front(std::move(bbox));
}

ma::utils::nms(multi_level_bboxes, threshold_nms_, threshold_score_, false, true);

if (multi_level_bboxes.empty())
return MA_OK;

// fetch mask
for (auto& bbox : multi_level_bboxes) {
ma_segm2f_t seg;
seg.box = {.x = bbox.x, .y = bbox.y, .w = bbox.w, .h = bbox.h, .score = bbox.score, .target = bbox.target};
seg.mask.width = protos_.shape.dims[2];
seg.mask.height = protos_.shape.dims[3];
seg.mask.data.resize(protos_.shape.dims[2] * protos_.shape.dims[3] / 8, 0); // bitwise

const int mask_size = protos_.shape.dims[2] * protos_.shape.dims[3];

std::vector<float> masks(mask_size, 0.0f);

// TODO: parallel for
for (int j = 0; j < protos_.shape.dims[1]; ++j) {
float mask_in = bboxes_.data.f32[bbox.index + num_record_ * (4 + num_class_ + j)];
for (int i = 0; i < mask_size; ++i) {
masks[i] += mask_in * protos_.data.f32[j * mask_size + i];
}
}

int x1 = (bbox.x - bbox.w / 2) * protos_.shape.dims[2];
int y1 = (bbox.y - bbox.h / 2) * protos_.shape.dims[3];
int x2 = (bbox.x + bbox.w / 2) * protos_.shape.dims[2];
int y2 = (bbox.y + bbox.h / 2) * protos_.shape.dims[3];

for (int i = 0; i < protos_.shape.dims[2]; i++) {
for (int j = 0; j < protos_.shape.dims[3]; j++) {
if (i < y1 || i >= y2 || j < x1 || j >= x2) [[likely]] {
continue;
}
if (masks[i * protos_.shape.dims[3] + j] > 0.5) {
seg.mask.data[i * protos_.shape.dims[3] / 8 + j / 8] |= (1 << (j % 8));
}
}
}

results_.emplace_front(std::move(seg));
}


return MA_OK;
}

} // namespace ma::model
Loading

0 comments on commit 509970a

Please sign in to comment.