-
Notifications
You must be signed in to change notification settings - Fork 13
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
feat: add support for YOLO11segmentation model
- Loading branch information
Showing
7 changed files
with
377 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,108 @@ | ||
#include "ma_model_segmenter.h" | ||
|
||
#include "core/cv/ma_cv.h" | ||
|
||
namespace ma::model { | ||
|
||
constexpr char TAG[] = "ma::model::segmenter"; | ||
|
||
Segmenter::Segmenter(Engine* p_engine, const char* name, ma_model_type_t type) : Model(p_engine, name, MA_INPUT_TYPE_IMAGE | MA_OUTPUT_TYPE_SEGMENTATION | type) { | ||
input_ = p_engine_->getInput(0); | ||
threshold_nms_ = 0.45; | ||
threshold_score_ = 0.25; | ||
|
||
is_nhwc_ = input_.shape.dims[3] == 3 || input_.shape.dims[3] == 1; | ||
|
||
if (is_nhwc_) { | ||
img_.height = input_.shape.dims[1]; | ||
img_.width = input_.shape.dims[2]; | ||
img_.size = input_.shape.dims[1] * input_.shape.dims[2] * input_.shape.dims[3]; | ||
img_.format = input_.shape.dims[3] == 3 ? MA_PIXEL_FORMAT_RGB888 : MA_PIXEL_FORMAT_GRAYSCALE; | ||
|
||
} else { | ||
img_.height = input_.shape.dims[2]; | ||
img_.width = input_.shape.dims[3]; | ||
img_.size = input_.shape.dims[3] * input_.shape.dims[2] * input_.shape.dims[1]; | ||
img_.format = input_.shape.dims[1] == 3 ? MA_PIXEL_FORMAT_RGB888 : MA_PIXEL_FORMAT_GRAYSCALE; | ||
} | ||
|
||
img_.data = input_.data.u8; | ||
} | ||
|
||
Segmenter::~Segmenter() {} | ||
ma_err_t Segmenter::preprocess() { | ||
ma_err_t ret = MA_OK; | ||
|
||
ret = ma::cv::convert(input_img_, &img_); | ||
if (ret != MA_OK) { | ||
return ret; | ||
} | ||
if (input_.type == MA_TENSOR_TYPE_S8) { | ||
for (int i = 0; i < input_.size; i++) { | ||
input_.data.u8[i] -= 128; | ||
} | ||
} | ||
|
||
return ret; | ||
} | ||
|
||
const void* Segmenter::getInput() { | ||
return static_cast<const void*>(&img_); | ||
} | ||
|
||
const std::forward_list<ma_segm2f_t>& Segmenter::getResults() const { | ||
return results_; | ||
} | ||
|
||
ma_err_t Segmenter::run(const ma_img_t* img) { | ||
MA_ASSERT(img != nullptr); | ||
|
||
input_img_ = img; | ||
|
||
return underlyingRun(); | ||
} | ||
|
||
ma_err_t Segmenter::setConfig(ma_model_cfg_opt_t opt, ...) { | ||
ma_err_t ret = MA_OK; | ||
va_list args; | ||
va_start(args, opt); | ||
switch (opt) { | ||
case MA_MODEL_CFG_OPT_THRESHOLD: | ||
threshold_score_ = va_arg(args, double); | ||
ret = MA_OK; | ||
break; | ||
case MA_MODEL_CFG_OPT_NMS: | ||
threshold_nms_ = va_arg(args, double); | ||
ret = MA_OK; | ||
break; | ||
default: | ||
ret = MA_EINVAL; | ||
break; | ||
} | ||
va_end(args); | ||
return ret; | ||
} | ||
|
||
ma_err_t Segmenter::getConfig(ma_model_cfg_opt_t opt, ...) { | ||
ma_err_t ret = MA_OK; | ||
va_list args; | ||
void* p_arg = nullptr; | ||
va_start(args, opt); | ||
switch (opt) { | ||
case MA_MODEL_CFG_OPT_THRESHOLD: | ||
p_arg = va_arg(args, void*); | ||
*(static_cast<double*>(p_arg)) = threshold_score_; | ||
break; | ||
case MA_MODEL_CFG_OPT_NMS: | ||
p_arg = va_arg(args, void*); | ||
*(static_cast<double*>(p_arg)) = threshold_nms_; | ||
break; | ||
default: | ||
ret = MA_EINVAL; | ||
break; | ||
} | ||
va_end(args); | ||
return ret; | ||
} | ||
|
||
} // namespace ma::model |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,43 @@ | ||
#ifndef _MA_MODEL_SEGMENTER_H_ | ||
#define _MA_MODEL_SEGMENTER_H_ | ||
|
||
#include <vector> | ||
|
||
#include "ma_model_base.h" | ||
|
||
namespace ma::model { | ||
|
||
class Segmenter : public Model { | ||
protected: | ||
ma_tensor_t input_; | ||
ma_img_t img_; | ||
const ma_img_t* input_img_; | ||
|
||
float threshold_nms_; | ||
float threshold_score_; | ||
|
||
bool is_nhwc_; | ||
|
||
std::forward_list<ma_segm2f_t> results_; | ||
|
||
protected: | ||
ma_err_t preprocess() override; | ||
|
||
public: | ||
Segmenter(Engine* engine, const char* name, ma_model_type_t type); | ||
virtual ~Segmenter(); | ||
|
||
const std::forward_list<ma_segm2f_t>& getResults() const; | ||
|
||
ma_err_t run(const ma_img_t* img); | ||
|
||
const void* getInput() override; | ||
|
||
ma_err_t setConfig(ma_model_cfg_opt_t opt, ...) override; | ||
|
||
ma_err_t getConfig(ma_model_cfg_opt_t opt, ...) override; | ||
}; | ||
|
||
} // namespace ma::model | ||
|
||
#endif // _MA_MODEL_SEGMENTER_H_ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,174 @@ | ||
#include "ma_model_yolo11_seg.h" | ||
|
||
#include <algorithm> | ||
#include <cstddef> | ||
#include <cstdint> | ||
#include <forward_list> | ||
#include <numeric> | ||
#include <utility> | ||
#include <vector> | ||
|
||
#include "core/math/ma_math.h" | ||
#include "core/utils/ma_nms.h" | ||
|
||
constexpr char TAG[] = "ma::model::yolo11_seg"; | ||
|
||
namespace ma::model { | ||
|
||
Yolo11Seg::Yolo11Seg(Engine* p_engine_) : Segmenter(p_engine_, "yolo11_seg", MA_MODEL_TYPE_YOLO11_SEG) { | ||
MA_ASSERT(p_engine_ != nullptr); | ||
|
||
bboxes_ = p_engine_->getOutput(0); | ||
protos_ = p_engine_->getOutput(1); | ||
|
||
num_class_ = bboxes_.shape.dims[1] - 36; // 4 + 1 + 32 | ||
num_record_ = bboxes_.shape.dims[2]; | ||
} | ||
|
||
Yolo11Seg::~Yolo11Seg() {} | ||
|
||
bool Yolo11Seg::isValid(Engine* engine) { | ||
|
||
const auto inputs_count = engine->getInputSize(); | ||
const auto outputs_count = engine->getOutputSize(); | ||
|
||
if (inputs_count != 1 || outputs_count != 2) { | ||
return false; | ||
} | ||
const auto& input_shape = engine->getInputShape(0); | ||
const auto& output_shape = engine->getOutputShape(0); | ||
const auto& mask_shape = engine->getOutputShape(1); | ||
|
||
// Validate input shape | ||
if (input_shape.size != 4) { | ||
return false; | ||
} | ||
|
||
int n = input_shape.dims[0], h = input_shape.dims[1], w = input_shape.dims[2], c = input_shape.dims[3]; | ||
bool is_nhwc = c == 3 || c == 1; | ||
|
||
if (!is_nhwc) | ||
std::swap(h, c); | ||
|
||
|
||
if (n != 1 || h < 32 || h % 32 != 0 || (c != 3 && c != 1)) { | ||
return false; | ||
} | ||
|
||
// Calculate expected output size based on input | ||
int s = w >> 5, m = w >> 4, l = w >> 3; | ||
int ibox_len = (s * s + m * m + l * l); | ||
|
||
// Validate output shape | ||
if ((output_shape.size != 3 && output_shape.size != 4) || mask_shape.size != 4) { | ||
return false; | ||
} | ||
|
||
if (output_shape.dims[0] != 1 || output_shape.dims[2] != ibox_len || output_shape.dims[1] < 37) { | ||
return false; | ||
} | ||
|
||
if (mask_shape.dims[0] != 1 || mask_shape.dims[1] != 32 || mask_shape.dims[2] != w >> 2 || mask_shape.dims[3] != w >> 2) { | ||
return false; | ||
} | ||
|
||
return true; | ||
} | ||
|
||
ma_err_t Yolo11Seg::postprocess() { | ||
results_.clear(); | ||
if (bboxes_.type == MA_TENSOR_TYPE_F32) { | ||
return postProcessF32(); | ||
} | ||
return MA_ENOTSUP; | ||
} | ||
|
||
ma_err_t Yolo11Seg::postProcessF32() { | ||
|
||
std::forward_list<ma_bbox_ext_t> multi_level_bboxes; | ||
auto* data = bboxes_.data.f32; | ||
for (decltype(num_record_) i = 0; i < num_record_; ++i) { | ||
|
||
float max = threshold_score_; | ||
int target = -1; | ||
|
||
for (int c = 0; c < num_class_; c++) { | ||
float score = data[i + num_record_ * (4 + c)]; | ||
if (score < max) [[likely]] { | ||
continue; | ||
} | ||
max = score; | ||
target = c; | ||
} | ||
|
||
if (target < 0) | ||
continue; | ||
|
||
float x = data[i]; | ||
float y = data[i + num_record_]; | ||
float w = data[i + num_record_ * 2]; | ||
float h = data[i + num_record_ * 3]; | ||
|
||
|
||
ma_bbox_ext_t bbox; | ||
bbox.level = 0; | ||
bbox.index = i; | ||
bbox.x = x / img_.width; | ||
bbox.y = y / img_.height; | ||
bbox.w = w / img_.width; | ||
bbox.h = h / img_.height; | ||
bbox.score = max; | ||
bbox.target = target; | ||
|
||
multi_level_bboxes.emplace_front(std::move(bbox)); | ||
} | ||
|
||
ma::utils::nms(multi_level_bboxes, threshold_nms_, threshold_score_, false, true); | ||
|
||
if (multi_level_bboxes.empty()) | ||
return MA_OK; | ||
|
||
// fetch mask | ||
for (auto& bbox : multi_level_bboxes) { | ||
ma_segm2f_t seg; | ||
seg.box = {.x = bbox.x, .y = bbox.y, .w = bbox.w, .h = bbox.h, .score = bbox.score, .target = bbox.target}; | ||
seg.mask.width = protos_.shape.dims[2]; | ||
seg.mask.height = protos_.shape.dims[3]; | ||
seg.mask.data.resize(protos_.shape.dims[2] * protos_.shape.dims[3] / 8, 0); // bitwise | ||
|
||
const int mask_size = protos_.shape.dims[2] * protos_.shape.dims[3]; | ||
|
||
std::vector<float> masks(mask_size, 0.0f); | ||
|
||
// TODO: parallel for | ||
for (int j = 0; j < protos_.shape.dims[1]; ++j) { | ||
float mask_in = bboxes_.data.f32[bbox.index + num_record_ * (4 + num_class_ + j)]; | ||
for (int i = 0; i < mask_size; ++i) { | ||
masks[i] += mask_in * protos_.data.f32[j * mask_size + i]; | ||
} | ||
} | ||
|
||
int x1 = (bbox.x - bbox.w / 2) * protos_.shape.dims[2]; | ||
int y1 = (bbox.y - bbox.h / 2) * protos_.shape.dims[3]; | ||
int x2 = (bbox.x + bbox.w / 2) * protos_.shape.dims[2]; | ||
int y2 = (bbox.y + bbox.h / 2) * protos_.shape.dims[3]; | ||
|
||
for (int i = 0; i < protos_.shape.dims[2]; i++) { | ||
for (int j = 0; j < protos_.shape.dims[3]; j++) { | ||
if (i < y1 || i >= y2 || j < x1 || j >= x2) [[likely]] { | ||
continue; | ||
} | ||
if (masks[i * protos_.shape.dims[3] + j] > 0.5) { | ||
seg.mask.data[i * protos_.shape.dims[3] / 8 + j / 8] |= (1 << (j % 8)); | ||
} | ||
} | ||
} | ||
|
||
results_.emplace_front(std::move(seg)); | ||
} | ||
|
||
|
||
return MA_OK; | ||
} | ||
|
||
} // namespace ma::model |
Oops, something went wrong.