Skip to content

Commit

Permalink
修复 OCR内存泄露
Browse files Browse the repository at this point in the history
  • Loading branch information
MakesYT committed Jan 15, 2025
1 parent 819d171 commit aa5ebe4
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 78 deletions.
2 changes: 2 additions & 0 deletions KitopiaEx/Ocr/Ocr.cs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ public IEnumerable<OcrResult> OcrImg(ScreenCaptureResult dResult, CancellationTo
{
(Mat, Rect) textimg = _textDetector.GetRotateCropImage(textDetectorDstImg, point2Fse);
var predictText = _textRecognizer.PredictText(textimg.Item1);
textimg.Item1.Dispose();
if (string.IsNullOrWhiteSpace(predictText))
{
continue;
Expand All @@ -58,6 +59,7 @@ public IEnumerable<OcrResult> OcrImg(ScreenCaptureResult dResult, CancellationTo
EPoint = new Point(rect.Left + rect.Width, rect.Top + rect.Height),
Text = predictText
});

//Console.WriteLine(predictText+" "+rect.Left + " " + rect.Top + " " + rect.Width + " " + rect.Height);
}

Expand Down
2 changes: 1 addition & 1 deletion KitopiaEx/Ocr/OcrResult.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

namespace KitopiaEx.Ocr;

public class OcrResult
public struct OcrResult
{
public Point SPoint { get; set; }
public Point EPoint { get; set; }
Expand Down
70 changes: 15 additions & 55 deletions KitopiaEx/Ocr/TextDetector.cs
Original file line number Diff line number Diff line change
Expand Up @@ -14,11 +14,9 @@ internal class TextDetector : IDisposable
private string modelPath;
private SessionOptions sessionOptions;
private InferenceSession _session;
private List<string> inputNames;

private int shortSize = 736;
private float shortSideThresh = 3.0f;

public Mat dstImg;
public TextDetector(string modelpath, SessionOptions opts = null)
{
this.unclipRatio = 1.6f;
Expand All @@ -29,20 +27,9 @@ public TextDetector(string modelpath, SessionOptions opts = null)
this.sessionOptions.GraphOptimizationLevel = GraphOptimizationLevel.ORT_ENABLE_BASIC;

this._session = new InferenceSession(this.modelPath, this.sessionOptions);
this.inputNames = new List<string>();



//智能
foreach (var name in this._session.InputMetadata.Keys)
{
this.inputNames.Add(name);
}


}

public Mat dstImg { set; get; }

public List<Point2f[]> Detect(Mat srcImg)
{

Expand All @@ -58,7 +45,7 @@ public List<Point2f[]> Detect(Mat srcImg)
var inputTensor = new DenseTensor<float>(normalize, inputShape);
var inputs = new List<NamedOnnxValue>
{
NamedOnnxValue.CreateFromTensor(this.inputNames[0], inputTensor)
NamedOnnxValue.CreateFromTensor(_session.InputMetadata.Keys.First(), inputTensor)
};

//2. 推理
Expand All @@ -77,8 +64,7 @@ public List<Point2f[]> Detect(Mat srcImg)
}
}
OpenCvSharp.Point[][] contours;
HierarchyIndex[] hierarchy;
Cv2.FindContours(binary, out contours, out hierarchy, RetrievalModes.List, ContourApproximationModes.ApproxTC89L1);
Cv2.FindContours(binary, out contours, out _, RetrievalModes.List, ContourApproximationModes.ApproxTC89L1);

var results = new List<Point2f[]>();

Expand Down Expand Up @@ -110,7 +96,8 @@ public List<Point2f[]> Detect(Mat srcImg)
continue;
results.Add(box.Points());
}


binary.Dispose();
return results;
}

Expand All @@ -124,12 +111,12 @@ private Mat Preprocess(Mat srcMat)
int tarW = w;

// 计算目标高度和宽度,确保是32的倍数
tarH = (int)(h / 32) * 32;
tarH = h / 32 * 32;
if (tarH < h)
{
tarH += 32; // 如果刚好是32的倍数,则需要再加32
}
tarW = (int)(w / 32) * 32;
tarW = w / 32 * 32;
if (tarW < w)
{
tarW += 32; // 如果刚好是32的倍数,则需要再加32
Expand All @@ -145,8 +132,9 @@ private Mat Preprocess(Mat srcMat)

// 将原图复制到新图像的中心位置
Cv2.CopyMakeBorder(dstImg, resizedImgWithPadding, 0, tarH - dstImg.Rows, 0, tarW - dstImg.Cols, BorderTypes.Isolated, new Scalar(255, 255, 255));
dstImg.Dispose();
return resizedImgWithPadding;
//
//


}
Expand All @@ -171,34 +159,6 @@ private float[] Normalize(Mat img)
return inputImage;
}


//计算轮廓分值 20240416未完全理解
// TODO 注意返回值的归一化
private float ContourScore(Mat binary, OpenCvSharp.Point[] contour)
{
//1. 获取轮廓点的外接矩形
Rect rect = Cv2.BoundingRect(contour);
int xmin = Math.Max(rect.X, 0);
int xmax = Math.Min(rect.X + rect.Width, binary.Cols - 1);
int ymin = Math.Max(rect.Y, 0);
int ymax = Math.Min(rect.Y + rect.Height, binary.Rows - 1);

//2. 填充外接矩形内,由轮廓点围成的多边形
Mat binROI = new Mat(binary, new Rect(xmin, ymin, xmax - xmin + 1, ymax - ymin + 1));
Mat mask = Mat.Zeros(new OpenCvSharp.Size(xmax - xmin + 1, ymax - ymin + 1), MatType.CV_8U);
var roiContour = contour.Select(p => new OpenCvSharp.Point(p.X - xmin, p.Y - ymin)).ToList();
Cv2.FillPoly(mask, new List<List<OpenCvSharp.Point>> { roiContour },(Scalar)1); // 1

//3. 计算填充多边形区域的均值

Scalar mean = Cv2.Mean(binROI, mask);

return (float)mean.Val0/255.0f;

}

// 未理解该函数的意义 20240416
// 或许可参考 DBNet后处理unclip()函数转C++ https://www.jianshu.com/p/0227c40b0736
private Point2f[] Unclip(Point2f[] inPoly)
{
var outPoly = new Point2f[4];
Expand Down Expand Up @@ -243,9 +203,9 @@ private Point2f[] Unclip(Point2f[] inPoly)
}
else
{
float denom = a.X * (float)(d.Y - c.Y) + b.X * (float)(c.Y - d.Y) +
d.X * (float)(b.Y - a.Y) + c.X * (float)(a.Y - b.Y);
float num = a.X * (float)(d.Y - c.Y) + c.X * (float)(a.Y - d.Y) + d.X * (float)(c.Y - a.Y);
float denom = a.X * (d.Y - c.Y) + b.X * (c.Y - d.Y) +
d.X * (b.Y - a.Y) + c.X * (a.Y - b.Y);
float num = a.X * (d.Y - c.Y) + c.X * (a.Y - d.Y) + d.X * (c.Y - a.Y);
float s = num / denom;

pt.X = a.X + s * (b.X - a.X);
Expand All @@ -257,8 +217,7 @@ private Point2f[] Unclip(Point2f[] inPoly)

return outPoly;
}

//基于vertices围成的外接矩形,似乎没有作用

public (Mat,Rect) GetRotateCropImage(Mat frame, Point2f[] vertices)
{
for (int i = 0; i < vertices.Length; i++)
Expand All @@ -275,6 +234,7 @@ public void Dispose()
{
sessionOptions.Dispose();
_session.Dispose();

}
}

Expand Down
26 changes: 4 additions & 22 deletions KitopiaEx/Ocr/TextRecognizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -13,39 +13,20 @@ internal class TextRecognizer : IDisposable
{
private InferenceSession _session;
private List<string> input_names;
private List<string> output_names;
private List<int[]> output_node_dims;
private List<string> alphabet;
private int inpHeight = 48;
private int inpWidth = 320;
private List<float> input_image_;
private List<int> preb_label;

SessionOptions sessionOptions;
public TextRecognizer(string modelpath,string recWorldDictPath)
{
var sessionOptions = new SessionOptions();
sessionOptions = new SessionOptions();
sessionOptions.GraphOptimizationLevel = GraphOptimizationLevel.ORT_ENABLE_BASIC;

_session = new InferenceSession(modelpath, sessionOptions);

input_names = new List<string>();
output_names = new List<string>();

this.input_image_ = new List<float>();

output_node_dims = new List<int[]>();


foreach (var name in this._session.InputMetadata.Keys)
{
this.input_names.Add(name);
}

foreach (var name in this._session.OutputMetadata.Keys)
{
this.output_names.Add(name);
}

foreach (var value in this._session.OutputMetadata.Values)
{
this.output_node_dims.Add(value.Dimensions);
Expand All @@ -69,7 +50,7 @@ public string PredictText(Mat cv_image)
var normalize = Normalize(dstimg);

int[] input_shape_ = new int[] { 1, 3, dstimg.Rows, dstimg.Width };

dstimg.Dispose();
var input_tensor_ = new DenseTensor<float>(normalize, input_shape_);

var ort_inputs = new List<NamedOnnxValue>
Expand Down Expand Up @@ -176,6 +157,7 @@ private float[] Normalize(Mat img)

public void Dispose()
{
sessionOptions.Dispose();
_session.Dispose();
}
}
Expand Down

0 comments on commit aa5ebe4

Please sign in to comment.