dev(narugo): try create plot

deepghs · Oct 16, 2023 · 6058425 · 6058425
1 parent c894c6a
commit 6058425
Show file tree

Hide file tree

Showing 16 changed files with 145 additions and 31 deletions.
diff --git a/docs/source/_libs/SimHei.ttf b/docs/source/_libs/SimHei.ttf
diff --git a/docs/source/_libs/font.py b/docs/source/_libs/font.py
@@ -0,0 +1,8 @@
+import os.path
+
+import matplotlib.font_manager as fm
+
+
+def get_cn_fp() -> fm.FontProperties:
+    ttf_file = os.path.join(os.path.dirname(__file__), 'SimHei.ttf')
+    return fm.FontProperties(fname=ttf_file)
diff --git a/docs/source/_static/SimHei.ttf b/docs/source/_static/SimHei.ttf
diff --git a/docs/source/api_doc/ocr/SimHei.ttf b/docs/source/api_doc/ocr/SimHei.ttf
diff --git a/docs/source/api_doc/ocr/anime_subtitle.jpg b/docs/source/api_doc/ocr/anime_subtitle.jpg
diff --git a/docs/source/api_doc/ocr/comic.jpg b/docs/source/api_doc/ocr/comic.jpg
diff --git a/docs/source/api_doc/ocr/index.rst b/docs/source/api_doc/ocr/index.rst
@@ -0,0 +1,36 @@
+imgutils.ocr
+========================
+
+.. currentmodule:: imgutils.ocr
+
+.. automodule:: imgutils.ocr
+
+
+detect_text_with_ocr
+------------------------------------------------------
+
+.. autofunction:: detect_text_with_ocr
+
+
+
+ocr
+------------------------------------------------------
+
+.. autofunction:: ocr
+
+
+
+list_det_models
+------------------------------------------------------
+
+.. autofunction:: list_det_models
+
+
+
+list_rec_models
+------------------------------------------------------
+
+.. autofunction:: list_rec_models
+
+
+
diff --git a/docs/source/api_doc/ocr/ocr_demo.plot.py b/docs/source/api_doc/ocr/ocr_demo.plot.py
@@ -0,0 +1,30 @@
+import font
+from imgutils.data import load_image
+from imgutils.detect.visual import detection_visualize
+from imgutils.ocr import ocr
+from plot import image_plot
+
+_max_size = 480
+
+
+def _detect(img, **kwargs):
+    img = load_image(img, mode='RGB', force_background='white')
+    if min(img.height, img.width) > _max_size:
+        r = _max_size / min(img.height, img.width)
+        img = img.resize((
+            int(round(img.width * r)),
+            int(round(img.height * r)),
+        ))
+
+    return detection_visualize(img, ocr(img, **kwargs), fp=font.get_cn_fp())
+
+
+if __name__ == '__main__':
+    image_plot(
+        (_detect('post_text.jpg', recognize_model='japan_PP-OCRv3_rec'), 'Text of Post'),
+        (_detect('anime_subtitle.jpg'), 'Subtitle of Anime'),
+        (_detect('comic.jpg'), 'Comic'),
+        (_detect('plot.png'), 'Complex'),
+        columns=2,
+        figsize=(12, 9),
+    )
diff --git a/docs/source/api_doc/ocr/plot.png b/docs/source/api_doc/ocr/plot.png
diff --git a/docs/source/api_doc/ocr/post_text.jpg b/docs/source/api_doc/ocr/post_text.jpg
diff --git a/docs/source/index.rst b/docs/source/index.rst
@@ -34,6 +34,7 @@ configuration file's structure and their versions.
     api_doc/detect/index
     api_doc/edge/index
     api_doc/metrics/index
+    api_doc/ocr/index
     api_doc/operate/index
     api_doc/restore/index
     api_doc/sd/index

diff --git a/imgutils/detect/visual.py b/imgutils/detect/visual.py
@@ -12,7 +12,7 @@
 from imgutils.data import ImageTyping, load_image
 
 
-def _try_get_font_from_matplotlib(fontsize: int = 12):
+def _try_get_font_from_matplotlib(fp=None, fontsize: int = 12):
     try:
         # noinspection PyPackageRequirements
         import matplotlib
@@ -21,13 +21,13 @@ def _try_get_font_from_matplotlib(fontsize: int = 12):
     else:
         # noinspection PyPackageRequirements
         from matplotlib.font_manager import findfont, FontProperties
-        font = findfont(FontProperties(family=['sans-serif']))
+        font = findfont(fp or FontProperties(family=['sans-serif']))
         return ImageFont.truetype(font, fontsize)
 
 
 def detection_visualize(image: ImageTyping, detection: List[Tuple[Tuple[float, float, float, float], str, float]],
                         labels: Optional[List[str]] = None, text_padding: int = 6, fontsize: int = 12,
-                        no_label: bool = False):
+                        fp=None, no_label: bool = False):
     """
     Overview:
         Visualize the results of the object detection.
@@ -51,7 +51,7 @@ def detection_visualize(image: ImageTyping, detection: List[Tuple[Tuple[float, f
     image = load_image(image, force_background=None, mode='RGBA')
     visual_image = image.copy()
     draw = ImageDraw.Draw(visual_image, mode='RGBA')
-    font = _try_get_font_from_matplotlib(fontsize) or ImageFont.load_default()
+    font = _try_get_font_from_matplotlib(fp, fontsize) or ImageFont.load_default()
 
     labels = sorted(labels or {label for _, label, _ in detection})
     _colors = list(map(str, rnd_colors(len(labels))))

diff --git a/imgutils/ocr/__init__.py b/imgutils/ocr/__init__.py
@@ -1 +1 @@
-from .entry import detect_text_with_ocr, ocr
+from .entry import detect_text_with_ocr, ocr, list_det_models, list_rec_models
diff --git a/imgutils/ocr/detect.py b/imgutils/ocr/detect.py
@@ -1,24 +1,31 @@
+import os.path
 from functools import lru_cache
+from typing import List
 
 import cv2
 import numpy as np
 import pyclipper
-from huggingface_hub import hf_hub_download
+from huggingface_hub import hf_hub_download, HfFileSystem
 from shapely import Polygon
 
-from ..data import ImageTyping
+from ..data import ImageTyping, load_image
 from ..utils import open_onnx_model
 
 _MIN_SIZE = 3
+_HF_CLIENT = HfFileSystem()
+_REPOSITORY = 'deepghs/paddleocr'
 
 
 @lru_cache()
 def _open_ocr_detection_model(model):
     return open_onnx_model(hf_hub_download(
-        'deepghs/paddleocr',
-        f'{model}/detection.onnx',
+        _REPOSITORY,
+        f'det/{model}/model.onnx',
     ))
 
+    print(ort.get_inputs()[0].shape)
+    return ort
+
 
 def _box_score_fast(bitmap, _box):
     h, w = bitmap.shape[:2]
@@ -150,10 +157,19 @@ def _get_text_points(image: ImageTyping, model: str = 'ch_PP-OCRv4_det_infer',
 def _detect_text(image: ImageTyping, model: str = 'ch_PP-OCRv4_det_infer',
                  heat_threshold: float = 0.3, box_threshold: float = 0.7,
                  max_candidates: int = 1000, unclip_ratio: float = 2.0):
+    image = load_image(image, force_background='white', mode='RGB')
     retval = []
     for points, score in _get_text_points(image, model, heat_threshold, box_threshold, max_candidates, unclip_ratio):
         x0, y0 = points[:, 0].min(), points[:, 1].min()
         x1, y1 = points[:, 0].max(), points[:, 1].max()
         retval.append(((x0.item(), y0.item(), x1.item(), y1.item()), 'text', score))
 
     return retval
+
+
+@lru_cache()
+def _list_det_models() -> List[str]:
+    retval = []
+    for item in _HF_CLIENT.glob(f'{_REPOSITORY}/det/*/model.onnx', ):
+        retval.append(os.path.relpath(item, _REPOSITORY).split('/')[1])
+    return retval
diff --git a/imgutils/ocr/entry.py b/imgutils/ocr/entry.py
@@ -1,13 +1,23 @@
 from typing import List, Tuple
 
-from .detect import _detect_text
-from .recognize import _text_recognize
+from .detect import _detect_text, _list_det_models
+from .recognize import _text_recognize, _list_rec_models
 from ..data import ImageTyping, load_image
+from ..utils import tqdm
 
-_DEFAULT_MODEL = 'ch_PP-OCRv4_det_infer'
+_DEFAULT_DET_MODEL = 'ch_PP-OCRv4_det'
+_DEFAULT_REC_MODEL = 'ch_PP-OCRv4_rec'
 
 
-def detect_text_with_ocr(image: ImageTyping, model: str = _DEFAULT_MODEL,
+def list_det_models() -> List[str]:
+    return _list_det_models()
+
+
+def list_rec_models() -> List[str]:
+    return _list_rec_models()
+
+
+def detect_text_with_ocr(image: ImageTyping, model: str = _DEFAULT_DET_MODEL,
                          heat_threshold: float = 0.3, box_threshold: float = 0.7,
                          max_candidates: int = 1000, unclip_ratio: float = 2.0) \
         -> List[Tuple[Tuple[int, int, int, int], str, float]]:
@@ -18,22 +28,22 @@ def detect_text_with_ocr(image: ImageTyping, model: str = _DEFAULT_MODEL,
     return retval
 
 
-def ocr(image: ImageTyping, model: str = _DEFAULT_MODEL,
-        heat_threshold: float = 0.3, box_threshold: float = 0.7,
-        max_candidates: int = 1000, unclip_ratio: float = 2.0,
-        is_remove_duplicate: bool = False):
+def ocr(image: ImageTyping, detect_model: str = _DEFAULT_DET_MODEL,
+        recognize_model: str = _DEFAULT_REC_MODEL, heat_threshold: float = 0.3, box_threshold: float = 0.7,
+        max_candidates: int = 1000, unclip_ratio: float = 2.0, rotation_threshold: float = 1.5,
+        is_remove_duplicate: bool = False, silent: bool = False):
     image = load_image(image)
     retval = []
-    for (x0, y0, x1, y1), _, score in _detect_text(image, model, heat_threshold,
-                                                   box_threshold, max_candidates, unclip_ratio):
+    for (x0, y0, x1, y1), _, score in \
+            tqdm(_detect_text(image, detect_model, heat_threshold,
+                              box_threshold, max_candidates, unclip_ratio), silent=silent):
         width, height = x1 - x0, y1 - y0
         area = image.crop((x0, y0, x1, y1))
-        if height >= width * 1.5:
+        if height >= width * rotation_threshold:
             area = area.rotate(90)
 
-        text, _ = _text_recognize(area, model, is_remove_duplicate)
-        print(text, score)
-        retval.append(((x0, y0, x1, y1), text, score))
+        text, rec_score = _text_recognize(area, recognize_model, is_remove_duplicate)
+        retval.append(((x0, y0, x1, y1), text, score * rec_score))
 
     retval = sorted(retval, key=lambda x: x[2], reverse=True)
     return retval
diff --git a/imgutils/ocr/recognize.py b/imgutils/ocr/recognize.py
@@ -1,26 +1,30 @@
+import os
 from functools import lru_cache
 from typing import List, Tuple
 
 import numpy as np
-from huggingface_hub import hf_hub_download
+from huggingface_hub import hf_hub_download, HfFileSystem
 
-from ..data import ImageTyping
+from ..data import ImageTyping, load_image
 from ..utils import open_onnx_model
 
+_HF_CLIENT = HfFileSystem()
+_REPOSITORY = 'deepghs/paddleocr'
+
 
 @lru_cache()
 def _open_ocr_recognition_model(model):
     return open_onnx_model(hf_hub_download(
-        'deepghs/paddleocr',
-        f'{model}/recognition.onnx',
+        _REPOSITORY,
+        f'rec/{model}/model.onnx',
     ))
 
 
 @lru_cache()
 def _open_ocr_recognition_dictionary(model) -> List[str]:
     with open(hf_hub_download(
-            'deepghs/paddleocr',
-            f'{model}/dict.txt',
+            _REPOSITORY,
+            f'rec/{model}/dict.txt',
     ), 'r') as f:
         dict_ = [line.strip() for line in f]
 
@@ -57,9 +61,10 @@ def decode(text_index, model: str, text_prob=None, is_remove_duplicate=False):
 
 def _text_recognize(image: ImageTyping, model: str = 'ch_PP-OCRv4_det_infer',
                     is_remove_duplicate: bool = False) -> Tuple[str, float]:
+    image = load_image(image, force_background='white', mode='RGB')
     r = 48 / image.height
-    new_height = int(image.height * r)
-    new_width = int(image.width * r)
+    new_height = int(round(image.height * r))
+    new_width = int(round(image.width * r))
     image = image.resize((new_width, new_height))
 
     input_ = np.array(image).transpose((2, 0, 1)).astype(np.float32) / 255.0
@@ -73,3 +78,11 @@ def _text_recognize(image: ImageTyping, model: str = 'ch_PP-OCRv4_det_infer',
     indices = output.argmax(axis=2)
     confs = output.max(axis=2)
     return decode(indices, model, confs, is_remove_duplicate)[0]
+
+
+@lru_cache()
+def _list_rec_models() -> List[str]:
+    retval = []
+    for item in _HF_CLIENT.glob(f'{_REPOSITORY}/rec/*/model.onnx', ):
+        retval.append(os.path.relpath(item, _REPOSITORY).split('/')[1])
+    return retval
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		from .entry import detect_text_with_ocr, ocr
		from .entry import detect_text_with_ocr, ocr, list_det_models, list_rec_models
Check warning on line 1 in imgutils/ocr/__init__.py View check run for this annotation Codecov / codecov/patch imgutils/ocr/__init__.py#L1 `Added line #L1 was not covered by tests`