Merge pull request #52 from deepghs/dev/ocr

dev(narugo): deprecate imgutils.detect.detect_text
deepghs · Oct 17, 2023 · 4e86e05 · 4e86e05
2 parents e5cba8a + e107366
commit 4e86e05
Show file tree

Hide file tree

Showing 7 changed files with 382 additions and 12 deletions.
diff --git a/docs/source/_libs/plot.py b/docs/source/_libs/plot.py
@@ -57,12 +57,7 @@ def image_plot(*images, save_as: str, columns=2, keep_axis: bool = False, figsiz
     for i, img in enumerate(images, start=0):
         xi, yi = i // columns, i % columns
         image, label = _image_input_process(img, autocensor)
-        if rows == 1 and columns == 1:
-            ax = axs
-        elif rows == 1:
-            ax = axs[yi]
-        else:
-            ax = axs[xi, yi]
+        ax = axs[xi, yi]
         ax.imshow(image)
         ax.set_title(label)
         if not keep_axis:

diff --git a/docs/source/api_doc/detect/text_detect_deprecate_demo.plot.py b/docs/source/api_doc/detect/text_detect_deprecate_demo.plot.py
@@ -0,0 +1,31 @@
+import font
+from imgutils.data import load_image
+from imgutils.detect import detect_text
+from imgutils.detect.visual import detection_visualize
+from imgutils.ocr import ocr
+from plot import image_plot
+
+
+def _detect_with_ocr(img, *, max_size=None, **kwargs):
+    img = load_image(img, mode='RGB', force_background='white')
+    if max_size is not None and min(img.height, img.width) > max_size:
+        r = max_size / min(img.height, img.width)
+        img = img.resize((
+            int(round(img.width * r)),
+            int(round(img.height * r)),
+        ))
+
+    return detection_visualize(img, ocr(img, **kwargs), fp=font.get_cn_fp())
+
+
+def _detect_with_deprecated(img, **kwargs):
+    return detection_visualize(img, detect_text(img, **kwargs))
+
+
+if __name__ == '__main__':
+    image_plot(
+        (_detect_with_deprecated('text/ml2.jpg'), 'detect_text'),
+        (_detect_with_ocr('text/ml2.jpg'), 'detect_text_with_ocr'),
+        columns=2,
+        figsize=(13, 3.8),
+    )
diff --git a/docs/source/api_doc/detect/text_detect_deprecate_demo.plot.py.svg b/docs/source/api_doc/detect/text_detect_deprecate_demo.plot.py.svg
diff --git a/imgutils/detect/text.py b/imgutils/detect/text.py
@@ -12,14 +12,26 @@
     .. image:: text_detect_benchmark.plot.py.svg
         :align: center
 
+    .. warning::
+        This module has been deprecated and will be removed in the future.
+
+        It is recommended to migrate to the :func:`imgutils.ocr.detect_text_with_ocr` function as soon as possible.
+        This function uses a higher-quality text detection model provided by PaddleOCR,
+        resulting in improved performance and higher efficiency.
+
+        .. image:: text_detect_deprecate_demo.plot.py.svg
+            :align: center
+
 """
 from functools import lru_cache
 from typing import List, Tuple, Optional
 
 import cv2
 import numpy as np
+from deprecation import deprecated
 from huggingface_hub import hf_hub_download
 
+from ..config.meta import __VERSION__
 from ..data import ImageTyping, load_image
 from ..utils import open_onnx_model
 
@@ -106,6 +118,8 @@ def _get_bounding_box_of_text(image: ImageTyping, model: str, threshold: float)
     return bboxes
 
 
+@deprecated(deprecated_in="0.2.10", removed_in="0.4", current_version=__VERSION__,
+            details="Use the new function :func:`imgutils.ocr.detect_text_with_ocr` instead")
 def detect_text(image: ImageTyping, model: str = _DEFAULT_MODEL, threshold: float = 0.05,
                 max_area_size: Optional[int] = 640):
     """
@@ -123,6 +137,10 @@ def detect_text(image: ImageTyping, model: str = _DEFAULT_MODEL, threshold: floa
     :type max_area_size: Optional[int]
     :return: List of detected text bounding boxes, labels, and scores.
     :rtype: List[Tuple[Tuple[int, int, int, int], str, float]]
+
+    .. warning::
+        This function is deprecated, and it will be removed from imgutils in the future.
+        Please migrate to :func:`imgutils.ocr.detect_text_with_ocr` as soon as possible.
     """
     image = load_image(image)
     if max_area_size is not None and image.width * image.height >= max_area_size ** 2:

diff --git a/imgutils/ocr/entry.py b/imgutils/ocr/entry.py
@@ -87,14 +87,21 @@ def detect_text_with_ocr(image: ImageTyping, model: str = _DEFAULT_DET_MODEL,
     :return: A list of detected text boxes, label (always ``text``), and their confidence scores.
     :rtype: List[Tuple[Tuple[int, int, int, int], str, float]]
 
-    .. note::
-        If you need to extract the actual text content, use the :func:`ocr` function.
-
     Examples::
         >>> from imgutils.ocr import detect_text_with_ocr
         >>>
         >>> detect_text_with_ocr('comic.jpg')
-        [((742, 485, 809, 511), 'text', 0.9543377610144915), ((682, 98, 734, 124), 'text', 0.9309689495575223), ((716, 136, 836, 164), 'text', 0.9042856988923695), ((144, 455, 196, 485), 'text', 0.874083638387722), ((719, 455, 835, 488), 'text', 0.8628696346175078), ((124, 478, 214, 508), 'text', 0.848871771901487), ((1030, 557, 1184, 578), 'text', 0.8352495440618789), ((427, 129, 553, 154), 'text', 0.8249209443996619)]
+        [((742, 485, 809, 511), 'text', 0.9543377610144915),
+         ((682, 98, 734, 124), 'text', 0.9309689495575223),
+         ((716, 136, 836, 164), 'text', 0.9042856988923695),
+         ((144, 455, 196, 485), 'text', 0.874083638387722),
+         ((719, 455, 835, 488), 'text', 0.8628696346175078),
+         ((124, 478, 214, 508), 'text', 0.848871771901487),
+         ((1030, 557, 1184, 578), 'text', 0.8352495440618789),
+         ((427, 129, 553, 154), 'text', 0.8249209443996619)]
+
+    .. note::
+        If you need to extract the actual text content, use the :func:`ocr` function.
     """
     retval = []
     for box, _, score in _detect_text(image, model, heat_threshold, box_threshold, max_candidates, unclip_ratio):
@@ -135,7 +142,14 @@ def ocr(image: ImageTyping, detect_model: str = _DEFAULT_DET_MODEL,
         >>> from imgutils.ocr import ocr
         >>>
         >>> ocr('comic.jpg')
-        [((742, 485, 809, 511), 'MOB.', 0.9356705927336156), ((716, 136, 836, 164), 'SHISHOU,', 0.8933000384412466), ((682, 98, 734, 124), 'BUT', 0.8730931912907247), ((144, 455, 196, 485), 'OH,', 0.8417627579351514), ((427, 129, 553, 154), 'A MIRROR.', 0.7366019454049503), ((1030, 557, 1184, 578), '(EL)  GATO IBERICO', 0.7271127306351021), ((719, 455, 835, 488), "THAt'S △", 0.701928390168364), ((124, 478, 214, 508), 'LOOK!', 0.6965972578194936)]
+        [((742, 485, 809, 511), 'MOB.', 0.9356705927336156),
+         ((716, 136, 836, 164), 'SHISHOU,', 0.8933000384412466),
+         ((682, 98, 734, 124), 'BUT', 0.8730931912907247),
+         ((144, 455, 196, 485), 'OH,', 0.8417627579351514),
+         ((427, 129, 553, 154), 'A MIRROR.', 0.7366019454049503),
+         ((1030, 557, 1184, 578), '(EL)  GATO IBERICO', 0.7271127306351021),
+         ((719, 455, 835, 488), "THAt'S △", 0.701928390168364),
+         ((124, 478, 214, 508), 'LOOK!', 0.6965972578194936)]
 
         By default, the text recognition model used is `ch_PP-OCRv4_rec`.
         This recognition model has good recognition capabilities for both Chinese and English.

diff --git a/requirements.txt b/requirements.txt
@@ -10,4 +10,5 @@ scipy
 emoji>=2.5.0
 pilmoji>=1.3.0
 shapely
-pyclipper
+pyclipper
+deprecation>=2.0.0
diff --git a/test/ocr/test_ocr.py b/test/ocr/test_ocr.py
@@ -2,9 +2,21 @@
 from PIL import Image
 
 from imgutils.ocr import detect_text_with_ocr, list_det_models, list_rec_models, ocr
+from imgutils.ocr.detect import _open_ocr_detection_model
+from imgutils.ocr.recognize import _open_ocr_recognition_dictionary, _open_ocr_recognition_model
 from test.testings import get_testfile
 
 
+@pytest.fixture(autouse=True, scope='module')
+def _clear_cache():
+    try:
+        yield
+    finally:
+        _open_ocr_detection_model.cache_clear()
+        _open_ocr_recognition_model.cache_clear()
+        _open_ocr_recognition_dictionary.cache_clear()
+
+
 @pytest.fixture()
 def ocr_img_plot():
     yield get_testfile('ocr', 'plot.png')