Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

dev(narugo): deprecate imgutils.detect.detect_text #52

Merged
merged 6 commits into from
Oct 17, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 1 addition & 6 deletions docs/source/_libs/plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,12 +57,7 @@ def image_plot(*images, save_as: str, columns=2, keep_axis: bool = False, figsiz
for i, img in enumerate(images, start=0):
xi, yi = i // columns, i % columns
image, label = _image_input_process(img, autocensor)
if rows == 1 and columns == 1:
ax = axs
elif rows == 1:
ax = axs[yi]
else:
ax = axs[xi, yi]
ax = axs[xi, yi]
ax.imshow(image)
ax.set_title(label)
if not keep_axis:
Expand Down
31 changes: 31 additions & 0 deletions docs/source/api_doc/detect/text_detect_deprecate_demo.plot.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import font
from imgutils.data import load_image
from imgutils.detect import detect_text
from imgutils.detect.visual import detection_visualize
from imgutils.ocr import ocr
from plot import image_plot


def _detect_with_ocr(img, *, max_size=None, **kwargs):
img = load_image(img, mode='RGB', force_background='white')
if max_size is not None and min(img.height, img.width) > max_size:
r = max_size / min(img.height, img.width)
img = img.resize((
int(round(img.width * r)),
int(round(img.height * r)),
))

return detection_visualize(img, ocr(img, **kwargs), fp=font.get_cn_fp())


def _detect_with_deprecated(img, **kwargs):
return detection_visualize(img, detect_text(img, **kwargs))


if __name__ == '__main__':
image_plot(
(_detect_with_deprecated('text/ml2.jpg'), 'detect_text'),
(_detect_with_ocr('text/ml2.jpg'), 'detect_text_with_ocr'),
columns=2,
figsize=(13, 3.8),
)
299 changes: 299 additions & 0 deletions docs/source/api_doc/detect/text_detect_deprecate_demo.plot.py.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
18 changes: 18 additions & 0 deletions imgutils/detect/text.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,26 @@
.. image:: text_detect_benchmark.plot.py.svg
:align: center

.. warning::
This module has been deprecated and will be removed in the future.

It is recommended to migrate to the :func:`imgutils.ocr.detect_text_with_ocr` function as soon as possible.
This function uses a higher-quality text detection model provided by PaddleOCR,
resulting in improved performance and higher efficiency.

.. image:: text_detect_deprecate_demo.plot.py.svg
:align: center

"""
from functools import lru_cache
from typing import List, Tuple, Optional

import cv2
import numpy as np
from deprecation import deprecated
from huggingface_hub import hf_hub_download

from ..config.meta import __VERSION__
from ..data import ImageTyping, load_image
from ..utils import open_onnx_model

Expand Down Expand Up @@ -106,6 +118,8 @@ def _get_bounding_box_of_text(image: ImageTyping, model: str, threshold: float)
return bboxes


@deprecated(deprecated_in="0.2.10", removed_in="0.4", current_version=__VERSION__,
details="Use the new function :func:`imgutils.ocr.detect_text_with_ocr` instead")
def detect_text(image: ImageTyping, model: str = _DEFAULT_MODEL, threshold: float = 0.05,
max_area_size: Optional[int] = 640):
"""
Expand All @@ -123,6 +137,10 @@ def detect_text(image: ImageTyping, model: str = _DEFAULT_MODEL, threshold: floa
:type max_area_size: Optional[int]
:return: List of detected text bounding boxes, labels, and scores.
:rtype: List[Tuple[Tuple[int, int, int, int], str, float]]

.. warning::
This function is deprecated, and it will be removed from imgutils in the future.
Please migrate to :func:`imgutils.ocr.detect_text_with_ocr` as soon as possible.
"""
image = load_image(image)
if max_area_size is not None and image.width * image.height >= max_area_size ** 2:
Expand Down
24 changes: 19 additions & 5 deletions imgutils/ocr/entry.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,14 +87,21 @@ def detect_text_with_ocr(image: ImageTyping, model: str = _DEFAULT_DET_MODEL,
:return: A list of detected text boxes, label (always ``text``), and their confidence scores.
:rtype: List[Tuple[Tuple[int, int, int, int], str, float]]

.. note::
If you need to extract the actual text content, use the :func:`ocr` function.

Examples::
>>> from imgutils.ocr import detect_text_with_ocr
>>>
>>> detect_text_with_ocr('comic.jpg')
[((742, 485, 809, 511), 'text', 0.9543377610144915), ((682, 98, 734, 124), 'text', 0.9309689495575223), ((716, 136, 836, 164), 'text', 0.9042856988923695), ((144, 455, 196, 485), 'text', 0.874083638387722), ((719, 455, 835, 488), 'text', 0.8628696346175078), ((124, 478, 214, 508), 'text', 0.848871771901487), ((1030, 557, 1184, 578), 'text', 0.8352495440618789), ((427, 129, 553, 154), 'text', 0.8249209443996619)]
[((742, 485, 809, 511), 'text', 0.9543377610144915),
((682, 98, 734, 124), 'text', 0.9309689495575223),
((716, 136, 836, 164), 'text', 0.9042856988923695),
((144, 455, 196, 485), 'text', 0.874083638387722),
((719, 455, 835, 488), 'text', 0.8628696346175078),
((124, 478, 214, 508), 'text', 0.848871771901487),
((1030, 557, 1184, 578), 'text', 0.8352495440618789),
((427, 129, 553, 154), 'text', 0.8249209443996619)]

.. note::
If you need to extract the actual text content, use the :func:`ocr` function.
"""
retval = []
for box, _, score in _detect_text(image, model, heat_threshold, box_threshold, max_candidates, unclip_ratio):
Expand Down Expand Up @@ -135,7 +142,14 @@ def ocr(image: ImageTyping, detect_model: str = _DEFAULT_DET_MODEL,
>>> from imgutils.ocr import ocr
>>>
>>> ocr('comic.jpg')
[((742, 485, 809, 511), 'MOB.', 0.9356705927336156), ((716, 136, 836, 164), 'SHISHOU,', 0.8933000384412466), ((682, 98, 734, 124), 'BUT', 0.8730931912907247), ((144, 455, 196, 485), 'OH,', 0.8417627579351514), ((427, 129, 553, 154), 'A MIRROR.', 0.7366019454049503), ((1030, 557, 1184, 578), '(EL) GATO IBERICO', 0.7271127306351021), ((719, 455, 835, 488), "THAt'S △", 0.701928390168364), ((124, 478, 214, 508), 'LOOK!', 0.6965972578194936)]
[((742, 485, 809, 511), 'MOB.', 0.9356705927336156),
((716, 136, 836, 164), 'SHISHOU,', 0.8933000384412466),
((682, 98, 734, 124), 'BUT', 0.8730931912907247),
((144, 455, 196, 485), 'OH,', 0.8417627579351514),
((427, 129, 553, 154), 'A MIRROR.', 0.7366019454049503),
((1030, 557, 1184, 578), '(EL) GATO IBERICO', 0.7271127306351021),
((719, 455, 835, 488), "THAt'S △", 0.701928390168364),
((124, 478, 214, 508), 'LOOK!', 0.6965972578194936)]

By default, the text recognition model used is `ch_PP-OCRv4_rec`.
This recognition model has good recognition capabilities for both Chinese and English.
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,5 @@ scipy
emoji>=2.5.0
pilmoji>=1.3.0
shapely
pyclipper
pyclipper
deprecation>=2.0.0
12 changes: 12 additions & 0 deletions test/ocr/test_ocr.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,21 @@
from PIL import Image

from imgutils.ocr import detect_text_with_ocr, list_det_models, list_rec_models, ocr
from imgutils.ocr.detect import _open_ocr_detection_model
from imgutils.ocr.recognize import _open_ocr_recognition_dictionary, _open_ocr_recognition_model
from test.testings import get_testfile


@pytest.fixture(autouse=True, scope='module')
def _clear_cache():
try:
yield
finally:
_open_ocr_detection_model.cache_clear()
_open_ocr_recognition_model.cache_clear()
_open_ocr_recognition_dictionary.cache_clear()


@pytest.fixture()
def ocr_img_plot():
yield get_testfile('ocr', 'plot.png')
Expand Down
Loading