Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add bbox auto zoom center and vertex moves independently #87

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 28 additions & 3 deletions PPOCRLabel.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,8 @@
newIcon,
rebuild_html_from_ppstructure_label,
stepsInfo,
polygon_bounding_box_center_and_area,
map_value,
struct,
)
from libs.labelColor import label_colormap
Expand Down Expand Up @@ -130,6 +132,7 @@ def __init__(
lang="ch",
gpu=False,
img_list_natural_sort=True,
bbox_auto_zoom_center=False,
kie_mode=False,
default_filename=None,
default_predefined_class_file=None,
Expand All @@ -151,6 +154,7 @@ def __init__(
self.lang = lang
self.gpu = gpu
self.img_list_natural_sort = img_list_natural_sort
self.bbox_auto_zoom_center = bbox_auto_zoom_center

# Load string bundle for i18n
if lang not in ["ch", "en"]:
Expand Down Expand Up @@ -1943,7 +1947,7 @@ def addZoom(self, increment=10):
int(self.zoomWidget.value() + increment)
) # set zoom slider value

def zoomRequest(self, delta):
def zoomRequest(self, delta, pos: QPoint = None):
# get the current scrollbar positions
# calculate the percentages ~ coordinates
h_bar = self.scrollBars[Qt.Horizontal]
Expand All @@ -1958,8 +1962,10 @@ def zoomRequest(self, delta):
# where 0 = move left
# 1 = move right
# up and down analogous
cursor = QCursor()
pos = cursor.pos()
if pos is None:
cursor = QCursor()
pos = cursor.pos()

relative_pos = QWidget.mapFromGlobal(self, pos)

cursor_x = relative_pos.x()
Expand Down Expand Up @@ -2013,6 +2019,7 @@ def togglePolygons(self, value):

def loadFile(self, filePath=None, isAdjustScale=True):
"""Load the specified file, or the last opened file if None."""
self.canvas.shape_move_index = None
if self.dirty:
self.mayContinue()
self.resetState()
Expand Down Expand Up @@ -2117,6 +2124,20 @@ def loadFile(self, filePath=None, isAdjustScale=True):
)

self.canvas.setFocus(True)

if self.bbox_auto_zoom_center:
if len(self.canvas.shapes) > 0:
(
center_x,
center_y,
shape_area,
) = polygon_bounding_box_center_and_area(
self.canvas.shapes[0].points
)
if shape_area < 30000:
zoom_value = 120 * map_value(shape_area, 100, 30000, 20, 0)
self.zoomRequest(zoom_value, QPoint(center_x, center_y))
# print(" =========> ", shape_area, " ==> ", zoom_value)
return True
return False

Expand Down Expand Up @@ -3548,6 +3569,9 @@ def get_main_app(argv=[]):
arg_parser.add_argument("--rec_model_dir", type=str, default=None, nargs="?")
arg_parser.add_argument("--rec_char_dict_path", type=str, default=None, nargs="?")
arg_parser.add_argument("--cls_model_dir", type=str, default=None, nargs="?")
arg_parser.add_argument(
"--bbox_auto_zoom_center", type=str2bool, default=False, nargs="?"
)

args = arg_parser.parse_args(argv[1:])

Expand All @@ -3561,6 +3585,7 @@ def get_main_app(argv=[]):
rec_model_dir=args.rec_model_dir,
rec_char_dict_path=args.rec_char_dict_path,
cls_model_dir=args.cls_model_dir,
bbox_auto_zoom_center=args.bbox_auto_zoom_center,
)
win.show()
return app, win
Expand Down
11 changes: 9 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ PPOCRLabelv2 is a semi-automatic graphic annotation tool suitable for OCR field,
- `rec_model_dir`: Path to the recognition model directory
- `rec_char_dict_path`: Path to the recognition model dictionary file
- `cls_model_dir`: Path to the classification model directory
- Added the `--bbox_auto_zoom_center` parameter, which can be enabled when there is only one bounding box in the image, automatically centering and zooming in on the bounding box.
- Added 5 shortcut keys `z`, `x`, `c`, `v`, `b` for controlling the 4 vertices of the bounding box. For usage details, see the '11. Additional Functionality Description' in "2.1 Operating Procedures" below.
- 2022.05: Add table annotations, follow `2.2 Table Annotations` for more information (by [whjdark](https://github.com/peterh0323); [Evezerest](https://github.com/Evezerest))
- 2022.02: (by [PeterH0323](https://github.com/peterh0323))
- Add KIE Mode by using `--kie`, for [detection + identification + keyword extraction] labeling.
Expand Down Expand Up @@ -166,6 +168,12 @@ PPOCRLabel.exe --lang ch
11. Additional Feature Description
- `File` -> `Re-recognition`: After checking, the newly annotated box content will automatically trigger the `Re-recognition` function of the current annotation box, eliminating the need to click the Re-identify button. This is suitable for scenarios where you do not want to use Automatic Annotation but prefer manual annotation, such as license plate recognition. In a single image with only one license plate, using Automatic Annotation would require deleting many additional recognized text boxes, which is less efficient than directly re-annotating.
- `File` -> `Auto Save Unsaved changes`: By default, you need to press the `Check` button to complete the marking confirmation for the current box, which can be cumbersome. After checking, when switching to the next image (by pressing the shortcut key `D`), a prompt box asking to confirm whether to save unconfirmed markings will no longer appear. The current markings will be automatically saved and the next image will be switched, making it convenient for quick marking.
- After selecting the bounding box, there are 5 shortcut keys available to individually control the movement of the four vertices of the bounding box, suitable for scenarios that require precise control over the positions of the bounding box vertices:
- `z`: After pressing, the up, down, left, and right arrow keys will move the 1st vertex individually.
- `x`: After pressing, the up, down, left, and right arrow keys will move the 2nd vertex individually.
- `c`: After pressing, the up, down, left, and right arrow keys will move the 3rd vertex individually.
- `v`: After pressing, the up, down, left, and right arrow keys will move the 4th vertex individually.
- `b`: After pressing, the up, down, left, and right arrow keys will revert to the default action of moving the entire bounding box.

### 2.2 Table Annotation

Expand Down Expand Up @@ -217,8 +225,6 @@ labeling in the Excel file, the recommended steps are:
| Ctrl + Shift + R | Re-recognize all the labels of the current image |
| W | Create a rect box |
| Q or Home | Create a multi-points box |
| X | Rotate the box anti-clockwise |
| C | Rotate the box clockwise |
| Ctrl + E | Edit label of the selected box |
| Ctrl + X | Change key class of the box when enable `--kie` |
| Ctrl + R | Re-recognize the selected box |
Expand All @@ -232,6 +238,7 @@ labeling in the Excel file, the recommended steps are:
| Ctrl++ | Zoom in |
| Ctrl-- | Zoom out |
| ↑→↓← | Move selected box |
| Z, X, C, V, B | Move the four vertices of the selected bounding box individually|

### 3.2 Built-in Model

Expand Down
11 changes: 9 additions & 2 deletions README_ch.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ PPOCRLabel是一款适用于OCR领域的半自动化图形标注工具,内置P
- `rec_model_dir` :识别模型目录路径
- `rec_char_dict_path` :识别模型字典文件路径
- `cls_model_dir` :分类模型目录路径
- 新增`--bbox_auto_zoom_center`参数,当图片只有一个标记框的时候,可以开启,会自动将标记框居中放大
- 新增5个控制标记框4个顶点的快捷键`z`、`x`、`c`、`v`、`b`,使用方法详见下方`2.1 操作步骤`的`11. 补充功能说明`。
- 2022.05:**新增表格标注**,使用方法见下方`2.2 表格标注`(by [whjdark](https://github.com/peterh0323); [Evezerest](https://github.com/Evezerest))
- 2022.02:**新增关键信息标注**、优化标注体验(by [PeterH0323](https://github.com/peterh0323) )
- 新增:使用 `--kie` 进入 KIE 功能,用于打【检测+识别+关键字提取】的标签
Expand Down Expand Up @@ -152,6 +154,12 @@ PPOCRLabel.exe --lang ch
11. 补充功能说明
- `文件` -> `自动重新识别` : 勾选后,对于新标注的框内容会自动触发当前标注框的重新识别功能,不需要再去点击`重新识别`按钮,适合各种原因不想使用`自动标注`只想手动标注的场景,例如车牌识别,一张图里只有一个车牌,如果使用`自动标注`,需要删除很多额外识别出来的文字框,不如直接重新标注
- `文件` -> `自动保存未提交变更` : 默认是按`确认`按钮完成当前框的标记确认,有点繁琐,勾选后,切换下一张图(按快捷键`D`)的时候,不再弹出提示框确认是否保存未确认的标记,自动保存当前标记并切换下一张图,方便快速标记
- 选中标记框后,5个可以控制标记框四个顶点单独移动的快捷键,适合需要精确控制标记框四个顶点位置的场景
- `z` :按下后,此时使用键盘的上下左右按键将单独移动第1个顶点
- `x` :按下后,此时使用键盘的上下左右按键将单独移动第2个顶点
- `c` :按下后,此时使用键盘的上下左右按键将单独移动第3个顶点
- `v` :按下后,此时使用键盘的上下左右按键将单独移动第4个顶点
- `b` :按下后,此时使用键盘的上下左右按键将恢复默认的整体移动整个标记框

### 2.2 表格标注([视频演示](https://www.bilibili.com/video/BV1wR4y1v7JE/?share_source=copy_web&vd_source=cf1f9d24648d49636e3d109c9f9a377d&t=1998))

Expand Down Expand Up @@ -196,8 +204,6 @@ PPOCRLabel.exe --lang ch
| Ctrl + shift + R | 对当前图片的所有标记重新识别 |
| W | 新建矩形框 |
| Q 或 Home | 新建多点框 |
| X | 框逆时针旋转 |
| C | 框顺时针旋转 |
| Ctrl + E | 编辑所选框标签 |
| Ctrl + X | `--kie` 模式下,修改 Box 的关键字种类 |
| Ctrl + R | 重新识别所选标记 |
Expand All @@ -211,6 +217,7 @@ PPOCRLabel.exe --lang ch
| Ctrl++ | 缩小 |
| Ctrl-- | 放大 |
| ↑→↓← | 移动标记框 |
| Z、X、C、V、B | 对选中的标记框,单独移动四个顶点 |

### 3.2 内置模型

Expand Down
64 changes: 48 additions & 16 deletions libs/canvas.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ class Canvas(QWidget):

epsilon = 5.0

shape_move_index = None

def __init__(self, *args, **kwargs):
super(Canvas, self).__init__(*args, **kwargs)
# Initialise local state.
Expand Down Expand Up @@ -754,6 +756,39 @@ def keyPressEvent(self, ev):
self.moveOnePixel("Up")
elif key == Qt.Key_Down and self.selectedShapes:
self.moveOnePixel("Down")
elif key == Qt.Key_Z and self.selectedShapes:
self.shape_move_index = 0
select_shape = self.selectedShapes[0]
select_shape.highlightVertex(
self.shape_move_index, select_shape.MOVE_VERTEX
)
self.update()
elif key == Qt.Key_X and self.selectedShapes:
self.shape_move_index = 1
select_shape = self.selectedShapes[0]
select_shape.highlightVertex(
self.shape_move_index, select_shape.MOVE_VERTEX
)
self.update()
elif key == Qt.Key_C and self.selectedShapes:
self.shape_move_index = 2
select_shape = self.selectedShapes[0]
select_shape.highlightVertex(
self.shape_move_index, select_shape.MOVE_VERTEX
)
self.update()
elif key == Qt.Key_V and self.selectedShapes:
self.shape_move_index = 3
select_shape = self.selectedShapes[0]
select_shape.highlightVertex(
self.shape_move_index, select_shape.MOVE_VERTEX
)
self.update()
elif key == Qt.Key_B and self.selectedShapes:
self.shape_move_index = None
select_shape = self.selectedShapes[0]
select_shape.highlightClear()
self.update()
elif key == Qt.Key_X and self.selectedShapes:
for i in range(len(self.selectedShapes)):
self.selectedShape = self.selectedShapes[i]
Expand Down Expand Up @@ -788,34 +823,31 @@ def moveOnePixel(self, direction):
self.selectedShape = self.selectedShapes[i]
if direction == "Left" and not self.moveOutOfBound(QPointF(-1.0, 0)):
# print("move Left one pixel")
self.selectedShape.points[0] += QPointF(-1.0, 0)
self.selectedShape.points[1] += QPointF(-1.0, 0)
self.selectedShape.points[2] += QPointF(-1.0, 0)
self.selectedShape.points[3] += QPointF(-1.0, 0)
self.move_points(QPointF(-1.0, 0))
elif direction == "Right" and not self.moveOutOfBound(QPointF(1.0, 0)):
# print("move Right one pixel")
self.selectedShape.points[0] += QPointF(1.0, 0)
self.selectedShape.points[1] += QPointF(1.0, 0)
self.selectedShape.points[2] += QPointF(1.0, 0)
self.selectedShape.points[3] += QPointF(1.0, 0)
self.move_points(QPointF(1.0, 0))
elif direction == "Up" and not self.moveOutOfBound(QPointF(0, -1.0)):
# print("move Up one pixel")
self.selectedShape.points[0] += QPointF(0, -1.0)
self.selectedShape.points[1] += QPointF(0, -1.0)
self.selectedShape.points[2] += QPointF(0, -1.0)
self.selectedShape.points[3] += QPointF(0, -1.0)
self.move_points(QPointF(0, -1.0))
elif direction == "Down" and not self.moveOutOfBound(QPointF(0, 1.0)):
# print("move Down one pixel")
self.selectedShape.points[0] += QPointF(0, 1.0)
self.selectedShape.points[1] += QPointF(0, 1.0)
self.selectedShape.points[2] += QPointF(0, 1.0)
self.selectedShape.points[3] += QPointF(0, 1.0)
self.move_points(QPointF(0, 1.0))
shapesBackup = []
shapesBackup = copy.deepcopy(self.shapes)
self.shapesBackups.append(shapesBackup)
self.shapeMoved.emit()
self.repaint()

def move_points(self, p: QPointF):
if self.shape_move_index is None:
self.selectedShape.points[0] += p
self.selectedShape.points[1] += p
self.selectedShape.points[2] += p
self.selectedShape.points[3] += p
else:
self.selectedShape.points[self.shape_move_index] += p

def moveOutOfBound(self, step):
points = [p1 + p2 for p1, p2 in zip(self.selectedShape.points, [step] * 4)]
return True in map(self.outOfPixmap, points)
Expand Down
45 changes: 45 additions & 0 deletions libs/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,7 @@ def keysInfo(lang="en"):
"Ctrl++\t\t\t缩小\n"
"Ctrl--\t\t\t放大\n"
"↑→↓←\t\t\t移动标记框\n"
"Z、X、C、V、B\t\t\t对选中的标记框,单独移动四个顶点\n"
"———————————————————————\n"
"注:Mac用户Command键替换上述Ctrl键"
)
Expand Down Expand Up @@ -351,8 +352,52 @@ def keysInfo(lang="en"):
"Ctrl++\t\t\tZoom in\n"
"Ctrl--\t\t\tZoom out\n"
"↑→↓←\t\t\tMove selected box"
"Z, X, C, V, B\t\tMove the four vertices of \n"
and "\t\t\tthe selected bounding box individually"
"———————————————————————\n"
"Notice:For Mac users, use the 'Command' key instead of the 'Ctrl' key"
)

return msg


def polygon_bounding_box_center_and_area(points):
"""
Calculate the center and area of the bounding rectangle of a polygon
"""
if len(points) < 3:
raise ValueError("At least three points are required to form a polygon")

area = 0
min_x = float("inf")
max_x = float("-inf")
min_y = float("inf")
max_y = float("-inf")

n = len(points)
for i in range(n):
x1 = points[i].x()
y1 = points[i].y()
x2 = points[(i + 1) % n].x()
y2 = points[(i + 1) % n].y()
area += x1 * y2 - x2 * y1

min_x = min(min_x, x1)
max_x = max(max_x, x1)
min_y = min(min_y, y1)
max_y = max(max_y, y1)

area = abs(area) / 2.0
center_x = (min_x + max_x) / 2
center_y = (min_y + max_y) / 2

return center_x, center_y, area


def map_value(x, in_min, in_max, out_min, out_max):
"""
Map the numerical value x from the range of [in_in, in_max] to the range of [out_in, out_max]
"""
if in_max == in_min:
raise ValueError("in_max and in_min cannot be equal")
return (x - in_min) * (out_max - out_min) / (in_max - in_min) + out_min