-
Notifications
You must be signed in to change notification settings - Fork 1.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[Feature] Add tool for converting labelme data to coco format #2041
base: dev-1.x
Are you sure you want to change the base?
Conversation
Codecov ReportPatch and project coverage have no change.
Additional details and impacted files@@ Coverage Diff @@
## dev-1.x #2041 +/- ##
========================================
Coverage 82.15% 82.16%
========================================
Files 224 224
Lines 13362 13362
Branches 2265 2265
========================================
+ Hits 10978 10979 +1
+ Misses 1873 1872 -1
Partials 511 511
Flags with carried forward coverage won't be shown. Click here to find out more. see 1 file with indirect coverage changes Help us with your feedback. Take ten seconds to tell us how you rate us. Have a feature suggestion? Share it here. ☔ View full report in Codecov by Sentry. |
from sklearn.model_selection import train_test_split | ||
from tqdm import tqdm | ||
|
||
# import sys |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
please remove unnecessary codes.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
sorry,I think some commented code might be useful so kept it.
category['id'] = id | ||
category['name'] = name | ||
# n个关键点数据 | ||
category['keypoint'] = [ |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should be another input instead of hard-coded.
annotation['image_id'] = self.img_id | ||
annotation['category_id'] = int(self.classname_to_id[label]) | ||
annotation['iscrowd'] = 0 | ||
annotation['area'] = 1.0 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
area
should be calculated, not 1.0
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
oh I forgot to modify this code when I found it,thank you!
..., | ||
], | ||
""" | ||
labels = ['wrist', 'thumb1', 'thumb2', ...] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Should not be hard-coded.
- class_name 根类名字 | ||
""" | ||
|
||
self.classname_to_id = {args.class_name: 1} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is it possible to have multiple classes?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Re-formatted the codes and add a corresponding document.
keypoints = {} | ||
keypoints['info'] = { | ||
'description': 'Air Dataset', | ||
'version': 1.0, | ||
'year': 2022 | ||
} | ||
keypoints['license'] = ['BUAA'] | ||
keypoints['images'] = self.images | ||
keypoints['annotations'] = self.annotations | ||
keypoints['categories'] = self.categories | ||
return keypoints | ||
|
||
|
||
def init_dir(base_path): | ||
"""初始化COCO数据集的文件夹结构; | ||
|
||
coco - annotations #标注文件路径 | ||
- train #训练数据集 | ||
- val #验证数据集 | ||
Args: | ||
base_path:数据集放置的根路径 | ||
""" | ||
if not os.path.exists(os.path.join(base_path, 'coco', 'annotations')): | ||
os.makedirs(os.path.join(base_path, 'coco', 'annotations')) | ||
if not os.path.exists(os.path.join(base_path, 'coco', 'train')): | ||
os.makedirs(os.path.join(base_path, 'coco', 'train')) | ||
if not os.path.exists(os.path.join(base_path, 'coco', 'val')): | ||
os.makedirs(os.path.join(base_path, 'coco', 'val')) | ||
|
||
|
||
def convert(path, target): | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument( | ||
'--class_name', '--n', help='class name', type=str, default='airplane') | ||
parser.add_argument( | ||
'--input', | ||
'--i', | ||
help='json file path (labelme)', | ||
type=str, | ||
default=path) | ||
parser.add_argument( | ||
'--output', | ||
'--o', | ||
help='output file path (coco format)', | ||
type=str, | ||
default=path) | ||
parser.add_argument( | ||
'--join_num', '--j', help='number of join', type=int, default=12) | ||
parser.add_argument( | ||
'--ratio', | ||
'--r', | ||
help='train and test split ratio', | ||
type=float, | ||
default=0.25) | ||
args = parser.parse_args() | ||
|
||
labelme_path = args.input | ||
saved_coco_path = args.output | ||
|
||
init_dir(saved_coco_path) # 初始化COCO数据集的文件夹结构 | ||
|
||
json_list_path = glob.glob(labelme_path + '/*.json') | ||
train_path, val_path = train_test_split( | ||
json_list_path, test_size=args.ratio) | ||
print('{} for training'.format(len(train_path)), | ||
'\n{} for testing'.format(len(val_path))) | ||
print('Start transform please wait ...') | ||
|
||
l2c_train = Labelme2coco_keypoints(args) # 构造数据集生成类 | ||
|
||
# 生成训练集 | ||
train_keypoints = l2c_train.to_coco(train_path) | ||
l2c_train.save_coco_json( | ||
train_keypoints, | ||
os.path.join(saved_coco_path, 'coco', 'annotations', | ||
'keypoints_train.json')) | ||
|
||
# 生成验证集 | ||
l2c_val = Labelme2coco_keypoints(args) | ||
val_instance = l2c_val.to_coco(val_path) | ||
l2c_val.save_coco_json( | ||
val_instance, | ||
os.path.join(saved_coco_path, 'coco', 'annotations', | ||
'keypoints_val.json')) | ||
|
||
# 拷贝 labelme 的原始图片到训练集和验证集里面 | ||
for file in train_path: | ||
shutil.copy( | ||
file.replace('json', 'jpg'), | ||
os.path.join(saved_coco_path, 'coco', 'train')) | ||
for file in val_path: | ||
shutil.copy( | ||
file.replace('json', 'jpg'), | ||
os.path.join(saved_coco_path, 'coco', 'val')) | ||
|
||
|
||
if __name__ == '__main__': | ||
source = 'your labelme path' | ||
target = 'your coco path' | ||
convert(source, target) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
# Copyright (c) OpenMMLab. All rights reserved. | |
import argparse | |
import glob | |
import json | |
import os | |
import shutil | |
import numpy as np | |
from sklearn.model_selection import train_test_split | |
from tqdm import tqdm | |
# import sys | |
class Labelme2coco_keypoints(): | |
def __init__(self, args): | |
"""Lableme 关键点数据集转 COCO 数据集的构造函数: | |
Args | |
args:命令行输入的参数 | |
- class_name 根类名字 | |
""" | |
self.classname_to_id = {args.class_name: 1} | |
self.images = [] | |
self.annotations = [] | |
self.categories = [] | |
self.ann_id = 0 | |
self.img_id = 0 | |
def save_coco_json(self, instance, save_path): | |
json.dump( | |
instance, | |
open(save_path, 'w', encoding='utf-8'), | |
ensure_ascii=False, | |
indent=1) | |
def read_jsonfile(self, path): | |
with open(path, 'r', encoding='utf-8') as f: | |
return json.load(f) | |
def _get_box(self, points): | |
min_x = min_y = np.inf | |
max_x = max_y = 0 | |
for x, y in points: | |
min_x = min(min_x, x) | |
min_y = min(min_y, y) | |
max_x = max(max_x, x) | |
max_y = max(max_y, y) | |
return [min_x, min_y, max_x - min_x, max_y - min_y] | |
def _get_keypoints(self, points, keypoints, num_keypoints, label): | |
"""解析 labelme 的原始数据, 生成 coco 标注的 关键点对象. | |
例如: | |
"keypoints": [ | |
67.06149888292556, # x 的值 | |
122.5043507571318, # y 的值 | |
1, # 相当于 Z 值,2D关键点 v = 0表示不可见, | |
v = 1表示标记但不可见,v = 2表示标记且可见 | |
82.42582269256718, | |
109.95672933232304, | |
1, | |
..., | |
], | |
""" | |
labels = ['wrist', 'thumb1', 'thumb2', ...] | |
flag = label.split('_')[-1] | |
x = label.split('_')[0] | |
visible = 0 | |
if flag == 'occluded': | |
visible = 1 | |
else: | |
visible = 2 | |
x = labels.index(x) | |
keypoints[x * 3] = points[0] | |
keypoints[x * 3 + 1] = points[1] | |
keypoints[x * 3 + 2] = visible | |
num_keypoints += 1 | |
return num_keypoints | |
def _image(self, obj, path): | |
"""解析 labelme 的 obj 对象,生成 coco 的 image 对象. | |
生成包括:id,file_name,height,width 4个属性 | |
示例: | |
{ | |
"file_name": "training/rgb/00031426.jpg", | |
"height": 224, | |
"width": 224, | |
"id": 31426 | |
} | |
""" | |
image = {} | |
# 此处通过imageData获得数据 | |
# 获得原始 labelme 标签的 imageData 属性,并通过 labelme 的工具方法转成 array | |
# img_x = utils.img_b64_to_arr(obj['imageData']) | |
# image['height'], image['width'] = img_x.shape[:-1] # 获得图片的宽高 | |
# 此处直接通过imageHeight,imageWidth得到,避免labelme中的imageData问题 | |
image['height'], image['width'] = obj['imageHeight'], obj[ | |
'imageWidth'] # 获得图片的宽高 | |
# self.img_id = int(os.path.basename(path).split(".json")[0]) | |
self.img_id = self.img_id + 1 | |
image['id'] = self.img_id | |
image['file_name'] = os.path.basename(path).replace('.json', '.jpg') | |
return image | |
def _annotation(self, bboxes_list, keypoints_list, json_path): | |
"""生成coco标注. | |
Args: bboxes_list: 矩形标注框 keypoints_list: 关键点 json_path:json文件路径 | |
""" | |
# 核对一个bbox里有n个keypoints; 然而本人不要求每个bbox里都要有n个点 | |
# if len(keypoints_list) != args.join_num * len(bboxes_list): | |
# print( | |
# 'you loss {} keypoint(s) with file {}'\ | |
# .format(args.join_num * len(bboxes_list) -\ | |
# len(keypoints_list), json_path) | |
# ) | |
# print('Please check !!!') | |
# sys.exit() | |
i = 0 | |
# 对每个bbox分别保存keypoints | |
for object in bboxes_list: | |
annotation = {} | |
keypoints = [0 for i in range(36) | |
] # 每个keypoint数组初始化为[0,..] len = 36 对应12个点(x,y,v) | |
num_keypoints = 0 | |
label = object['label'] | |
bbox = object['points'] | |
annotation['id'] = self.ann_id | |
annotation['image_id'] = self.img_id | |
annotation['category_id'] = int(self.classname_to_id[label]) | |
annotation['iscrowd'] = 0 | |
annotation['area'] = 1.0 | |
annotation['segmentation'] = [np.asarray(bbox).flatten().tolist() | |
] # 两个坐标点 | |
annotation['bbox'] = self._get_box(bbox) # 矩形框左上角的坐标和矩形框的长宽 | |
# 生成keypoint的list | |
for keypoint in keypoints_list: | |
point = keypoint['points'] | |
label = keypoint['label'] # 点的名字 | |
num_keypoints = self._get_keypoints(point[0], keypoints, | |
num_keypoints, label) | |
annotation['keypoints'] = keypoints | |
annotation['num_keypoints'] = num_keypoints | |
i += 1 | |
self.ann_id += 1 | |
self.annotations.append(annotation) | |
def _init_categories(self): | |
"""初始化 COCO 的 标注类别. | |
例如: | |
"categories": [ | |
{ | |
"supercategory": "hand", | |
"id": 1, | |
"name": "hand", | |
"keypoints": [ | |
"wrist", | |
"thumb1", | |
"thumb2", | |
..., | |
], | |
"skeleton": [ | |
] | |
} | |
] | |
""" | |
for name, id in self.classname_to_id.items(): | |
category = {} | |
category['supercategory'] = name | |
category['id'] = id | |
category['name'] = name | |
# n个关键点数据 | |
category['keypoint'] = [ | |
'wrist', | |
'thumb1', | |
'thumb2', | |
..., | |
] | |
# category['keypoint'] = [str(i + 1) for i in range(args.join_num)] | |
self.categories.append(category) | |
def to_coco(self, json_path_list): | |
"""Labelme 原始标签转换成 coco 数据集格式,生成的包括标签和图像. | |
Args: json_path_list:原始数据集的目录 | |
""" | |
self._init_categories() | |
# 整个文件夹里的json进行逐个处理 | |
for json_path in tqdm(json_path_list): | |
obj = self.read_jsonfile(json_path) # 解析一个标注文件 | |
self.images.append(self._image(obj, json_path)) # 解析图片 | |
shapes = obj['shapes'] # 读取 labelme shape 标注 | |
bboxes_list, keypoints_list = [], [] | |
for shape in shapes: | |
if shape['shape_type'] == 'rectangle': # bboxs | |
bboxes_list.append(shape) | |
elif shape['shape_type'] == 'point': # keypoints | |
keypoints_list.append(shape) | |
# 输入为一个文件的keypoints和bbox,即一张图里的信息 | |
self._annotation(bboxes_list, keypoints_list, json_path) | |
keypoints = {} | |
keypoints['info'] = { | |
'description': 'Air Dataset', | |
'version': 1.0, | |
'year': 2022 | |
} | |
keypoints['license'] = ['BUAA'] | |
keypoints['images'] = self.images | |
keypoints['annotations'] = self.annotations | |
keypoints['categories'] = self.categories | |
return keypoints | |
def init_dir(base_path): | |
"""初始化COCO数据集的文件夹结构; | |
coco - annotations #标注文件路径 | |
- train #训练数据集 | |
- val #验证数据集 | |
Args: | |
base_path:数据集放置的根路径 | |
""" | |
if not os.path.exists(os.path.join(base_path, 'coco', 'annotations')): | |
os.makedirs(os.path.join(base_path, 'coco', 'annotations')) | |
if not os.path.exists(os.path.join(base_path, 'coco', 'train')): | |
os.makedirs(os.path.join(base_path, 'coco', 'train')) | |
if not os.path.exists(os.path.join(base_path, 'coco', 'val')): | |
os.makedirs(os.path.join(base_path, 'coco', 'val')) | |
def convert(path, target): | |
parser = argparse.ArgumentParser() | |
parser.add_argument( | |
'--class_name', '--n', help='class name', type=str, default='airplane') | |
parser.add_argument( | |
'--input', | |
'--i', | |
help='json file path (labelme)', | |
type=str, | |
default=path) | |
parser.add_argument( | |
'--output', | |
'--o', | |
help='output file path (coco format)', | |
type=str, | |
default=path) | |
parser.add_argument( | |
'--join_num', '--j', help='number of join', type=int, default=12) | |
parser.add_argument( | |
'--ratio', | |
'--r', | |
help='train and test split ratio', | |
type=float, | |
default=0.25) | |
args = parser.parse_args() | |
labelme_path = args.input | |
saved_coco_path = args.output | |
init_dir(saved_coco_path) # 初始化COCO数据集的文件夹结构 | |
json_list_path = glob.glob(labelme_path + '/*.json') | |
train_path, val_path = train_test_split( | |
json_list_path, test_size=args.ratio) | |
print('{} for training'.format(len(train_path)), | |
'\n{} for testing'.format(len(val_path))) | |
print('Start transform please wait ...') | |
l2c_train = Labelme2coco_keypoints(args) # 构造数据集生成类 | |
# 生成训练集 | |
train_keypoints = l2c_train.to_coco(train_path) | |
l2c_train.save_coco_json( | |
train_keypoints, | |
os.path.join(saved_coco_path, 'coco', 'annotations', | |
'keypoints_train.json')) | |
# 生成验证集 | |
l2c_val = Labelme2coco_keypoints(args) | |
val_instance = l2c_val.to_coco(val_path) | |
l2c_val.save_coco_json( | |
val_instance, | |
os.path.join(saved_coco_path, 'coco', 'annotations', | |
'keypoints_val.json')) | |
# 拷贝 labelme 的原始图片到训练集和验证集里面 | |
for file in train_path: | |
shutil.copy( | |
file.replace('json', 'jpg'), | |
os.path.join(saved_coco_path, 'coco', 'train')) | |
for file in val_path: | |
shutil.copy( | |
file.replace('json', 'jpg'), | |
os.path.join(saved_coco_path, 'coco', 'val')) | |
if __name__ == '__main__': | |
source = 'your labelme path' | |
target = 'your coco path' | |
convert(source, target) | |
# Copyright (c) OpenMMLab. All rights reserved. | |
import argparse | |
import collections | |
import datetime | |
import glob | |
import json | |
import os | |
import shutil | |
import uuid | |
import numpy as np | |
import PIL.Image | |
import PIL.ImageDraw | |
from mmengine import Config | |
from sklearn.model_selection import train_test_split | |
from tqdm import tqdm | |
try: | |
import pycocotools.mask | |
except ImportError: | |
import sys | |
print('Please install pycocotools:\n\n pip install pycocotools\n') | |
sys.exit(1) | |
class Labelme2coco_keypoints(): | |
"""Convert Labelme annotated keypoints into COCO format. | |
Args: | |
category_names (list): A list of category names in this dataset. | |
keypoints_names (list): A list of keypoint names in this dataset. | |
skeleton (list): A list of skeleton information in this dataset. | |
""" | |
def __init__(self, category_names, keypoints_names, skeleton): | |
self.classname_to_id = {} | |
for id, name in enumerate(category_names): | |
self.classname_to_id[name] = id + 1 | |
self.images = [] | |
self.annotations = [] | |
self.categories = [] | |
self.ann_id = 0 | |
self.img_id = 0 | |
self.keypoints_names = keypoints_names | |
self.skeleton = skeleton | |
def save_coco_json(self, instance, save_path): | |
json.dump( | |
instance, | |
open(save_path, 'w', encoding='utf-8'), | |
ensure_ascii=False, | |
indent=1) | |
def read_jsonfile(self, path): | |
with open(path, 'r', encoding='utf-8') as f: | |
return json.load(f) | |
def shape_to_mask(self, | |
img_shape, | |
points, | |
shape_type=None, | |
point_size=5): | |
mask = np.zeros(img_shape[:2], dtype=np.uint8) | |
mask = PIL.Image.fromarray(mask) | |
draw = PIL.ImageDraw.Draw(mask) | |
xy = [tuple(point) for point in points] | |
if shape_type == 'rectangle': | |
assert len( | |
xy) == 2, 'Shape of shape_type=rectangle must have 2 points' | |
draw.rectangle(xy, outline=1, fill=1) | |
elif shape_type == 'point': | |
assert len(xy) == 1, 'Shape of shape_type=point must have 1 points' | |
cx, cy = xy[0] | |
r = point_size | |
draw.ellipse([cx - r, cy - r, cx + r, cy + r], outline=1, fill=1) | |
else: | |
assert len(xy) > 2, 'Polygon must have points more than 2' | |
draw.polygon(xy=xy, outline=1, fill=1) | |
mask = np.array(mask, dtype=bool) | |
return mask | |
def _image(self, path, obj): | |
"""Parse the obj object of Labelme to generate the image object of | |
COCO. | |
Args: | |
obj (JSON): The JSON object corresponding to ``path``. | |
path (str): Path to the Labelme json file. | |
Returns: | |
Dict: A dictionary representing the image, the keys include | |
``'id'``, ``'file_name'``, ``'license'``, ``'height'`` and | |
``'width'``. | |
""" | |
image = {} | |
# Directly obtain by imageHeight and imageWidth here to avoid problems | |
# imageData in Labelme | |
image['height'], image['width'] = obj['imageHeight'], obj[ | |
'imageWidth'] # get image's width and height | |
self.img_id += 1 | |
image['id'] = self.img_id | |
image['file_name'] = os.path.basename(path).replace('.json', '.jpg') | |
image['license'] = 1 | |
return image | |
def _annotation(self, shapes, img): | |
"""Generate COCO annotations. | |
Args: | |
shapes (list): The list of shapes in Labelme annotation. | |
img (dict): The configuration of the image. | |
""" | |
groupId_keypoints = collections.defaultdict(list) | |
masks = {} # for area | |
segmentations = collections.defaultdict(list) # for segmentation | |
for shape in shapes: | |
group_id = shape.get('group_id') | |
if group_id is None: | |
group_id = uuid.uuid1() | |
if shape['shape_type'] == 'point': | |
groupId_keypoints[group_id].append(shape) | |
else: | |
points = shape['points'] | |
label = shape['label'] | |
shape_type = shape.get('shape_type', 'polygon') | |
mask = self.shape_to_mask([img['height'], img['width']], | |
points, shape_type) | |
instance = (label, group_id) | |
if instance in masks: | |
masks[instance] = masks[instance] | mask | |
else: | |
masks[instance] = mask | |
if shape_type == 'rectangle': | |
(x1, y1), (x2, y2) = points | |
x1, x2 = sorted([x1, x2]) | |
y1, y2 = sorted([y1, y2]) | |
points = [x1, y1, x2, y1, x2, y2, x1, y2] | |
points = np.asarray(points).flatten().tolist() | |
segmentations[instance].append(points) | |
for instance, mask in masks.items(): | |
cls_name, group_id = instance | |
if cls_name not in self.classname_to_id: | |
continue | |
cls_id = self.classname_to_id[cls_name] | |
mask = np.asfortranarray(mask.astype(np.uint8)) | |
mask = pycocotools.mask.encode(mask) | |
area = float(pycocotools.mask.area(mask)) | |
bbox = pycocotools.mask.toBbox(mask).flatten().tolist() | |
keypoints = [0] * (3 * len(self.keypoints_names)) | |
keypoints_list = groupId_keypoints[group_id] | |
for keypoint in keypoints_list: | |
idx = self.keypoints_names.index(keypoint['label']) | |
point = keypoint['points'][0] | |
visible = 1 if 'occluded' in keypoint['flags'] and keypoint[ | |
'flags']['occluded'] else 2 | |
keypoints[idx * 3] = point[0] | |
keypoints[idx * 3 + 1] = point[1] | |
keypoints[idx * 3 + 2] = visible | |
self.annotations.append( | |
dict( | |
id=len(self.annotations), | |
image_id=img['id'], | |
category_id=cls_id, | |
segmentation=segmentations[instance], | |
area=area, | |
bbox=bbox, | |
iscrowd=0, | |
num_keypoints=len(keypoints_list), | |
keypoints=keypoints, | |
)) | |
def _init_categories(self): | |
"""Initialize the COCO labeling category.""" | |
for name, id in self.classname_to_id.items(): | |
category = {} | |
category['supercategory'] = name | |
category['id'] = id | |
category['name'] = name | |
category['keypoints'] = self.keypoints_names | |
category['skeleton'] = self.skeleton | |
self.categories.append(category) | |
def to_coco(self, json_path_list): | |
"""Convert Labelme raw labels into COCO dataset format. The generated | |
results include labels and images. | |
Args: | |
json_path_list (list): Paths of original datasets. | |
Returns: | |
Dict: A dictionary in COCO annotation format. | |
""" | |
self._init_categories() | |
# The json files representing each image in the folder are processed | |
for json_path in tqdm(json_path_list): | |
obj = self.read_jsonfile(json_path) | |
img = self._image(json_path, obj) | |
self.images.append(img) | |
self._annotation(obj['shapes'], img) | |
now = datetime.datetime.today() | |
coco_json = {} | |
coco_json['info'] = dict( | |
description='Converted COCO dataset', | |
version=None, | |
contributor=None, | |
url=None, | |
year=now.year, | |
date_created=now.strftime('%Y/%m/%d')) | |
coco_json['licenses'] = [dict( | |
url=None, | |
id=1, | |
name=None, | |
)] | |
coco_json['images'] = self.images | |
coco_json['annotations'] = self.annotations | |
coco_json['categories'] = self.categories | |
return coco_json | |
def init_dir(base_path): | |
"""Initializing the folder structure of the COCO dataset. | |
Args: | |
base_path (str): Root path for placing the COCO dataset. | |
""" | |
if not os.path.exists(os.path.join(base_path, 'coco', 'annotations')): | |
os.makedirs(os.path.join(base_path, 'coco', 'annotations')) | |
if not os.path.exists(os.path.join(base_path, 'coco', 'train')): | |
os.makedirs(os.path.join(base_path, 'coco', 'train')) | |
if not os.path.exists(os.path.join(base_path, 'coco', 'val')): | |
os.makedirs(os.path.join(base_path, 'coco', 'val')) | |
def parse_args(): | |
parser = argparse.ArgumentParser() | |
parser.add_argument( | |
'--input', | |
'--i', | |
help='input json file folder', | |
type=str, | |
required=True) | |
parser.add_argument( | |
'--output', | |
'--o', | |
help='output data folder (for COCO data)', | |
type=str, | |
required=True) | |
parser.add_argument( | |
'--categories', | |
'--c', | |
type=str, | |
help='category names file', | |
default=None) | |
parser.add_argument( | |
'--dataset_cfg', | |
'--d', | |
type=str, | |
help='dataset config file', | |
default='configs/_base_/datasets/coco.py') | |
parser.add_argument( | |
'--ratio', | |
'--r', | |
help='train and test split ratio', | |
type=float, | |
default=0.25) | |
args = parser.parse_args() | |
return args | |
def parse_dataset_config(dataset_cfg='configs/_base_/datasets/coco.py'): | |
"""Parse the dataset configuration file. | |
Args: | |
dataset_cfg (str, optional): Path to the dataset configuration | |
file. A valid configuration file must include ``dataset_info`` | |
and the following keys: | |
- keypoint_info (dict): the keypoint information | |
- skeleton_info (dict): the skeleton information | |
If not specified, will use default COCO dataset. Defaults to | |
'configs/_base_/datasets/coco.py'. | |
Returns: | |
Tuple[list]: Return the keypoint names and skeleton info. | |
""" | |
dataset_info = Config.fromfile(dataset_cfg).dataset_info | |
keypoints_names = ['' for _ in range(len(dataset_info['keypoint_info']))] | |
skeleton = [] | |
keypoint2id = {} | |
for keypoint_info in dataset_info['keypoint_info'].values(): | |
name = keypoint_info['name'] | |
keypoint_id = keypoint_info['id'] | |
keypoints_names[keypoint_id] = name | |
keypoint2id[name] = keypoint_id | |
for skeleton_info in dataset_info['skeleton_info'].values(): | |
link = skeleton_info['link'] | |
skeleton.append([keypoint2id[link[0]], keypoint2id[link[1]]]) | |
return keypoints_names, skeleton | |
def convert(category_names, keypoints_names, skeleton, labelme_paths, | |
coco_file_path): | |
"""Convert Labelme annotated files into COCO format and save it. | |
Args: | |
category_names (list): The list of category names of this dataset. | |
keypoints_names (list): The list of keypoint names of this dataset. | |
skeleton (list): The list of skeleton info of this dataset. | |
labelme_paths (list): The list of path of Labelme files. | |
coco_file_path (str): The path to save the COCO annotation file. | |
""" | |
l2c = Labelme2coco_keypoints(category_names, keypoints_names, skeleton) | |
# generate train dateset | |
coco = l2c.to_coco(labelme_paths) | |
l2c.save_coco_json(coco, coco_file_path) | |
def main(): | |
args = parse_args() | |
labelme_path = args.input | |
saved_coco_path = args.output | |
init_dir( | |
saved_coco_path) # Initialize the folder structure of the COCO dataset | |
json_list_path = glob.glob(labelme_path + '/*.json') | |
train_path, val_path = train_test_split( | |
json_list_path, test_size=args.ratio) | |
print('{} for training'.format(len(train_path)), | |
'\n{} for testing'.format(len(val_path))) | |
print('Start transform please wait ...') | |
keypoints_names, skeleton = parse_dataset_config(args.dataset_cfg) | |
category_names = [] | |
if args.categories: | |
for line in open(args.categories).readlines(): | |
category_names.append(line.strip()) | |
else: | |
category_names = ['person'] | |
# generate train dateset | |
convert( | |
category_names, keypoints_names, skeleton, train_path, | |
os.path.join(saved_coco_path, 'coco', 'annotations', | |
'keypoints_train.json')) | |
# generate val dateset | |
convert( | |
category_names, keypoints_names, skeleton, val_path, | |
os.path.join(saved_coco_path, 'coco', 'annotations', | |
'keypoints_val.json')) | |
# Copy the original images of Labelme into the training and validation sets | |
for file in train_path: | |
shutil.copy( | |
file.replace('json', 'jpg'), | |
os.path.join(saved_coco_path, 'coco', 'train')) | |
for file in val_path: | |
shutil.copy( | |
file.replace('json', 'jpg'), | |
os.path.join(saved_coco_path, 'coco', 'val')) | |
if __name__ == '__main__': | |
main() | |
Also with a document introducing the usage of the script.
labelme2coco.md
Motivation
Provides a tool for labelme2coco that does not include the ability to assemble initially scattered labelme files into a single file.
Modification
BC-breaking (Optional)
Use cases (Optional)
Checklist
Before PR:
After PR: