forked from TonyJiangWJ/Yolov8_Train
-
Notifications
You must be signed in to change notification settings - Fork 0
/
dataset_sql.py
275 lines (248 loc) · 10.5 KB
/
dataset_sql.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
import json
import os
import sqlite3
import label_config
from lib.mylogger import LOGGER
# 数据集
###
# create table dataset
# (
# ID integer not null
# constraint dataset_pk
# primary key autoincrement,
# dataset_name varchar(32) not null,
# data_dir_path varchar(512) not null,
# create_time varchar(20) not null,
# modify_time varchar(20) not null
# );
###
class Dataset:
def __init__(self, dataset_name, data_dir_path, id=None):
self.dataset_name = dataset_name
self.data_dir_path = data_dir_path
self.id = id
def save(self, cnx):
cursor = cnx.cursor()
insert_sql = f"INSERT INTO dataset (dataset_name, data_dir_path, create_time, modify_time)" \
f" SELECT '{self.dataset_name}', '{self.data_dir_path}', current_timestamp, current_timestamp" \
f" where not exists(select 1 from dataset where dataset_name ='{self.dataset_name}')"
LOGGER.verbose(f"insert sql:{insert_sql}")
cursor.execute(insert_sql)
if cursor.rowcount > 0:
LOGGER.info(f"insert success")
select_id = f"select id from dataset where dataset_name='{self.dataset_name}'"
cursor.execute(select_id)
result = cursor.fetchone()
if result is not None:
self.id = result[0]
LOGGER.verbose(f"数据集 id:{self.id}")
cnx.commit()
def get_dataset_from_db(cnx, dataset_name):
cursor = cnx.cursor()
sql = f"SELECT dataset_name, data_dir_path, id from dataset where dataset_name='{dataset_name}'"
cursor.execute(sql)
result = cursor.fetchone()
if result is not None:
return Dataset(result[0], result[1], id=result[2])
return None
###
# create table dataset_images
# (
# ID integer
# constraint dataset_images_pk
# primary key autoincrement,
# dataset_id integer not null,
# image_id varchar(32),
# file_name varchar(64),
# create_time varchar(20),
# modify_time varchar(20)
# );
#
# create index dataset_images_img_id_index
# on dataset_images (dataset_id, image_id);
###
class DatasetImages:
def __init__(self, dataset_id, image_id, file_name):
self.dataset_id = dataset_id
self.image_id = image_id
self.file_name = file_name
def save(self, cnx):
cursor = cnx.cursor()
insert_sql = f"INSERT INTO dataset_images (dataset_id, image_id, file_name, create_time, modify_time)" \
f" SELECT '{self.dataset_id}', '{self.image_id}', '{self.file_name}', current_timestamp," \
f" current_timestamp" \
f" where not exists(" \
f"select 1 from dataset_images where dataset_id ='{self.dataset_id}'" \
f" and image_id='{self.image_id}')"
LOGGER.verbose(f"insert sql:{insert_sql}")
cursor.execute(insert_sql)
if cursor.rowcount > 0:
LOGGER.info(f"insert success")
cnx.commit()
###
# create table dataset_labels
# (
# ID integer
# constraint dataset_labels_pk
# primary key autoincrement,
# dataset_id integer not null,
# label_name varchar(32) not null,
# class_id integer,
# label_chz varchar(128),
# create_time varchar(20) not null,
# modify_time varchar(20)
# );
#
# create index dataset_labels_data_id_index
# on dataset_labels (dataset_id);
###
class DatasetLabels:
def __init__(self, dataset_id, label_name, class_id, label_chz, total_count):
self.dataset_id = dataset_id
self.label_name = label_name
self.class_id = class_id
self.label_chz = label_chz
self.total_count = total_count
def save(self, cnx):
cursor = cnx.cursor()
select_id = f"select id from dataset_labels where dataset_id='{self.dataset_id}' and label_name='{self.label_name}'"
cursor.execute(select_id)
id_result = cursor.fetchone()
if id_result is None:
insert_sql = f"INSERT INTO dataset_labels (dataset_id, label_name, class_id, label_chz, total_count, create_time, modify_time)" \
f" SELECT '{self.dataset_id}', '{self.label_name}', '{self.class_id}', '{self.label_chz}', '{self.total_count}', current_timestamp," \
f" current_timestamp" \
f" where not exists(" \
f"select 1 from dataset_labels where dataset_id ='{self.dataset_id}'" \
f" and label_name='{self.label_name}')"
LOGGER.verbose(f"insert sql:{insert_sql}")
cursor.execute(insert_sql)
else:
update_sql = f"UPDATE dataset_labels set label_chz='{self.label_chz}', total_count='{self.total_count}' WHERE " \
f"label_name='{self.label_name}' and dataset_id='{self.dataset_id}'"
LOGGER.verbose(f"update sql:{update_sql}")
cursor.execute(update_sql)
if cursor.rowcount > 0:
LOGGER.verbose(f"update success")
cnx.commit()
###
# create table image_labels
# (
# ID integer
# constraint image_labels_pk
# primary key autoincrement,
# image_id integer not null,
# label_name varchar(32) not null,
# shape_type varchar(32) default 'rectangle',
# points varchar(256) not null,
# create_time varchar(20) not null,
# modify_time varchar(20) not null,
# dataset_id integer
# );
#
# create index image_labels_ID_index
# on image_labels (ID);
#
# create index image_labels_data_label_index
# on image_labels (dataset_id, label_name);
#
# create index image_labels_img_id_index
# on image_labels (image_id);
###
class ImageLabels:
def __init__(self, dataset_id, image_id, label_name, points, shape_type='rectangle'):
self.dataset_id = dataset_id
self.image_id = image_id
self.label_name = label_name
self.shape_type = shape_type
self.points = json.dumps(points)
self.id = None
def save(self, cnx):
cursor = cnx.cursor()
insert_sql = f"INSERT INTO image_labels (dataset_id, image_id, label_name, points, shape_type, create_time, modify_time)" \
f" SELECT '{self.dataset_id}', '{self.image_id}', '{self.label_name}', '{self.points}', '{self.shape_type}', current_timestamp," \
f" current_timestamp" \
f" where not exists(" \
f"select 1 from image_labels where dataset_id ='{self.dataset_id}'" \
f" and image_id='{self.image_id}' and label_name='{self.label_name}' and points = '{self.points}')"
LOGGER.verbose(f"insert sql:{insert_sql}")
cursor.execute(insert_sql)
cnx.commit()
def overwrite(self, cnx):
cursor = cnx.cursor()
del_sql = f"delete from image_labels where dataset_id='{self.dataset_id}' and image_id='{self.image_id}'"
LOGGER.verbose(f"delete sql:{del_sql}")
cursor.execute(del_sql)
insert_sql = f"INSERT INTO image_labels (dataset_id, image_id, label_name, points, shape_type, create_time, modify_time)" \
f" SELECT '{self.dataset_id}', '{self.image_id}', '{self.label_name}', '{self.points}', '{self.shape_type}', current_timestamp," \
f" current_timestamp"
LOGGER.verbose(f"insert sql:{insert_sql}")
cursor.execute(insert_sql)
cnx.commit()
###
# create table original_image(
# id integer primary key autoincrement ,
# desc varchar(256) not null,
# image_id varchar(32) not null,
# dataset_type varchar(32) not null
# );
# create index idx_datatype_desc on original_image(dataset_type,desc);
# create index idx_datatype_img_id on original_image(dataset_type,image_id);
###
class OriginalImage:
def __init__(self, dataset_type, desc, image_id):
self.dataset_type = dataset_type
self.desc = desc
self.image_id = image_id
def save(self, cnx):
cursor = cnx.cursor()
insert_sql = f"INSERT INTO original_image (dataset_type, desc, image_id)" \
f"SELECT '{self.dataset_type}','{self.desc}','{self.image_id}' where not exists(" \
f"select 1 from original_image where dataset_type='{self.dataset_type}' and image_id='{self.image_id}')"
LOGGER.verbose(f"insert sql: {insert_sql}")
cursor.execute(insert_sql)
cnx.commit()
def create_sqlite_connection(db_file='datasets/datasets.db'):
return sqlite3.connect(db_file)
def summary_dataset_labels(dataset, labels, labels_chz, conn):
for label in labels:
cls_id = labels.index(label)
summary_sql = f"select count(*) from image_labels where dataset_id='{dataset.id}' AND label_name='{label}'"
LOGGER.verbose(f"summary sql: {summary_sql}")
cur = conn.cursor()
cur.execute(summary_sql)
result = cur.fetchone()
if result is None:
LOGGER.error(f"标签未找到对应的数据:{label}")
continue
if cls_id < len(labels_chz):
dataset_labels = DatasetLabels(dataset.id, label, cls_id, labels_chz[cls_id], result[0])
dataset_labels.save(conn)
else:
LOGGER.warn(f'标签对应中文值未配置:{label}')
##
# 读取数据集标签信息,并保存到数据库
##
def check_json_labels_and_save(_dataset, _conn, relative_path='', overwrite=False):
from convert_to_yolo_txt import load_label_file
for json_file in os.listdir(os.path.join(relative_path, _dataset.data_dir_path)):
if json_file.endswith('json'):
image_id = json_file.replace('.json', '')
json_shapes, file_name = load_label_file(os.path.join(relative_path, _dataset.data_dir_path, json_file))
dataset_image = DatasetImages(_dataset.id, image_id, file_name)
dataset_image.save(_conn)
for jsonShape in json_shapes:
image_label = ImageLabels(_dataset.id, image_id, jsonShape.label, jsonShape.points)
if overwrite:
image_label.overwrite(_conn)
else:
image_label.save(_conn)
if __name__ == '__main__':
conn = sqlite3.connect('datasets/datasets.db')
root_path = 'data/manor'
dataset = Dataset('manor', root_path)
dataset.save(conn)
labels = label_config.to_list(label_config.manor)
labels_chz = label_config.to_list(label_config.manor_chz)
check_json_labels_and_save(dataset, conn)
summary_dataset_labels(dataset, labels, labels_chz, conn)