forked from zhang0jhon/AttentionOCR
-
Notifications
You must be signed in to change notification settings - Fork 0
/
parse_dict.py
40 lines (35 loc) · 1.14 KB
/
parse_dict.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# File: parse_dict.py
import os
import re
import numpy as np
import inspect
currentdir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe())))
def get_dict(path=os.path.join(currentdir, 'label_dict/icdar_labels.txt'), add_space=False, add_eos=False):
"""
Load text label dict from preprocessed text file.
Args:
path: label dict text file path.
add_space: whether add additional space charater to label dict.
add_eos: whether add EOS which represents end of sequence to label dict.
Returns:
label_dict: text label dict.
"""
label_dict = dict()
with open(path, 'r') as f:
lines = f.readlines()
for line in lines:
m = re.match(r'(\d+) (.*)', line)
idx, label = int(m.group(1)), m.group(2)
label_dict[idx] = label
if add_space:
idx = idx + 1
label_dict[idx] = ' '
if add_eos:
idx = idx + 1
label_dict[idx] = 'EOS'
return label_dict
if __name__ == '__main__':
label_dict = get_dict()
print(label_dict)