-
Notifications
You must be signed in to change notification settings - Fork 591
/
ocropus-lpred
executable file
·102 lines (83 loc) · 3.14 KB
/
ocropus-lpred
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
#!/usr/bin/env python
from __future__ import print_function
import argparse
import sys
import numpy as np
import matplotlib.pyplot as plt
import ocrolib
from ocrolib import lineest
import ocrolib.lstm as lstm
from ocrolib import edist
import clstm
plt.ion()
plt.rc('xtick',labelsize=7)
plt.rc('ytick',labelsize=7)
plt.rcParams.update({"font.size":7})
np.seterr(divide='raise',over='raise',invalid='raise',under='ignore')
parser = argparse.ArgumentParser("run an RNN recognizer")
# character set
parser.add_argument("-c","--codec",default=[],nargs='*',
help="construct a codec from the input text")
parser.add_argument("-e","--eval",action="store_true")
parser.add_argument("-K","--kind",default="exact",help="kind of comparison (exact, nospace, letdig, letters, digits, lnc), default: %(default)s")
parser.add_argument("--lineheight",type=int,default=48,
help="# LSTM state units, default: %(default)s")
parser.add_argument("-p","--pad",type=int,default=16)
# learning
parser.add_argument("-S","--hiddensize",type=int,default=100,
help="# LSTM state units, default: %(default)s")
parser.add_argument('-m','--load',default=None,
help="start training with a previously trained model")
parser.add_argument("files",nargs="*")
args = parser.parse_args()
inputs = ocrolib.glob_all(args.files)
if len(inputs)==0:
parser.print_help()
sys.exit(0)
charset = sorted(list(set(list(lstm.ascii_labels) + list(ocrolib.chars.default))))
charset = [""," ","~",]+[c for c in charset if c not in [" ","~"]]
codec = lstm.Codec().init(charset)
lnorm = lineest.CenterNormalizer(args.lineheight)
network = clstm.make_BIDILSTM()
print("# network", (codec.size(), args.hiddensize, lnorm.target_height))
network.init(codec.size(),args.hiddensize,lnorm.target_height)
network = clstm.CNetwork(network)
network.load(args.load)
def preprocess(line):
lnorm.measure(np.amax(line)-line)
line = lnorm.normalize(line,cval=np.amax(line))
if line.size<10 or np.amax(line)==np.amin(line):
return None
line = line * 1.0/np.amax(line)
line = np.amax(line)-line
line = line.T
if args.pad>0:
w = line.shape[1]
line = np.vstack([np.zeros((args.pad,w)),line,np.zeros((args.pad,w))])
return line
if args.eval:
errs = 0
total = 0
for trial in range(len(inputs)):
try:
fname = inputs[trial]
base,_ = ocrolib.allsplitext(fname)
line = ocrolib.read_image_gray(fname)
line = preprocess(line)
if line is None: continue
outputs = np.array(network.forward(line))
result = lstm.translate_back(outputs)
pred = "".join(codec.decode(result))
print("%s\t%s" % (fname, pred))
ocrolib.write_text(base+".txt",pred)
if args.eval:
transcript = ocrolib.read_text(base+".gt.txt")
gt = ocrolib.project_text(transcript,kind=args.kind)
txt = ocrolib.project_text(pred,kind=args.kind)
err = edist.levenshtein(txt,gt)
errs += err
total += len(gt)
except Exception as e:
print(e)
if args.eval:
print(errs, total, errs * 1.0 / total)