-
Notifications
You must be signed in to change notification settings - Fork 24
/
ocroseg-predlines
executable file
·116 lines (93 loc) · 3.2 KB
/
ocroseg-predlines
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
#!/usr/bin/python
import os
import os.path
import argparse
from pylab import *
from torch import nn
from dlinputs import gopen, paths, filters
import ocroseg
model_path = os.environ.get(
"MODELS", ".:/usr/local/share/ocroseg:/usr/share/ocroseg")
default_model = "lowskew-000000259-011440.pt"
parser = argparse.ArgumentParser("train a page segmenter")
parser.add_argument("-m", "--model", default=default_model, help="load model")
#parser.add_argument("-b", "--batchsize", type=int, default=1)
parser.add_argument("-D", "--makesource", default=None)
parser.add_argument("-P", "--makepipeline", default=None)
parser.add_argument("-i", "--invert", action="store_true")
parser.add_argument("--display", type=int, default=0)
parser.add_argument("--hiprob", type=float, default=0.5)
parser.add_argument("--loprob", type=float, default=0.4)
parser.add_argument("input")
parser.add_argument("output", nargs="?")
args = parser.parse_args()
ARGS = {k: v for k, v in args.__dict__.items()}
if args.display > 0:
rc("image", cmap="gray", interpolation="bicubic")
ion()
def make_source():
return gopen.sharditerator_once(args.input)
def make_pipeline():
def fixdepth(image):
assert image.ndim in [2, 3]
if image.ndim == 3:
image = np.mean(image, 2)
image -= amin(image)
image /= amax(image)
if args.invert:
image = 1-image
return image
return filters.compose(
filters.rename(input="bin.png png gray.jpg jpeg jpg"),
filters.map(input=fixdepth))
if args.makesource:
execfile(args.makesource)
if args.makepipeline:
execfile(args.makepipeline)
def pixels_to_batch(x):
b, d, h, w = x.size()
return x.permute(0, 2, 3, 1).contiguous().view(b*h*w, d)
class PixelsToBatch(nn.Module):
def forward(self, x):
return pixels_to_batch(x)
source = make_source()
pipeline = make_pipeline()
source = pipeline(source)
if args.output:
sink = gopen.open_sink(args.output)
mname = paths.find_file(model_path, args.model)
assert mname is not None, "{}: model not found".format(args.model)
print "loading", mname
seg = ocroseg.Segmenter(mname, hiprob=args.hiprob, loprob=args.loprob)
print seg.model
for i, sample in enumerate(source):
fname = sample["__key__"]
image = sample["input"]
textlines = seg.extract_textlines(image)
print i, fname, len(textlines)
# if nbatches % 10 == 0:
if args.display > 0:
if i % args.display == 0 and len(textlines) > 0:
clf()
subplot(121)
imshow(image, vmin=0, vmax=1)
subplot(122)
imshow(textlines[len(textlines)//2]["image"], vmin=0, vmax=1)
draw()
ginput(1, 1e-3)
waitforbuttonpress(0.0001)
for j, line in enumerate(textlines):
bounds = line["bounds"]
bounds = [bounds[0].start, bounds[1].start,
bounds[0].stop, bounds[1].stop]
bounds = [int(x) for x in bounds]
result = {
"__key__": fname+"/{:06d}".format(j),
"line.png": line["image"],
"json": dict(bounds=bounds)
}
# utils.print_sample(line)
if args.output:
sink.write(result)
if args.output:
sink.close()