forked from tmbdev-tutorials/das2018-tutorial
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ocropus3-recognize
executable file
·131 lines (112 loc) · 3.88 KB
/
ocropus3-recognize
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
#!/usr/bin/env python
import sys
from pylab import *
import ocroline
import ocroseg
import ocrorot # Loading the RotationEstimator Model
import ocrobin
import argparse
from dlinputs import paths
import scipy.ndimage as ndi
import dltrainers.layers
sys.modules["layers"] = dltrainers.layers
model_path = ["."]
for d in "ocrobin ocroseg ocrorot ocroline".split():
for p in "/opt/conda/share /usr/local/share /usr/share".split():
model_path += [p+"/"+d]
model_path = ":".join(model_path)
def findmodel(s):
result = paths.find_file(model_path, s)
assert result is not None, "{} not found on path".format(s)
return result
parser = argparse.ArgumentParser("train a page segmenter")
parser.add_argument("--binarizer", default="bin-000000046-005393.pt")
parser.add_argument("--rotation", default="rot-000003456-020897.pt")
parser.add_argument("--skew", default="logskew-000015808-000132.pt")
parser.add_argument("--segmenter", default="lowskew-000000259-011440.pt")
parser.add_argument("--linerec", default="line2-000003330-004377.pt")
parser.add_argument("--display", type=float, default=-1.0)
parser.add_argument("--html", action="store_true")
parser.add_argument("inputs", nargs="+")
args = parser.parse_args()
destination = sys.stdout
def out(*args):
args = [str(x) for x in args]
destination.write(" ".join(args))
def note(*args):
args = [str(x) for x in args]
sys.stderr.write("# " + " ".join(args) + "\n")
def invert(image):
image = np.amax(image) - image
image /= np.amax(image)
return image
def img2uri(image):
from StringIO import StringIO
import PIL
stream = StringIO()
try:
assert np.amax(image) <= 1.0
image = PIL.Image.fromarray(array(image*255, 'uint8'))
image.save(stream, format="PNG")
value = stream.getvalue()
data = value.encode("base64").replace("\n", "")
src = "data:image/png;base64,"+data
return src
finally:
stream.close()
bm = rot = sk = seg = rec = None
if args.binarizer != "none":
bm = ocrobin.Binarizer(findmodel(args.binarizer))
if args.rotation != "none":
rot = ocrorot.RotationEstimator(findmodel(args.rotation))
if args.skew != "none":
sk = ocrorot.SkewEstimator(findmodel(args.skew))
if args.segmenter != "none":
seg = ocroseg.Segmenter(findmodel(args.segmenter))
if args.linerec != "none":
rec = ocroline.LineRecognizer(findmodel(args.linerec))
if args.display > 0:
ion()
rc("image", cmap="gray", interpolation="bicubic")
def debug_display(image):
if args.display > 0:
clf()
imshow(image)
ginput(1, args.display)
for fname in args.inputs:
image = imread(fname)
note(fname, image.shape, amin(image), amax(image), mean(image))
if image.ndim==3: image = mean(image[:, :, :3], 2)
debug_display(image)
if bm is not None:
image = bm.binarize(image)
note("binarized")
debug_display(image)
else:
image = invert(image)
note("not binarized")
debug_display(image)
if rot is not None:
angle = rot.rotation(image)
note("rotation", angle)
if angle != 0.0:
image = ndi.rotate(image, -angle, order=1, mode="nearest")
debug_display(image)
if sk is not None:
angle = sk.skew(image)
note("skew", angle)
if angle != 0.0:
image = ndi.rotate(image, -angle, order=1, mode="nearest")
debug_display(image)
if seg is not None:
lines = seg.extract_textlines(image)
note("got", len(lines), "lines")
if rec is not None:
for line in lines:
debug_display(line["image"])
result = rec.recognize_line(line["image"])
if args.html:
out("<img src='{}'><br>\n".format(img2uri(line["image"])))
out("{}<p>\n".format(result))
else:
print result