forked from JaidedAI/EasyOCR
-
Notifications
You must be signed in to change notification settings - Fork 0
/
recognizer_example.py
132 lines (104 loc) · 4.01 KB
/
recognizer_example.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
from easyocr.utils import get_image_list
import torch
from PIL import Image
import numpy as np
import torch.nn.functional as F
import cv2
import onnxruntime
import torchvision.transforms as transforms
from torchvision.transforms.functional import to_tensor, to_pil_image
import math
def to_numpy(tensor):
if tensor.requires_grad:
return tensor.detach().cpu().numpy()
return tensor.cpu().numpy()
def custom_mean(x):
print("--", x.prod())
return x.prod() ** (2.0 / np.sqrt(len(x)))
def decode_greedy(text_index, length):
""" convert text-index into text-label. """
character = ['[blank]'] + list(
"0123456789!\"#$%&'()*+,-./:;<=>?@[\]^_`{|}~ €ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz")
ignore_idx = [0]
texts = []
index = 0
for l in length:
t = text_index[index:index + l]
# Returns a boolean array where true is when the value is not repeated
a = np.insert(~(t[1:] == t[:-1]), 0, True)
# Returns a boolean array where true is when the value is not in the ignore_idx list
b = ~np.isin(t, np.array(ignore_idx))
# Combine the two boolean array
c = a & b
# Gets the corresponding character according to the saved indexes
text = ''.join(np.array(character)[t[c.nonzero()]])
texts.append(text)
index += l
return texts
def normalize_pad(img, max_size):
img.sub_(0.5).div_(0.5)
c, h, w = img.size()
padded_img = torch.FloatTensor(*max_size).fill_(0)
padded_img[:, :, :w] = img # right pad
if max_size[2] != w: # add border Pad
padded_img[:, :, w:] = img[:, :, w -
1].unsqueeze(2).expand(c, h, max_size[2] - w)
return padded_img
def process(image, imgH, imgW):
w, h = image.size
# TODO: adjust image's contrast maybe
ratio = w / float(h)
if math.ceil(imgH * ratio) > imgW:
resized_w = imgW
else:
resized_w = math.ceil(imgH * ratio)
resized_image = image.resize((resized_w, imgH), Image.BICUBIC)
resized_image = to_tensor(resized_image)
return normalize_pad(resized_image, (1, imgH, imgW)).unsqueeze(0)
imgH = 64
img_cv_grey = cv2.imread("examples/screen2.png", cv2.IMREAD_GRAYSCALE)
print(img_cv_grey.shape) # (26, 293)
image_list, max_width = get_image_list(
[[6, 254, 2, 26]], [], img_cv_grey, model_height=imgH)
image = image_list[0]
# print(image[1].shape)
print(f"max_width: {max_width}")
# Image.fromarray(image[1], 'L').show()
image = process(Image.fromarray(image[1], 'L'), imgH, int(max_width))
# to_pil_image(image[0]).show()
# print(image.shape)
# print(image.mean(), image.std())
ort_session = onnxruntime.InferenceSession("recognizer.onnx")
# compute ONNX Runtime output prediction
ort_inputs = {ort_session.get_inputs()[0].name: to_numpy(image)}
preds_onnx = ort_session.run(None, ort_inputs)[0]
preds = torch.tensor(preds_onnx)
print(preds.shape)
# Select max probabilty (greedy decoding) then decode index to character
preds_size = torch.IntTensor([preds.size(1)] * 1)
######## filter ignore_char, rebalance
preds_prob = F.softmax(preds, dim=2)
preds_prob = preds_prob.cpu().detach().numpy()
pred_norm = preds_prob.sum(axis=2)
preds_prob = preds_prob/np.expand_dims(pred_norm, axis=-1)
preds_prob = torch.from_numpy(preds_prob).float().to("cpu")
_, preds_index = preds_prob.max(2)
preds_index = preds_index.view(-1)
# print(preds_index.data.cpu().detach().numpy().shape, preds_size.data)
preds_str = decode_greedy(
preds_index.data.cpu().detach().numpy(), preds_size.data)
print(preds_str)
preds_prob = preds_prob.cpu().detach().numpy()
values = preds_prob.max(axis=2)
indices = preds_prob.argmax(axis=2)
preds_max_prob = []
for v, i in zip(values, indices):
max_probs = v[i != 0]
if len(max_probs) > 0:
preds_max_prob.append(max_probs)
else:
preds_max_prob.append(np.array([0]))
for pred, pred_max_prob in zip(preds_str, preds_max_prob):
print(len(pred_max_prob))
confidence_score = custom_mean(pred_max_prob)
print([pred, confidence_score])