This repository has been archived by the owner on May 31, 2022. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 1
/
ocr.cgi
executable file
·98 lines (80 loc) · 2.4 KB
/
ocr.cgi
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
#!/usr/bin/env python3
import sys
import cgi
import json
import requests
import os.path
import io
import functools
import configparser
from PIL import Image
config = configparser.ConfigParser()
config.read('/etc/annif/ocr.ini')
api_options = config['api']
OCRAPIKEY = api_options['apikey']
DEFAULT_LANGUAGE = api_options['default_language']
MAXSIZE_BYTES = 1000000;
MAXSIZE_PIXELS = (2600, 2600)
# map ISO 639-1 language codes into the ISO 639-3 codes that ocr.space uses
LANGMAP = {
'fi': 'fin',
'sv': 'swe',
'en': 'eng'
}
sys.stdout.buffer.write(b"Content-Type: text/plain; charset=utf-8\r\n")
sys.stdout.buffer.write(b"\r\n")
# Use EXIF information to flip and/or transpose the image as necessary
def image_transpose_exif(im):
exif_orientation_tag = 0x0112 # contains an integer, 1 through 8
exif_transpose_sequences = [ # corresponding to the following
[],
[Image.FLIP_LEFT_RIGHT],
[Image.ROTATE_180],
[Image.FLIP_TOP_BOTTOM],
[Image.FLIP_LEFT_RIGHT, Image.ROTATE_90],
[Image.ROTATE_270],
[Image.FLIP_TOP_BOTTOM, Image.ROTATE_90],
[Image.ROTATE_90],
]
try:
seq = exif_transpose_sequences[im._getexif()[exif_orientation_tag] - 1]
except Exception:
return im
else:
return functools.reduce(lambda im, op: im.transpose(op), seq, im)
form = cgi.FieldStorage()
if 'imagefile' in form:
fileitem = form['imagefile']
if fileitem.file:
f = fileitem.file
fn = fileitem.filename
lang = form.getfirst('language')
if lang is None:
lang = DEFAULT_LANGUAGE
if lang in LANGMAP:
# map to ISO 639-3 code
lang = LANGMAP[lang]
else:
# take the language and filename as command line parameters - for testing
lang = sys.argv[1]
fn = sys.argv[2]
f = open(fn, 'rb')
image = image_transpose_exif(Image.open(f))
if image.size[0] > MAXSIZE_PIXELS[0] or image.size[1] > MAXSIZE_PIXELS[1]:
# need to scale it
image.thumbnail(MAXSIZE_PIXELS)
f = io.BytesIO()
image.save(f, 'JPEG')
f.seek(0)
payload = {
'isOverlayRequired': False,
'apikey': OCRAPIKEY,
'language': lang
}
r = requests.post('https://api.ocr.space/parse/image', files={fn: f}, data=payload)
results = r.json()
try:
text = results['ParsedResults'][0]['ParsedText']
sys.stdout.buffer.write(text.encode('UTF-8') + b"\r\n")
except:
sys.stdout.buffer.write(b"error\r\n")