diff --git a/src/main/deep_learning_service.py b/src/main/deep_learning_service.py index 6cac31a..61f0563 100755 --- a/src/main/deep_learning_service.py +++ b/src/main/deep_learning_service.py @@ -93,11 +93,10 @@ async def run_model(self, model_name, input_data, draw, predict_batch): try: if predict_batch: return await self.models_dict[model_name].run_batch(input_data, draw, predict_batch) + if not draw: + return await self.models_dict[model_name].infer(input_data, draw, predict_batch) else: - if not draw: - return await self.models_dict[model_name].infer(input_data, draw, predict_batch) - else: - await self.models_dict[model_name].infer(input_data, draw, predict_batch) + await self.models_dict[model_name].infer(input_data, draw, predict_batch) except ApplicationError as e: raise e else: @@ -147,9 +146,11 @@ def get_labels_custom(self, model_name): model_name = key models = self.list_models() if model_name not in self.labels_hash_dict: - model_dict = {} - for label in self.models_dict[model_name].labels: - model_dict[label] = str(uuid.uuid4()) + model_dict = { + label: str(uuid.uuid4()) + for label in self.models_dict[model_name].labels + } + self.labels_hash_dict[model_name] = model_dict for key in list(self.labels_hash_dict): if key not in models: diff --git a/src/main/object_detection/utils/visualization_utils.py b/src/main/object_detection/utils/visualization_utils.py index 79e1825..38e263f 100644 --- a/src/main/object_detection/utils/visualization_utils.py +++ b/src/main/object_detection/utils/visualization_utils.py @@ -258,9 +258,7 @@ def draw_bounding_boxes_on_image(image, if len(boxes_shape) != 2 or boxes_shape[1] != 4: raise ValueError('Input must be of size [N, 4]') for i in range(boxes_shape[0]): - display_str_list = () - if display_str_list_list: - display_str_list = display_str_list_list[i] + display_str_list = display_str_list_list[i] if display_str_list_list else () draw_bounding_box_on_image(image, boxes[i, 0], boxes[i, 1], boxes[i, 2], boxes[i, 3], color, thickness, display_str_list) @@ -497,8 +495,8 @@ def draw_keypoints_on_image(image, keypoints_x = [k[1] for k in keypoints] keypoints_y = [k[0] for k in keypoints] if use_normalized_coordinates: - keypoints_x = tuple([im_width * x for x in keypoints_x]) - keypoints_y = tuple([im_height * y for y in keypoints_y]) + keypoints_x = tuple(im_width * x for x in keypoints_x) + keypoints_y = tuple(im_height * y for y in keypoints_y) for keypoint_x, keypoint_y in zip(keypoints_x, keypoints_y): draw.ellipse([(keypoint_x - radius, keypoint_y - radius), (keypoint_x + radius, keypoint_y + radius)], @@ -617,13 +615,12 @@ def visualize_boxes_and_labels_on_image_array( box_to_color_map[box] = groundtruth_box_visualization_color else: display_str = '' - if not skip_labels: - if not agnostic_mode: - if classes[i] in category_index.keys(): - class_name = category_index[classes[i]]['name'] - else: - class_name = 'N/A' - display_str = str(class_name) + if not skip_labels and not agnostic_mode: + if classes[i] in category_index.keys(): + class_name = category_index[classes[i]]['name'] + else: + class_name = 'N/A' + display_str = str(class_name) if not skip_scores: if not display_str: display_str = '{}%'.format(int(100*scores[i])) diff --git a/src/main/ocr.py b/src/main/ocr.py index 56aab9c..f0a048a 100644 --- a/src/main/ocr.py +++ b/src/main/ocr.py @@ -2,57 +2,63 @@ import unicodedata import re import numpy as np -#from PIL import Image, ImageFont, ImageDraw, ImageEnhance + +# from PIL import Image, ImageFont, ImageDraw, ImageEnhance # Define class variables bounding_box_order = ["left", "top", "right", "bottom"] + # This method will take the model bounding box predictions and return the extracted text inside each box def one_shot_ocr_service(image, output): # iterate over detections response = [] detections = output['bounding-boxes'] - for i in range(0, len(detections)): + for i in range(len(detections)): # crop image for every detection: coordinates = (detections[i]["coordinates"]) cropped = image.crop((float(coordinates["left"]), float( coordinates["top"]), float(coordinates["right"]), float(coordinates["bottom"]))) - # convert image to grayscale for better accuracy - processed_img=cropped.convert('L') - + # convert image to grayscale for better accuracy + processed_img = cropped.convert('L') + # extract text with positive confidence from cropped image df = pytesseract.image_to_data(processed_img, output_type='data.frame') valid_df = df[df["conf"] > 0] extracted_text = " ".join(valid_df["text"].values) # process text - extracted_text = str(unicodedata.normalize('NFKD', extracted_text).encode('ascii', 'ignore').decode()).strip().replace("\n", " ").replace( - "...", ".").replace("..", ".").replace('”', ' ').replace('“', ' ').replace("'", ' ').replace('\"', '').replace("alt/1m", "").strip() + extracted_text = str( + unicodedata.normalize('NFKD', extracted_text).encode('ascii', 'ignore').decode()).strip().replace("\n", + " ").replace( + "...", ".").replace("..", ".").replace('”', ' ').replace('“', ' ').replace("'", ' ').replace('\"', + '').replace( + "alt/1m", "").strip() extracted_text = re.sub( '[^A-Za-z0-9.!?,;%:=()\[\]$€&/\- ]+', '', extracted_text) extracted_text = " ".join(extracted_text.split()) # wrap each prediction inside a dictionary if len(extracted_text) is not 0: - prediction = dict() - prediction["text"] = extracted_text + prediction = {'text': extracted_text} bounding_box = [coordinates[el] for el in bounding_box_order] prediction["box"] = bounding_box - prediction["score"] = valid_df["conf"].mean()/100.0 + prediction["score"] = valid_df["conf"].mean() / 100.0 response.append(prediction) return response + # This method will take an image and return the extracted text from the image def ocr_service(image): - # convert image to grayscale for better accuracy - processed_img=image.convert('L') + # convert image to grayscale for better accuracy + processed_img = image.convert('L') # Get data including boxes, confidences, line and page numbers df = pytesseract.image_to_data(processed_img, output_type='data.frame') @@ -60,8 +66,11 @@ def ocr_service(image): # process text extracted_text = " ".join(valid_df["text"].values) - extracted_text = str(unicodedata.normalize('NFKD', extracted_text).encode('ascii', 'ignore').decode()).strip().replace("\n", " ").replace( - "...", ".").replace("..", ".").replace('”', ' ').replace('“', ' ').replace("'", ' ').replace('\"', '').replace("alt/1m", "").strip() + extracted_text = str( + unicodedata.normalize('NFKD', extracted_text).encode('ascii', 'ignore').decode()).strip().replace("\n", + " ").replace( + "...", ".").replace("..", ".").replace('”', ' ').replace('“', ' ').replace("'", ' ').replace('\"', '').replace( + "alt/1m", "").strip() extracted_text = re.sub( '[^A-Za-z0-9.!?,;%:=()\[\]$€&/\- ]+', '', extracted_text) extracted_text = " ".join(extracted_text.split()) @@ -78,9 +87,10 @@ def ocr_service(image): bounding_box = [coordinates[el].item() for el in bounding_box_order] # wrap each prediction inside a dictionary - response = {} - response["text"] = extracted_text - response["box"] = bounding_box - response["score"] = valid_df["conf"].mean()/100.0 + response = { + 'text': extracted_text, + 'box': bounding_box, + 'score': valid_df["conf"].mean() / 100.0, + } return [response]