From 8f4a30daa5209941529d2e8e20149b2e3ca0e820 Mon Sep 17 00:00:00 2001 From: Dylan Freedman Date: Fri, 14 Apr 2023 08:47:36 -0400 Subject: [PATCH] Feat: Extra positional info Adds subBounds to extracted positional text with word-level boundaries --- install.sh | 2 +- textra/main.swift | 121 +++++++++++++++++++++++++++++++++++++++---- textra/version.swift | 2 +- 3 files changed, 112 insertions(+), 13 deletions(-) diff --git a/install.sh b/install.sh index 14a65f5..9b4824a 100644 --- a/install.sh +++ b/install.sh @@ -65,7 +65,7 @@ if [[ $(sw_vers -productVersion | cut -d . -f 1) -lt 13 ]]; then error 'Mac OS 13 or greater is required to run textra since it depends on Apple''s updated Vision APIs. Please upgrade your Mac and try again.' fi -download_uri="https://github.com/freedmand/textra/releases/download/0.2.0/textra-0.2.0.zip" +download_uri="https://github.com/freedmand/textra/releases/download/0.2.1/textra-0.2.1.zip" install_env=TEXTRA_INSTALL bin_env=\$$install_env/bin diff --git a/textra/main.swift b/textra/main.swift index 9b1d6d6..b34905c 100644 --- a/textra/main.swift +++ b/textra/main.swift @@ -166,6 +166,111 @@ func getPageCount(convertFile: ConvertFile) -> (Int, Double)? { } } +/** + Determines if a VNRectangleObservation has an area of 0. + + - Parameter box: The VNRectangleObservation to test for an empty area. + + - Returns: A boolean value indicating whether the box has an area of 0. + */ +func isEmptyBox(_ box: VNRectangleObservation) -> Bool { + // Calculate the width and height of the rectangle + let width = box.topRight.x - box.topLeft.x + let height = box.topLeft.y - box.bottomLeft.y + + // Return true if the area of the rectangle is 0 (i.e., either width or height is 0) + return width * height == 0 +} + +/** + Extracts sub-bounding boxes for each character or word from the recognized text observation and appends the information to the positionalJson. + + - Parameter text: The recognized text string. + - Parameter observation: The VNRecognizedText observation containing the recognized text and its bounding boxes. + - Parameter recognizedText: The top recognized text candidate within the observation + - Parameter positionalJson: An inout parameter to append the observation information including sub-bounding boxes. + */ +func extractSubBounds(observation: VNRecognizedTextObservation, recognizedText: VNRecognizedText, positionalJson: inout [[String: Any]]?) { + // Ensure we have recognized text + let text = recognizedText.string + let stringLength = text.count + var previousBox: VNRectangleObservation? + var previousText = "" + var previousStartIndex = 0 + var subBounds: [[String: Any]] = [] + + // Iterate through each character in the text + for i in 0.. (Strin // Get the recognized text and its location from the observations for observation in observations { if let recognizedText = observation.topCandidates(1).first { - fullText.append(recognizedText.string) - positionalJson?.append(["observation": [ - "text": recognizedText.string, - "confidence": recognizedText.confidence, - "bounds": [ - "x1": observation.topLeft.x, - "y1": observation.topLeft.y, - "x2": observation.bottomRight.x, - "y2": observation.bottomRight.y - ] - ]]) + let text = recognizedText.string + fullText.append(text) + + // Add in all bounds information + extractSubBounds(observation: observation, recognizedText: recognizedText, positionalJson: &positionalJson) } } } diff --git a/textra/version.swift b/textra/version.swift index 3b16031..a0e85e8 100644 --- a/textra/version.swift +++ b/textra/version.swift @@ -8,4 +8,4 @@ import Foundation /// The authoritative version of the application -let VERSION = "0.2.0" +let VERSION = "0.2.1"