From 3f6c10df910d3e6b81cd0152564ce4c9d4c628cd Mon Sep 17 00:00:00 2001 From: eddableheath Date: Fri, 6 Dec 2024 18:08:01 +0000 Subject: [PATCH] :bug: bug fix on OCR generation --- src/arc_spice/data/multieurlex_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/arc_spice/data/multieurlex_utils.py b/src/arc_spice/data/multieurlex_utils.py index e38c71e..767546c 100644 --- a/src/arc_spice/data/multieurlex_utils.py +++ b/src/arc_spice/data/multieurlex_utils.py @@ -67,7 +67,7 @@ def extract_articles( def _make_ocr_data(text: str) -> list[tuple[Image.Image, str]]: text_split = text.split() - text_split = [text for text in text_split if text not in ("", " ")] + text_split = [text for text in text_split if text not in ("", " ", None)] generator = GeneratorFromStrings(text_split, count=len(text_split)) return list(generator)