From 28161f7bb9c43b7d53080f78417223e369b86301 Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Tue, 29 Oct 2024 16:20:27 +0100 Subject: [PATCH] feat: DOCXToDocument: add table extraction (#8457) * DOCXToDocument: add table extraction * Add reno note * mypy fixes * add unit tests * Add csv table support * Update release note * Add TableFormat enum * Add table_format as str init param * Update docx.py Co-authored-by: Madeesh Kannan * PR feedback * PR feedback --------- Co-authored-by: medsriha Co-authored-by: Mo Sriha <22803208+medsriha@users.noreply.github.com> Co-authored-by: Madeesh Kannan --- haystack/components/converters/docx.py | 211 ++++++++++++++---- ...ocx-table-extraction-3232d3059d220550.yaml | 4 + .../converters/test_docx_file_to_document.py | 206 ++++++++++++++++- test/test_files/docx/sample_docx_3.docx | Bin 0 -> 10130 bytes 4 files changed, 371 insertions(+), 50 deletions(-) create mode 100644 releasenotes/notes/enhance-docx-table-extraction-3232d3059d220550.yaml create mode 100644 test/test_files/docx/sample_docx_3.docx diff --git a/haystack/components/converters/docx.py b/haystack/components/converters/docx.py index 8ffc3888a8..dc0a51f485 100644 --- a/haystack/components/converters/docx.py +++ b/haystack/components/converters/docx.py @@ -2,12 +2,15 @@ # # SPDX-License-Identifier: Apache-2.0 +import csv import io from dataclasses import dataclass +from enum import Enum +from io import StringIO from pathlib import Path from typing import Any, Dict, List, Optional, Union -from haystack import Document, component, logging +from haystack import Document, component, default_from_dict, default_to_dict, logging from haystack.components.converters.utils import get_bytestream_from_source, normalize_metadata from haystack.dataclasses import ByteStream from haystack.lazy_imports import LazyImport @@ -17,6 +20,7 @@ with LazyImport("Run 'pip install python-docx'") as docx_import: import docx from docx.document import Document as DocxDocument + from docx.table import Table from docx.text.paragraph import Paragraph @@ -59,6 +63,30 @@ class DOCXMetadata: version: str +class DOCXTableFormat(Enum): + """ + Supported formats for storing DOCX tabular data in a Document. + """ + + MARKDOWN = "markdown" + CSV = "csv" + + def __str__(self): + return self.value + + @staticmethod + def from_str(string: str) -> "DOCXTableFormat": + """ + Convert a string to a DOCXTableFormat enum. + """ + enum_map = {e.value: e for e in DOCXTableFormat} + table_format = enum_map.get(string.lower()) + if table_format is None: + msg = f"Unknown table format '{string}'. Supported formats are: {list(enum_map.keys())}" + raise ValueError(msg) + return table_format + + @component class DOCXToDocument: """ @@ -69,9 +97,9 @@ class DOCXToDocument: Usage example: ```python - from haystack.components.converters.docx import DOCXToDocument + from haystack.components.converters.docx import DOCXToDocument, DOCXTableFormat - converter = DOCXToDocument() + converter = DOCXToDocument(table_format=DOCXTableFormat.CSV) results = converter.run(sources=["sample.docx"], meta={"date_added": datetime.now().isoformat()}) documents = results["documents"] print(documents[0].content) @@ -79,11 +107,38 @@ class DOCXToDocument: ``` """ - def __init__(self): + def __init__(self, table_format: Union[str, DOCXTableFormat] = DOCXTableFormat.CSV): """ Create a DOCXToDocument component. + + :param table_format: The format for table output. Can be either DOCXTableFormat.MARKDOWN, + DOCXTableFormat.CSV, "markdown", or "csv". Defaults to DOCXTableFormat.CSV. """ docx_import.check() + self.table_format = DOCXTableFormat.from_str(table_format) if isinstance(table_format, str) else table_format + + def to_dict(self) -> Dict[str, Any]: + """ + Serializes the component to a dictionary. + + :returns: + Dictionary with serialized data. + """ + return default_to_dict(self, table_format=str(self.table_format)) + + @classmethod + def from_dict(cls, data: Dict[str, Any]) -> "DOCXToDocument": + """ + Deserializes the component from a dictionary. + + :param data: + The dictionary to deserialize from. + :returns: + The deserialized component. + """ + if "table_format" in data["init_parameters"]: + data["init_parameters"]["table_format"] = DOCXTableFormat.from_str(data["init_parameters"]["table_format"]) + return default_from_dict(cls, data) @component.output_types(documents=List[Document]) def run( @@ -118,9 +173,9 @@ def run( logger.warning("Could not read {source}. Skipping it. Error: {error}", source=source, error=e) continue try: - file = docx.Document(io.BytesIO(bytestream.data)) - paragraphs = self._extract_paragraphs_with_page_breaks(file.paragraphs) - text = "\n".join(paragraphs) + docx_document = docx.Document(io.BytesIO(bytestream.data)) + elements = self._extract_elements(docx_document) + text = "\n".join(elements) except Exception as e: logger.warning( "Could not read {source} and convert it to a DOCX Document, skipping. Error: {error}", @@ -129,52 +184,116 @@ def run( ) continue - docx_metadata = self._get_docx_metadata(document=file) + docx_metadata = self._get_docx_metadata(document=docx_document) merged_metadata = {**bytestream.meta, **metadata, "docx": docx_metadata} document = Document(content=text, meta=merged_metadata) documents.append(document) return {"documents": documents} - def _extract_paragraphs_with_page_breaks(self, paragraphs: List["Paragraph"]) -> List[str]: + def _extract_elements(self, document: "DocxDocument") -> List[str]: """ - Extracts paragraphs from a DOCX file, including page breaks. + Extracts elements from a DOCX file. - Page breaks (both soft and hard page breaks) are not automatically extracted by python-docx as '\f' chars. - This means we need to add them in ourselves, as done here. This allows the correct page number - to be associated with each document if the file contents are split, e.g. by DocumentSplitter. + :param document: The DOCX Document object. + :returns: List of strings (paragraph texts and table representations) with page breaks added as '\f' characters. + """ + elements = [] + for element in document.element.body: + if element.tag.endswith("p"): + paragraph = Paragraph(element, document) + if paragraph.contains_page_break: + para_text = self._process_paragraph_with_page_breaks(paragraph) + else: + para_text = paragraph.text + elements.append(para_text) + elif element.tag.endswith("tbl"): + table = docx.table.Table(element, document) + table_str = ( + self._table_to_markdown(table) + if self.table_format == DOCXTableFormat.MARKDOWN + else self._table_to_csv(table) + ) + elements.append(table_str) - :param paragraphs: - List of paragraphs from a DOCX file. + return elements - :returns: - List of strings (paragraph text fields) with all page breaks added in as '\f' characters. - """ - paragraph_texts = [] - for para in paragraphs: - if para.contains_page_break: - para_text_w_page_breaks = "" - # Usually, just 1 page break exists, but could be more if paragraph is really long, so we loop over them - for pb_index, page_break in enumerate(para.rendered_page_breaks): - # Can only extract text from first paragraph page break, unfortunately - if pb_index == 0: - if page_break.preceding_paragraph_fragment: - para_text_w_page_breaks += page_break.preceding_paragraph_fragment.text - para_text_w_page_breaks += "\f" - if page_break.following_paragraph_fragment: - # following_paragraph_fragment contains all text for remainder of paragraph. - # However, if the remainder of the paragraph spans multiple page breaks, it won't include - # those later page breaks so we have to add them at end of text in the `else` block below. - # This is not ideal, but this case should be very rare and this is likely good enough. - para_text_w_page_breaks += page_break.following_paragraph_fragment.text - else: - para_text_w_page_breaks += "\f" - - paragraph_texts.append(para_text_w_page_breaks) + def _process_paragraph_with_page_breaks(self, paragraph: "Paragraph") -> str: + """ + Processes a paragraph with page breaks. + + :param paragraph: The DOCX paragraph to process. + :returns: A string with page breaks added as '\f' characters. + """ + para_text = "" + # Usually, just 1 page break exists, but could be more if paragraph is really long, so we loop over them + for pb_index, page_break in enumerate(paragraph.rendered_page_breaks): + # Can only extract text from first paragraph page break, unfortunately + if pb_index == 0: + if page_break.preceding_paragraph_fragment: + para_text += page_break.preceding_paragraph_fragment.text + para_text += "\f" + if page_break.following_paragraph_fragment: + # following_paragraph_fragment contains all text for remainder of paragraph. + # However, if the remainder of the paragraph spans multiple page breaks, it won't include + # those later page breaks so we have to add them at end of text in the `else` block below. + # This is not ideal, but this case should be very rare and this is likely good enough. + para_text += page_break.following_paragraph_fragment.text else: - paragraph_texts.append(para.text) + para_text += "\f" + return para_text + + def _table_to_markdown(self, table: "Table") -> str: + """ + Converts a DOCX table to a Markdown string. + + :param table: The DOCX table to convert. + :returns: A Markdown string representation of the table. + """ + markdown: List[str] = [] + max_col_widths: List[int] = [] + + # Calculate max width for each column + for row in table.rows: + for i, cell in enumerate(row.cells): + cell_text = cell.text.strip() + if i >= len(max_col_widths): + max_col_widths.append(len(cell_text)) + else: + max_col_widths[i] = max(max_col_widths[i], len(cell_text)) + + # Process rows + for i, row in enumerate(table.rows): + md_row = [cell.text.strip().ljust(max_col_widths[j]) for j, cell in enumerate(row.cells)] + markdown.append("| " + " | ".join(md_row) + " |") + + # Add separator after header row + if i == 0: + separator = ["-" * max_col_widths[j] for j in range(len(row.cells))] + markdown.append("| " + " | ".join(separator) + " |") + + return "\n".join(markdown) + + def _table_to_csv(self, table: "Table") -> str: + """ + Converts a DOCX table to a CSV string. + + :param table: The DOCX table to convert. + :returns: A CSV string representation of the table. + """ + csv_output = StringIO() + csv_writer = csv.writer(csv_output, quoting=csv.QUOTE_MINIMAL) + + # Process rows + for row in table.rows: + csv_row = [cell.text.strip() for cell in row.cells] + csv_writer.writerow(csv_row) + + # Get the CSV as a string and strip any trailing newlines + csv_string = csv_output.getvalue().strip() + csv_output.close() - return paragraph_texts + return csv_string def _get_docx_metadata(self, document: "DocxDocument") -> DOCXMetadata: """ @@ -191,15 +310,15 @@ def _get_docx_metadata(self, document: "DocxDocument") -> DOCXMetadata: category=document.core_properties.category, comments=document.core_properties.comments, content_status=document.core_properties.content_status, - created=document.core_properties.created.isoformat() if document.core_properties.created else None, + created=(document.core_properties.created.isoformat() if document.core_properties.created else None), identifier=document.core_properties.identifier, keywords=document.core_properties.keywords, language=document.core_properties.language, last_modified_by=document.core_properties.last_modified_by, - last_printed=document.core_properties.last_printed.isoformat() - if document.core_properties.last_printed - else None, - modified=document.core_properties.modified.isoformat() if document.core_properties.modified else None, + last_printed=( + document.core_properties.last_printed.isoformat() if document.core_properties.last_printed else None + ), + modified=(document.core_properties.modified.isoformat() if document.core_properties.modified else None), revision=document.core_properties.revision, subject=document.core_properties.subject, title=document.core_properties.title, diff --git a/releasenotes/notes/enhance-docx-table-extraction-3232d3059d220550.yaml b/releasenotes/notes/enhance-docx-table-extraction-3232d3059d220550.yaml new file mode 100644 index 0000000000..3c7c52e0dd --- /dev/null +++ b/releasenotes/notes/enhance-docx-table-extraction-3232d3059d220550.yaml @@ -0,0 +1,4 @@ +--- +features: + - | + Enhanced DOCX converter to support table extraction in addition to paragraph content. The converter supports both CSV and Markdown table formats, providing flexible options for representing tabular data extracted from DOCX documents. diff --git a/test/components/converters/test_docx_file_to_document.py b/test/components/converters/test_docx_file_to_document.py index d529f2a06d..64dfaa79f4 100644 --- a/test/components/converters/test_docx_file_to_document.py +++ b/test/components/converters/test_docx_file_to_document.py @@ -1,11 +1,12 @@ -import logging import json - +import logging import pytest +import csv +from io import StringIO +from haystack import Document, Pipeline +from haystack.components.converters.docx import DOCXMetadata, DOCXToDocument, DOCXTableFormat from haystack.dataclasses import ByteStream -from haystack import Document -from haystack.components.converters.docx import DOCXToDocument, DOCXMetadata @pytest.fixture @@ -17,6 +18,96 @@ class TestDOCXToDocument: def test_init(self, docx_converter): assert isinstance(docx_converter, DOCXToDocument) + def test_init_with_string(self): + converter = DOCXToDocument(table_format="markdown") + assert isinstance(converter, DOCXToDocument) + assert converter.table_format == DOCXTableFormat.MARKDOWN + + def test_init_with_invalid_string(self): + with pytest.raises(ValueError, match="Unknown table format 'invalid_format'"): + DOCXToDocument(table_format="invalid_format") + + def test_to_dict(self): + converter = DOCXToDocument() + data = converter.to_dict() + assert data == { + "type": "haystack.components.converters.docx.DOCXToDocument", + "init_parameters": {"table_format": "csv"}, + } + + def test_to_dict_custom_parameters(self): + converter = DOCXToDocument(table_format="markdown") + data = converter.to_dict() + assert data == { + "type": "haystack.components.converters.docx.DOCXToDocument", + "init_parameters": {"table_format": "markdown"}, + } + + converter = DOCXToDocument(table_format="csv") + data = converter.to_dict() + assert data == { + "type": "haystack.components.converters.docx.DOCXToDocument", + "init_parameters": {"table_format": "csv"}, + } + + converter = DOCXToDocument(table_format=DOCXTableFormat.MARKDOWN) + data = converter.to_dict() + assert data == { + "type": "haystack.components.converters.docx.DOCXToDocument", + "init_parameters": {"table_format": "markdown"}, + } + + converter = DOCXToDocument(table_format=DOCXTableFormat.CSV) + data = converter.to_dict() + assert data == { + "type": "haystack.components.converters.docx.DOCXToDocument", + "init_parameters": {"table_format": "csv"}, + } + + def test_from_dict(self): + data = { + "type": "haystack.components.converters.docx.DOCXToDocument", + "init_parameters": {"table_format": "csv"}, + } + converter = DOCXToDocument.from_dict(data) + assert converter.table_format == DOCXTableFormat.CSV + + def test_from_dict_custom_parameters(self): + data = { + "type": "haystack.components.converters.docx.DOCXToDocument", + "init_parameters": {"table_format": "markdown"}, + } + converter = DOCXToDocument.from_dict(data) + assert converter.table_format == DOCXTableFormat.MARKDOWN + + def test_from_dict_invalid_table_format(self): + data = { + "type": "haystack.components.converters.docx.DOCXToDocument", + "init_parameters": {"table_format": "invalid_format"}, + } + with pytest.raises(ValueError, match="Unknown table format 'invalid_format'"): + DOCXToDocument.from_dict(data) + + def test_from_dict_empty_init_parameters(self): + data = {"type": "haystack.components.converters.docx.DOCXToDocument", "init_parameters": {}} + converter = DOCXToDocument.from_dict(data) + assert converter.table_format == DOCXTableFormat.CSV + + def test_pipeline_serde(self): + pipeline = Pipeline() + converter = DOCXToDocument(table_format=DOCXTableFormat.MARKDOWN) + pipeline.add_component("converter", converter) + + pipeline_str = pipeline.dumps() + assert "haystack.components.converters.docx.DOCXToDocument" in pipeline_str + assert "table_format" in pipeline_str + assert "markdown" in pipeline_str + + new_pipeline = Pipeline.loads(pipeline_str) + new_converter = new_pipeline.get_component("converter") + assert isinstance(new_converter, DOCXToDocument) + assert new_converter.table_format == DOCXTableFormat.MARKDOWN + def test_run(self, test_files_path, docx_converter): """ Test if the component runs correctly @@ -48,6 +139,113 @@ def test_run(self, test_files_path, docx_converter): ), } + def test_run_with_table(self, test_files_path): + """ + Test if the component runs correctly + """ + docx_converter = DOCXToDocument(table_format=DOCXTableFormat.MARKDOWN) + paths = [test_files_path / "docx" / "sample_docx.docx"] + output = docx_converter.run(sources=paths) + docs = output["documents"] + assert len(docs) == 1 + assert "Donald Trump" in docs[0].content ## :-) + assert docs[0].meta.keys() == {"file_path", "docx"} + assert docs[0].meta == { + "file_path": str(paths[0]), + "docx": DOCXMetadata( + author="Saha, Anirban", + category="", + comments="", + content_status="", + created="2020-07-14T08:14:00+00:00", + identifier="", + keywords="", + language="", + last_modified_by="Saha, Anirban", + last_printed=None, + modified="2020-07-14T08:16:00+00:00", + revision=1, + subject="", + title="", + version="", + ), + } + # let's now detect that the table markdown is correctly added and that order of elements is correct + content_parts = docs[0].content.split("\n\n") + table_index = next(i for i, part in enumerate(content_parts) if "| This | Is | Just a |" in part) + # check that natural order of the document is preserved + assert any("Donald Trump" in part for part in content_parts[:table_index]), "Text before table not found" + assert any( + "Now we are in Page 2" in part for part in content_parts[table_index + 1 :] + ), "Text after table not found" + + @pytest.mark.parametrize("table_format", ["markdown", "csv"]) + def test_table_between_two_paragraphs(self, test_files_path, table_format): + docx_converter = DOCXToDocument(table_format=table_format) + paths = [test_files_path / "docx" / "sample_docx_3.docx"] + output = docx_converter.run(sources=paths) + + content = output["documents"][0].content + + paragraphs_one = content.find("Table: AI Use Cases in Different Industries") + paragraphs_two = content.find("Paragraph 2:") + table = content[ + paragraphs_one + len("Table: AI Use Cases in Different Industries") + 1 : paragraphs_two + ].strip() + + if table_format == "markdown": + split = list(filter(None, table.split("\n"))) + expected_table_header = "| Industry | AI Use Case | Impact |" + expected_last_row = "| Finance | Fraud detection and prevention | Reduced financial losses |" + + assert split[0] == expected_table_header + assert split[-1] == expected_last_row + if table_format == "csv": # CSV format + csv_reader = csv.reader(StringIO(table)) + rows = list(csv_reader) + assert len(rows) == 3 # Header + 2 data rows + assert rows[0] == ["Industry", "AI Use Case", "Impact"] + assert rows[-1] == ["Finance", "Fraud detection and prevention", "Reduced financial losses"] + + @pytest.mark.parametrize("table_format", ["markdown", "csv"]) + def test_table_content_correct_parsing(self, test_files_path, table_format): + docx_converter = DOCXToDocument(table_format=table_format) + paths = [test_files_path / "docx" / "sample_docx_3.docx"] + output = docx_converter.run(sources=paths) + content = output["documents"][0].content + + paragraphs_one = content.find("Table: AI Use Cases in Different Industries") + paragraphs_two = content.find("Paragraph 2:") + table = content[ + paragraphs_one + len("Table: AI Use Cases in Different Industries") + 1 : paragraphs_two + ].strip() + + if table_format == "markdown": + split = list(filter(None, table.split("\n"))) + assert len(split) == 4 + + expected_table_header = "| Industry | AI Use Case | Impact |" + expected_table_top_border = "| ---------- | ------------------------------ | ------------------------- |" + expected_table_row_one = "| Healthcare | Predictive diagnostics | Improved patient outcomes |" + expected_table_row_two = "| Finance | Fraud detection and prevention | Reduced financial losses |" + + assert split[0] == expected_table_header + assert split[1] == expected_table_top_border + assert split[2] == expected_table_row_one + assert split[3] == expected_table_row_two + if table_format == "csv": # CSV format + csv_reader = csv.reader(StringIO(table)) + rows = list(csv_reader) + assert len(rows) == 3 # Header + 2 data rows + + expected_header = ["Industry", "AI Use Case", "Impact"] + expected_row_one = ["Healthcare", "Predictive diagnostics", "Improved patient outcomes"] + expected_row_two = ["Finance", "Fraud detection and prevention", "Reduced financial losses"] + + assert rows[0] == expected_header + assert rows[1] == expected_row_one + assert rows[2] == expected_row_two + def test_run_with_additional_meta(self, test_files_path, docx_converter): paths = [test_files_path / "docx" / "sample_docx_1.docx"] output = docx_converter.run(sources=paths, meta={"language": "it", "author": "test_author"}) diff --git a/test/test_files/docx/sample_docx_3.docx b/test/test_files/docx/sample_docx_3.docx new file mode 100644 index 0000000000000000000000000000000000000000..f3100fa9abbf5ba1f562714b8854fd2bdb7bb6f2 GIT binary patch literal 10130 zcmaia1ymf{vi4wMfFZcMySqzZa3{FCy9al74Nh9*Z!(%E6PAYqXU3IAiz9BSrhP^pgx_u+dG*s*}2*pnL1h8nKOFW z+7u}~|1KTWTpS9QU9@%h-IbdRYFC6U9{fESzlChLug5!gPqd0B)F;S) zoKcCLqUD-1f>gq}x+^Ann-`kv1;));gk~Y&LkNNLJ9>}zn94WC>=BzZWz3WmFb9RA zpp-m|tc4rp#PlyQ4y&d^#UAGPx=3;TFY_GCURHT7MCSNH=2xEiFm;%TV9JC$@w#4bTJUmanroZ>`J z$}Oo>-^4dz$-B{P;d^0OL-VhwK?vM=mVGElimCyMY$Mo!7GOo3h4rCKhyvMNh8Ga_ z2-wZcG^zOX6Vm-bB8`-;?hgx&-ll1B`1JOc3y&7|7Vt}p5ATtVH(tc%o-Gk(FT0Py@gB#F%magKS5ifBFGEap55 zV^!EMEh84~c%W^LA1=jD5$re=chaJlx5XO zrgtm962vTPIn0&YsBl_%t(cIY_LJy1EVU7L*oPy`?dk>P;XPm<(ZHE#Z6DKraF-vv z#XrV_zB;e~!pFV)#O@!mW5B=8)9r(0Q?!oJZ2Q@7ySeE4vXtEo5vQPuB&aHrDILAm z>X$o*KVADctFT6-_k`O-$N!-Uw?~-{#+B`l@H&~#UK% zN@{*PmRay>R#O^0kVchwTZ#7CK%hP6A0y)G^){s$wIx8 zd}cn{iHbN0N}t8y$|Jev-8sQLj>e^8tqxIlhHnY>9YliWm#grHIrOY@cS?=22P$D$ z;e%SBKhbuLLrtRw)*TVwxHC&&kBkg!x^Hn0uh4U>`PFHO8r?V02R4TBiZP**yZW$dNorZDjm>f~ zVya^RwMmm~98i}i_yBn)kx2IFLX|Pgg&F27_>yQBAKB}AouzjclQot)V=>5iu8H`Y zrT?Inm7iZT58e_cmOnH=1+wG{ZR;xR+OJ)PQitDgkik;nhWw)BN-NxAgL_%O2l>ny zSA>)=EjdJZkW!n6#2}NK_q!!FL;xm`?Aw+{HD4VYHgBUbm=dQ~Gu#`@qYH1!nm9o#9O?sX0DM91-lnL}$Z`@gv2s`GIJ1A``?2kL9828tEXg65 z0Uvy+HoJ*Hh^y%fNc=AI89~vGf_dS1HBvKiVM56D8VEEVfFLx4iXT)E4zac(v~%qN z#^?)W|E|b@T99(*+~=1%WI8+6$e&PcUGW#Gz?-F@>va-~6cE173G5mN0DKP-oe2>1 z<_Gm8q`t;SKMG@`bx_npx`P0Aap{n>y9QRmTPY<#ekoXvBqK_x3Ov`*j1Qm zVMM(l(+R{zsD68dH~dyp=HVNW8tVe2CU1NGabY43mnnAqYsR3hqkEl*Y)~&tK3RPV zt$Lq)4_^nyXO>Ssd=cx9QErGpGqxyE+iMP-9?FAjm*=Y#`FsoMfbC7;Y0s0_5Exe=M_6Rt8o=CI zQEK8Z?HvW&3gw5S^I#w|cSbaoqg!^YcL?{7yEq6c@@J)m&8gU_AOEcKhI>|yKH$`t z9)=imno+GC)e6CQne`%c(`>J(PCA-Ev~EtS?z&EjaE`kZn_%o)U+It|Ennhhlzc3c z9@@YP+{V0hgz+i5BmdEs$4wm%;Vchfj{oMU?SW)06CY2f_iOw&RBQ9r>+^gN1MlQx zC2As#uc7hV81UN$9Deu*^G~zK(rMil3_ZY6MRGBq{Mac4p15)zNtHS8L3js1lHVGb zf2Z4qgVxb;gg$?DTTSLQU@~J}?``g1$+>W3VCOSjR%=I%GA7&EQz`=fu+nMgo9ZLm zKUBOfv!JrgJK8p5?eG?3jBWZ`wE@9JouYZcLMCjO#@e*$?HLzbp$O^sT^%a$f!Z06 z^-)GQRr{V|M91?X2E$FJx4F3_;%kxrrxRa9=64=cUDZEfh(E0n4fe_-o@&N?GXLy* z)IVP)vIg3)8!8s+N9VK=6A*7}a_{qGm2qN1L_5D?E_c{|b;>B8vC{It2bNLv zYg5M~Rq|mUvhK?`RbxZW-^s@-Kw0=?%yCsTtz2TP%W;=94eyeV%4*5UE4}K0O`zGR z+uP8L(qV3338J$|F)Pe4F5{*%xie>U6_{eDead=UN-nwy&P{z&8`Egji%^H`VLpmFi^FW8@fg)--N~lF3PtnO2 z7VR)6)E{4WQ?`9o{Nxg&Wn*LSkR|?j z<{F$DxY%d87)L9Yq_OTb+SFD!Cifa>e^T!MJl{5!e`v^a#2f&fI9{T^NF->ac$&VT z!aAwwBrrZBQQk64utf#ajLCd%5}!KaG$~ zKasY*7Dj&Kv5!+BT;`AZhB!ef=LCcgy?Kvj`ZdI~%S&$qEPxt3kzN_IbVXhdLsxju z*nl=o{b!#>p8T{R|MyG)?f+o&3zn3GCFO2rbn%Z}etTQGT8y+k#?JMvfh9lg(3%unm<-6oGy`0KA!%h7|Q4me;nRguG0KIx!WgLiFBXkx^p z*^sBjCCU}Y_zqe3n2Ex1F{PpWwld;4^sM63m4;2=XAIX0#17kymkOOAutgGrohq$B z{-zCJ|D`=+5#L)Oxp)=B1_sI4MjmOi9=A+Xt7=AWj72-{(IN)R&_^s2g|HFWs0H2kQTtN5PU`T5&7}~T zBCRJ8g_j*IejM+xy4rR3uz$uJ?RFTF?FsMgf5e>dZ}78|GvrBnrub;1Fwqj`cSQ0Dg>}lx7_S`cM{zSHndFWjZBf& zLi4}h&4bjP`b1Qb)uE%Sk?2?G?j`Sy@qnm)M{WnR>uI;M1jBIces<;Ip{G}++pH>X zV{rY872Actq>VofR`dT0$FHi(6DF>BaZw(6H0=`W_&nuCb5;3jW!obDfY$5)7wtlH^A^h08vKn*oNAIHNalcS&eyKQ3c zVX*03y;k;JdtL^q?nxGZ|312_pf2(5WqoW3iaKw|_fdG8-Q5q5ca}V{4TETs1h0ap zW6>N}BC?13MGVA2a36HDMIx!p7s;2`^D|)KxITEgk-H>1Qk5u6eSp6+bzgZyUwB?Ngm*a!=)$#piBT&EZjL@^+c z2X@DDRTrVln~kkBHk@G$O)gm(E?YOd9Eb0e;OE+AYf-^meI-njmPtu+9e~WjRsBNo zo|u}!j-+*MKagt(|BJiy?i+^D6L>8L?`N_8> z9M9;6P*sJ*M0W~^w??(I^ox15Cd~U&jk9tJkan|c?+#Vv(((v7SNLu29N^e;=&lDd z@akvg@uJ!6Zd|7uuHcR9@LmzcD%f5|01#piFh=n6Y1UAPH6o|T3CwYgJr|MqMDPMu z491lUR`Hrruw%5O@sWB`QG+&{a&m)h1%!BP4{sgDXPw*qZ7D_9WAc`(rM9ffEuyn zOrnw4T&W%6sHZz5es8e}iCxH&9j*Z_IvEr9E_gMuz$z`EIMKH{Slh0kl&z)7@enH2ygKIvjdw*)*^DBE7v=aFFm*x0bRU4#78FG9 z#+Ax)*kVSIV-$Bbqpic*m*3|F zXg8uC?aO+E*;bl=90P=(~*!}R86G&E5 zQYyW@+0dK-!}9IiK|{uORbQH6futG!NaTDM3B_zk$Vg%l3z=_uZWkd+9aawZ*oB-- zR!Sm?G#!w=7+D)udjTSvU=JrB^a;n$W;__sCxmX`I$|ByQ}3VO5Gb%He>$kX8iSXu20mDK{I|47{*qm z>!V?Lqo3ogWvcqNo7ZmH%V4$%)|Fu1K-b~NvHmmB+Sb5a#e1r>HtPHn#m9f*9>18y zznJFl{NfkctV)u5N`a6=o;o|>3v-cEoXMy>naaS1%ni{Gk^DdNQ%%@6l~+4SDPo~M zxvX#Vvz+?8O=xN(T8gv>O?#9O%c&8Mnc)Nd7S(W~NQl@%avJ43%06`9AP z3HRPR6I$t{#|s);xxggQkBG;A!-9LDNJ7V*>Sy#;j4gf+XI6$4nRQ@&5@)k;gk+f^ zLeXoWMX(`BDS8sW_aPCv`U5QJ+lqO((%T>p4Hq2#25#NH_c>9k^f>ljM)pF+i<0r= zRF}uEIiuaYGtM`LO+P+EDk?RF3-?6STA}_4QiLZ+|Apo+1dHOwq@h_rVYyxI)Genb zZb+nc*&X4vAH@4%fwntH@|lzUGd^#ksCjE)KNCLYyzu&I^O(S?!gpmo#_1BKK+HDlyxe zRbqyB#Z-p;=R(~4uo4Z^353GmAmU%Xo%7ll*paJ~c*O8qT07YN3j2%&u(Kj@>S?5H z|43Xh{�SeV+HGX47gmB}1u*{UpYBM+=zt9bmlDt679Co#n3g=U<}Tweq^IM-lE z$m^IaP|={@Tzp8(HHMu^#pJDLhN`>xpr-ZE5Ifs&Dzw};(pn@6hRaP^Inv42?cHc9UdA+4!Ybnd1=+ilxvZ1lL2Iw<}c}MWe1s706m(Q0RnLG}*)*u|M+DCiqHf&cA)fkUqES7sad#_)jTzEN+HqF9yEKG~?bmMa?N*)IPMju}5{bLE zBm4Brx!GH?2i+@kXmX$drtOnwHb%#sZRL_NmV9y%e+0};&)pFLlrj7Y6a}XY=B~O#GLU=$guE^7TByg7 z_%Vl+SGPa?)i?`-=;Adti(AnNPDi|11FPYL7evZV_p&OZTx*DtU3LBv(CBljuIeY3C}N`6#_m0qeyaHd>4R%C~v;MSlL zTs(q@bQUf&!HKtr?%Lmp8h57K;g-5d?-JHx!!j&oUIb4xUcH$Wc~39o7_ls38`ESW z$5(Fs-8oF<#>>tAZk0WJ*YP}W+oYr;6Wd--Q=jDR{j1d`$i&ejWFsi_P4v4YZ)5FX z+qDd$Qk%esH;(rGQ+DOc0q%o`h-hW;2U&x*ZfTaZg#PWgkK@-3L^pJqbUB z$IJ*~n>qi*LrG{^a%okOAK)SZsBvlWQJ&H8vWgX~Wo7m;+eBPRt=eEDX>rR%{o?@~ zD^Kmp*zIx^&l{Jng}BUnU-Nr{&MPAK#;KJ8`D3=M7At~^#2KtwpO?AF?(`K2G&-gs zMHZ*$L187KN5X@r2)MT-dw7s^JeWhVHHQJ6g!32lU{7n7+}CxT`tULD`oAPv)`sVO zMOB8QfHK`QufJ?yTUJ0E z*W*yj=Xi2rZ!{Dm-GNUjM;dY94n*PHkB_k?C;A{L{Z0WE&Ldv9i&~Z zv1|_391znD-xgD@U=$$ihy#%m)xor0)1{WX!OTgeRN?CrhE)TnG!PbxsO-7L?Vn`FJ34q7oo#IujkaxC(ARyxS&X z@f%hhNKocdoODc{MhqC)T=!Xa9~^1dL{H<%{xBlv$C}7Uddo+=AVv;LW^NK5O-!a6 z4Wa{s2Z5}UeS@V&y|2LW35?+ONE9L*5pXN^H#%_tzwR{~Z2^D%#8Zx)K}qxtYe^P;jYxvUbGLC%xMu`mGgHin**D)G}v_|~B)D%$NJkn%hMMRtpfEPYS{ ztVi(c@MKy{@R#ZHXVc6HanTnn=ZO(N`29{B9y1B49&5biSAHNpFDIOQlF6W_V(rcA ze_C3f3zFKep8CR1B@lg8PX|+H-RDY3WAgCx_7m}sd5_4R+7zLT<>jQcd&V=c2(+%& zS#TPcGFQRftL~`8$|S(@Zpu2J0r$~p0wl1^yCj(4(yT9a%=gbEf!G z#n79O7c~bL;7RciYhEO!gr$$0;W=|kTwG^K@W7H8?eJ?TOJhX~zYWE?7WIw5Im^^L zO_ec6p)b~WQ@g%~BCTX8OE`Pj=sFsFttA!E+cy+Vp&)}gmwuU*TWfC^MUd_Sl?XE~ zC!0k_RO5~ok9!C@ozBREzfpl#Pn8y3H!-!4xjGhI4@yENR)EppP%Vahg zK9uZyT`!j~fZ}Frf-#XXHiuP>Ec@v4ZnM#*GKLTq9#m@ZF>-bPu**X&^uc#S+TEeD zZNUOdPCs!@OqRn1t>Ijp#KyE3*|K4K$`{RwE|403An+pEa|Gi`I+=%{544$Uu zzaIbZw)CIMzn9dXn+ku)1n9rF82+^Kdy)8gw*Mu^h<{r7MWp^T@Oyprxt9Ew7(NZ@ zuTIB*sx1F$>-Xa1-`!!3{2%JGf2#kUf&5+F?dkFtoWJKAf2#k^wx83KzhoTef7Jg= z((3 literal 0 HcmV?d00001