From d30db746846bedbeff7682483dd2bf2c7641be12 Mon Sep 17 00:00:00 2001 From: Dragos0000 Date: Thu, 15 Feb 2024 16:23:07 +0000 Subject: [PATCH] fixes for extractor --- .../adapters/notice_metadata_extractor.py | 41 +++++++++++++------ .../adapters/xpath_registry.py | 8 +++- .../test_metadata_extractor.py | 3 +- 3 files changed, 36 insertions(+), 16 deletions(-) diff --git a/ted_sws/notice_metadata_processor/adapters/notice_metadata_extractor.py b/ted_sws/notice_metadata_processor/adapters/notice_metadata_extractor.py index 76b772a8..f94caac1 100644 --- a/ted_sws/notice_metadata_processor/adapters/notice_metadata_extractor.py +++ b/ted_sws/notice_metadata_processor/adapters/notice_metadata_extractor.py @@ -276,16 +276,19 @@ def __init__(self, xml_manifestation: XMLManifestation): @property def title(self): - title_country = LanguageTaggedString(text=extract_text_from_element( - element=self.manifestation_root.find(self.xpath_registry.xpath_title_country, namespaces=self.namespaces)),language='') - title_text = LanguageTaggedString( - text=extract_text_from_element(element=self.manifestation_root.find( - self.xpath_registry.xpath_title, - namespaces=self.namespaces)), - language=extract_attribute_from_element(element=self.manifestation_root.find( - self.xpath_registry.xpath_title, - namespaces=self.namespaces), attrib_key="languageID")) - return [CompositeTitle(title=title_text, title_country=title_country)] + title_translations = [] + title_elements = self.manifestation_root.findall( + self.xpath_registry.xpath_title, + namespaces=self.namespaces) + for title in title_elements: + language = title.find(".").attrib["languageID"] + title_country = LanguageTaggedString(text=language, language=language) + title_text = LanguageTaggedString( + text=extract_text_from_element(element=title), + language=language) + title_translations.append( + CompositeTitle(title=title_text, title_country=title_country)) + return title_translations @property def publication_date(self): @@ -324,9 +327,21 @@ def type_of_procedure(self): @property def place_of_performance(self): - extracted_nuts_code = extract_text_from_element( - element=self.manifestation_root.find(self.xpath_registry.xpath_place_of_performance, namespaces=self.namespaces)) - return [EncodedValue(value=extracted_nuts_code,code=extracted_nuts_code)] + extracted_project_nuts_code = extract_text_from_element( + element=self.manifestation_root.find(self.xpath_registry.xpath_place_of_performance, + namespaces=self.namespaces)) + place_of_performance_organisation_elements = self.manifestation_root.findall( + self.xpath_registry.xpath_place_of_performance_elements, namespaces=self.namespaces) + nuts_code_from_organisations = [EncodedValue(code=extract_text_from_element(element=element), + value=extract_text_from_element(element=element)) for element in + place_of_performance_organisation_elements] + + if extracted_project_nuts_code: + extracted_project_nuts_encoded = EncodedValue(value=extracted_project_nuts_code, + code=extracted_project_nuts_code) + nuts_code_from_organisations.append(extracted_project_nuts_encoded) + + return nuts_code_from_organisations @property def common_procurement(self): diff --git a/ted_sws/notice_metadata_processor/adapters/xpath_registry.py b/ted_sws/notice_metadata_processor/adapters/xpath_registry.py index 1e79ec25..2d0667c7 100644 --- a/ted_sws/notice_metadata_processor/adapters/xpath_registry.py +++ b/ted_sws/notice_metadata_processor/adapters/xpath_registry.py @@ -148,7 +148,7 @@ class EformsXPathRegistry(XPathRegistryABC): @property def xpath_title(self): - return ".//cac:ProcurementProject/cbc:Name" + return "./cac:ProcurementProject/cbc:Name" @property def xpath_title_country(self): @@ -176,7 +176,7 @@ def xpath_document_sent_date(self): @property def xpath_type_of_contract(self): - return ".//cac:ProcurementProject/cbc:ProcurementTypeCode[@listName='contract-nature']" + return "./cac:ProcurementProject/cbc:ProcurementTypeCode[@listName='contract-nature']" @property def xpath_type_of_procedure(self): @@ -186,6 +186,10 @@ def xpath_type_of_procedure(self): def xpath_place_of_performance(self): return ".//cac:ProcurementProject/cac:RealizedLocation/cac:Address/cbc:CountrySubentityCode[@listName='nuts']" + @property + def xpath_place_of_performance_elements(self): + return ".//efac:Organizations/efac:Organization/efac:Company/cac:PostalAddress/cbc:CountrySubentityCode[@listName='nuts']" + @property def xpath_common_procurement_elements(self): return ".//cac:ProcurementProject/*/cbc:ItemClassificationCode[@listName='cpv']" diff --git a/tests/unit/notice_metadata_processor/test_metadata_extractor.py b/tests/unit/notice_metadata_processor/test_metadata_extractor.py index 9575e5a2..ad9fa0a1 100644 --- a/tests/unit/notice_metadata_processor/test_metadata_extractor.py +++ b/tests/unit/notice_metadata_processor/test_metadata_extractor.py @@ -1,4 +1,5 @@ import xml.etree.ElementTree as ET +from importlib.resources import path from ted_sws.core.model.manifestation import XMLManifestation from ted_sws.notice_metadata_processor.adapters.notice_metadata_extractor import EformsNoticeMetadataExtractor, \ @@ -117,7 +118,7 @@ def test_metadata_eform_extractor(eform_notice_622690): assert extracted_metadata_dict["extracted_form_number"] == None -def _test_metadata_extractor_for_all_eforms_variations(eforms_xml_notice_paths): +def test_metadata_extractor_for_all_eforms_variations(eforms_xml_notice_paths): for xml_notice_path in eforms_xml_notice_paths: notice_id = xml_notice_path.name eforms_subtype = xml_notice_path.parent.name