From 80b60a742e7485e4b9dd7d74067064611216f6d3 Mon Sep 17 00:00:00 2001 From: Dragos0000 Date: Mon, 23 Oct 2023 14:50:10 +0100 Subject: [PATCH] test passing --- ...totype.py => notice_metadata_extractor.py} | 4 +- ...otype.py => notice_metadata_normaliser.py} | 37 +- .../services/metadata_normalizer.py | 4 +- .../notice_metadata_normaliser_prototype.py | 72 ---- .../xml_manifestation_metadata_extractor.py | 322 ------------------ .../services/xpath_registry.py | 128 ------- .../services/notice_packager.py | 3 - tests/conftest.py | 2 +- .../notice_metadata_processor/conftest.py | 3 +- .../test_metadata_normaliser.py | 4 +- .../test_notice_eligibility.py | 1 + .../test_metadata_extractor.py | 82 +---- .../test_metadata_normaliser.py | 94 ++++- .../test_xpath_registry.py | 13 +- tests/unit/notice_packager/conftest.py | 7 +- 15 files changed, 142 insertions(+), 634 deletions(-) rename ted_sws/notice_metadata_processor/adapters/{notice_metadata_extractor_prototype.py => notice_metadata_extractor.py} (99%) rename ted_sws/notice_metadata_processor/adapters/{notice_metadata_normaliser_prototype.py => notice_metadata_normaliser.py} (90%) delete mode 100644 ted_sws/notice_metadata_processor/services/notice_metadata_normaliser_prototype.py delete mode 100644 ted_sws/notice_metadata_processor/services/xml_manifestation_metadata_extractor.py delete mode 100644 ted_sws/notice_metadata_processor/services/xpath_registry.py diff --git a/ted_sws/notice_metadata_processor/adapters/notice_metadata_extractor_prototype.py b/ted_sws/notice_metadata_processor/adapters/notice_metadata_extractor.py similarity index 99% rename from ted_sws/notice_metadata_processor/adapters/notice_metadata_extractor_prototype.py rename to ted_sws/notice_metadata_processor/adapters/notice_metadata_extractor.py index b62e1a533..02fbf2e6b 100644 --- a/ted_sws/notice_metadata_processor/adapters/notice_metadata_extractor_prototype.py +++ b/ted_sws/notice_metadata_processor/adapters/notice_metadata_extractor.py @@ -273,8 +273,8 @@ def __init__(self, xml_manifestation: XMLManifestation): @property def title(self): - title_country = extract_text_from_element( - element=self.manifestation_root.find(self.xpath_registry.xpath_title_country, namespaces=self.namespaces)) + title_country = LanguageTaggedString(text=extract_text_from_element( + element=self.manifestation_root.find(self.xpath_registry.xpath_title_country, namespaces=self.namespaces)),language='') title_text = LanguageTaggedString( text=extract_text_from_element(element=self.manifestation_root.find( self.xpath_registry.xpath_title, diff --git a/ted_sws/notice_metadata_processor/adapters/notice_metadata_normaliser_prototype.py b/ted_sws/notice_metadata_processor/adapters/notice_metadata_normaliser.py similarity index 90% rename from ted_sws/notice_metadata_processor/adapters/notice_metadata_normaliser_prototype.py rename to ted_sws/notice_metadata_processor/adapters/notice_metadata_normaliser.py index 4842bb82e..711e4fb60 100644 --- a/ted_sws/notice_metadata_processor/adapters/notice_metadata_normaliser_prototype.py +++ b/ted_sws/notice_metadata_processor/adapters/notice_metadata_normaliser.py @@ -269,29 +269,50 @@ class EformsNoticeMetadataNormaliser(NoticeMetadataNormaliserABC): @classmethod def iso_date_format(cls, _date: str, with_none=False): if _date or not with_none: + if 'Z' in _date: + # Replace 'Z' with '+00:00' and parse + _date = _date.replace('Z', '+00:00') return datetime.fromisoformat(_date).isoformat() return None + @classmethod + def get_form_type_notice_type_and_legal_basis(cls, extracted_notice_subtype: str) -> Tuple: + """ + Get the values for form type, notice type and legal basis from the eForm mapping files + """ + ef_map: pd.DataFrame = mapping_registry.ef_notice_df + filtered_df = ef_map.query(f"{E_FORMS_SUBTYPE_KEY}=='{extracted_notice_subtype}'").to_dict(orient='records')[0] + try: + form_type = filtered_df[FORM_TYPE_KEY] + notice_type = filtered_df[E_FORM_NOTICE_TYPE_COLUMN] + legal_basis = filtered_df[E_FORM_LEGAL_BASIS_COLUMN] + except: + raise Exception( + f"This notice can't be mapped with the current mapping files (standard forms mapping and eforms mapping)." + f"Searched values: notice_subtype = {extracted_notice_subtype}," + f"Therefore form_type, notice_type, legal_basis and eforms_subtype fields can't be normalised") + + return form_type, notice_type, legal_basis + def normalise_metadata(self, extracted_metadata: ExtractedMetadata) -> NormalisedMetadata: """ Generate the normalised metadata :return: """ extracted_metadata = extracted_metadata - countries_map = mapping_registry.countries form_type_map = mapping_registry.form_type languages_map = mapping_registry.languages legal_basis_map = mapping_registry.legal_basis notice_type_map = mapping_registry.notice_type nuts_map = mapping_registry.nuts - + form_type, notice_type, legal_basis = self.get_form_type_notice_type_and_legal_basis( + extracted_notice_subtype=extracted_metadata.extracted_notice_subtype) metadata = { TITLE_KEY: [title.title for title in extracted_metadata.title], LONG_TITLE_KEY: [ LanguageTaggedString(text=JOIN_SEP.join( [ title.title_country.text, - title.title_city.text, title.title.text ]), language=title.title.language) for title in extracted_metadata.title @@ -300,15 +321,11 @@ def normalise_metadata(self, extracted_metadata: ExtractedMetadata) -> Normalise PUBLICATION_DATE_KEY: self.iso_date_format(extracted_metadata.publication_date), OJS_NUMBER_KEY: extracted_metadata.ojs_issue_number, OJS_TYPE_KEY: extracted_metadata.ojs_type if extracted_metadata.ojs_type else "S", - BUYER_CITY_KEY: [city_of_buyer for city_of_buyer in extracted_metadata.city_of_buyer], - BUYER_NAME_KEY: [name_of_buyer for name_of_buyer in extracted_metadata.name_of_buyer], LANGUAGE_KEY: get_map_value(mapping=languages_map, value=extracted_metadata.original_language), - BUYER_COUNTRY_KEY: get_map_value(mapping=countries_map, value=extracted_metadata.country_of_buyer), - EU_INSTITUTION_KEY: False if extracted_metadata.eu_institution == '-' else True, SENT_DATE_KEY: self.iso_date_format(extracted_metadata.document_sent_date, True), DEADLINE_DATE_KEY: self.iso_date_format(extracted_metadata.deadline_for_submission, True), - NOTICE_TYPE_KEY: get_map_value(mapping=notice_type_map, value=extracted_metadata.extracted_notice_type), - FORM_TYPE_KEY: get_map_value(mapping=form_type_map, value=extracted_metadata.extracted_eform_type), + NOTICE_TYPE_KEY: get_map_value(mapping=notice_type_map, value=notice_type), + FORM_TYPE_KEY: get_map_value(mapping=form_type_map, value=form_type), PLACE_OF_PERFORMANCE_KEY: get_map_list_value_by_code( mapping=nuts_map, listing=extracted_metadata.place_of_performance @@ -318,7 +335,7 @@ def normalise_metadata(self, extracted_metadata: ExtractedMetadata) -> Normalise ) if extracted_metadata.legal_basis_directive else None, FORM_NUMBER_KEY: '', LEGAL_BASIS_DIRECTIVE_KEY: get_map_value(mapping=legal_basis_map, - value=extracted_metadata.legal_basis_directive), + value=legal_basis), E_FORMS_SUBTYPE_KEY: extracted_metadata.extracted_notice_subtype, XSD_VERSION_KEY: extracted_metadata.xml_schema_version } diff --git a/ted_sws/notice_metadata_processor/services/metadata_normalizer.py b/ted_sws/notice_metadata_processor/services/metadata_normalizer.py index 09bf6bddf..45ae8ee67 100644 --- a/ted_sws/notice_metadata_processor/services/metadata_normalizer.py +++ b/ted_sws/notice_metadata_processor/services/metadata_normalizer.py @@ -5,9 +5,9 @@ from ted_sws.core.model.metadata import NormalisedMetadata, NormalisedMetadataView from ted_sws.core.model.notice import Notice from ted_sws.data_manager.adapters.notice_repository import NoticeRepositoryABC -from ted_sws.notice_metadata_processor.adapters.notice_metadata_extractor_prototype import NoticeMetadataExtractorABC, \ +from ted_sws.notice_metadata_processor.adapters.notice_metadata_extractor import NoticeMetadataExtractorABC, \ EformsNoticeMetadataExtractor, DefaultNoticeMetadataExtractor -from ted_sws.notice_metadata_processor.adapters.notice_metadata_normaliser_prototype import NoticeMetadataNormaliserABC, \ +from ted_sws.notice_metadata_processor.adapters.notice_metadata_normaliser import NoticeMetadataNormaliserABC, \ EformsNoticeMetadataNormaliser, DefaultNoticeMetadataNormaliser, ENGLISH_LANGUAGE_TAG, LONG_TITLE_KEY, TITLE_KEY, \ BUYER_NAME_KEY, BUYER_CITY_KEY from ted_sws.notice_metadata_processor.model.metadata import ExtractedMetadata diff --git a/ted_sws/notice_metadata_processor/services/notice_metadata_normaliser_prototype.py b/ted_sws/notice_metadata_processor/services/notice_metadata_normaliser_prototype.py deleted file mode 100644 index 56a7ec150..000000000 --- a/ted_sws/notice_metadata_processor/services/notice_metadata_normaliser_prototype.py +++ /dev/null @@ -1,72 +0,0 @@ -# from ted_sws.core.model.manifestation import XMLManifestation -# from ted_sws.core.model.metadata import NormalisedMetadata -# from ted_sws.core.model.notice import Notice -# from ted_sws.notice_metadata_processor.adapters.notice_metadata_normaliser_prototype import NoticeMetadataNormaliserABC, \ -# EformsNoticeMetadataNormaliser, DefaultNoticeMetadataNormaliser -# from ted_sws.notice_metadata_processor.adapters.notice_metadata_extractor_prototype import NoticeMetadataExtractorABC, \ -# EformsNoticeMetadataExtractor, DefaultNoticeMetadataExtractor -# from ted_sws.notice_metadata_processor.model.metadata import ExtractedMetadata -# import xml.etree.ElementTree as ET -# -# -# def check_if_xml_manifestation_is_eform(xml_manifestation: XMLManifestation) -> bool: -# """ -# Check if the provided XML content is an Eform Notice document. -# """ -# return ET.fromstring(xml_manifestation.object_data).tag != "TED_EXPORT" -# -# -# def find_metadata_extractor_based_on_xml_manifestation( -# xml_manifestation: XMLManifestation) -> NoticeMetadataExtractorABC: -# """ -# Find the correct extractor based on the XML Manifestation -# """ -# if check_if_xml_manifestation_is_eform(xml_manifestation): -# return EformsNoticeMetadataExtractor(xml_manifestation=xml_manifestation) -# else: -# return DefaultNoticeMetadataExtractor(xml_manifestation=xml_manifestation) -# -# -# def find_metadata_normaliser_based_on_xml_manifestation( -# xml_manifestation: XMLManifestation) -> NoticeMetadataNormaliserABC: -# """ -# Find the correct extractor based on the XML Manifestation -# """ -# if check_if_xml_manifestation_is_eform(xml_manifestation): -# return EformsNoticeMetadataNormaliser() -# else: -# return DefaultNoticeMetadataNormaliser() -# -# -# def extract_notice_metadata(metadata_extractor: NoticeMetadataExtractorABC) -> ExtractedMetadata: -# """ -# Extract metadata using the correct extractor type -# """ -# return metadata_extractor.extract_metadata() -# -# -# def normalise_notice_metadata(extracted_metadata: ExtractedMetadata, -# metadata_normaliser: NoticeMetadataNormaliserABC) -> NormalisedMetadata: -# """ -# Normalise metadata using the correct normaliser type -# """ -# return metadata_normaliser.normalise_metadata(extracted_metadata) -# -# -# def extract_and_normalise_notice_metadata(xml_manifestation: XMLManifestation) -> NormalisedMetadata: -# """ -# Extract and normalise metadata using the correct extractor and normaliser type -# """ -# metadata_extractor = find_metadata_extractor_based_on_xml_manifestation(xml_manifestation) -# extracted_metadata = extract_notice_metadata(metadata_extractor) -# metadata_normaliser = find_metadata_normaliser_based_on_xml_manifestation(xml_manifestation) -# normalised_metadata = normalise_notice_metadata(extracted_metadata, metadata_normaliser) -# return normalised_metadata -# -# -# def extract_and_normalise_notice_metadata_from_notice(notice: Notice) -> NormalisedMetadata: -# """ -# Extract and normalise metadata using the correct extractor and normaliser type -# """ -# xml_manifestation = notice.xml_manifestation -# return extract_and_normalise_notice_metadata(xml_manifestation) diff --git a/ted_sws/notice_metadata_processor/services/xml_manifestation_metadata_extractor.py b/ted_sws/notice_metadata_processor/services/xml_manifestation_metadata_extractor.py deleted file mode 100644 index e0d177c39..000000000 --- a/ted_sws/notice_metadata_processor/services/xml_manifestation_metadata_extractor.py +++ /dev/null @@ -1,322 +0,0 @@ -import xml.etree.ElementTree as ET -from io import StringIO - -from ted_sws.core.model.manifestation import XMLManifestation -from ted_sws.notice_metadata_processor.model.metadata import ExtractedMetadata, LanguageTaggedString, CompositeTitle, \ - EncodedValue -from ted_sws.notice_metadata_processor.services.xpath_registry import XpathRegistry - - -class XMLManifestationMetadataExtractor: - """ - Extracts metadata from an XML manifestation. - """ - - def __init__(self, xml_manifestation: XMLManifestation): - self.xml_manifestation = xml_manifestation - self.manifestation_root = self._parse_manifestation() - self.namespaces = self._get_normalised_namespaces() - self.xpath_registry = XpathRegistry() - - @property - def title(self): - title_translations = [] - title_elements = self.manifestation_root.findall( - self.xpath_registry.xpath_title_elements, - namespaces=self.namespaces) - for title in title_elements: - language = title.find(".").attrib["LG"] - title_country = LanguageTaggedString( - text=extract_text_from_element( - element=title.find(self.xpath_registry.xpath_title_country, namespaces=self.namespaces)), - language=language) - title_city = LanguageTaggedString( - text=extract_text_from_element( - element=title.find(self.xpath_registry.xpath_title_town, namespaces=self.namespaces)), - language=language) - - title_text = LanguageTaggedString( - text=extract_text_from_element(element=title.find(self.xpath_registry.xpath_title_text_first, - namespaces=self.namespaces)) or extract_text_from_element( - element=title.find(self.xpath_registry.xpath_title_text_second, namespaces=self.namespaces)), - language=language) - title_translations.append( - CompositeTitle(title=title_text, title_city=title_city, title_country=title_country)) - - return title_translations - - @property - def notice_publication_number(self): - return self.manifestation_root.get("DOC_ID") - - @property - def publication_date(self): - return extract_text_from_element(element=self.manifestation_root.find( - self.xpath_registry.xpath_publication_date, - namespaces=self.namespaces)) - - @property - def ojs_type(self): - return extract_text_from_element(element=self.manifestation_root.find( - self.xpath_registry.xpath_ojs_type, - namespaces=self.namespaces)) - - @property - def ojs_issue_number(self): - return extract_text_from_element(element=self.manifestation_root.find( - self.xpath_registry.xpath_ojs_issue_number, - namespaces=self.namespaces)) - - @property - def city_of_buyer(self): - return [title.title_city for title in self.title] - - @property - def name_of_buyer(self): - buyer_name_elements = self.manifestation_root.findall( - self.xpath_registry.xpath_name_of_buyer_elements, - namespaces=self.namespaces) - - return [LanguageTaggedString(text=extract_text_from_element(element=buyer_name.find(".")), - language=extract_attribute_from_element(element=buyer_name.find("."), - attrib_key="LG")) for - buyer_name in buyer_name_elements] - - @property - def eu_institution(self): - return self.type_of_buyer.value if self.type_of_buyer.code == "5" else "-" - - @property - def uri_list(self): - uri_elements = self.manifestation_root.findall( - self.xpath_registry.xpath_uri_elements, - namespaces=self.namespaces) - - return [LanguageTaggedString(text=extract_text_from_element(element=uri.find(".")), - language=extract_attribute_from_element(element=uri.find("."), - attrib_key="LG")) for - uri in uri_elements] - - @property - def country_of_buyer(self): - return extract_attribute_from_element(element=self.manifestation_root.find( - self.xpath_registry.xpath_country_of_buyer, - namespaces=self.namespaces), attrib_key="VALUE") - - @property - def original_language(self): - return extract_text_from_element(element=self.manifestation_root.find( - self.xpath_registry.xpath_original_language, - namespaces=self.namespaces)) - - @property - def document_sent_date(self): - return extract_text_from_element(element=self.manifestation_root.find( - self.xpath_registry.xpath_document_sent_date, - namespaces=self.namespaces)) - - @property - def type_of_buyer(self): - return extract_code_and_value_from_element(element=self.manifestation_root.find( - self.xpath_registry.xpath_type_of_buyer, - namespaces=self.namespaces)) - - @property - def deadline_for_submission(self): - return extract_text_from_element(element=self.manifestation_root.find( - self.xpath_registry.xpath_deadline_for_submission, - namespaces=self.namespaces)) - - @property - def type_of_contract(self): - return extract_code_and_value_from_element(element=self.manifestation_root.find( - self.xpath_registry.xpath_type_of_contract, - namespaces=self.namespaces)) - - @property - def type_of_procedure(self): - return extract_code_and_value_from_element(element=self.manifestation_root.find( - self.xpath_registry.xpath_type_of_procedure, - namespaces=self.namespaces)) - - @property - def extracted_document_type(self): - return extract_code_and_value_from_element(element=self.manifestation_root.find( - self.xpath_registry.xpath_document_type, - namespaces=self.namespaces)) - - @property - def extracted_form_number(self): - return extract_attribute_from_element(element=self.manifestation_root.find( - self.xpath_registry.xpath_form_number, - namespaces=self.namespaces), attrib_key="FORM") - - @property - def regulation(self): - return extract_code_and_value_from_element(element=self.manifestation_root.find( - self.xpath_registry.xpath_regulation, - namespaces=self.namespaces)) - - @property - def type_of_bid(self): - return extract_code_and_value_from_element(element=self.manifestation_root.find( - self.xpath_registry.xpath_type_of_bid, - namespaces=self.namespaces)) - - @property - def award_criteria(self): - return extract_code_and_value_from_element(element=self.manifestation_root.find( - self.xpath_registry.xpath_award_criteria, - namespaces=self.namespaces)) - - @property - def common_procurement(self): - common_procurement_elements = self.manifestation_root.findall( - self.xpath_registry.xpath_common_procurement_elements, - namespaces=self.namespaces) - return [extract_code_and_value_from_element(element=element) for element in common_procurement_elements] - - @property - def place_of_performance(self): - place_of_performance_elements = self.manifestation_root.findall( - self.xpath_registry.xpath_place_of_performance_first, - namespaces=self.namespaces) or self.manifestation_root.findall( - self.xpath_registry.xpath_place_of_performance_second, - namespaces=self.namespaces) or self.manifestation_root.findall( - self.xpath_registry.xpath_place_of_performance_third, - namespaces=self.namespaces) - - return [extract_code_and_value_from_element(element=element) for element in place_of_performance_elements] - - @property - def internet_address(self): - return extract_text_from_element(element=self.manifestation_root.find( - self.xpath_registry.xpath_internet_address, - namespaces=self.namespaces)) - - @property - def legal_basis_directive(self): - return extract_attribute_from_element(element=self.manifestation_root.find( - self.xpath_registry.xpath_legal_basis_directive_first, - namespaces=self.namespaces), attrib_key="VALUE") or extract_attribute_from_element( - element=self.manifestation_root.find( - self.xpath_registry.xpath_legal_basis_directive_second, - namespaces=self.namespaces), attrib_key="VALUE") or extract_text_from_element( - element=self.manifestation_root.find( - self.xpath_registry.xpath_legal_basis_directive_third, - namespaces=self.namespaces)) - - @property - def xml_schema(self): - xsi_namespace = self.namespaces.get("xsi") - xml_schema_attribute = f"{ {xsi_namespace} }schemaLocation".replace("'", "") - return self.manifestation_root.get(xml_schema_attribute) if xsi_namespace else None - - @property - def xml_schema_version(self): - return self.manifestation_root.get("VERSION") or extract_attribute_from_element( - element=self.manifestation_root.find( - self.xpath_registry.xpath_form_number, - namespaces=self.namespaces), attrib_key="VERSION") - - @property - def extracted_notice_type(self): - return extract_attribute_from_element(element=self.manifestation_root.find( - self.xpath_registry.xpath_notice_type, - namespaces=self.namespaces), attrib_key="TYPE") - - def to_metadata(self) -> ExtractedMetadata: - """ - Creating extracted metadata - :return: - """ - metadata: ExtractedMetadata = ExtractedMetadata() - metadata.title = self.title - metadata.notice_publication_number = self.notice_publication_number - metadata.publication_date = self.publication_date - metadata.ojs_type = self.ojs_type - metadata.ojs_issue_number = self.ojs_issue_number - metadata.city_of_buyer = self.city_of_buyer - metadata.name_of_buyer = self.name_of_buyer - metadata.original_language = self.original_language - metadata.uri_list = self.uri_list - metadata.country_of_buyer = self.country_of_buyer - metadata.type_of_buyer = self.type_of_buyer - metadata.eu_institution = self.eu_institution - metadata.document_sent_date = self.document_sent_date - metadata.type_of_contract = self.type_of_contract - metadata.type_of_procedure = self.type_of_procedure - metadata.extracted_document_type = self.extracted_document_type - metadata.extracted_form_number = self.extracted_form_number - metadata.regulation = self.regulation - metadata.type_of_bid = self.type_of_bid - metadata.award_criteria = self.award_criteria - metadata.common_procurement = self.common_procurement - metadata.place_of_performance = self.place_of_performance - metadata.internet_address = self.internet_address - metadata.legal_basis_directive = self.legal_basis_directive - metadata.xml_schema = self.xml_schema - metadata.xml_schema_version = self.xml_schema_version - metadata.extracted_notice_type = self.extracted_notice_type - return metadata - - def _parse_manifestation(self): - """ - Parsing XML manifestation and getting the root - :return: - """ - xml_manifestation_content = self.xml_manifestation.object_data - return ET.fromstring(xml_manifestation_content) - - def _get_normalised_namespaces(self): - """ - Get normalised namespaces from XML manifestation - :return: - """ - namespaces = dict([node for _, node in ET.iterparse(source=StringIO(self.xml_manifestation.object_data), - events=['start-ns'])]) - - namespaces["manifestation_ns"] = namespaces.pop("") if "" in namespaces.keys() else "" - - tmp_dict = namespaces.copy() - items = tmp_dict.items() - for key, value in items: - if value.endswith("nuts"): - namespaces["nuts"] = namespaces.pop(key) - - if "nuts" not in namespaces.keys(): - namespaces.update({"nuts": "no_nuts"}) - - return namespaces - - -def extract_text_from_element(element: ET.Element) -> str: - """ - Extract text from an element in the XML structure - :param element: - :return: str - """ - if element is not None: - return element.text - - -def extract_attribute_from_element(element: ET.Element, attrib_key: str) -> str: - """ - Extract attribute value from an element in the XML structure - :param element: - :param attrib_key: - :return: - """ - if element is not None: - return element.get(attrib_key) - - -def extract_code_and_value_from_element(element: ET.Element) -> EncodedValue: - """ - Extract code attribute and text values from an element in the XML structure - :param element: - :return: - """ - if element is not None: - return EncodedValue(code=extract_attribute_from_element(element=element, attrib_key="CODE"), - value=extract_text_from_element(element=element)) diff --git a/ted_sws/notice_metadata_processor/services/xpath_registry.py b/ted_sws/notice_metadata_processor/services/xpath_registry.py deleted file mode 100644 index 1521a7986..000000000 --- a/ted_sws/notice_metadata_processor/services/xpath_registry.py +++ /dev/null @@ -1,128 +0,0 @@ -class XpathRegistry: - """ - Holds xpath's to the elements necessary to extract metadata from XML manifestation - """ - - @property - def xpath_title_elements(self): - return "manifestation_ns:TRANSLATION_SECTION/manifestation_ns:ML_TITLES/" - - @property - def xpath_title_town(self): - return "manifestation_ns:TI_TOWN" - - @property - def xpath_title_country(self): - return "manifestation_ns:TI_CY" - - @property - def xpath_title_text_first(self): - return "manifestation_ns:TI_TEXT/manifestation_ns:P" - - @property - def xpath_title_text_second(self): - return "manifestation_ns:TI_TEXT" - - @property - def xpath_publication_date(self): - return "manifestation_ns:CODED_DATA_SECTION/manifestation_ns:REF_OJS/manifestation_ns:DATE_PUB" - - @property - def xpath_ojs_type(self): - return "manifestation_ns:CODED_DATA_SECTION/manifestation_ns:REF_OJS/manifestation_ns:COLL_OJ" - - @property - def xpath_ojs_issue_number(self): - return "manifestation_ns:CODED_DATA_SECTION/manifestation_ns:REF_OJS/manifestation_ns:NO_OJ" - - @property - def xpath_name_of_buyer_elements(self): - return "manifestation_ns:TRANSLATION_SECTION/manifestation_ns:ML_AA_NAMES/" - - @property - def xpath_country_of_buyer(self): - return "manifestation_ns:CODED_DATA_SECTION/manifestation_ns:NOTICE_DATA/manifestation_ns:ISO_COUNTRY" - - @property - def xpath_uri_elements(self): - return "manifestation_ns:CODED_DATA_SECTION/manifestation_ns:NOTICE_DATA/manifestation_ns:URI_LIST/" - - @property - def xpath_original_language(self): - return "manifestation_ns:CODED_DATA_SECTION/manifestation_ns:NOTICE_DATA/manifestation_ns:LG_ORIG" - - @property - def xpath_document_sent_date(self): - return "manifestation_ns:CODED_DATA_SECTION/manifestation_ns:CODIF_DATA/manifestation_ns:DS_DATE_DISPATCH" - - @property - def xpath_type_of_buyer(self): - return "manifestation_ns:CODED_DATA_SECTION/manifestation_ns:CODIF_DATA/manifestation_ns:AA_AUTHORITY_TYPE" - - @property - def xpath_deadline_for_submission(self): - return "manifestation_ns:CODED_DATA_SECTION/manifestation_ns:CODIF_DATA/manifestation_ns:DT_DATE_FOR_SUBMISSION" - - @property - def xpath_type_of_contract(self): - return "manifestation_ns:CODED_DATA_SECTION/manifestation_ns:CODIF_DATA/manifestation_ns:NC_CONTRACT_NATURE" - - @property - def xpath_type_of_procedure(self): - return "manifestation_ns:CODED_DATA_SECTION/manifestation_ns:CODIF_DATA/manifestation_ns:PR_PROC" - - @property - def xpath_document_type(self): - return "manifestation_ns:CODED_DATA_SECTION/manifestation_ns:CODIF_DATA/manifestation_ns:TD_DOCUMENT_TYPE" - - @property - def xpath_regulation(self): - return "manifestation_ns:CODED_DATA_SECTION/manifestation_ns:CODIF_DATA/manifestation_ns:RP_REGULATION" - - @property - def xpath_type_of_bid(self): - return "manifestation_ns:CODED_DATA_SECTION/manifestation_ns:CODIF_DATA/manifestation_ns:TY_TYPE_BID" - - @property - def xpath_award_criteria(self): - return "manifestation_ns:CODED_DATA_SECTION/manifestation_ns:CODIF_DATA/manifestation_ns:AC_AWARD_CRIT" - - @property - def xpath_common_procurement_elements(self): - return "manifestation_ns:CODED_DATA_SECTION/manifestation_ns:NOTICE_DATA/manifestation_ns:ORIGINAL_CPV" - - @property - def xpath_place_of_performance_first(self): - return "manifestation_ns:CODED_DATA_SECTION/manifestation_ns:NOTICE_DATA/manifestation_ns:PERFORMANCE_NUTS" - - @property - def xpath_place_of_performance_second(self): - return "manifestation_ns:CODED_DATA_SECTION/manifestation_ns:NOTICE_DATA/nuts:PERFORMANCE_NUTS" - - @property - def xpath_place_of_performance_third(self): - return "manifestation_ns:CODED_DATA_SECTION/manifestation_ns:NOTICE_DATA/manifestation_ns:ORIGINAL_NUTS" - - @property - def xpath_internet_address(self): - return "manifestation_ns:CODED_DATA_SECTION/manifestation_ns:NOTICE_DATA/manifestation_ns:IA_URL_GENERAL" - - @property - def xpath_legal_basis_directive_first(self): - return "manifestation_ns:CODED_DATA_SECTION/manifestation_ns:CODIF_DATA/manifestation_ns:DIRECTIVE" - - @property - def xpath_legal_basis_directive_second(self): - return "manifestation_ns:FORM_SECTION/*/manifestation_ns:LEGAL_BASIS" - - @property - def xpath_legal_basis_directive_third(self): - return "manifestation_ns:FORM_SECTION/*/manifestation_ns:LEGAL_BASIS_OTHER/manifestation_ns:P/manifestation_ns:FT" - - @property - def xpath_form_number(self): - return ".//*[@FORM]" - - @property - def xpath_notice_type(self): - return "manifestation_ns:FORM_SECTION/*/manifestation_ns:NOTICE" diff --git a/ted_sws/notice_packager/services/notice_packager.py b/ted_sws/notice_packager/services/notice_packager.py index 253e255ab..ed50e33ac 100644 --- a/ted_sws/notice_packager/services/notice_packager.py +++ b/ted_sws/notice_packager/services/notice_packager.py @@ -23,9 +23,6 @@ from ted_sws.core.model.manifestation import METSManifestation from ted_sws.core.model.metadata import NormalisedMetadata from ted_sws.core.model.notice import Notice -from ted_sws.notice_metadata_processor.model.metadata import ExtractedMetadata -from ted_sws.notice_metadata_processor.services.xml_manifestation_metadata_extractor import \ - XMLManifestationMetadataExtractor from ted_sws.notice_packager import DEFAULT_NOTICE_PACKAGE_EXTENSION from ted_sws.notice_packager.adapters.archiver import ZipArchiver from ted_sws.notice_packager.adapters.template_generator import TemplateGenerator diff --git a/tests/conftest.py b/tests/conftest.py index 603dc9126..4a7001c7b 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -14,7 +14,7 @@ from ted_sws.data_manager.adapters.notice_repository import NoticeRepositoryInFileSystem from ted_sws.notice_fetcher.adapters.ted_api import TedAPIAdapter from ted_sws.notice_fetcher.services.notice_fetcher import NoticeFetcher -from ted_sws.notice_metadata_processor.adapters.notice_metadata_normaliser_prototype import TITLE_KEY, LONG_TITLE_KEY, \ +from ted_sws.notice_metadata_processor.adapters.notice_metadata_normaliser import TITLE_KEY, LONG_TITLE_KEY, \ NOTICE_NUMBER_KEY, PUBLICATION_DATE_KEY, OJS_NUMBER_KEY, OJS_TYPE_KEY, BUYER_CITY_KEY, BUYER_NAME_KEY, LANGUAGE_KEY, \ BUYER_COUNTRY_KEY, EU_INSTITUTION_KEY, SENT_DATE_KEY, DEADLINE_DATE_KEY, NOTICE_TYPE_KEY, FORM_TYPE_KEY, \ PLACE_OF_PERFORMANCE_KEY, EXTRACTED_LEGAL_BASIS_KEY, FORM_NUMBER_KEY, LEGAL_BASIS_DIRECTIVE_KEY, \ diff --git a/tests/features/notice_metadata_processor/conftest.py b/tests/features/notice_metadata_processor/conftest.py index 7e6d96209..ff69a371d 100644 --- a/tests/features/notice_metadata_processor/conftest.py +++ b/tests/features/notice_metadata_processor/conftest.py @@ -3,6 +3,7 @@ from ted_sws import config from ted_sws.data_manager.adapters.mapping_suite_repository import MappingSuiteRepositoryInFileSystem, \ MappingSuiteRepositoryMongoDB +from ted_sws.notice_metadata_processor.services.metadata_normalizer import normalise_notice from tests import TEST_DATA_PATH from tests.fakes.fake_repository import FakeNoticeRepository @@ -30,7 +31,7 @@ def notice_eligibility_repository_path(): @pytest.fixture def normalised_notice(notice_2020): notice = notice_2020.copy() - MetadataNormaliser(notice=notice).normalise_metadata() + normalise_notice(notice=notice) return notice diff --git a/tests/features/notice_metadata_processor/test_metadata_normaliser.py b/tests/features/notice_metadata_processor/test_metadata_normaliser.py index 5a41a239d..3bbef0bb2 100644 --- a/tests/features/notice_metadata_processor/test_metadata_normaliser.py +++ b/tests/features/notice_metadata_processor/test_metadata_normaliser.py @@ -1,7 +1,7 @@ from pytest_bdd import scenario, given, when, then, parsers from ted_sws.core.model.notice import NoticeStatus -from ted_sws.notice_metadata_processor.services.metadata_normalizer import MetadataNormaliser +from ted_sws.notice_metadata_processor.services.metadata_normalizer import normalise_notice @scenario('metadata_normaliser.feature', 'Normalizing a notice metadata') @@ -16,7 +16,7 @@ def step_impl(f03_notice_2020): @when("the normalize process is executed") def step_impl(notice): - MetadataNormaliser(notice=notice).normalise_metadata() + normalise_notice(notice=notice) @then(parsers.parse("a normalized notice {metadata} is {possibly} available")) diff --git a/tests/features/notice_metadata_processor/test_notice_eligibility.py b/tests/features/notice_metadata_processor/test_notice_eligibility.py index a616d7aa8..1e458ca99 100644 --- a/tests/features/notice_metadata_processor/test_notice_eligibility.py +++ b/tests/features/notice_metadata_processor/test_notice_eligibility.py @@ -55,6 +55,7 @@ def a_notice(normalised_notice): @given('the notice is with form number F03') def the_notice_is_with_form_number_f03(normalised_notice): """the notice is with form number F03.""" + print(normalised_notice.normalised_metadata) assert normalised_notice.normalised_metadata.form_number == "F03" diff --git a/tests/unit/notice_metadata_processor/test_metadata_extractor.py b/tests/unit/notice_metadata_processor/test_metadata_extractor.py index 8acb3db0b..1b9ed7635 100644 --- a/tests/unit/notice_metadata_processor/test_metadata_extractor.py +++ b/tests/unit/notice_metadata_processor/test_metadata_extractor.py @@ -1,16 +1,15 @@ import xml.etree.ElementTree as ET -from ted_sws.notice_metadata_processor.adapters.notice_metadata_extractor_prototype import EformsNoticeMetadataExtractor -from ted_sws.notice_metadata_processor.adapters.xpath_registry import EformsXPathRegistry +from ted_sws.notice_metadata_processor.adapters.notice_metadata_extractor import EformsNoticeMetadataExtractor, \ + DefaultNoticeMetadataExtractor, extract_text_from_element, extract_attribute_from_element, \ + extract_code_and_value_from_element, parse_xml_manifestation, normalised_namespaces_from_xml_manifestation from ted_sws.notice_metadata_processor.model.metadata import ExtractedMetadata, EncodedValue -from ted_sws.notice_metadata_processor.services.metadata_normalizer import check_if_xml_manifestation_is_eform -from ted_sws.notice_metadata_processor.services.xml_manifestation_metadata_extractor import extract_text_from_element, \ - extract_attribute_from_element, XMLManifestationMetadataExtractor, extract_code_and_value_from_element + def test_metadata_extractor(indexed_notice): - metadata_extractor = XMLManifestationMetadataExtractor( - xml_manifestation=indexed_notice.xml_manifestation).to_metadata() + metadata_extractor = DefaultNoticeMetadataExtractor( + xml_manifestation=indexed_notice.xml_manifestation).extract_metadata() extracted_metadata_dict = metadata_extractor.model_dump() assert isinstance(metadata_extractor, ExtractedMetadata) @@ -23,8 +22,8 @@ def test_metadata_extractor(indexed_notice): def test_metadata_extractor_2016(notice_2016): - metadata_extractor = XMLManifestationMetadataExtractor( - xml_manifestation=notice_2016.xml_manifestation).to_metadata() + metadata_extractor = DefaultNoticeMetadataExtractor( + xml_manifestation=notice_2016.xml_manifestation).extract_metadata() extracted_metadata_dict = metadata_extractor.model_dump() assert isinstance(metadata_extractor, ExtractedMetadata) @@ -33,8 +32,8 @@ def test_metadata_extractor_2016(notice_2016): def test_metadata_extractor_2015(notice_2015): - metadata_extractor = XMLManifestationMetadataExtractor( - xml_manifestation=notice_2015.xml_manifestation).to_metadata() + metadata_extractor = DefaultNoticeMetadataExtractor( + xml_manifestation=notice_2015.xml_manifestation).extract_metadata() extracted_metadata_dict = metadata_extractor.model_dump() assert isinstance(metadata_extractor, ExtractedMetadata) @@ -43,8 +42,8 @@ def test_metadata_extractor_2015(notice_2015): def test_metadata_extractor_2018(notice_2018): - metadata_extractor = XMLManifestationMetadataExtractor( - xml_manifestation=notice_2018.xml_manifestation).to_metadata() + metadata_extractor = DefaultNoticeMetadataExtractor( + xml_manifestation=notice_2018.xml_manifestation).extract_metadata() extracted_metadata_dict = metadata_extractor.model_dump() assert isinstance(metadata_extractor, ExtractedMetadata) @@ -94,71 +93,18 @@ def test_extract_code_and_value(indexed_notice): def test_get_root_of_manifestation(indexed_notice): - manifestation_root = XMLManifestationMetadataExtractor( - xml_manifestation=indexed_notice.xml_manifestation)._parse_manifestation() + manifestation_root = parse_xml_manifestation(xml_manifestation=indexed_notice.xml_manifestation) assert isinstance(manifestation_root, ET.Element) def test_get_normalised_namespaces(indexed_notice): - namespaces = XMLManifestationMetadataExtractor( - xml_manifestation=indexed_notice.xml_manifestation)._get_normalised_namespaces() + namespaces = normalised_namespaces_from_xml_manifestation(xml_manifestation=indexed_notice.xml_manifestation) assert isinstance(namespaces, dict) assert "manifestation_ns", "nuts" in namespaces.keys() -def test_check_if_xml_manifestation_is_eform(eform_notice_622690, notice_2018): - is_eform_notice_622690_a_eform = check_if_xml_manifestation_is_eform( - xml_manifestation=eform_notice_622690.xml_manifestation) - is_notice_2018_a_eform = check_if_xml_manifestation_is_eform(xml_manifestation=notice_2018.xml_manifestation) - - assert is_eform_notice_622690_a_eform == True - assert is_notice_2018_a_eform == False - - - -def test_eform_xpath(eform_notice_622690): - namespaces = XMLManifestationMetadataExtractor( - xml_manifestation=eform_notice_622690.xml_manifestation)._get_normalised_namespaces() - # print(namespaces) - doc_root = ET.fromstring(eform_notice_622690.xml_manifestation.object_data) - xpaths = EformsXPathRegistry() - # element = doc_root.find(".//ext:UBLExtensions/ext:UBLExtension/ext:ExtensionContent/efext:EformsExtension/efac:NoticeSubType/cbc:SubTypeCode", namespaces=namespaces).text - # element = doc_root.find(".//ext:UBLExtensions/ext:UBLExtension/ext:ExtensionContent/efext:EformsExtension/efac:Publication/efbc:PublicationDate", namespaces=namespaces).text - # element = doc_root.find(".//cbc:RegulatoryDomain", namespaces=namespaces).text - # element = doc_root.find(".//cbc:RegulatoryDomain", namespaces=namespaces).text - # element = doc_root.find(".//cac:ProcurementProject/cac:MainCommodityClassification/cbc:ItemClassificationCode[@listName='cpv']", namespaces=namespaces).text - xpath_title = doc_root.find(xpaths.xpath_title, namespaces=namespaces).text - xpath_title_country = doc_root.find(xpaths.xpath_title_country, namespaces=namespaces).text - xpath_publication_date = doc_root.find(xpaths.xpath_publication_date, namespaces=namespaces).text - xpath_ojs_issue_number = doc_root.find(xpaths.xpath_ojs_issue_number, namespaces=namespaces).text - xpath_original_language = doc_root.find(xpaths.xpath_original_language, namespaces=namespaces).text - xpath_document_sent_date = doc_root.find(xpaths.xpath_document_sent_date, namespaces=namespaces).text - xpath_type_of_contract = doc_root.find(xpaths.xpath_type_of_contract, namespaces=namespaces).text - xpath_type_of_procedure = doc_root.find(xpaths.xpath_type_of_procedure, namespaces=namespaces).text - xpath_place_of_performance = doc_root.find(xpaths.xpath_place_of_performance, namespaces=namespaces).text - xpath_internet_address = doc_root.find(xpaths.xpath_internet_address, namespaces=namespaces).text - xpath_legal_basis_directive = doc_root.find(xpaths.xpath_legal_basis_directive, namespaces=namespaces).text - xpath_notice_subtype = doc_root.find(xpaths.xpath_notice_subtype, namespaces=namespaces).text - xpath_form_type = doc_root.find(xpaths.xpath_form_type, namespaces=namespaces).attrib["listName"] - xpath_notice_type = doc_root.find(xpaths.xpath_notice_type, namespaces=namespaces).text - print(xpath_title) - print(xpath_title_country) - print(xpath_publication_date) - print(xpath_ojs_issue_number) - print(xpath_original_language) - print(xpath_document_sent_date) - print(xpath_type_of_contract) - print(xpath_type_of_procedure) - print(xpath_place_of_performance) - print(xpath_internet_address) - print(xpath_legal_basis_directive) - print(xpath_notice_subtype) - print(xpath_form_type) - print(xpath_notice_type) - - def test_metadata_eform_extractor(eform_notice_622690): metadata_extractor = EformsNoticeMetadataExtractor( xml_manifestation=eform_notice_622690.xml_manifestation).extract_metadata() diff --git a/tests/unit/notice_metadata_processor/test_metadata_normaliser.py b/tests/unit/notice_metadata_processor/test_metadata_normaliser.py index b74ef5739..5405c6cc6 100644 --- a/tests/unit/notice_metadata_processor/test_metadata_normaliser.py +++ b/tests/unit/notice_metadata_processor/test_metadata_normaliser.py @@ -1,15 +1,17 @@ import pytest +from ted_sws.core.model.metadata import NormalisedMetadata from ted_sws.core.model.notice import NoticeStatus -from ted_sws.notice_metadata_processor.adapters.notice_metadata_extractor_prototype import \ - DefaultNoticeMetadataExtractor -from ted_sws.notice_metadata_processor.adapters.notice_metadata_normaliser_prototype import \ +from ted_sws.notice_metadata_processor.adapters.notice_metadata_extractor import \ + DefaultNoticeMetadataExtractor, EformsNoticeMetadataExtractor +from ted_sws.notice_metadata_processor.adapters.notice_metadata_normaliser import \ DefaultNoticeMetadataNormaliser, get_map_value, FORM_NUMBER_KEY, LEGAL_BASIS_KEY, SF_NOTICE_TYPE_KEY, \ - DOCUMENT_CODE_KEY + DOCUMENT_CODE_KEY, EformsNoticeMetadataNormaliser +from ted_sws.notice_metadata_processor.model.metadata import ExtractedMetadata from ted_sws.notice_metadata_processor.services.metadata_constraints import filter_df_by_variables -from ted_sws.notice_metadata_processor.services.metadata_normalizer import normalise_notice, normalise_notice_by_id -from ted_sws.notice_metadata_processor.services.xml_manifestation_metadata_extractor import \ - XMLManifestationMetadataExtractor +from ted_sws.notice_metadata_processor.services.metadata_normalizer import normalise_notice, normalise_notice_by_id, \ + check_if_xml_manifestation_is_eform, find_metadata_extractor_based_on_xml_manifestation, \ + find_metadata_normaliser_based_on_xml_manifestation, extract_notice_metadata, normalise_notice_metadata from ted_sws.resources.mapping_files_registry import MappingFilesRegistry @@ -64,10 +66,12 @@ def test_normalise_legal_basis(indexed_notice): assert "32009L0081" == default_notice_normaliser.normalise_legal_basis_value( value="2009/81/EC") + def test_get_map_value(): value = get_map_value(mapping=MappingFilesRegistry().countries, value="DE") assert value == "http://publications.europa.eu/resource/authority/country/DEU" + def test_filter_df_by_variables(): df = MappingFilesRegistry().ef_notice_df filtered_df = filter_df_by_variables(df=df, form_type="planning", @@ -76,6 +80,7 @@ def test_filter_df_by_variables(): assert len(filtered_df.index) == 5 assert "32014L0024" in filtered_df["eform_legal_basis"].values + def test_get_form_type_and_notice_type(): default_notice_metadata_normaliser = DefaultNoticeMetadataNormaliser() form_type, notice_type, legal_basis, eforms_subtype = default_notice_metadata_normaliser.get_form_type_and_notice_type( @@ -89,6 +94,7 @@ def test_get_form_type_and_notice_type(): assert "32014L0024" == legal_basis assert "16" == eforms_subtype + def test_get_form_type_and_notice_type_F07(): default_notice_metadata_normaliser = DefaultNoticeMetadataNormaliser() form_type, notice_type, legal_basis, eforms_subtype = default_notice_metadata_normaliser.get_form_type_and_notice_type( @@ -102,14 +108,15 @@ def test_get_form_type_and_notice_type_F07(): assert "32014L0025" == legal_basis assert "15.1" == eforms_subtype + def test_get_filter_values(indexed_notice): default_notice_metadata_normaliser = DefaultNoticeMetadataNormaliser() filter_map = MappingFilesRegistry().filter_map_df filter_variables_dict = default_notice_metadata_normaliser.get_filter_variables_values(form_number="F07", - filter_map=filter_map, - extracted_notice_type=None, - document_type_code="7", - legal_basis="legal") + filter_map=filter_map, + extracted_notice_type=None, + document_type_code="7", + legal_basis="legal") assert isinstance(filter_variables_dict, dict) assert filter_variables_dict[FORM_NUMBER_KEY] == "F07" assert filter_variables_dict[LEGAL_BASIS_KEY] is None @@ -118,10 +125,11 @@ def test_get_filter_values(indexed_notice): with pytest.raises(Exception): default_notice_metadata_normaliser.get_filter_variables_values(form_number="F073", - filter_map=filter_map, - extracted_notice_type=None, - document_type_code="7", - legal_basis="legal") + filter_map=filter_map, + extracted_notice_type=None, + document_type_code="7", + legal_basis="legal") + def test_normalising_process_on_failed_notice_in_dag(notice_2021): extracted_metadata = DefaultNoticeMetadataExtractor(xml_manifestation=notice_2021.xml_manifestation) @@ -153,3 +161,59 @@ def test_normalising_process_on_failed_notice_in_dag(notice_2021): assert notice_type == "can-social" assert legal_basis == "32014L0024" assert eforms_subtype == "33" + + +def test_check_if_xml_manifestation_is_eform(eform_notice_622690, notice_2018): + is_eform_notice_622690_a_eform = check_if_xml_manifestation_is_eform( + xml_manifestation=eform_notice_622690.xml_manifestation) + is_notice_2018_a_eform = check_if_xml_manifestation_is_eform(xml_manifestation=notice_2018.xml_manifestation) + + assert is_eform_notice_622690_a_eform == True + assert is_notice_2018_a_eform == False + + +def test_find_metadata_extractor_based_on_xml_manifestation(eform_notice_622690, notice_2018): + assert isinstance( + find_metadata_extractor_based_on_xml_manifestation(xml_manifestation=eform_notice_622690.xml_manifestation), + EformsNoticeMetadataExtractor) + assert isinstance( + find_metadata_extractor_based_on_xml_manifestation(xml_manifestation=notice_2018.xml_manifestation), + DefaultNoticeMetadataExtractor) + + +def test_find_metadata_normaliser_based_on_xml_manifestation(eform_notice_622690, notice_2018): + assert isinstance( + find_metadata_normaliser_based_on_xml_manifestation(xml_manifestation=eform_notice_622690.xml_manifestation), + EformsNoticeMetadataNormaliser) + assert isinstance( + find_metadata_normaliser_based_on_xml_manifestation(xml_manifestation=notice_2018.xml_manifestation), + DefaultNoticeMetadataNormaliser) + + +def test_extract_notice_metadata(eform_notice_622690, notice_2018): + extractors = [EformsNoticeMetadataExtractor(xml_manifestation=eform_notice_622690.xml_manifestation), + DefaultNoticeMetadataExtractor(notice_2018.xml_manifestation)] + for extractor in extractors: + assert isinstance(extract_notice_metadata(metadata_extractor=extractor), ExtractedMetadata) + + +def test_normalise_notice_metadata(eform_notice_622690, notice_2018): + extracted_metadata = extract_notice_metadata( + metadata_extractor=EformsNoticeMetadataExtractor(xml_manifestation=eform_notice_622690.xml_manifestation)) + assert isinstance(normalise_notice_metadata(extracted_metadata=extracted_metadata, + metadata_normaliser=EformsNoticeMetadataNormaliser()), + NormalisedMetadata) + + extracted_metadata = extract_notice_metadata( + metadata_extractor=DefaultNoticeMetadataExtractor(xml_manifestation=notice_2018.xml_manifestation)) + assert isinstance(normalise_notice_metadata(extracted_metadata=extracted_metadata, + metadata_normaliser=DefaultNoticeMetadataNormaliser()), + NormalisedMetadata) + + +def test_get_form_type_notice_type_and_legal_basis(): + form_type, notice_type, legal_basis = EformsNoticeMetadataNormaliser().get_form_type_notice_type_and_legal_basis( + extracted_notice_subtype='20') + assert form_type == 'competition' + assert notice_type == 'cn-social' + assert legal_basis == '32014L0024' diff --git a/tests/unit/notice_metadata_processor/test_xpath_registry.py b/tests/unit/notice_metadata_processor/test_xpath_registry.py index 23c568399..32aeb150a 100644 --- a/tests/unit/notice_metadata_processor/test_xpath_registry.py +++ b/tests/unit/notice_metadata_processor/test_xpath_registry.py @@ -1,8 +1,15 @@ -from ted_sws.notice_metadata_processor.services.xpath_registry import XpathRegistry +from ted_sws.notice_metadata_processor.adapters.xpath_registry import DefaultXPathRegistry, EformsXPathRegistry -def test_xpath_registry(): - xpath_registry = XpathRegistry() +def test_default_xpath_registry(): + xpath_registry = DefaultXPathRegistry() assert isinstance(xpath_registry.xpath_regulation, str) assert xpath_registry.xpath_ojs_type == "manifestation_ns:CODED_DATA_SECTION/manifestation_ns:REF_OJS/manifestation_ns:COLL_OJ" assert xpath_registry.xpath_name_of_buyer_elements == "manifestation_ns:TRANSLATION_SECTION/manifestation_ns:ML_AA_NAMES/" + + +def test_eforms_xpath_registry(): + xpath_registry = EformsXPathRegistry() + assert isinstance(xpath_registry.xpath_notice_type, str) + assert xpath_registry.xpath_notice_subtype == ".//ext:UBLExtensions/ext:UBLExtension/ext:ExtensionContent/efext:EformsExtension/efac:NoticeSubType/cbc:SubTypeCode[@listName='notice-subtype']" + assert xpath_registry.xpath_notice_type == ".//cbc:NoticeTypeCode" \ No newline at end of file diff --git a/tests/unit/notice_packager/conftest.py b/tests/unit/notice_packager/conftest.py index 99271c336..6ad49bb87 100644 --- a/tests/unit/notice_packager/conftest.py +++ b/tests/unit/notice_packager/conftest.py @@ -13,13 +13,10 @@ import pytest from ted_sws.core.model.metadata import NormalisedMetadata -from ted_sws.notice_metadata_processor.adapters.notice_metadata_extractor_prototype import \ +from ted_sws.notice_metadata_processor.adapters.notice_metadata_extractor import \ DefaultNoticeMetadataExtractor -from ted_sws.notice_metadata_processor.adapters.notice_metadata_normaliser_prototype import \ +from ted_sws.notice_metadata_processor.adapters.notice_metadata_normaliser import \ DefaultNoticeMetadataNormaliser -from ted_sws.notice_metadata_processor.model.metadata import ExtractedMetadata -from ted_sws.notice_metadata_processor.services.xml_manifestation_metadata_extractor import \ - XMLManifestationMetadataExtractor from ted_sws.notice_packager.model.metadata import PackagerMetadata, NoticeMetadata, WorkMetadata, ExpressionMetadata, \ ManifestationMetadata from tests import TEST_DATA_PATH