Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fixed error for out of index in normalisation process #522

Merged
merged 3 commits into from
Mar 13, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -363,8 +363,10 @@ def legal_basis_directive(self):
@property
def extracted_notice_subtype(self):
return extract_text_from_element(
element=self.manifestation_root.find(self.xpath_registry.xpath_notice_subtype, namespaces=self.namespaces))

element=self.manifestation_root.find(self.xpath_registry.xpath_notice_subtype_first,
namespaces=self.namespaces)) or extract_text_from_element(
element=self.manifestation_root.find(self.xpath_registry.xpath_notice_subtype_second,
namespaces=self.namespaces))
@property
def extracted_eform_type(self):
return extract_attribute_from_element(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -284,7 +284,11 @@ def get_form_type_notice_type_and_legal_basis(cls, extracted_notice_subtype: str
Get the values for form type, notice type and legal basis from the eForm mapping files
"""
ef_map: pd.DataFrame = mapping_registry.ef_notice_df
filtered_df = ef_map.query(f"{E_FORMS_SUBTYPE_KEY}=='{extracted_notice_subtype}'").to_dict(orient='records')[0]
try:
filtered_df = ef_map.query(f"{E_FORMS_SUBTYPE_KEY}=='{extracted_notice_subtype}'").to_dict(orient='records')[0]
except:
raise Exception(
f'No mapping available for {extracted_notice_subtype} notice subtype. Please check that the field exists in the XML content if the notice subtype is not specified in this message')
try:
form_type = filtered_df[FORM_TYPE_KEY]
notice_type = filtered_df[E_FORM_NOTICE_TYPE_COLUMN]
Expand Down
6 changes: 5 additions & 1 deletion ted_sws/notice_metadata_processor/adapters/xpath_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,9 +203,13 @@ def xpath_legal_basis_directive(self):
return ".//cbc:RegulatoryDomain"

@property
def xpath_notice_subtype(self):
def xpath_notice_subtype_first(self):
return ".//ext:UBLExtensions/ext:UBLExtension/ext:ExtensionContent/efext:EformsExtension/efac:NoticeSubType/cbc:SubTypeCode[@listName='notice-subtype']"

@property
def xpath_notice_subtype_second(self):
return ".//ext:UBLExtensions/ext:UBLExtension/ext:ExtensionContent/efext:EformsExtension/efac:NoticeSubType/cbc:SubTypeCode"

@property
def xpath_form_type(self):
return ".//cbc:NoticeTypeCode"
Expand Down
335 changes: 335 additions & 0 deletions tests/test_data/notice_normalisation/2023-OJS153-00486429.xml

Large diffs are not rendered by default.

335 changes: 335 additions & 0 deletions tests/test_data/notice_normalisation/no_eform_subtype_notice.xml

Large diffs are not rendered by default.

5 changes: 5 additions & 0 deletions tests/unit/notice_metadata_processor/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ def file_system_repository_path():
return TEST_DATA_PATH / "notice_transformer" / "mapping_suite_processor_repository"


@pytest.fixture
def notice_normalisation_test_data_path():
return TEST_DATA_PATH / "notice_normalisation"


@pytest.fixture
def eforms_xml_notice_paths() -> List[pathlib.Path]:
eforms_xml_notices_path = TEST_DATA_PATH / "eforms_samples"
Expand Down
20 changes: 19 additions & 1 deletion tests/unit/notice_metadata_processor/test_metadata_normaliser.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import pytest

from ted_sws.core.model.manifestation import XMLManifestation
from ted_sws.core.model.metadata import NormalisedMetadata
from ted_sws.core.model.notice import NoticeStatus
from ted_sws.notice_metadata_processor.adapters.notice_metadata_extractor import \
Expand All @@ -11,7 +12,8 @@
from ted_sws.notice_metadata_processor.services.metadata_constraints import filter_df_by_variables
from ted_sws.notice_metadata_processor.services.metadata_normalizer import normalise_notice, normalise_notice_by_id, \
check_if_xml_manifestation_is_eform, find_metadata_extractor_based_on_xml_manifestation, \
find_metadata_normaliser_based_on_xml_manifestation, extract_notice_metadata, normalise_notice_metadata
find_metadata_normaliser_based_on_xml_manifestation, extract_notice_metadata, normalise_notice_metadata, \
extract_and_normalise_notice_metadata
from ted_sws.resources.mapping_files_registry import MappingFilesRegistry


Expand Down Expand Up @@ -217,3 +219,19 @@ def test_get_form_type_notice_type_and_legal_basis():
assert form_type == 'competition'
assert notice_type == 'cn-social'
assert legal_basis == '32014L0024'


def test_normalising_notice_out_of_index(notice_normalisation_test_data_path):
notice_xml_path = notice_normalisation_test_data_path / "2023-OJS153-00486429.xml"
notice_content = notice_xml_path.read_text(encoding="utf-8")
normalised_notice_metadata = extract_and_normalise_notice_metadata(
xml_manifestation=XMLManifestation(object_data=notice_content))
assert normalised_notice_metadata.eforms_subtype == "16"
assert normalised_notice_metadata.notice_publication_number == "00486429-2023"

broken_notice_xml_path = notice_normalisation_test_data_path / "no_eform_subtype_notice.xml"
broke_notice_content = broken_notice_xml_path.read_text(encoding="utf-8")

with pytest.raises(Exception):
extract_and_normalise_notice_metadata(
xml_manifestation=XMLManifestation(object_data=broke_notice_content))
Original file line number Diff line number Diff line change
Expand Up @@ -11,5 +11,5 @@ def test_default_xpath_registry():
def test_eforms_xpath_registry():
xpath_registry = EformsXPathRegistry()
assert isinstance(xpath_registry.xpath_notice_type, str)
assert xpath_registry.xpath_notice_subtype == ".//ext:UBLExtensions/ext:UBLExtension/ext:ExtensionContent/efext:EformsExtension/efac:NoticeSubType/cbc:SubTypeCode[@listName='notice-subtype']"
assert xpath_registry.xpath_notice_subtype_first == ".//ext:UBLExtensions/ext:UBLExtension/ext:ExtensionContent/efext:EformsExtension/efac:NoticeSubType/cbc:SubTypeCode[@listName='notice-subtype']"
assert xpath_registry.xpath_notice_type == ".//cbc:NoticeTypeCode"
Loading