diff --git a/ted_sws/notice_metadata_processor/adapters/notice_metadata_extractor.py b/ted_sws/notice_metadata_processor/adapters/notice_metadata_extractor.py index f94caac1..67716ebc 100644 --- a/ted_sws/notice_metadata_processor/adapters/notice_metadata_extractor.py +++ b/ted_sws/notice_metadata_processor/adapters/notice_metadata_extractor.py @@ -363,8 +363,10 @@ def legal_basis_directive(self): @property def extracted_notice_subtype(self): return extract_text_from_element( - element=self.manifestation_root.find(self.xpath_registry.xpath_notice_subtype, namespaces=self.namespaces)) - + element=self.manifestation_root.find(self.xpath_registry.xpath_notice_subtype_first, + namespaces=self.namespaces)) or extract_text_from_element( + element=self.manifestation_root.find(self.xpath_registry.xpath_notice_subtype_second, + namespaces=self.namespaces)) @property def extracted_eform_type(self): return extract_attribute_from_element( diff --git a/ted_sws/notice_metadata_processor/adapters/notice_metadata_normaliser.py b/ted_sws/notice_metadata_processor/adapters/notice_metadata_normaliser.py index 54198ff6..52218c8c 100644 --- a/ted_sws/notice_metadata_processor/adapters/notice_metadata_normaliser.py +++ b/ted_sws/notice_metadata_processor/adapters/notice_metadata_normaliser.py @@ -284,7 +284,11 @@ def get_form_type_notice_type_and_legal_basis(cls, extracted_notice_subtype: str Get the values for form type, notice type and legal basis from the eForm mapping files """ ef_map: pd.DataFrame = mapping_registry.ef_notice_df - filtered_df = ef_map.query(f"{E_FORMS_SUBTYPE_KEY}=='{extracted_notice_subtype}'").to_dict(orient='records')[0] + try: + filtered_df = ef_map.query(f"{E_FORMS_SUBTYPE_KEY}=='{extracted_notice_subtype}'").to_dict(orient='records')[0] + except: + raise Exception( + f'No mapping available for {extracted_notice_subtype} notice subtype. Please check that the field exists in the XML content if the notice subtype is not specified in this message') try: form_type = filtered_df[FORM_TYPE_KEY] notice_type = filtered_df[E_FORM_NOTICE_TYPE_COLUMN] diff --git a/ted_sws/notice_metadata_processor/adapters/xpath_registry.py b/ted_sws/notice_metadata_processor/adapters/xpath_registry.py index 2d0667c7..d7a4407a 100644 --- a/ted_sws/notice_metadata_processor/adapters/xpath_registry.py +++ b/ted_sws/notice_metadata_processor/adapters/xpath_registry.py @@ -203,9 +203,13 @@ def xpath_legal_basis_directive(self): return ".//cbc:RegulatoryDomain" @property - def xpath_notice_subtype(self): + def xpath_notice_subtype_first(self): return ".//ext:UBLExtensions/ext:UBLExtension/ext:ExtensionContent/efext:EformsExtension/efac:NoticeSubType/cbc:SubTypeCode[@listName='notice-subtype']" + @property + def xpath_notice_subtype_second(self): + return ".//ext:UBLExtensions/ext:UBLExtension/ext:ExtensionContent/efext:EformsExtension/efac:NoticeSubType/cbc:SubTypeCode" + @property def xpath_form_type(self): return ".//cbc:NoticeTypeCode" diff --git a/tests/test_data/notice_normalisation/2023-OJS153-00486429.xml b/tests/test_data/notice_normalisation/2023-OJS153-00486429.xml new file mode 100644 index 00000000..96629c6f --- /dev/null +++ b/tests/test_data/notice_normalisation/2023-OJS153-00486429.xml @@ -0,0 +1,335 @@ + + + + + + 2023-08-08+03:00 + 19:30:47.935+03:00 + + 16 + + + + + http://www.ria.ee + + ORG-0001 + + + Riigi Infosüsteemi Amet + + + Pärnu mnt 139a + Tallinn + 15169 + + EST + + + + 70006317 + + + EVA KOIT + +372 6630258 + hanketiim@ria.ee + + + + + + + ORG-0002 + + + Riigihangete vaidlustuskomisjon + + + Tartu mnt 85 + Tallinn + 10115 + + EST + + + + 1000123 + + + +3726113713 + vako@fin.ee + + + + + + + ORG-0003 + + + Riigihangete register + + + Tallinn + + EST + + + + TED64 + + + +3726113693 + register@riigihanked.riik.ee + + + + + 00486429-2023153/20232023-08-10Z + + + + 2.3 + eforms-sdk-1.3 + e41f364c-9fba-4353-aedd-09790dcf9e9c + 22cb6d21-6f04-4962-9535-d421fa12ec1e + 2023-08-08+03:00 + 15:05:16.484+03:00 + 01 + 32014L0024 + cn-standard + EST + + http://www.ria.ee + + cga + + + gen-pub + + + + ORG-0001 + + + ted-esen + + + ORG-0003 + + + + + + + + + nati-ground + https://riigihanked.riik.ee/rhr-web/#/procurement/5825900/procurement-passport + + + + + Avatud hankemenetlus + open + + + 263753 + IT- ja andmeteadustööd + Raamhanke eesmärk on sõlmida mitme osapoolega raamleping Bürokrati, riikliku mobiiliäpi (mRiik) ja keeletehnoloogia komponentide analüüsi-, arendus-, hooldus-, testimis- ja andmeteadustöödeks. + services + Hankija ei jaga riigihanget osadeks ja sõlmib raamlepingu kõikide edukaks tunnistatud pakkumuse esitanud pakkujatega. See tagab hankijale eesmärgi tõhusaima tulemuse ning rahaliste vahendite otstarbeka ja säästliku kasutamise. + + 20000000.00 + + + 72200000 + + + + + EST + + + + + 48 + + + + LOT-0000 + + + + + + + Finantseerimisallikateks võivad olla riigieelarve ja/või Euroopa Liidu struktuurfondide ja/või Euroopa Regionaalarengu Fondi ja/või Euroopa Liidu Taaste- ja vastupidavusrahastu vms vahendid. + + + sui-act + n-used + false + + + ef-stand + n-used + false + + + tp-abil + n-used + false + + + false + + + + + + not-allowed + eu-funds + not-requ + false + + false + + + 7101370 + non-restricted-document + EST + + + https://riigihanked.riik.ee/rhr-web/#/procurement/5825900/general-info + + + + + + none + + + + false + + + no + + + not-allowed + + + required + + + + + + + + + + poi-exa + 100.0 + + + + + + price + + + + + + ORG-0001 + + + + https://riigihanked.riik.ee/rhr-web/#/procurement/5825900/tenders + + ORG-0001 + + + + 5 + + + + + ORG-0002 + + + + + ORG-0002 + + + + + EST + + + ENG + + + false + true + + + + allowed + true + + 2023-09-12+03:00 + 13:00:00.000+03:00 + + + false + + + 2023-09-12+03:00 + 14:00:00.000+03:00 + + https://riigihanked.riik.ee/rhr-web/#/ + + + + false + + + fa-w-rc + + + none + + + + 263753 + IT- ja andmeteadustööd + Raamhanke eesmärk on sõlmida mitme osapoolega raamleping Bürokrati, riikliku mobiiliäpi (mRiik) ja keeletehnoloogia komponentide analüüsi-, arendus-, hooldus-, testimis- ja andmeteadustöödeks. + services + Hankija ei jaga riigihanget osadeks ja sõlmib raamlepingu kõikide edukaks tunnistatud pakkumuse esitanud pakkujatega. See tagab hankijale eesmärgi tõhusaima tulemuse ning rahaliste vahendite otstarbeka ja säästliku kasutamise. + + n-inc + + + 20000000.00 + + + 72200000 + + + + + EST + + + + + 48 + + + + \ No newline at end of file diff --git a/tests/test_data/notice_normalisation/no_eform_subtype_notice.xml b/tests/test_data/notice_normalisation/no_eform_subtype_notice.xml new file mode 100644 index 00000000..be774c8f --- /dev/null +++ b/tests/test_data/notice_normalisation/no_eform_subtype_notice.xml @@ -0,0 +1,335 @@ + + + + + + 2023-08-08+03:00 + 19:30:47.935+03:00 + + + + + + + http://www.ria.ee + + ORG-0001 + + + Riigi Infosüsteemi Amet + + + Pärnu mnt 139a + Tallinn + 15169 + + EST + + + + 70006317 + + + EVA KOIT + +372 6630258 + hanketiim@ria.ee + + + + + + + ORG-0002 + + + Riigihangete vaidlustuskomisjon + + + Tartu mnt 85 + Tallinn + 10115 + + EST + + + + 1000123 + + + +3726113713 + vako@fin.ee + + + + + + + ORG-0003 + + + Riigihangete register + + + Tallinn + + EST + + + + TED64 + + + +3726113693 + register@riigihanked.riik.ee + + + + + 00486429-2023153/20232023-08-10Z + + + + 2.3 + eforms-sdk-1.3 + e41f364c-9fba-4353-aedd-09790dcf9e9c + 22cb6d21-6f04-4962-9535-d421fa12ec1e + 2023-08-08+03:00 + 15:05:16.484+03:00 + 01 + 32014L0024 + cn-standard + EST + + http://www.ria.ee + + cga + + + gen-pub + + + + ORG-0001 + + + ted-esen + + + ORG-0003 + + + + + + + + + nati-ground + https://riigihanked.riik.ee/rhr-web/#/procurement/5825900/procurement-passport + + + + + Avatud hankemenetlus + open + + + 263753 + IT- ja andmeteadustööd + Raamhanke eesmärk on sõlmida mitme osapoolega raamleping Bürokrati, riikliku mobiiliäpi (mRiik) ja keeletehnoloogia komponentide analüüsi-, arendus-, hooldus-, testimis- ja andmeteadustöödeks. + services + Hankija ei jaga riigihanget osadeks ja sõlmib raamlepingu kõikide edukaks tunnistatud pakkumuse esitanud pakkujatega. See tagab hankijale eesmärgi tõhusaima tulemuse ning rahaliste vahendite otstarbeka ja säästliku kasutamise. + + 20000000.00 + + + 72200000 + + + + + EST + + + + + 48 + + + + LOT-0000 + + + + + + + Finantseerimisallikateks võivad olla riigieelarve ja/või Euroopa Liidu struktuurfondide ja/või Euroopa Regionaalarengu Fondi ja/või Euroopa Liidu Taaste- ja vastupidavusrahastu vms vahendid. + + + sui-act + n-used + false + + + ef-stand + n-used + false + + + tp-abil + n-used + false + + + false + + + + + + not-allowed + eu-funds + not-requ + false + + false + + + 7101370 + non-restricted-document + EST + + + https://riigihanked.riik.ee/rhr-web/#/procurement/5825900/general-info + + + + + + none + + + + false + + + no + + + not-allowed + + + required + + + + + + + + + + poi-exa + 100.0 + + + + + + price + + + + + + ORG-0001 + + + + https://riigihanked.riik.ee/rhr-web/#/procurement/5825900/tenders + + ORG-0001 + + + + 5 + + + + + ORG-0002 + + + + + ORG-0002 + + + + + EST + + + ENG + + + false + true + + + + allowed + true + + 2023-09-12+03:00 + 13:00:00.000+03:00 + + + false + + + 2023-09-12+03:00 + 14:00:00.000+03:00 + + https://riigihanked.riik.ee/rhr-web/#/ + + + + false + + + fa-w-rc + + + none + + + + 263753 + IT- ja andmeteadustööd + Raamhanke eesmärk on sõlmida mitme osapoolega raamleping Bürokrati, riikliku mobiiliäpi (mRiik) ja keeletehnoloogia komponentide analüüsi-, arendus-, hooldus-, testimis- ja andmeteadustöödeks. + services + Hankija ei jaga riigihanget osadeks ja sõlmib raamlepingu kõikide edukaks tunnistatud pakkumuse esitanud pakkujatega. See tagab hankijale eesmärgi tõhusaima tulemuse ning rahaliste vahendite otstarbeka ja säästliku kasutamise. + + n-inc + + + 20000000.00 + + + 72200000 + + + + + EST + + + + + 48 + + + + \ No newline at end of file diff --git a/tests/unit/notice_metadata_processor/conftest.py b/tests/unit/notice_metadata_processor/conftest.py index 80eca263..1e1f03e9 100644 --- a/tests/unit/notice_metadata_processor/conftest.py +++ b/tests/unit/notice_metadata_processor/conftest.py @@ -16,6 +16,11 @@ def file_system_repository_path(): return TEST_DATA_PATH / "notice_transformer" / "mapping_suite_processor_repository" +@pytest.fixture +def notice_normalisation_test_data_path(): + return TEST_DATA_PATH / "notice_normalisation" + + @pytest.fixture def eforms_xml_notice_paths() -> List[pathlib.Path]: eforms_xml_notices_path = TEST_DATA_PATH / "eforms_samples" diff --git a/tests/unit/notice_metadata_processor/test_metadata_normaliser.py b/tests/unit/notice_metadata_processor/test_metadata_normaliser.py index 5405c6cc..1c5d9195 100644 --- a/tests/unit/notice_metadata_processor/test_metadata_normaliser.py +++ b/tests/unit/notice_metadata_processor/test_metadata_normaliser.py @@ -1,5 +1,6 @@ import pytest +from ted_sws.core.model.manifestation import XMLManifestation from ted_sws.core.model.metadata import NormalisedMetadata from ted_sws.core.model.notice import NoticeStatus from ted_sws.notice_metadata_processor.adapters.notice_metadata_extractor import \ @@ -11,7 +12,8 @@ from ted_sws.notice_metadata_processor.services.metadata_constraints import filter_df_by_variables from ted_sws.notice_metadata_processor.services.metadata_normalizer import normalise_notice, normalise_notice_by_id, \ check_if_xml_manifestation_is_eform, find_metadata_extractor_based_on_xml_manifestation, \ - find_metadata_normaliser_based_on_xml_manifestation, extract_notice_metadata, normalise_notice_metadata + find_metadata_normaliser_based_on_xml_manifestation, extract_notice_metadata, normalise_notice_metadata, \ + extract_and_normalise_notice_metadata from ted_sws.resources.mapping_files_registry import MappingFilesRegistry @@ -217,3 +219,19 @@ def test_get_form_type_notice_type_and_legal_basis(): assert form_type == 'competition' assert notice_type == 'cn-social' assert legal_basis == '32014L0024' + + +def test_normalising_notice_out_of_index(notice_normalisation_test_data_path): + notice_xml_path = notice_normalisation_test_data_path / "2023-OJS153-00486429.xml" + notice_content = notice_xml_path.read_text(encoding="utf-8") + normalised_notice_metadata = extract_and_normalise_notice_metadata( + xml_manifestation=XMLManifestation(object_data=notice_content)) + assert normalised_notice_metadata.eforms_subtype == "16" + assert normalised_notice_metadata.notice_publication_number == "00486429-2023" + + broken_notice_xml_path = notice_normalisation_test_data_path / "no_eform_subtype_notice.xml" + broke_notice_content = broken_notice_xml_path.read_text(encoding="utf-8") + + with pytest.raises(Exception): + extract_and_normalise_notice_metadata( + xml_manifestation=XMLManifestation(object_data=broke_notice_content)) diff --git a/tests/unit/notice_metadata_processor/test_xpath_registry.py b/tests/unit/notice_metadata_processor/test_xpath_registry.py index 32aeb150..1425d5b2 100644 --- a/tests/unit/notice_metadata_processor/test_xpath_registry.py +++ b/tests/unit/notice_metadata_processor/test_xpath_registry.py @@ -11,5 +11,5 @@ def test_default_xpath_registry(): def test_eforms_xpath_registry(): xpath_registry = EformsXPathRegistry() assert isinstance(xpath_registry.xpath_notice_type, str) - assert xpath_registry.xpath_notice_subtype == ".//ext:UBLExtensions/ext:UBLExtension/ext:ExtensionContent/efext:EformsExtension/efac:NoticeSubType/cbc:SubTypeCode[@listName='notice-subtype']" + assert xpath_registry.xpath_notice_subtype_first == ".//ext:UBLExtensions/ext:UBLExtension/ext:ExtensionContent/efext:EformsExtension/efac:NoticeSubType/cbc:SubTypeCode[@listName='notice-subtype']" assert xpath_registry.xpath_notice_type == ".//cbc:NoticeTypeCode" \ No newline at end of file