diff --git a/ted_sws/notice_metadata_processor/adapters/notice_metadata_extractor.py b/ted_sws/notice_metadata_processor/adapters/notice_metadata_extractor.py
index f94caac1..67716ebc 100644
--- a/ted_sws/notice_metadata_processor/adapters/notice_metadata_extractor.py
+++ b/ted_sws/notice_metadata_processor/adapters/notice_metadata_extractor.py
@@ -363,8 +363,10 @@ def legal_basis_directive(self):
@property
def extracted_notice_subtype(self):
return extract_text_from_element(
- element=self.manifestation_root.find(self.xpath_registry.xpath_notice_subtype, namespaces=self.namespaces))
-
+ element=self.manifestation_root.find(self.xpath_registry.xpath_notice_subtype_first,
+ namespaces=self.namespaces)) or extract_text_from_element(
+ element=self.manifestation_root.find(self.xpath_registry.xpath_notice_subtype_second,
+ namespaces=self.namespaces))
@property
def extracted_eform_type(self):
return extract_attribute_from_element(
diff --git a/ted_sws/notice_metadata_processor/adapters/notice_metadata_normaliser.py b/ted_sws/notice_metadata_processor/adapters/notice_metadata_normaliser.py
index 54198ff6..52218c8c 100644
--- a/ted_sws/notice_metadata_processor/adapters/notice_metadata_normaliser.py
+++ b/ted_sws/notice_metadata_processor/adapters/notice_metadata_normaliser.py
@@ -284,7 +284,11 @@ def get_form_type_notice_type_and_legal_basis(cls, extracted_notice_subtype: str
Get the values for form type, notice type and legal basis from the eForm mapping files
"""
ef_map: pd.DataFrame = mapping_registry.ef_notice_df
- filtered_df = ef_map.query(f"{E_FORMS_SUBTYPE_KEY}=='{extracted_notice_subtype}'").to_dict(orient='records')[0]
+ try:
+ filtered_df = ef_map.query(f"{E_FORMS_SUBTYPE_KEY}=='{extracted_notice_subtype}'").to_dict(orient='records')[0]
+ except:
+ raise Exception(
+ f'No mapping available for {extracted_notice_subtype} notice subtype. Please check that the field exists in the XML content if the notice subtype is not specified in this message')
try:
form_type = filtered_df[FORM_TYPE_KEY]
notice_type = filtered_df[E_FORM_NOTICE_TYPE_COLUMN]
diff --git a/ted_sws/notice_metadata_processor/adapters/xpath_registry.py b/ted_sws/notice_metadata_processor/adapters/xpath_registry.py
index 2d0667c7..d7a4407a 100644
--- a/ted_sws/notice_metadata_processor/adapters/xpath_registry.py
+++ b/ted_sws/notice_metadata_processor/adapters/xpath_registry.py
@@ -203,9 +203,13 @@ def xpath_legal_basis_directive(self):
return ".//cbc:RegulatoryDomain"
@property
- def xpath_notice_subtype(self):
+ def xpath_notice_subtype_first(self):
return ".//ext:UBLExtensions/ext:UBLExtension/ext:ExtensionContent/efext:EformsExtension/efac:NoticeSubType/cbc:SubTypeCode[@listName='notice-subtype']"
+ @property
+ def xpath_notice_subtype_second(self):
+ return ".//ext:UBLExtensions/ext:UBLExtension/ext:ExtensionContent/efext:EformsExtension/efac:NoticeSubType/cbc:SubTypeCode"
+
@property
def xpath_form_type(self):
return ".//cbc:NoticeTypeCode"
diff --git a/tests/test_data/notice_normalisation/2023-OJS153-00486429.xml b/tests/test_data/notice_normalisation/2023-OJS153-00486429.xml
new file mode 100644
index 00000000..96629c6f
--- /dev/null
+++ b/tests/test_data/notice_normalisation/2023-OJS153-00486429.xml
@@ -0,0 +1,335 @@
+
+
+
+
+
+ 2023-08-08+03:00
+ 19:30:47.935+03:00
+
+ 16
+
+
+
+
+ http://www.ria.ee
+
+ ORG-0001
+
+
+ Riigi Infosüsteemi Amet
+
+
+ Pärnu mnt 139a
+ Tallinn
+ 15169
+
+ EST
+
+
+
+ 70006317
+
+
+ EVA KOIT
+ +372 6630258
+ hanketiim@ria.ee
+
+
+
+
+
+
+ ORG-0002
+
+
+ Riigihangete vaidlustuskomisjon
+
+
+ Tartu mnt 85
+ Tallinn
+ 10115
+
+ EST
+
+
+
+ 1000123
+
+
+ +3726113713
+ vako@fin.ee
+
+
+
+
+
+
+ ORG-0003
+
+
+ Riigihangete register
+
+
+ Tallinn
+
+ EST
+
+
+
+ TED64
+
+
+ +3726113693
+ register@riigihanked.riik.ee
+
+
+
+
+ 00486429-2023153/20232023-08-10Z
+
+
+
+ 2.3
+ eforms-sdk-1.3
+ e41f364c-9fba-4353-aedd-09790dcf9e9c
+ 22cb6d21-6f04-4962-9535-d421fa12ec1e
+ 2023-08-08+03:00
+ 15:05:16.484+03:00
+ 01
+ 32014L0024
+ cn-standard
+ EST
+
+ http://www.ria.ee
+
+ cga
+
+
+ gen-pub
+
+
+
+ ORG-0001
+
+
+ ted-esen
+
+
+ ORG-0003
+
+
+
+
+
+
+
+
+ nati-ground
+ https://riigihanked.riik.ee/rhr-web/#/procurement/5825900/procurement-passport
+
+
+
+
+ Avatud hankemenetlus
+ open
+
+
+ 263753
+ IT- ja andmeteadustööd
+ Raamhanke eesmärk on sõlmida mitme osapoolega raamleping Bürokrati, riikliku mobiiliäpi (mRiik) ja keeletehnoloogia komponentide analüüsi-, arendus-, hooldus-, testimis- ja andmeteadustöödeks.
+ services
+ Hankija ei jaga riigihanget osadeks ja sõlmib raamlepingu kõikide edukaks tunnistatud pakkumuse esitanud pakkujatega. See tagab hankijale eesmärgi tõhusaima tulemuse ning rahaliste vahendite otstarbeka ja säästliku kasutamise.
+
+ 20000000.00
+
+
+ 72200000
+
+
+
+
+ EST
+
+
+
+
+ 48
+
+
+
+ LOT-0000
+
+
+
+
+
+
+ Finantseerimisallikateks võivad olla riigieelarve ja/või Euroopa Liidu struktuurfondide ja/või Euroopa Regionaalarengu Fondi ja/või Euroopa Liidu Taaste- ja vastupidavusrahastu vms vahendid.
+
+
+ sui-act
+ n-used
+ false
+
+
+ ef-stand
+ n-used
+ false
+
+
+ tp-abil
+ n-used
+ false
+
+
+ false
+
+
+
+
+
+ not-allowed
+ eu-funds
+ not-requ
+ false
+
+ false
+
+
+ 7101370
+ non-restricted-document
+ EST
+
+
+ https://riigihanked.riik.ee/rhr-web/#/procurement/5825900/general-info
+
+
+
+
+
+ none
+
+
+
+ false
+
+
+ no
+
+
+ not-allowed
+
+
+ required
+
+
+
+
+
+
+
+
+
+ poi-exa
+ 100.0
+
+
+
+
+
+ price
+
+
+
+
+
+ ORG-0001
+
+
+
+ https://riigihanked.riik.ee/rhr-web/#/procurement/5825900/tenders
+
+ ORG-0001
+
+
+
+ 5
+
+
+
+
+ ORG-0002
+
+
+
+
+ ORG-0002
+
+
+
+
+ EST
+
+
+ ENG
+
+
+ false
+ true
+
+
+
+ allowed
+ true
+
+ 2023-09-12+03:00
+ 13:00:00.000+03:00
+
+
+ false
+
+
+ 2023-09-12+03:00
+ 14:00:00.000+03:00
+
+ https://riigihanked.riik.ee/rhr-web/#/
+
+
+
+ false
+
+
+ fa-w-rc
+
+
+ none
+
+
+
+ 263753
+ IT- ja andmeteadustööd
+ Raamhanke eesmärk on sõlmida mitme osapoolega raamleping Bürokrati, riikliku mobiiliäpi (mRiik) ja keeletehnoloogia komponentide analüüsi-, arendus-, hooldus-, testimis- ja andmeteadustöödeks.
+ services
+ Hankija ei jaga riigihanget osadeks ja sõlmib raamlepingu kõikide edukaks tunnistatud pakkumuse esitanud pakkujatega. See tagab hankijale eesmärgi tõhusaima tulemuse ning rahaliste vahendite otstarbeka ja säästliku kasutamise.
+
+ n-inc
+
+
+ 20000000.00
+
+
+ 72200000
+
+
+
+
+ EST
+
+
+
+
+ 48
+
+
+
+
\ No newline at end of file
diff --git a/tests/test_data/notice_normalisation/no_eform_subtype_notice.xml b/tests/test_data/notice_normalisation/no_eform_subtype_notice.xml
new file mode 100644
index 00000000..be774c8f
--- /dev/null
+++ b/tests/test_data/notice_normalisation/no_eform_subtype_notice.xml
@@ -0,0 +1,335 @@
+
+
+
+
+
+ 2023-08-08+03:00
+ 19:30:47.935+03:00
+
+
+
+
+
+
+ http://www.ria.ee
+
+ ORG-0001
+
+
+ Riigi Infosüsteemi Amet
+
+
+ Pärnu mnt 139a
+ Tallinn
+ 15169
+
+ EST
+
+
+
+ 70006317
+
+
+ EVA KOIT
+ +372 6630258
+ hanketiim@ria.ee
+
+
+
+
+
+
+ ORG-0002
+
+
+ Riigihangete vaidlustuskomisjon
+
+
+ Tartu mnt 85
+ Tallinn
+ 10115
+
+ EST
+
+
+
+ 1000123
+
+
+ +3726113713
+ vako@fin.ee
+
+
+
+
+
+
+ ORG-0003
+
+
+ Riigihangete register
+
+
+ Tallinn
+
+ EST
+
+
+
+ TED64
+
+
+ +3726113693
+ register@riigihanked.riik.ee
+
+
+
+
+ 00486429-2023153/20232023-08-10Z
+
+
+
+ 2.3
+ eforms-sdk-1.3
+ e41f364c-9fba-4353-aedd-09790dcf9e9c
+ 22cb6d21-6f04-4962-9535-d421fa12ec1e
+ 2023-08-08+03:00
+ 15:05:16.484+03:00
+ 01
+ 32014L0024
+ cn-standard
+ EST
+
+ http://www.ria.ee
+
+ cga
+
+
+ gen-pub
+
+
+
+ ORG-0001
+
+
+ ted-esen
+
+
+ ORG-0003
+
+
+
+
+
+
+
+
+ nati-ground
+ https://riigihanked.riik.ee/rhr-web/#/procurement/5825900/procurement-passport
+
+
+
+
+ Avatud hankemenetlus
+ open
+
+
+ 263753
+ IT- ja andmeteadustööd
+ Raamhanke eesmärk on sõlmida mitme osapoolega raamleping Bürokrati, riikliku mobiiliäpi (mRiik) ja keeletehnoloogia komponentide analüüsi-, arendus-, hooldus-, testimis- ja andmeteadustöödeks.
+ services
+ Hankija ei jaga riigihanget osadeks ja sõlmib raamlepingu kõikide edukaks tunnistatud pakkumuse esitanud pakkujatega. See tagab hankijale eesmärgi tõhusaima tulemuse ning rahaliste vahendite otstarbeka ja säästliku kasutamise.
+
+ 20000000.00
+
+
+ 72200000
+
+
+
+
+ EST
+
+
+
+
+ 48
+
+
+
+ LOT-0000
+
+
+
+
+
+
+ Finantseerimisallikateks võivad olla riigieelarve ja/või Euroopa Liidu struktuurfondide ja/või Euroopa Regionaalarengu Fondi ja/või Euroopa Liidu Taaste- ja vastupidavusrahastu vms vahendid.
+
+
+ sui-act
+ n-used
+ false
+
+
+ ef-stand
+ n-used
+ false
+
+
+ tp-abil
+ n-used
+ false
+
+
+ false
+
+
+
+
+
+ not-allowed
+ eu-funds
+ not-requ
+ false
+
+ false
+
+
+ 7101370
+ non-restricted-document
+ EST
+
+
+ https://riigihanked.riik.ee/rhr-web/#/procurement/5825900/general-info
+
+
+
+
+
+ none
+
+
+
+ false
+
+
+ no
+
+
+ not-allowed
+
+
+ required
+
+
+
+
+
+
+
+
+
+ poi-exa
+ 100.0
+
+
+
+
+
+ price
+
+
+
+
+
+ ORG-0001
+
+
+
+ https://riigihanked.riik.ee/rhr-web/#/procurement/5825900/tenders
+
+ ORG-0001
+
+
+
+ 5
+
+
+
+
+ ORG-0002
+
+
+
+
+ ORG-0002
+
+
+
+
+ EST
+
+
+ ENG
+
+
+ false
+ true
+
+
+
+ allowed
+ true
+
+ 2023-09-12+03:00
+ 13:00:00.000+03:00
+
+
+ false
+
+
+ 2023-09-12+03:00
+ 14:00:00.000+03:00
+
+ https://riigihanked.riik.ee/rhr-web/#/
+
+
+
+ false
+
+
+ fa-w-rc
+
+
+ none
+
+
+
+ 263753
+ IT- ja andmeteadustööd
+ Raamhanke eesmärk on sõlmida mitme osapoolega raamleping Bürokrati, riikliku mobiiliäpi (mRiik) ja keeletehnoloogia komponentide analüüsi-, arendus-, hooldus-, testimis- ja andmeteadustöödeks.
+ services
+ Hankija ei jaga riigihanget osadeks ja sõlmib raamlepingu kõikide edukaks tunnistatud pakkumuse esitanud pakkujatega. See tagab hankijale eesmärgi tõhusaima tulemuse ning rahaliste vahendite otstarbeka ja säästliku kasutamise.
+
+ n-inc
+
+
+ 20000000.00
+
+
+ 72200000
+
+
+
+
+ EST
+
+
+
+
+ 48
+
+
+
+
\ No newline at end of file
diff --git a/tests/unit/notice_metadata_processor/conftest.py b/tests/unit/notice_metadata_processor/conftest.py
index 80eca263..1e1f03e9 100644
--- a/tests/unit/notice_metadata_processor/conftest.py
+++ b/tests/unit/notice_metadata_processor/conftest.py
@@ -16,6 +16,11 @@ def file_system_repository_path():
return TEST_DATA_PATH / "notice_transformer" / "mapping_suite_processor_repository"
+@pytest.fixture
+def notice_normalisation_test_data_path():
+ return TEST_DATA_PATH / "notice_normalisation"
+
+
@pytest.fixture
def eforms_xml_notice_paths() -> List[pathlib.Path]:
eforms_xml_notices_path = TEST_DATA_PATH / "eforms_samples"
diff --git a/tests/unit/notice_metadata_processor/test_metadata_normaliser.py b/tests/unit/notice_metadata_processor/test_metadata_normaliser.py
index 5405c6cc..1c5d9195 100644
--- a/tests/unit/notice_metadata_processor/test_metadata_normaliser.py
+++ b/tests/unit/notice_metadata_processor/test_metadata_normaliser.py
@@ -1,5 +1,6 @@
import pytest
+from ted_sws.core.model.manifestation import XMLManifestation
from ted_sws.core.model.metadata import NormalisedMetadata
from ted_sws.core.model.notice import NoticeStatus
from ted_sws.notice_metadata_processor.adapters.notice_metadata_extractor import \
@@ -11,7 +12,8 @@
from ted_sws.notice_metadata_processor.services.metadata_constraints import filter_df_by_variables
from ted_sws.notice_metadata_processor.services.metadata_normalizer import normalise_notice, normalise_notice_by_id, \
check_if_xml_manifestation_is_eform, find_metadata_extractor_based_on_xml_manifestation, \
- find_metadata_normaliser_based_on_xml_manifestation, extract_notice_metadata, normalise_notice_metadata
+ find_metadata_normaliser_based_on_xml_manifestation, extract_notice_metadata, normalise_notice_metadata, \
+ extract_and_normalise_notice_metadata
from ted_sws.resources.mapping_files_registry import MappingFilesRegistry
@@ -217,3 +219,19 @@ def test_get_form_type_notice_type_and_legal_basis():
assert form_type == 'competition'
assert notice_type == 'cn-social'
assert legal_basis == '32014L0024'
+
+
+def test_normalising_notice_out_of_index(notice_normalisation_test_data_path):
+ notice_xml_path = notice_normalisation_test_data_path / "2023-OJS153-00486429.xml"
+ notice_content = notice_xml_path.read_text(encoding="utf-8")
+ normalised_notice_metadata = extract_and_normalise_notice_metadata(
+ xml_manifestation=XMLManifestation(object_data=notice_content))
+ assert normalised_notice_metadata.eforms_subtype == "16"
+ assert normalised_notice_metadata.notice_publication_number == "00486429-2023"
+
+ broken_notice_xml_path = notice_normalisation_test_data_path / "no_eform_subtype_notice.xml"
+ broke_notice_content = broken_notice_xml_path.read_text(encoding="utf-8")
+
+ with pytest.raises(Exception):
+ extract_and_normalise_notice_metadata(
+ xml_manifestation=XMLManifestation(object_data=broke_notice_content))
diff --git a/tests/unit/notice_metadata_processor/test_xpath_registry.py b/tests/unit/notice_metadata_processor/test_xpath_registry.py
index 32aeb150..1425d5b2 100644
--- a/tests/unit/notice_metadata_processor/test_xpath_registry.py
+++ b/tests/unit/notice_metadata_processor/test_xpath_registry.py
@@ -11,5 +11,5 @@ def test_default_xpath_registry():
def test_eforms_xpath_registry():
xpath_registry = EformsXPathRegistry()
assert isinstance(xpath_registry.xpath_notice_type, str)
- assert xpath_registry.xpath_notice_subtype == ".//ext:UBLExtensions/ext:UBLExtension/ext:ExtensionContent/efext:EformsExtension/efac:NoticeSubType/cbc:SubTypeCode[@listName='notice-subtype']"
+ assert xpath_registry.xpath_notice_subtype_first == ".//ext:UBLExtensions/ext:UBLExtension/ext:ExtensionContent/efext:EformsExtension/efac:NoticeSubType/cbc:SubTypeCode[@listName='notice-subtype']"
assert xpath_registry.xpath_notice_type == ".//cbc:NoticeTypeCode"
\ No newline at end of file