Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fixes for extractor #519

Merged
merged 2 commits into from
Feb 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -276,16 +276,19 @@ def __init__(self, xml_manifestation: XMLManifestation):

@property
def title(self):
title_country = LanguageTaggedString(text=extract_text_from_element(
element=self.manifestation_root.find(self.xpath_registry.xpath_title_country, namespaces=self.namespaces)),language='')
title_text = LanguageTaggedString(
text=extract_text_from_element(element=self.manifestation_root.find(
self.xpath_registry.xpath_title,
namespaces=self.namespaces)),
language=extract_attribute_from_element(element=self.manifestation_root.find(
self.xpath_registry.xpath_title,
namespaces=self.namespaces), attrib_key="languageID"))
return [CompositeTitle(title=title_text, title_country=title_country)]
title_translations = []
title_elements = self.manifestation_root.findall(
self.xpath_registry.xpath_title,
namespaces=self.namespaces)
for title in title_elements:
language = title.find(".").attrib["languageID"]
title_country = LanguageTaggedString(text=language, language=language)
title_text = LanguageTaggedString(
text=extract_text_from_element(element=title),
language=language)
title_translations.append(
CompositeTitle(title=title_text, title_country=title_country))
return title_translations

@property
def publication_date(self):
Expand Down Expand Up @@ -324,9 +327,21 @@ def type_of_procedure(self):

@property
def place_of_performance(self):
extracted_nuts_code = extract_text_from_element(
element=self.manifestation_root.find(self.xpath_registry.xpath_place_of_performance, namespaces=self.namespaces))
return [EncodedValue(value=extracted_nuts_code,code=extracted_nuts_code)]
extracted_project_nuts_code = extract_text_from_element(
element=self.manifestation_root.find(self.xpath_registry.xpath_place_of_performance,
namespaces=self.namespaces))
place_of_performance_organisation_elements = self.manifestation_root.findall(
self.xpath_registry.xpath_place_of_performance_elements, namespaces=self.namespaces)
nuts_code_from_organisations = [EncodedValue(code=extract_text_from_element(element=element),
value=extract_text_from_element(element=element)) for element in
place_of_performance_organisation_elements]

if extracted_project_nuts_code:
extracted_project_nuts_encoded = EncodedValue(value=extracted_project_nuts_code,
code=extracted_project_nuts_code)
nuts_code_from_organisations.append(extracted_project_nuts_encoded)

return nuts_code_from_organisations

@property
def common_procurement(self):
Expand Down
8 changes: 6 additions & 2 deletions ted_sws/notice_metadata_processor/adapters/xpath_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ class EformsXPathRegistry(XPathRegistryABC):

@property
def xpath_title(self):
return ".//cac:ProcurementProject/cbc:Name"
return "./cac:ProcurementProject/cbc:Name"

@property
def xpath_title_country(self):
Expand Down Expand Up @@ -176,7 +176,7 @@ def xpath_document_sent_date(self):

@property
def xpath_type_of_contract(self):
return ".//cac:ProcurementProject/cbc:ProcurementTypeCode[@listName='contract-nature']"
return "./cac:ProcurementProject/cbc:ProcurementTypeCode[@listName='contract-nature']"

@property
def xpath_type_of_procedure(self):
Expand All @@ -186,6 +186,10 @@ def xpath_type_of_procedure(self):
def xpath_place_of_performance(self):
return ".//cac:ProcurementProject/cac:RealizedLocation/cac:Address/cbc:CountrySubentityCode[@listName='nuts']"

@property
def xpath_place_of_performance_elements(self):
return ".//efac:Organizations/efac:Organization/efac:Company/cac:PostalAddress/cbc:CountrySubentityCode[@listName='nuts']"

@property
def xpath_common_procurement_elements(self):
return ".//cac:ProcurementProject/*/cbc:ItemClassificationCode[@listName='cpv']"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import xml.etree.ElementTree as ET
from importlib.resources import path

from ted_sws.core.model.manifestation import XMLManifestation
from ted_sws.notice_metadata_processor.adapters.notice_metadata_extractor import EformsNoticeMetadataExtractor, \
Expand Down Expand Up @@ -117,7 +118,7 @@ def test_metadata_eform_extractor(eform_notice_622690):
assert extracted_metadata_dict["extracted_form_number"] == None


def _test_metadata_extractor_for_all_eforms_variations(eforms_xml_notice_paths):
def test_metadata_extractor_for_all_eforms_variations(eforms_xml_notice_paths):
for xml_notice_path in eforms_xml_notice_paths:
notice_id = xml_notice_path.name
eforms_subtype = xml_notice_path.parent.name
Expand Down
Loading