Skip to content

Commit

Permalink
Merge pull request #519 from OP-TED/feature/TED4-106
Browse files Browse the repository at this point in the history
fixes for extractor
  • Loading branch information
Dragos0000 authored Feb 20, 2024
2 parents 8c551e6 + cc4f116 commit 10ec84b
Show file tree
Hide file tree
Showing 3 changed files with 36 additions and 16 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -276,16 +276,19 @@ def __init__(self, xml_manifestation: XMLManifestation):

@property
def title(self):
title_country = LanguageTaggedString(text=extract_text_from_element(
element=self.manifestation_root.find(self.xpath_registry.xpath_title_country, namespaces=self.namespaces)),language='')
title_text = LanguageTaggedString(
text=extract_text_from_element(element=self.manifestation_root.find(
self.xpath_registry.xpath_title,
namespaces=self.namespaces)),
language=extract_attribute_from_element(element=self.manifestation_root.find(
self.xpath_registry.xpath_title,
namespaces=self.namespaces), attrib_key="languageID"))
return [CompositeTitle(title=title_text, title_country=title_country)]
title_translations = []
title_elements = self.manifestation_root.findall(
self.xpath_registry.xpath_title,
namespaces=self.namespaces)
for title in title_elements:
language = title.find(".").attrib["languageID"]
title_country = LanguageTaggedString(text=language, language=language)
title_text = LanguageTaggedString(
text=extract_text_from_element(element=title),
language=language)
title_translations.append(
CompositeTitle(title=title_text, title_country=title_country))
return title_translations

@property
def publication_date(self):
Expand Down Expand Up @@ -324,9 +327,21 @@ def type_of_procedure(self):

@property
def place_of_performance(self):
extracted_nuts_code = extract_text_from_element(
element=self.manifestation_root.find(self.xpath_registry.xpath_place_of_performance, namespaces=self.namespaces))
return [EncodedValue(value=extracted_nuts_code,code=extracted_nuts_code)]
extracted_project_nuts_code = extract_text_from_element(
element=self.manifestation_root.find(self.xpath_registry.xpath_place_of_performance,
namespaces=self.namespaces))
place_of_performance_organisation_elements = self.manifestation_root.findall(
self.xpath_registry.xpath_place_of_performance_elements, namespaces=self.namespaces)
nuts_code_from_organisations = [EncodedValue(code=extract_text_from_element(element=element),
value=extract_text_from_element(element=element)) for element in
place_of_performance_organisation_elements]

if extracted_project_nuts_code:
extracted_project_nuts_encoded = EncodedValue(value=extracted_project_nuts_code,
code=extracted_project_nuts_code)
nuts_code_from_organisations.append(extracted_project_nuts_encoded)

return nuts_code_from_organisations

@property
def common_procurement(self):
Expand Down
8 changes: 6 additions & 2 deletions ted_sws/notice_metadata_processor/adapters/xpath_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ class EformsXPathRegistry(XPathRegistryABC):

@property
def xpath_title(self):
return ".//cac:ProcurementProject/cbc:Name"
return "./cac:ProcurementProject/cbc:Name"

@property
def xpath_title_country(self):
Expand Down Expand Up @@ -176,7 +176,7 @@ def xpath_document_sent_date(self):

@property
def xpath_type_of_contract(self):
return ".//cac:ProcurementProject/cbc:ProcurementTypeCode[@listName='contract-nature']"
return "./cac:ProcurementProject/cbc:ProcurementTypeCode[@listName='contract-nature']"

@property
def xpath_type_of_procedure(self):
Expand All @@ -186,6 +186,10 @@ def xpath_type_of_procedure(self):
def xpath_place_of_performance(self):
return ".//cac:ProcurementProject/cac:RealizedLocation/cac:Address/cbc:CountrySubentityCode[@listName='nuts']"

@property
def xpath_place_of_performance_elements(self):
return ".//efac:Organizations/efac:Organization/efac:Company/cac:PostalAddress/cbc:CountrySubentityCode[@listName='nuts']"

@property
def xpath_common_procurement_elements(self):
return ".//cac:ProcurementProject/*/cbc:ItemClassificationCode[@listName='cpv']"
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import xml.etree.ElementTree as ET
from importlib.resources import path

from ted_sws.core.model.manifestation import XMLManifestation
from ted_sws.notice_metadata_processor.adapters.notice_metadata_extractor import EformsNoticeMetadataExtractor, \
Expand Down Expand Up @@ -117,7 +118,7 @@ def test_metadata_eform_extractor(eform_notice_622690):
assert extracted_metadata_dict["extracted_form_number"] == None


def _test_metadata_extractor_for_all_eforms_variations(eforms_xml_notice_paths):
def test_metadata_extractor_for_all_eforms_variations(eforms_xml_notice_paths):
for xml_notice_path in eforms_xml_notice_paths:
notice_id = xml_notice_path.name
eforms_subtype = xml_notice_path.parent.name
Expand Down

0 comments on commit 10ec84b

Please sign in to comment.