Skip to content

Commit

Permalink
Merge pull request #532 from OP-TED/feature/TED4-141
Browse files Browse the repository at this point in the history
updates xpath validator
  • Loading branch information
Dragos0000 authored Apr 9, 2024
2 parents 02c3f7f + 0dbac92 commit ea25402
Show file tree
Hide file tree
Showing 42 changed files with 273 additions and 1,530 deletions.
6 changes: 3 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ ENV_FILE := .env
PROJECT_PATH = $(shell pwd)
AIRFLOW_INFRA_FOLDER ?= ${PROJECT_PATH}/.airflow
RML_MAPPER_PATH = ${PROJECT_PATH}/.rmlmapper/rmlmapper.jar
XML_PROCESSOR_PATH = ${PROJECT_PATH}/.saxon/saxon-he-10.6.jar
XML_PROCESSOR_PATH = ${PROJECT_PATH}/.saxon/saxon-he-10.9.jar
LIMES_ALIGNMENT_PATH = $(PROJECT_PATH)/.limes/limes.jar
HOSTNAME = $(shell hostname)
CAROOT = $(shell pwd)/infra/traefik/certs
Expand Down Expand Up @@ -222,8 +222,8 @@ init-limes:

init-saxon:
@ echo -e "$(BUILD_PRINT)Saxon folder initialization $(END_BUILD_PRINT)"
@ wget -c https://kumisystems.dl.sourceforge.net/project/saxon/Saxon-HE/10/Java/SaxonHE10-6J.zip -P .saxon/
@ cd .saxon && unzip SaxonHE10-6J.zip && rm -rf SaxonHE10-6J.zip
@ wget -c https://github.com/Saxonica/Saxon-HE/releases/download/SaxonHE10-9/SaxonHE10-9J.zip -P .saxon/
@ cd .saxon && unzip SaxonHE10-9J.zip && rm -rf SaxonHE10-9J.zip

start-project-services: | start-airflow start-mongo init-rml-mapper init-limes start-allegro-graph start-metabase
stop-project-services: | stop-airflow stop-mongo stop-allegro-graph stop-metabase
Expand Down
4 changes: 2 additions & 2 deletions infra/airflow-cluster/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ RUN mkdir -p ./.rmlmapper
RUN wget -c https://github.com/RMLio/rmlmapper-java/releases/download/v6.2.2/rmlmapper-6.2.2-r371-all.jar -O ./.rmlmapper/rmlmapper.jar


RUN wget -c https://kumisystems.dl.sourceforge.net/project/saxon/Saxon-HE/10/Java/SaxonHE10-6J.zip -P .saxon/
RUN cd .saxon && unzip SaxonHE10-6J.zip && rm -rf SaxonHE10-6J.zip
RUN wget -c https://github.com/Saxonica/Saxon-HE/releases/download/SaxonHE10-9/SaxonHE10-9J.zip -P .saxon/
RUN cd .saxon && unzip SaxonHE10-9J.zip && rm -rf SaxonHE10-9J.zip


RUN mkdir -p ./.limes
Expand Down
2 changes: 1 addition & 1 deletion infra/airflow-cluster/docker-compose-worker.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ x-airflow-common:
PYTHONPATH: /opt/airflow/
AIRFLOW_HOME: /opt/airflow
RML_MAPPER_PATH: /opt/airflow/.rmlmapper/rmlmapper.jar
XML_PROCESSOR_PATH: /opt/airflow/.saxon/saxon-he-10.6.jar
XML_PROCESSOR_PATH: /opt/airflow/.saxon/saxon-he-10.9.jar
LIMES_ALIGNMENT_PATH: /opt/airflow/.limes/limes.jar
DAG_LOGGER_CONFIG_HANDLERS: ${DAG_LOGGER_CONFIG_HANDLERS}
volumes:
Expand Down
2 changes: 1 addition & 1 deletion infra/airflow-cluster/docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ x-airflow-common:
PYTHONPATH: /opt/airflow/
AIRFLOW_HOME: /opt/airflow
RML_MAPPER_PATH: /opt/airflow/.rmlmapper/rmlmapper.jar
XML_PROCESSOR_PATH: /opt/airflow/.saxon/saxon-he-10.6.jar
XML_PROCESSOR_PATH: /opt/airflow/.saxon/saxon-he-10.9.jar
LIMES_ALIGNMENT_PATH: /opt/airflow/.limes/limes.jar
DAG_LOGGER_CONFIG_HANDLERS: ${DAG_LOGGER_CONFIG_HANDLERS}
volumes:
Expand Down
4 changes: 2 additions & 2 deletions infra/airflow/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ RUN mkdir -p ./.rmlmapper ./dags ./ted_sws ./temp
RUN wget -c https://github.com/RMLio/rmlmapper-java/releases/download/v6.2.2/rmlmapper-6.2.2-r371-all.jar -O ./.rmlmapper/rmlmapper.jar


RUN wget -c https://kumisystems.dl.sourceforge.net/project/saxon/Saxon-HE/10/Java/SaxonHE10-6J.zip -P .saxon/
RUN cd .saxon && unzip SaxonHE10-6J.zip && rm -rf SaxonHE10-6J.zip
RUN wget -c https://github.com/Saxonica/Saxon-HE/releases/download/SaxonHE10-9/SaxonHE10-9J.zip -P .saxon/
RUN cd .saxon && unzip SaxonHE10-9J.zip && rm -rf SaxonHE10-9J.zip

RUN mkdir -p ./.limes
RUN wget -c https://github.com/dice-group/LIMES/releases/download/1.7.9/limes.jar -P ./.limes
Expand Down
2 changes: 1 addition & 1 deletion infra/airflow/docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ x-airflow-common:
PYTHONPATH: /opt/airflow/
AIRFLOW_HOME: /opt/airflow
RML_MAPPER_PATH: /opt/airflow/.rmlmapper/rmlmapper.jar
XML_PROCESSOR_PATH: /opt/airflow/.saxon/saxon-he-10.6.jar
XML_PROCESSOR_PATH: /opt/airflow/.saxon/saxon-he-10.9.jar
LIMES_ALIGNMENT_PATH: /opt/airflow/.limes/limes.jar
DAG_LOGGER_CONFIG_HANDLERS: ${DAG_LOGGER_CONFIG_HANDLERS}
volumes:
Expand Down
10 changes: 5 additions & 5 deletions infra/aws/airflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ services:
PYTHONPATH: /opt/airflow/
LIMES_ALIGNMENT_PATH: /opt/airflow/.limes/limes.jar
RML_MAPPER_PATH: /opt/airflow/.rmlmapper/rmlmapper.jar
XML_PROCESSOR_PATH: /opt/airflow/.saxon/saxon-he-10.6.jar
XML_PROCESSOR_PATH: /opt/airflow/.saxon/saxon-he-10.9.jar
AIRFLOW_UID: ${AIRFLOW_UID}
image: ${AIRFLOW_IMAGE_URI}
logging:
Expand Down Expand Up @@ -77,7 +77,7 @@ services:
PYTHONPATH: /opt/airflow/
LIMES_ALIGNMENT_PATH: /opt/airflow/.limes/limes.jar
RML_MAPPER_PATH: /opt/airflow/.rmlmapper/rmlmapper.jar
XML_PROCESSOR_PATH: /opt/airflow/.saxon/saxon-he-10.6.jar
XML_PROCESSOR_PATH: /opt/airflow/.saxon/saxon-he-10.9.jar
healthcheck:
test: ["CMD-SHELL", 'airflow jobs check --job-type SchedulerJob --hostname "$${HOSTNAME}"']
interval: 10s
Expand Down Expand Up @@ -125,7 +125,7 @@ services:
PYTHONPATH: /opt/airflow/
LIMES_ALIGNMENT_PATH: /opt/airflow/.limes/limes.jar
RML_MAPPER_PATH: /opt/airflow/.rmlmapper/rmlmapper.jar
XML_PROCESSOR_PATH: /opt/airflow/.saxon/saxon-he-10.6.jar
XML_PROCESSOR_PATH: /opt/airflow/.saxon/saxon-he-10.9.jar
image: ${AIRFLOW_IMAGE_URI}
logging:
driver: awslogs
Expand Down Expand Up @@ -168,7 +168,7 @@ services:
PYTHONPATH: /opt/airflow/
LIMES_ALIGNMENT_PATH: /opt/airflow/.limes/limes.jar
RML_MAPPER_PATH: /opt/airflow/.rmlmapper/rmlmapper.jar
XML_PROCESSOR_PATH: /opt/airflow/.saxon/saxon-he-10.6.jar
XML_PROCESSOR_PATH: /opt/airflow/.saxon/saxon-he-10.9.jar
image: ${AIRFLOW_IMAGE_URI}
logging:
driver: awslogs
Expand Down Expand Up @@ -215,7 +215,7 @@ services:
PYTHONPATH: /opt/airflow/
LIMES_ALIGNMENT_PATH: /opt/airflow/.limes/limes.jar
RML_MAPPER_PATH: /opt/airflow/.rmlmapper/rmlmapper.jar
XML_PROCESSOR_PATH: /opt/airflow/.saxon/saxon-he-10.6.jar
XML_PROCESSOR_PATH: /opt/airflow/.saxon/saxon-he-10.9.jar
image: ${AIRFLOW_IMAGE_URI}
labels:
ecs-local.task-definition-input.type: remote
Expand Down
2 changes: 1 addition & 1 deletion infra/aws/worker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ services:
PYTHONPATH: /opt/airflow/
LIMES_ALIGNMENT_PATH: /opt/airflow/.limes/limes.jar
RML_MAPPER_PATH: /opt/airflow/.rmlmapper/rmlmapper.jar
XML_PROCESSOR_PATH: /opt/airflow/.saxon/saxon-he-10.6.jar
XML_PROCESSOR_PATH: /opt/airflow/.saxon/saxon-he-10.9.jar
image: ${AIRFLOW_IMAGE_URI}
# hostname: "worker1"
ports:
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,5 @@ json2html~=1.3.0
minio~=7.1.1
certifi~=2022.12.7
shortuuid~=1.0.11
pendulum~=2.1.2
pendulum~=2.1.2
saxonche~=12.4
6 changes: 0 additions & 6 deletions ted_sws/core/model/manifestation.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@ class ValidationManifestation(Manifestation):

class XPATHCoverageSummaryResult(PropertyBaseModel):
xpath_covered: Optional[int] = 0
xpath_not_covered: Optional[int] = 0


class XPATHCoverageSummaryReport(PropertyBaseModel):
Expand Down Expand Up @@ -154,11 +153,6 @@ class XPATHCoverageValidationResultBase(PropertyBaseModel):
"""
xpath_assertions: Optional[List[XPATHCoverageValidationAssertion]] = []
xpath_covered: Optional[List[str]] = []
xpath_not_covered: Optional[List[str]] = []
xpath_extra: Optional[List[str]] = []
remarked_xpaths: Optional[List[str]] = []
coverage: Optional[float]
conceptual_coverage: Optional[float]


class XPATHCoverageValidationResult(XPATHCoverageValidationResultBase):
Expand Down
113 changes: 1 addition & 112 deletions ted_sws/core/model/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,121 +96,11 @@ class TransformationTestData(MappingSuiteComponent):
test_data: List[FileResource]


class ConceptualMappingXPATH(MappingSuiteComponent):
class MappingXPATH(MappingSuiteComponent):
xpath: str
form_field: Optional[str]


class ConceptualMappingDiffMetadata(MappingSuiteComponent):
""""""
branches: Optional[List[str]]
mapping_suite_ids: Optional[List[str]]
files: Optional[List[Optional[str]]]
defaults: Optional[dict]
metadata: Optional[List[dict]]


class ConceptualMappingDiffData(MappingSuiteComponent):
""""""
html: Optional[str]
transformed: Optional[dict]
original: Optional[dict]


class ConceptualMappingDiff(MappingSuiteComponent):
""""""
created_at: str = datetime.now().isoformat()
metadata: Optional[ConceptualMappingDiffMetadata]
data: Optional[ConceptualMappingDiffData]


class ConceptualMappingMetadataConstraints(PropertyBaseModel):
"""
This class contains Mapping Suite Conceptual Mapping Metadata Constraints Object model structure
"""
eforms_subtype: Optional[List[str]]
start_date: Optional[str]
end_date: Optional[str]
min_xsd_version: Optional[str]
max_xsd_version: Optional[str]


class ConceptualMappingMetadata(MappingSuiteComponent):
"""
"""
identifier: Optional[str]
title: Optional[str]
description: Optional[str]
mapping_version: Optional[str]
epo_version: Optional[str]
base_xpath: Optional[str]
metadata_constraints: Optional[ConceptualMappingMetadataConstraints]


class ConceptualMappingRule(MappingSuiteComponent):
"""
"""
standard_form_field_id: Optional[str]
standard_form_field_name: Optional[str]
eform_bt_id: Optional[str]
eform_bt_name: Optional[str]
field_xpath: Optional[List[str]]
field_xpath_condition: Optional[List[str]]
class_path: Optional[List[str]]
property_path: Optional[List[str]]
triple_fingerprint: Optional[List[str]]
fragment_fingerprint: Optional[List[str]]


class ConceptualMappingResource(MappingSuiteComponent):
"""
"""
file_name: Optional[str]


class ConceptualMappingRMLModule(MappingSuiteComponent):
"""
"""
file_name: Optional[str]


class ConceptualMappingRemark(MappingSuiteComponent):
"""
"""
standard_form_field_id: Optional[str]
standard_form_field_name: Optional[str]
field_xpath: Optional[List[str]]


class ConceptualMappingControlList(MappingSuiteComponent):
"""
"""
field_value: Optional[str]
mapping_reference: Optional[str]
super_type: Optional[str]
xml_path_fragment: Optional[str]


class ConceptualMapping(MappingSuiteComponent):
"""
"""
xpaths: List[ConceptualMappingXPATH] = []
metadata: Optional[ConceptualMappingMetadata]
rules: List[ConceptualMappingRule] = []
mapping_remarks: List[ConceptualMappingRemark] = []
resources: List[ConceptualMappingResource] = []
rml_modules: List[ConceptualMappingRMLModule] = []
cl1_roles: List[ConceptualMappingControlList] = []
cl2_organisations: List[ConceptualMappingControlList] = []


class MappingSuiteType(str, Enum):
STANDARD_FORMS = "standard_forms"
ELECTRONIC_FORMS = "eforms"
Expand All @@ -236,7 +126,6 @@ class MappingSuite(MappingSuiteComponent):
shacl_test_suites: List[SHACLTestSuite]
sparql_test_suites: List[SPARQLTestSuite]
transformation_test_data: TransformationTestData
conceptual_mapping: Optional[ConceptualMapping]

def get_mongodb_id(self) -> str:
return f"{self.identifier}_v{self.version}"
52 changes: 25 additions & 27 deletions ted_sws/data_manager/adapters/mapping_suite_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,10 @@

from ted_sws import config
from ted_sws.core.model.transform import MappingSuite, FileResource, TransformationRuleSet, SHACLTestSuite, \
SPARQLTestSuite, MetadataConstraints, TransformationTestData, ConceptualMapping, MappingSuiteType, \
SPARQLTestSuite, MetadataConstraints, TransformationTestData, MappingSuiteType, \
MetadataConstraintsStandardForm, MetadataConstraintsEform
from ted_sws.data_manager.adapters import inject_date_string_fields, remove_date_string_fields
from ted_sws.data_manager.adapters.repository_abc import MappingSuiteRepositoryABC
from ted_sws.mapping_suite_processor.adapters.conceptual_mapping_reader import ConceptualMappingReader

MS_METADATA_FILE_NAME = "metadata.json"
MS_TRANSFORM_FOLDER_NAME = "transformation"
Expand Down Expand Up @@ -184,11 +183,6 @@ def _read_sparql_test_suites(self, package_path: pathlib.Path) -> List[SPARQLTes
sparql_tests=self._read_file_resources(path=sparql_test_suite_path))
for sparql_test_suite_path in sparql_test_suite_paths]

@classmethod
def _read_conceptual_mapping(cls, package_path: pathlib.Path) -> ConceptualMapping:
return ConceptualMappingReader.mapping_suite_read_conceptual_mapping(
package_path / MS_TRANSFORM_FOLDER_NAME / MS_CONCEPTUAL_MAPPING_FILE_NAME)

def _write_package_metadata(self, mapping_suite: MappingSuite):
"""
This method creates the metadata of a package based on the metadata in the mapping_suite.
Expand Down Expand Up @@ -354,27 +348,31 @@ def _read_mapping_suite_package(self, mapping_suite_identifier: str) -> Optional
package_metadata = self._read_package_metadata(package_path)
if MS_MAPPING_TYPE_KEY in package_metadata and package_metadata[
MS_MAPPING_TYPE_KEY] == MappingSuiteType.ELECTRONIC_FORMS:
package_metadata[MS_METADATA_CONSTRAINTS_KEY] = MetadataConstraints(constraints=MetadataConstraintsEform(
**package_metadata[MS_METADATA_CONSTRAINTS_KEY][MS_CONSTRAINTS_KEY]))
package_metadata[MS_METADATA_CONSTRAINTS_KEY] = MetadataConstraints(
constraints=MetadataConstraintsEform(
**package_metadata[MS_METADATA_CONSTRAINTS_KEY][MS_CONSTRAINTS_KEY]))
else:
package_metadata[MS_METADATA_CONSTRAINTS_KEY] = MetadataConstraints(constraints=MetadataConstraintsStandardForm(
**package_metadata[MS_METADATA_CONSTRAINTS_KEY][MS_CONSTRAINTS_KEY]))
mapping_suite = MappingSuite(metadata_constraints=package_metadata[MS_METADATA_CONSTRAINTS_KEY],
created_at=package_metadata[MS_CREATED_AT_KEY],
title=package_metadata[MS_TITLE_KEY],
ontology_version=package_metadata[MS_ONTOLOGY_VERSION_KEY],
mapping_suite_hash_digest=package_metadata[MS_HASH_DIGEST_KEY],
mapping_type=package_metadata[MS_MAPPING_TYPE_KEY] if MS_MAPPING_TYPE_KEY in package_metadata else MappingSuiteType.STANDARD_FORMS,
version=package_metadata[
MS_STANDARD_METADATA_VERSION_KEY] if MS_STANDARD_METADATA_VERSION_KEY in package_metadata else \
package_metadata[MS_EFORMS_METADATA_VERSION_KEY],
identifier=package_metadata[
MS_METADATA_IDENTIFIER_KEY] if MS_METADATA_IDENTIFIER_KEY in package_metadata else mapping_suite_identifier,
transformation_rule_set=self._read_transformation_rule_set(package_path),
shacl_test_suites=self._read_shacl_test_suites(package_path),
sparql_test_suites=self._read_sparql_test_suites(package_path),
transformation_test_data=self._read_test_data_package(package_path),
conceptual_mapping=self._read_conceptual_mapping(package_path)) #TODO remove conceptual_mapping value assignment when conceptual mapping reader is removed
package_metadata[MS_METADATA_CONSTRAINTS_KEY] = MetadataConstraints(
constraints=MetadataConstraintsStandardForm(
**package_metadata[MS_METADATA_CONSTRAINTS_KEY][MS_CONSTRAINTS_KEY]))
mapping_suite = MappingSuite(
metadata_constraints=package_metadata[MS_METADATA_CONSTRAINTS_KEY],
created_at=package_metadata[MS_CREATED_AT_KEY],
title=package_metadata[MS_TITLE_KEY],
ontology_version=package_metadata[MS_ONTOLOGY_VERSION_KEY],
mapping_suite_hash_digest=package_metadata[MS_HASH_DIGEST_KEY],
mapping_type=package_metadata[
MS_MAPPING_TYPE_KEY] if MS_MAPPING_TYPE_KEY in package_metadata else MappingSuiteType.STANDARD_FORMS,
version=package_metadata[
MS_STANDARD_METADATA_VERSION_KEY] if MS_STANDARD_METADATA_VERSION_KEY in package_metadata else \
package_metadata[MS_EFORMS_METADATA_VERSION_KEY],
identifier=package_metadata[
MS_METADATA_IDENTIFIER_KEY] if MS_METADATA_IDENTIFIER_KEY in package_metadata else mapping_suite_identifier,
transformation_rule_set=self._read_transformation_rule_set(package_path),
shacl_test_suites=self._read_shacl_test_suites(package_path),
sparql_test_suites=self._read_sparql_test_suites(package_path),
transformation_test_data=self._read_test_data_package(package_path)
)
return mapping_suite
return None

Expand Down
Loading

0 comments on commit ea25402

Please sign in to comment.