Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

updates xpath validator #532

Merged
merged 13 commits into from
Apr 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ ENV_FILE := .env
PROJECT_PATH = $(shell pwd)
AIRFLOW_INFRA_FOLDER ?= ${PROJECT_PATH}/.airflow
RML_MAPPER_PATH = ${PROJECT_PATH}/.rmlmapper/rmlmapper.jar
XML_PROCESSOR_PATH = ${PROJECT_PATH}/.saxon/saxon-he-10.6.jar
XML_PROCESSOR_PATH = ${PROJECT_PATH}/.saxon/saxon-he-10.9.jar
LIMES_ALIGNMENT_PATH = $(PROJECT_PATH)/.limes/limes.jar
HOSTNAME = $(shell hostname)
CAROOT = $(shell pwd)/infra/traefik/certs
Expand Down Expand Up @@ -222,8 +222,8 @@ init-limes:

init-saxon:
@ echo -e "$(BUILD_PRINT)Saxon folder initialization $(END_BUILD_PRINT)"
@ wget -c https://kumisystems.dl.sourceforge.net/project/saxon/Saxon-HE/10/Java/SaxonHE10-6J.zip -P .saxon/
@ cd .saxon && unzip SaxonHE10-6J.zip && rm -rf SaxonHE10-6J.zip
@ wget -c https://github.com/Saxonica/Saxon-HE/releases/download/SaxonHE10-9/SaxonHE10-9J.zip -P .saxon/
@ cd .saxon && unzip SaxonHE10-9J.zip && rm -rf SaxonHE10-9J.zip

start-project-services: | start-airflow start-mongo init-rml-mapper init-limes start-allegro-graph start-metabase
stop-project-services: | stop-airflow stop-mongo stop-allegro-graph stop-metabase
Expand Down
4 changes: 2 additions & 2 deletions infra/airflow-cluster/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ RUN mkdir -p ./.rmlmapper
RUN wget -c https://github.com/RMLio/rmlmapper-java/releases/download/v6.2.2/rmlmapper-6.2.2-r371-all.jar -O ./.rmlmapper/rmlmapper.jar


RUN wget -c https://kumisystems.dl.sourceforge.net/project/saxon/Saxon-HE/10/Java/SaxonHE10-6J.zip -P .saxon/
RUN cd .saxon && unzip SaxonHE10-6J.zip && rm -rf SaxonHE10-6J.zip
RUN wget -c https://github.com/Saxonica/Saxon-HE/releases/download/SaxonHE10-9/SaxonHE10-9J.zip -P .saxon/
RUN cd .saxon && unzip SaxonHE10-9J.zip && rm -rf SaxonHE10-9J.zip


RUN mkdir -p ./.limes
Expand Down
2 changes: 1 addition & 1 deletion infra/airflow-cluster/docker-compose-worker.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ x-airflow-common:
PYTHONPATH: /opt/airflow/
AIRFLOW_HOME: /opt/airflow
RML_MAPPER_PATH: /opt/airflow/.rmlmapper/rmlmapper.jar
XML_PROCESSOR_PATH: /opt/airflow/.saxon/saxon-he-10.6.jar
XML_PROCESSOR_PATH: /opt/airflow/.saxon/saxon-he-10.9.jar
LIMES_ALIGNMENT_PATH: /opt/airflow/.limes/limes.jar
DAG_LOGGER_CONFIG_HANDLERS: ${DAG_LOGGER_CONFIG_HANDLERS}
volumes:
Expand Down
2 changes: 1 addition & 1 deletion infra/airflow-cluster/docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ x-airflow-common:
PYTHONPATH: /opt/airflow/
AIRFLOW_HOME: /opt/airflow
RML_MAPPER_PATH: /opt/airflow/.rmlmapper/rmlmapper.jar
XML_PROCESSOR_PATH: /opt/airflow/.saxon/saxon-he-10.6.jar
XML_PROCESSOR_PATH: /opt/airflow/.saxon/saxon-he-10.9.jar
LIMES_ALIGNMENT_PATH: /opt/airflow/.limes/limes.jar
DAG_LOGGER_CONFIG_HANDLERS: ${DAG_LOGGER_CONFIG_HANDLERS}
volumes:
Expand Down
4 changes: 2 additions & 2 deletions infra/airflow/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@ RUN mkdir -p ./.rmlmapper ./dags ./ted_sws ./temp
RUN wget -c https://github.com/RMLio/rmlmapper-java/releases/download/v6.2.2/rmlmapper-6.2.2-r371-all.jar -O ./.rmlmapper/rmlmapper.jar


RUN wget -c https://kumisystems.dl.sourceforge.net/project/saxon/Saxon-HE/10/Java/SaxonHE10-6J.zip -P .saxon/
RUN cd .saxon && unzip SaxonHE10-6J.zip && rm -rf SaxonHE10-6J.zip
RUN wget -c https://github.com/Saxonica/Saxon-HE/releases/download/SaxonHE10-9/SaxonHE10-9J.zip -P .saxon/
RUN cd .saxon && unzip SaxonHE10-9J.zip && rm -rf SaxonHE10-9J.zip

RUN mkdir -p ./.limes
RUN wget -c https://github.com/dice-group/LIMES/releases/download/1.7.9/limes.jar -P ./.limes
Expand Down
2 changes: 1 addition & 1 deletion infra/airflow/docker-compose.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ x-airflow-common:
PYTHONPATH: /opt/airflow/
AIRFLOW_HOME: /opt/airflow
RML_MAPPER_PATH: /opt/airflow/.rmlmapper/rmlmapper.jar
XML_PROCESSOR_PATH: /opt/airflow/.saxon/saxon-he-10.6.jar
XML_PROCESSOR_PATH: /opt/airflow/.saxon/saxon-he-10.9.jar
LIMES_ALIGNMENT_PATH: /opt/airflow/.limes/limes.jar
DAG_LOGGER_CONFIG_HANDLERS: ${DAG_LOGGER_CONFIG_HANDLERS}
volumes:
Expand Down
10 changes: 5 additions & 5 deletions infra/aws/airflow.yml
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ services:
PYTHONPATH: /opt/airflow/
LIMES_ALIGNMENT_PATH: /opt/airflow/.limes/limes.jar
RML_MAPPER_PATH: /opt/airflow/.rmlmapper/rmlmapper.jar
XML_PROCESSOR_PATH: /opt/airflow/.saxon/saxon-he-10.6.jar
XML_PROCESSOR_PATH: /opt/airflow/.saxon/saxon-he-10.9.jar
AIRFLOW_UID: ${AIRFLOW_UID}
image: ${AIRFLOW_IMAGE_URI}
logging:
Expand Down Expand Up @@ -77,7 +77,7 @@ services:
PYTHONPATH: /opt/airflow/
LIMES_ALIGNMENT_PATH: /opt/airflow/.limes/limes.jar
RML_MAPPER_PATH: /opt/airflow/.rmlmapper/rmlmapper.jar
XML_PROCESSOR_PATH: /opt/airflow/.saxon/saxon-he-10.6.jar
XML_PROCESSOR_PATH: /opt/airflow/.saxon/saxon-he-10.9.jar
healthcheck:
test: ["CMD-SHELL", 'airflow jobs check --job-type SchedulerJob --hostname "$${HOSTNAME}"']
interval: 10s
Expand Down Expand Up @@ -125,7 +125,7 @@ services:
PYTHONPATH: /opt/airflow/
LIMES_ALIGNMENT_PATH: /opt/airflow/.limes/limes.jar
RML_MAPPER_PATH: /opt/airflow/.rmlmapper/rmlmapper.jar
XML_PROCESSOR_PATH: /opt/airflow/.saxon/saxon-he-10.6.jar
XML_PROCESSOR_PATH: /opt/airflow/.saxon/saxon-he-10.9.jar
image: ${AIRFLOW_IMAGE_URI}
logging:
driver: awslogs
Expand Down Expand Up @@ -168,7 +168,7 @@ services:
PYTHONPATH: /opt/airflow/
LIMES_ALIGNMENT_PATH: /opt/airflow/.limes/limes.jar
RML_MAPPER_PATH: /opt/airflow/.rmlmapper/rmlmapper.jar
XML_PROCESSOR_PATH: /opt/airflow/.saxon/saxon-he-10.6.jar
XML_PROCESSOR_PATH: /opt/airflow/.saxon/saxon-he-10.9.jar
image: ${AIRFLOW_IMAGE_URI}
logging:
driver: awslogs
Expand Down Expand Up @@ -215,7 +215,7 @@ services:
PYTHONPATH: /opt/airflow/
LIMES_ALIGNMENT_PATH: /opt/airflow/.limes/limes.jar
RML_MAPPER_PATH: /opt/airflow/.rmlmapper/rmlmapper.jar
XML_PROCESSOR_PATH: /opt/airflow/.saxon/saxon-he-10.6.jar
XML_PROCESSOR_PATH: /opt/airflow/.saxon/saxon-he-10.9.jar
image: ${AIRFLOW_IMAGE_URI}
labels:
ecs-local.task-definition-input.type: remote
Expand Down
2 changes: 1 addition & 1 deletion infra/aws/worker.yml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ services:
PYTHONPATH: /opt/airflow/
LIMES_ALIGNMENT_PATH: /opt/airflow/.limes/limes.jar
RML_MAPPER_PATH: /opt/airflow/.rmlmapper/rmlmapper.jar
XML_PROCESSOR_PATH: /opt/airflow/.saxon/saxon-he-10.6.jar
XML_PROCESSOR_PATH: /opt/airflow/.saxon/saxon-he-10.9.jar
image: ${AIRFLOW_IMAGE_URI}
# hostname: "worker1"
ports:
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,4 +25,5 @@ json2html~=1.3.0
minio~=7.1.1
certifi~=2022.12.7
shortuuid~=1.0.11
pendulum~=2.1.2
pendulum~=2.1.2
saxonche~=12.4
6 changes: 0 additions & 6 deletions ted_sws/core/model/manifestation.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@ class ValidationManifestation(Manifestation):

class XPATHCoverageSummaryResult(PropertyBaseModel):
xpath_covered: Optional[int] = 0
xpath_not_covered: Optional[int] = 0


class XPATHCoverageSummaryReport(PropertyBaseModel):
Expand Down Expand Up @@ -154,11 +153,6 @@ class XPATHCoverageValidationResultBase(PropertyBaseModel):
"""
xpath_assertions: Optional[List[XPATHCoverageValidationAssertion]] = []
xpath_covered: Optional[List[str]] = []
xpath_not_covered: Optional[List[str]] = []
xpath_extra: Optional[List[str]] = []
remarked_xpaths: Optional[List[str]] = []
coverage: Optional[float]
conceptual_coverage: Optional[float]


class XPATHCoverageValidationResult(XPATHCoverageValidationResultBase):
Expand Down
113 changes: 1 addition & 112 deletions ted_sws/core/model/transform.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,121 +96,11 @@ class TransformationTestData(MappingSuiteComponent):
test_data: List[FileResource]


class ConceptualMappingXPATH(MappingSuiteComponent):
class MappingXPATH(MappingSuiteComponent):
xpath: str
form_field: Optional[str]


class ConceptualMappingDiffMetadata(MappingSuiteComponent):
""""""
branches: Optional[List[str]]
mapping_suite_ids: Optional[List[str]]
files: Optional[List[Optional[str]]]
defaults: Optional[dict]
metadata: Optional[List[dict]]


class ConceptualMappingDiffData(MappingSuiteComponent):
""""""
html: Optional[str]
transformed: Optional[dict]
original: Optional[dict]


class ConceptualMappingDiff(MappingSuiteComponent):
""""""
created_at: str = datetime.now().isoformat()
metadata: Optional[ConceptualMappingDiffMetadata]
data: Optional[ConceptualMappingDiffData]


class ConceptualMappingMetadataConstraints(PropertyBaseModel):
"""
This class contains Mapping Suite Conceptual Mapping Metadata Constraints Object model structure
"""
eforms_subtype: Optional[List[str]]
start_date: Optional[str]
end_date: Optional[str]
min_xsd_version: Optional[str]
max_xsd_version: Optional[str]


class ConceptualMappingMetadata(MappingSuiteComponent):
"""

"""
identifier: Optional[str]
title: Optional[str]
description: Optional[str]
mapping_version: Optional[str]
epo_version: Optional[str]
base_xpath: Optional[str]
metadata_constraints: Optional[ConceptualMappingMetadataConstraints]


class ConceptualMappingRule(MappingSuiteComponent):
"""

"""
standard_form_field_id: Optional[str]
standard_form_field_name: Optional[str]
eform_bt_id: Optional[str]
eform_bt_name: Optional[str]
field_xpath: Optional[List[str]]
field_xpath_condition: Optional[List[str]]
class_path: Optional[List[str]]
property_path: Optional[List[str]]
triple_fingerprint: Optional[List[str]]
fragment_fingerprint: Optional[List[str]]


class ConceptualMappingResource(MappingSuiteComponent):
"""

"""
file_name: Optional[str]


class ConceptualMappingRMLModule(MappingSuiteComponent):
"""

"""
file_name: Optional[str]


class ConceptualMappingRemark(MappingSuiteComponent):
"""

"""
standard_form_field_id: Optional[str]
standard_form_field_name: Optional[str]
field_xpath: Optional[List[str]]


class ConceptualMappingControlList(MappingSuiteComponent):
"""

"""
field_value: Optional[str]
mapping_reference: Optional[str]
super_type: Optional[str]
xml_path_fragment: Optional[str]


class ConceptualMapping(MappingSuiteComponent):
"""

"""
xpaths: List[ConceptualMappingXPATH] = []
metadata: Optional[ConceptualMappingMetadata]
rules: List[ConceptualMappingRule] = []
mapping_remarks: List[ConceptualMappingRemark] = []
resources: List[ConceptualMappingResource] = []
rml_modules: List[ConceptualMappingRMLModule] = []
cl1_roles: List[ConceptualMappingControlList] = []
cl2_organisations: List[ConceptualMappingControlList] = []


class MappingSuiteType(str, Enum):
STANDARD_FORMS = "standard_forms"
ELECTRONIC_FORMS = "eforms"
Expand All @@ -236,7 +126,6 @@ class MappingSuite(MappingSuiteComponent):
shacl_test_suites: List[SHACLTestSuite]
sparql_test_suites: List[SPARQLTestSuite]
transformation_test_data: TransformationTestData
conceptual_mapping: Optional[ConceptualMapping]

def get_mongodb_id(self) -> str:
return f"{self.identifier}_v{self.version}"
52 changes: 25 additions & 27 deletions ted_sws/data_manager/adapters/mapping_suite_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,10 @@

from ted_sws import config
from ted_sws.core.model.transform import MappingSuite, FileResource, TransformationRuleSet, SHACLTestSuite, \
SPARQLTestSuite, MetadataConstraints, TransformationTestData, ConceptualMapping, MappingSuiteType, \
SPARQLTestSuite, MetadataConstraints, TransformationTestData, MappingSuiteType, \
MetadataConstraintsStandardForm, MetadataConstraintsEform
from ted_sws.data_manager.adapters import inject_date_string_fields, remove_date_string_fields
from ted_sws.data_manager.adapters.repository_abc import MappingSuiteRepositoryABC
from ted_sws.mapping_suite_processor.adapters.conceptual_mapping_reader import ConceptualMappingReader

MS_METADATA_FILE_NAME = "metadata.json"
MS_TRANSFORM_FOLDER_NAME = "transformation"
Expand Down Expand Up @@ -184,11 +183,6 @@ def _read_sparql_test_suites(self, package_path: pathlib.Path) -> List[SPARQLTes
sparql_tests=self._read_file_resources(path=sparql_test_suite_path))
for sparql_test_suite_path in sparql_test_suite_paths]

@classmethod
def _read_conceptual_mapping(cls, package_path: pathlib.Path) -> ConceptualMapping:
return ConceptualMappingReader.mapping_suite_read_conceptual_mapping(
package_path / MS_TRANSFORM_FOLDER_NAME / MS_CONCEPTUAL_MAPPING_FILE_NAME)

def _write_package_metadata(self, mapping_suite: MappingSuite):
"""
This method creates the metadata of a package based on the metadata in the mapping_suite.
Expand Down Expand Up @@ -354,27 +348,31 @@ def _read_mapping_suite_package(self, mapping_suite_identifier: str) -> Optional
package_metadata = self._read_package_metadata(package_path)
if MS_MAPPING_TYPE_KEY in package_metadata and package_metadata[
MS_MAPPING_TYPE_KEY] == MappingSuiteType.ELECTRONIC_FORMS:
package_metadata[MS_METADATA_CONSTRAINTS_KEY] = MetadataConstraints(constraints=MetadataConstraintsEform(
**package_metadata[MS_METADATA_CONSTRAINTS_KEY][MS_CONSTRAINTS_KEY]))
package_metadata[MS_METADATA_CONSTRAINTS_KEY] = MetadataConstraints(
constraints=MetadataConstraintsEform(
**package_metadata[MS_METADATA_CONSTRAINTS_KEY][MS_CONSTRAINTS_KEY]))
else:
package_metadata[MS_METADATA_CONSTRAINTS_KEY] = MetadataConstraints(constraints=MetadataConstraintsStandardForm(
**package_metadata[MS_METADATA_CONSTRAINTS_KEY][MS_CONSTRAINTS_KEY]))
mapping_suite = MappingSuite(metadata_constraints=package_metadata[MS_METADATA_CONSTRAINTS_KEY],
created_at=package_metadata[MS_CREATED_AT_KEY],
title=package_metadata[MS_TITLE_KEY],
ontology_version=package_metadata[MS_ONTOLOGY_VERSION_KEY],
mapping_suite_hash_digest=package_metadata[MS_HASH_DIGEST_KEY],
mapping_type=package_metadata[MS_MAPPING_TYPE_KEY] if MS_MAPPING_TYPE_KEY in package_metadata else MappingSuiteType.STANDARD_FORMS,
version=package_metadata[
MS_STANDARD_METADATA_VERSION_KEY] if MS_STANDARD_METADATA_VERSION_KEY in package_metadata else \
package_metadata[MS_EFORMS_METADATA_VERSION_KEY],
identifier=package_metadata[
MS_METADATA_IDENTIFIER_KEY] if MS_METADATA_IDENTIFIER_KEY in package_metadata else mapping_suite_identifier,
transformation_rule_set=self._read_transformation_rule_set(package_path),
shacl_test_suites=self._read_shacl_test_suites(package_path),
sparql_test_suites=self._read_sparql_test_suites(package_path),
transformation_test_data=self._read_test_data_package(package_path),
conceptual_mapping=self._read_conceptual_mapping(package_path)) #TODO remove conceptual_mapping value assignment when conceptual mapping reader is removed
package_metadata[MS_METADATA_CONSTRAINTS_KEY] = MetadataConstraints(
constraints=MetadataConstraintsStandardForm(
**package_metadata[MS_METADATA_CONSTRAINTS_KEY][MS_CONSTRAINTS_KEY]))
mapping_suite = MappingSuite(
metadata_constraints=package_metadata[MS_METADATA_CONSTRAINTS_KEY],
created_at=package_metadata[MS_CREATED_AT_KEY],
title=package_metadata[MS_TITLE_KEY],
ontology_version=package_metadata[MS_ONTOLOGY_VERSION_KEY],
mapping_suite_hash_digest=package_metadata[MS_HASH_DIGEST_KEY],
mapping_type=package_metadata[
MS_MAPPING_TYPE_KEY] if MS_MAPPING_TYPE_KEY in package_metadata else MappingSuiteType.STANDARD_FORMS,
version=package_metadata[
MS_STANDARD_METADATA_VERSION_KEY] if MS_STANDARD_METADATA_VERSION_KEY in package_metadata else \
package_metadata[MS_EFORMS_METADATA_VERSION_KEY],
identifier=package_metadata[
MS_METADATA_IDENTIFIER_KEY] if MS_METADATA_IDENTIFIER_KEY in package_metadata else mapping_suite_identifier,
transformation_rule_set=self._read_transformation_rule_set(package_path),
shacl_test_suites=self._read_shacl_test_suites(package_path),
sparql_test_suites=self._read_sparql_test_suites(package_path),
transformation_test_data=self._read_test_data_package(package_path)
)
return mapping_suite
return None

Expand Down
Loading
Loading