Skip to content

Commit

Permalink
updated mapping_suite_hash_digest generation algo on package export
Browse files Browse the repository at this point in the history
  • Loading branch information
Kolea PLESCO authored and Kolea PLESCO committed Apr 24, 2024
1 parent 45718c7 commit f9cb632
Show file tree
Hide file tree
Showing 99 changed files with 306,058 additions and 30 deletions.
37 changes: 32 additions & 5 deletions ted_sws/data_manager/adapters/mapping_suite_repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@
MS_STANDARD_METADATA_VERSION_KEY = 'version'
MS_EFORMS_METADATA_VERSION_KEY = 'mapping_version'
MS_METADATA_CONSTRAINTS_KEY = 'metadata_constraints'
MS_METADATA_CONSTRAINTS_START_DATE_KEY = 'start_date'
MS_METADATA_CONSTRAINTS_END_DATE_KEY = 'end_date'
MS_CONSTRAINTS_KEY = 'constraints'
MS_TITLE_KEY = 'title'
MS_HASH_DIGEST_KEY = 'mapping_suite_hash_digest'
Expand Down Expand Up @@ -134,6 +136,26 @@ def __init__(self, repository_path: pathlib.Path):
self.repository_path = repository_path
self.repository_path.mkdir(parents=True, exist_ok=True)

def _preprocess_package_metadata(self, package_metadata: dict):
"""
This method is adjusting the metadata structure to be fully compatible.
:param package_metadata:
:return:
"""
if MS_METADATA_CONSTRAINTS_KEY in package_metadata:
metadata_constraints = package_metadata[MS_METADATA_CONSTRAINTS_KEY]
if MS_CONSTRAINTS_KEY in metadata_constraints:
constraints = metadata_constraints[MS_CONSTRAINTS_KEY]
if MS_METADATA_CONSTRAINTS_START_DATE_KEY in constraints:
start_date_value = constraints[MS_METADATA_CONSTRAINTS_START_DATE_KEY]
if start_date_value and not isinstance(start_date_value, list):
package_metadata[MS_METADATA_CONSTRAINTS_KEY][MS_CONSTRAINTS_KEY][
MS_METADATA_CONSTRAINTS_START_DATE_KEY] = [start_date_value]
end_date_value = constraints[MS_METADATA_CONSTRAINTS_END_DATE_KEY]
if end_date_value and not isinstance(end_date_value, list):
package_metadata[MS_METADATA_CONSTRAINTS_KEY][MS_CONSTRAINTS_KEY][
MS_METADATA_CONSTRAINTS_END_DATE_KEY] = [end_date_value]

def _read_package_metadata(self, package_path: pathlib.Path) -> dict:
"""
This method allows reading the metadata of a packet.
Expand All @@ -143,6 +165,7 @@ def _read_package_metadata(self, package_path: pathlib.Path) -> dict:
package_metadata_path = package_path / MS_METADATA_FILE_NAME
package_metadata_content = package_metadata_path.read_text(encoding="utf-8")
package_metadata = json.loads(package_metadata_content)
self._preprocess_package_metadata(package_metadata)
return package_metadata

def _read_transformation_rule_set(self, package_path: pathlib.Path) -> TransformationRuleSet:
Expand Down Expand Up @@ -346,8 +369,8 @@ def _read_mapping_suite_package(self, mapping_suite_identifier: str) -> Optional
package_path = self.repository_path / mapping_suite_identifier
if package_path.is_dir():
package_metadata = self._read_package_metadata(package_path)
if MS_MAPPING_TYPE_KEY in package_metadata and package_metadata[
MS_MAPPING_TYPE_KEY] == MappingSuiteType.ELECTRONIC_FORMS:
if (MS_MAPPING_TYPE_KEY in package_metadata and
package_metadata[MS_MAPPING_TYPE_KEY] == MappingSuiteType.ELECTRONIC_FORMS):
package_metadata[MS_METADATA_CONSTRAINTS_KEY] = MetadataConstraints(
constraints=MetadataConstraintsEform(
**package_metadata[MS_METADATA_CONSTRAINTS_KEY][MS_CONSTRAINTS_KEY]))
Expand All @@ -363,9 +386,7 @@ def _read_mapping_suite_package(self, mapping_suite_identifier: str) -> Optional
mapping_suite_hash_digest=package_metadata[MS_HASH_DIGEST_KEY],
mapping_type=package_metadata[
MS_MAPPING_TYPE_KEY] if MS_MAPPING_TYPE_KEY in package_metadata else MappingSuiteType.STANDARD_FORMS,
version=package_metadata[
MS_STANDARD_METADATA_VERSION_KEY] if MS_STANDARD_METADATA_VERSION_KEY in package_metadata else \
package_metadata[MS_EFORMS_METADATA_VERSION_KEY],
version=mapping_suite_read_version_from_metadata(package_metadata),
identifier=package_metadata[
MS_METADATA_IDENTIFIER_KEY] if MS_METADATA_IDENTIFIER_KEY in package_metadata else mapping_suite_identifier,
transformation_rule_set=self._read_transformation_rule_set(package_path),
Expand Down Expand Up @@ -421,3 +442,9 @@ def clear_repository(self):
:return:
"""
shutil.rmtree(self.repository_path)


def mapping_suite_read_version_from_metadata(metadata: dict) -> str:
version_key = MS_EFORMS_METADATA_VERSION_KEY if MS_MAPPING_TYPE_KEY in metadata and metadata[
MS_MAPPING_TYPE_KEY] == MappingSuiteType.ELECTRONIC_FORMS else MS_STANDARD_METADATA_VERSION_KEY
return metadata.get(version_key)
46 changes: 36 additions & 10 deletions ted_sws/mapping_suite_processor/adapters/mapping_suite_hasher.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,35 @@

""" """
import hashlib
import json
import pathlib
import re
from typing import Tuple, List, Union

from ted_sws.core.model.transform import MappingSuiteType
from ted_sws.data_manager.adapters.mapping_suite_repository import MS_TRANSFORM_FOLDER_NAME, \
MS_CONCEPTUAL_MAPPING_FILE_NAME, MS_MAPPINGS_FOLDER_NAME, MS_RESOURCES_FOLDER_NAME
MS_MAPPINGS_FOLDER_NAME, MS_RESOURCES_FOLDER_NAME, MS_CONCEPTUAL_MAPPING_FILE_NAME, MS_MAPPING_TYPE_KEY
from ted_sws.mapping_suite_processor.model.mapping_suite_metadata import EFormsPackageMetadataBase


class MappingSuiteHasher:
"""
"""

def __init__(self, mapping_suite_path: Union[pathlib.Path, str]):
self.mapping_suite_path = pathlib.Path(mapping_suite_path)
def __init__(self, mapping_suite_path: pathlib.Path, mapping_suite_metadata: dict = None):
self.mapping_suite_path = mapping_suite_path
self.mapping_suite_metadata = mapping_suite_metadata

if self.is_for_eforms():
self.mapping_suite_metadata = EFormsPackageMetadataBase(**mapping_suite_metadata).dict()

def is_for_eforms(self):
return (
self.mapping_suite_metadata and
MS_MAPPING_TYPE_KEY in self.mapping_suite_metadata and
self.mapping_suite_metadata.get(MS_MAPPING_TYPE_KEY) == MappingSuiteType.ELECTRONIC_FORMS
)

def hash_critical_mapping_files(self) -> List[Tuple[str, str]]:
"""
Expand All @@ -43,17 +57,19 @@ def _hash_a_file(file_path: pathlib.Path) -> Tuple[str, str]:
relative_path = str(file_path).replace(str(self.mapping_suite_path), "")
return relative_path, hashed_line

files_to_hash = [
files_to_hash = [] if self.is_for_eforms() else [
self.mapping_suite_path / MS_TRANSFORM_FOLDER_NAME / MS_CONCEPTUAL_MAPPING_FILE_NAME,
]

mapping_files = filter(lambda item: item.is_file(),
(self.mapping_suite_path / MS_TRANSFORM_FOLDER_NAME /
MS_MAPPINGS_FOLDER_NAME).iterdir())
mapping_files = filter(
lambda item: item.is_file(),
(self.mapping_suite_path / MS_TRANSFORM_FOLDER_NAME / MS_MAPPINGS_FOLDER_NAME).iterdir()
)

mapping_resource_files = filter(lambda item: item.is_file(),
(self.mapping_suite_path / MS_TRANSFORM_FOLDER_NAME /
MS_RESOURCES_FOLDER_NAME).iterdir())
mapping_resource_files = filter(
lambda item: item.is_file(),
(self.mapping_suite_path / MS_TRANSFORM_FOLDER_NAME / MS_RESOURCES_FOLDER_NAME).iterdir()
)

files_to_hash += mapping_files
files_to_hash += mapping_resource_files
Expand All @@ -62,6 +78,11 @@ def _hash_a_file(file_path: pathlib.Path) -> Tuple[str, str]:
result.sort(key=lambda x: x[0])
return result

def hash_mapping_metadata(self) -> str:
return hashlib.sha256(
json.dumps(self.mapping_suite_metadata).encode('utf-8')
).hexdigest()

def hash_mapping_suite(self, with_version: str = "") -> str:
"""
Returns a hash of the mapping suite.
Expand All @@ -74,6 +95,11 @@ def hash_mapping_suite(self, with_version: str = "") -> str:
"""
list_of_hashes = self.hash_critical_mapping_files()
signatures = [signature[1] for signature in list_of_hashes]

if self.is_for_eforms():
signatures.append(self.hash_mapping_metadata())

if with_version:
signatures += with_version

return hashlib.sha256(str.encode(",".join(signatures))).hexdigest()
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
import json
import pathlib
from typing import List, Union

from ted_sws.core.model.transform import MetadataConstraints
from ted_sws.data_manager.adapters.mapping_suite_repository import MS_TRANSFORM_FOLDER_NAME, MS_TEST_DATA_FOLDER_NAME, \
MS_CONCEPTUAL_MAPPING_FILE_NAME, MS_RESOURCES_FOLDER_NAME, MS_MAPPINGS_FOLDER_NAME, MS_METADATA_FILE_NAME, \
MS_VALIDATE_FOLDER_NAME, MS_SPARQL_FOLDER_NAME, MS_SHACL_FOLDER_NAME, MS_OUTPUT_FOLDER_NAME, MS_TEST_SUITE_REPORT
MS_VALIDATE_FOLDER_NAME, MS_SPARQL_FOLDER_NAME, MS_SHACL_FOLDER_NAME, MS_OUTPUT_FOLDER_NAME, MS_TEST_SUITE_REPORT, \
mapping_suite_read_version_from_metadata
from ted_sws.event_manager.model.event_message import EventMessage, EventMessageLogSettings
from ted_sws.event_manager.services.logger_from_context import get_console_logger
from ted_sws.mapping_suite_processor.adapters.mapping_suite_hasher import MappingSuiteHasher
from ted_sws.mapping_suite_processor.services.mapping_suite_reader import mapping_suite_read_metadata, \
MAPPING_SUITE_HASH, VERSION_KEY
MAPPING_SUITE_HASH

SHACL_KEYWORD = "shacl"
SPARQL_KEYWORD = "sparql"
Expand Down Expand Up @@ -143,19 +142,20 @@ def check_for_changes_by_version(self) -> bool:
settings=self.log_settings)
success = True

metadata = mapping_suite_read_metadata(mapping_suite_path=self.mapping_suite_path)
mapping_suite_metadata = mapping_suite_read_metadata(mapping_suite_path=self.mapping_suite_path)
version = mapping_suite_read_version_from_metadata(mapping_suite_metadata)

version = metadata.get(VERSION_KEY)
mapping_suite_versioned_hash = MappingSuiteHasher(
mapping_suite_path=self.mapping_suite_path,
mapping_suite_metadata=mapping_suite_metadata
).hash_mapping_suite(with_version=version)

mapping_suite_versioned_hash = MappingSuiteHasher(self.mapping_suite_path).hash_mapping_suite(
with_version=version)

if mapping_suite_versioned_hash != metadata.get(MAPPING_SUITE_HASH):
if mapping_suite_versioned_hash != mapping_suite_metadata.get(MAPPING_SUITE_HASH):
self.logger.error(
event_message=EventMessage(
message=f'The Mapping Suite hash digest ({mapping_suite_versioned_hash}) '
f'does not correspond to the one in the metadata.json file '
f'({metadata.get(MAPPING_SUITE_HASH)}.'
f'({mapping_suite_metadata.get(MAPPING_SUITE_HASH)}.'
),
settings=self.log_settings
)
Expand Down
Empty file.
19 changes: 19 additions & 0 deletions ted_sws/mapping_suite_processor/model/mapping_suite_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from typing import Optional

from pydantic import BaseModel

from ted_sws.core.model.transform import MappingSuiteType, MetadataConstraints


class EFormsPackageMetadataBase(BaseModel):
identifier: str
title: str
created_at: str
description: str
mapping_version: str
ontology_version: str
mapping_type: Optional[MappingSuiteType] = MappingSuiteType.ELECTRONIC_FORMS
metadata_constraints: MetadataConstraints

class Config:
use_enum_values = True
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
from pathlib import Path
from typing import Dict

from ted_sws.core.model.transform import MappingSuiteType
from ted_sws.mapping_suite_processor.adapters.mapping_suite_reader import MappingSuiteReader

VERSION_KEY = "version"
STANDARD_FORM_VERSION_KEY = "version"
EFORM_VERSION_KEY = "mapping_version"
MAPPING_TYPE_KEY = "mapping_type"
MAPPING_SUITE_HASH = "mapping_suite_hash_digest"


Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
{
"identifier": "package_eforms_10-24_v1.9",
"title": "Package EF10-EF24, SDK v1.9",
"created_at": "2024-04-24 17:07:38.786702",
"description": "This is the conceptual mapping for eForms subtype 10-24 SDK version 1.9",
"mapping_version": "3.0.0-alpha.3",
"ontology_version": "4.0.0",
"mapping_type": "eforms",
"metadata_constraints": {
"constraints": {
"eforms_subtype": [
"10",
"11",
"12",
"13",
"14",
"15",
"16",
"17",
"18",
"19",
"20",
"21",
"22",
"23",
"24"
],
"start_date": null,
"end_date": null,
"eforms_sdk_versions": [
"1.9"
]
}
},
"mapping_suite_hash_digest": "8940944c0f7e7f5761ab52d09a4e9ad51bee5cd150736f528c8d743e27ea2aaa"
}
Loading

0 comments on commit f9cb632

Please sign in to comment.