From e41e97c08bded90dd0de143708fc75527b1658e6 Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Tue, 12 Sep 2023 10:59:55 +0300 Subject: [PATCH 1/7] Update Airflow version to 2.7.1 --- infra/airflow-cluster/Dockerfile | 6 +++--- infra/airflow/Dockerfile | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/infra/airflow-cluster/Dockerfile b/infra/airflow-cluster/Dockerfile index 954e9550..2bdd42a4 100644 --- a/infra/airflow-cluster/Dockerfile +++ b/infra/airflow-cluster/Dockerfile @@ -1,4 +1,4 @@ -FROM docker.io/apache/airflow:2.5.1-python3.8 +FROM docker.io/apache/airflow:2.7.1-python3.8 # quick sudo USER root @@ -14,7 +14,7 @@ COPY requirements.txt /opt/airflow # working in the /opt/airflow WORKDIR /opt/airflow RUN mkdir -p ./.rmlmapper -RUN wget -c https://github.com/RMLio/rmlmapper-java/releases/download/v6.1.3/rmlmapper-6.1.3-r367-all.jar -O ./.rmlmapper/rmlmapper.jar +RUN wget -c https://github.com/RMLio/rmlmapper-java/releases/download/v6.2.1/rmlmapper-6.2.1-r368-all.jar -O ./.rmlmapper/rmlmapper.jar RUN wget -c https://kumisystems.dl.sourceforge.net/project/saxon/Saxon-HE/10/Java/SaxonHE10-6J.zip -P .saxon/ @@ -26,4 +26,4 @@ RUN wget -c https://github.com/dice-group/LIMES/releases/download/1.7.9/limes.ja RUN pip install --upgrade pip -RUN pip install --no-cache-dir -r requirements.txt --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.5.1/constraints-no-providers-3.8.txt" +RUN pip install --no-cache-dir -r requirements.txt --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.7.1/constraints-no-providers-3.8.txt" diff --git a/infra/airflow/Dockerfile b/infra/airflow/Dockerfile index 27d682c0..f1170d7e 100644 --- a/infra/airflow/Dockerfile +++ b/infra/airflow/Dockerfile @@ -1,4 +1,4 @@ -FROM docker.io/apache/airflow:2.5.1-python3.8 +FROM docker.io/apache/airflow:2.7.1-python3.8 # quick sudo USER root @@ -14,7 +14,7 @@ COPY requirements.txt /opt/airflow # working in the /opt/airflow WORKDIR /opt/airflow RUN mkdir -p ./.rmlmapper ./dags ./ted_sws ./temp -RUN wget -c https://github.com/RMLio/rmlmapper-java/releases/download/v6.1.3/rmlmapper-6.1.3-r367-all.jar -O ./.rmlmapper/rmlmapper.jar +RUN wget -c https://github.com/RMLio/rmlmapper-java/releases/download/v6.2.1/rmlmapper-6.2.1-r368-all.jar -O ./.rmlmapper/rmlmapper.jar RUN wget -c https://kumisystems.dl.sourceforge.net/project/saxon/Saxon-HE/10/Java/SaxonHE10-6J.zip -P .saxon/ @@ -25,4 +25,4 @@ RUN wget -c https://github.com/dice-group/LIMES/releases/download/1.7.9/limes.ja RUN pip install --upgrade pip -RUN pip install --no-cache-dir -r requirements.txt --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.5.1/constraints-no-providers-3.8.txt" +RUN pip install --no-cache-dir -r requirements.txt --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.7.1/constraints-no-providers-3.8.txt" From 9c3922a149c89f89e08b5e80aaf419f775804dff Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Tue, 12 Sep 2023 11:00:21 +0300 Subject: [PATCH 2/7] Update rmlmapper to 6.2.1 --- Makefile | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 02eab246..8113b31d 100644 --- a/Makefile +++ b/Makefile @@ -24,12 +24,12 @@ CAROOT = $(shell pwd)/infra/traefik/certs install: @ echo -e "$(BUILD_PRINT)Installing the requirements$(END_BUILD_PRINT)" @ pip install --upgrade pip - @ pip install --no-cache-dir -r requirements.txt --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.5.1/constraints-no-providers-3.8.txt" + @ pip install --no-cache-dir -r requirements.txt --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.7.1/constraints-no-providers-3.8.txt" install-dev: @ echo -e "$(BUILD_PRINT)Installing the dev requirements$(END_BUILD_PRINT)" @ pip install --upgrade pip - @ pip install --no-cache-dir -r requirements.dev.txt --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.5.1/constraints-no-providers-3.8.txt" + @ pip install --no-cache-dir -r requirements.dev.txt --constraint "https://raw.githubusercontent.com/apache/airflow/constraints-2.7.1/constraints-no-providers-3.8.txt" test: test-unit @@ -213,7 +213,7 @@ stop-metabase: init-rml-mapper: @ echo -e "RMLMapper folder initialisation!" @ mkdir -p ./.rmlmapper - @ wget -c https://github.com/RMLio/rmlmapper-java/releases/download/v6.1.3/rmlmapper-6.1.3-r367-all.jar -O ./.rmlmapper/rmlmapper.jar + @ wget -c https://github.com/RMLio/rmlmapper-java/releases/download/v6.2.1/rmlmapper-6.2.1-r368-all.jar -O ./.rmlmapper/rmlmapper.jar init-limes: @ echo -e "Limes folder initialisation!" From f0d8ebde399d4b377c938bffc311a8c500d0e0a7 Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Tue, 12 Sep 2023 11:01:20 +0300 Subject: [PATCH 3/7] Update project requirements --- requirements.dev.txt | 20 +++++++++--------- requirements.txt | 50 ++++++++++++++++++++++---------------------- 2 files changed, 35 insertions(+), 35 deletions(-) diff --git a/requirements.dev.txt b/requirements.dev.txt index 7e45e5f0..2690b85e 100644 --- a/requirements.dev.txt +++ b/requirements.dev.txt @@ -1,12 +1,12 @@ # Development libraries -coverage~=6.3.1 -pytest~=7.0.0 -pytest-bdd~=5.0.0 -pytest-cov~=3.0.0 -pytest-subtests~=0.6.0 -tox~=3.24.5 +allure-pytest-bdd~=2.13.2 +coverage~=7.3.1 +mongomock~=4.1.2 +pycurl~=7.45.2 +pytest~=7.4.2 +pytest-bdd~=6.1.1 +pytest-cov~=4.1.0 +pytest-subtests~=0.11.0 +tox~=4.11.3 tox-pytest-summary~=0.1.2 -mongomock==4.1.2 -uvicorn[standard] -allure-pytest-bdd==2.10.0 -pycurl~=7.45.2 \ No newline at end of file +uvicorn[standard]~=0.23.2 diff --git a/requirements.txt b/requirements.txt index aa4cd4bc..bf4a3b8d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,28 +1,28 @@ # Project dependecies -pydantic~=1.9.0 -requests~=2.28.2 -deepdiff~=5.7.0 -jinja2~=3.1.2 -python-dotenv~=0.19.2 -pymongo~=4.0.1 -apache-airflow~=2.5.1 -hvac==0.11.2 -SPARQLWrapper==1.8.5 -pandas==1.5.2 -click~=8.1.0 -openpyxl==3.0.9 -colorama~=0.4 -fastapi~=0.77 -python-dateutil~=2.8.2 -rdflib~=6.1.1 -pyshacl~=0.19.0 -agraph-python==101.0.10 +agraph-python~=102.0.0 +apache-airflow~=2.7.1 +certifi~=2023.7.22 +click~=8.1.7 +colorama~=0.4.6 decorator~=5.1.1 -urllib3[secure] -semantic-version==2.10.0 -paramiko~=3.0.0 -ordered-set~=4.0.2 +deepdiff~=6.5.0 +fastapi~=0.103.0 +hvac~=1.2.1 +jinja2~=3.1.2 json2html~=1.3.0 -minio~=7.1.1 -certifi~=2022.12.7 -shortuuid~=1.0.11 \ No newline at end of file +minio~=7.1.16 +openpyxl~=3.1.2 +ordered-set~=4.1.0 +paramiko~=3.3.1 +pandas~=2.0.3 +pydantic~=2.3.0 +pymongo~=4.5.0 +pyshacl~=0.23.0 +python-dateutil~=2.8.2 +python-dotenv~=1.0.0 +rdflib~=6.3.2 +requests~=2.31.0 +semantic-version~=2.10.0 +shortuuid~=1.0.11 +SPARQLWrapper~=2.0.0 +urllib3[secure]~=1.26.16 From 4ac8b8bd7e4410b9e9946db0ce1437f324fe5cb3 Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Tue, 12 Sep 2023 16:52:36 +0300 Subject: [PATCH 4/7] Adjust tests and model logic to suport pydantic 2 version --- ted_sws/core/model/__init__.py | 26 ++--- ted_sws/core/model/manifestation.py | 62 ++++++------ ted_sws/core/model/metadata.py | 99 +++++++++---------- ted_sws/core/model/notice.py | 23 +++-- ted_sws/core/model/supra_notice.py | 8 +- ted_sws/core/model/transform.py | 90 ++++++++--------- ted_sws/core/model/validation_report.py | 12 +-- ted_sws/notice_packager/model/metadata.py | 38 +++---- tests/conftest.py | 2 +- tests/features/conftest.py | 4 +- tests/unit/core/model/conftest.py | 2 +- tests/unit/core/model/test_pydantic.py | 6 +- .../test_mapping_suite_repository.py | 2 +- 13 files changed, 191 insertions(+), 183 deletions(-) diff --git a/ted_sws/core/model/__init__.py b/ted_sws/core/model/__init__.py index 4d0a91e5..c38d67bd 100644 --- a/ted_sws/core/model/__init__.py +++ b/ted_sws/core/model/__init__.py @@ -22,24 +22,24 @@ class PropertyBaseModel(BaseModel): https://github.com/samuelcolvin/pydantic/issues/935 is solved """ - - @classmethod - def get_properties(cls): - return [ - prop for prop in dir(cls) - if isinstance(getattr(cls, prop), property) and prop not in ("__values__", "fields") - ] - - def dict(self, *args, **kwargs) -> 'DictStrAny': - self.__dict__.update({prop: getattr(self, prop) for prop in self.get_properties()}) - - return super().dict(*args, **kwargs) + #TODO : remove this code, because this implementation is not necessary anymore in pydantic 2 + # @classmethod + # def get_properties(cls): + # return [ + # prop for prop in dir(cls) + # if isinstance(getattr(cls, prop), property) and prop not in ("__values__", "fields") + # ] + # + #TODO : remove this code, because this implementation is not necessary anymore in pydantic 2 + # def dict(self, *args, **kwargs) -> 'DictStrAny': + # self.__dict__.update({prop: getattr(self, prop) for prop in self.get_properties()}) + # return super().dict(*args, **kwargs) def __eq__(self, other) -> bool: if not isinstance(other, self.__class__) or not other: return False # raise ValueError(f"Must compare objects of the same class {self.__class__}") - difference = DeepDiff(self.dict(), other.dict()) + difference = DeepDiff(self.model_dump(), other.model_dump()) return not difference def __ne__(self, other): diff --git a/ted_sws/core/model/manifestation.py b/ted_sws/core/model/manifestation.py index 20897f5d..9c965cee 100644 --- a/ted_sws/core/model/manifestation.py +++ b/ted_sws/core/model/manifestation.py @@ -69,7 +69,7 @@ class XPATHCoverageSummaryResult(PropertyBaseModel): class XPATHCoverageSummaryReport(PropertyBaseModel): - mapping_suite_identifier: Optional[str] + mapping_suite_identifier: Optional[str] = None validation_result: Optional[XPATHCoverageSummaryResult] = XPATHCoverageSummaryResult() @@ -86,8 +86,8 @@ class SPARQLSummaryCountReport(PropertyBaseModel): class SPARQLSummaryResult(PropertyBaseModel): - test_suite_identifier: Optional[str] - mapping_suite_identifier: Optional[str] + test_suite_identifier: Optional[str] = None + mapping_suite_identifier: Optional[str] = None aggregate: Optional[SPARQLSummaryCountReport] = SPARQLSummaryCountReport() @@ -107,8 +107,8 @@ class SHACLSummaryResultSeverityReport(PropertyBaseModel): class SHACLSummaryResult(PropertyBaseModel): - test_suite_identifier: Optional[str] - mapping_suite_identifier: Optional[str] + test_suite_identifier: Optional[str] = None + mapping_suite_identifier: Optional[str] = None result_severity: Optional[SHACLSummaryResultSeverityReport] = SHACLSummaryResultSeverityReport() @@ -141,11 +141,11 @@ class XPATHCoverageValidationAssertion(PropertyBaseModel): """ """ - form_field: Optional[str] - xpath: Optional[str] - count: Optional[int] - notice_hit: Optional[Dict[str, int]] - query_result: Optional[bool] + form_field: Optional[str] = None + xpath: Optional[str] = None + count: Optional[int] = None + notice_hit: Optional[Dict[str, int]] = None + query_result: Optional[bool] = None class XPATHCoverageValidationResultBase(PropertyBaseModel): @@ -157,15 +157,15 @@ class XPATHCoverageValidationResultBase(PropertyBaseModel): xpath_not_covered: Optional[List[str]] = [] xpath_extra: Optional[List[str]] = [] remarked_xpaths: Optional[List[str]] = [] - coverage: Optional[float] - conceptual_coverage: Optional[float] + coverage: Optional[float] = None + conceptual_coverage: Optional[float] = None class XPATHCoverageValidationResult(XPATHCoverageValidationResultBase): """ XPATHCoverageValidationResult for Notice """ - notices: Optional[List[ReportNoticeData]] + notices: Optional[List[ReportNoticeData]] = None class XPATHCoverageValidationReport(XMLValidationManifestation): @@ -173,14 +173,14 @@ class XPATHCoverageValidationReport(XMLValidationManifestation): This is the model structure for Notice(s) XPATHs Coverage Report """ - validation_result: Optional[XPATHCoverageValidationResult] + validation_result: Optional[XPATHCoverageValidationResult] = None class XMLManifestation(Manifestation): """ Original XML Notice manifestation as published on the TED website. """ - xpath_coverage_validation: XPATHCoverageValidationReport = None + xpath_coverage_validation: Optional[XPATHCoverageValidationReport] = None def add_validation(self, validation: Union[XPATHCoverageValidationReport]): if type(validation) == XPATHCoverageValidationReport: @@ -197,7 +197,7 @@ class METSManifestation(Manifestation): """ type: str = METS_TYPE_CREATE - package_name: str = None + package_name: Optional[str] = None class RDFValidationManifestation(ValidationManifestation): @@ -206,15 +206,15 @@ class RDFValidationManifestation(ValidationManifestation): """ mapping_suite_identifier: str - test_suite_identifier: Optional[str] + test_suite_identifier: Optional[str] = None class SPARQLQuery(PropertyBaseModel): """ Stores SPARQL query details """ - title: Optional[str] - description: Optional[str] + title: Optional[str] = None + description: Optional[str] = None xpath: Optional[List[str]] = [] query: str @@ -224,13 +224,13 @@ class SPARQLQueryResult(PropertyBaseModel): Stores SPARQL query execution result """ query: SPARQLQuery - result: Optional[SPARQLQueryRefinedResultType] - query_result: Optional[str] + result: Optional[SPARQLQueryRefinedResultType] = None + query_result: Optional[str] = None fields_covered: Optional[bool] = True missing_fields: Optional[List[str]] = [] - error: Optional[str] - message: Optional[str] - identifier: Optional[str] + error: Optional[str] = None + message: Optional[str] = None + identifier: Optional[str] = None class Config: use_enum_values = True @@ -248,10 +248,10 @@ class QueriedSHACLShapeValidationResult(PropertyBaseModel): Queried SHACL Validation Report which contains the following variables ?focusNode ?message ?resultPath ?resultSeverity ?sourceConstraintComponent ?sourceShape ?value """ - conforms: Optional[str] - results_dict: Optional[dict] - error: Optional[str] - identifier: Optional[str] + conforms: Optional[str] = None + results_dict: Optional[dict] = None + error: Optional[str] = None + identifier: Optional[str] = None class SHACLTestSuiteValidationReport(RDFValidationManifestation): @@ -262,7 +262,7 @@ class SHACLTestSuiteValidationReport(RDFValidationManifestation): class EntityDeduplicationReport(Manifestation): - object_data: Optional[str] + object_data: Optional[str] = None number_of_duplicates: int number_of_cets: int uries: List[str] @@ -272,10 +272,10 @@ class RDFManifestation(Manifestation): """ Transformed manifestation in RDF format """ - mapping_suite_id = "unknown_mapping_suite_id" + mapping_suite_id: str = "unknown_mapping_suite_id" shacl_validations: List[SHACLTestSuiteValidationReport] = [] sparql_validations: List[SPARQLTestSuiteValidationReport] = [] - deduplication_report: Optional[EntityDeduplicationReport] + deduplication_report: Optional[EntityDeduplicationReport] = None def validation_exists(self, validation, validations): """ diff --git a/ted_sws/core/model/metadata.py b/ted_sws/core/model/metadata.py index 8f4ab4fb..9a18a106 100644 --- a/ted_sws/core/model/metadata.py +++ b/ted_sws/core/model/metadata.py @@ -6,10 +6,9 @@ # Email: costezki.eugen@gmail.com """ """ -from typing import List, Optional +from typing import List, Optional, NamedTuple from pydantic import Field -from pydantic.annotated_types import NamedTuple from ted_sws.core.model import PropertyBaseModel @@ -65,17 +64,17 @@ class NormalisedMetadata(Metadata): publication_date: str ojs_issue_number: str ojs_type: str - city_of_buyer: Optional[List[LanguageTaggedString]] - name_of_buyer: Optional[List[LanguageTaggedString]] - original_language: Optional[str] - country_of_buyer: Optional[str] - eu_institution: Optional[bool] - document_sent_date: Optional[str] - deadline_for_submission: Optional[str] + city_of_buyer: Optional[List[LanguageTaggedString]] = None + name_of_buyer: Optional[List[LanguageTaggedString]] = None + original_language: Optional[str] = None + country_of_buyer: Optional[str] = None + eu_institution: Optional[bool] = None + document_sent_date: Optional[str] = None + deadline_for_submission: Optional[str] = None notice_type: str form_type: str - place_of_performance: Optional[List[str]] - extracted_legal_basis_directive: Optional[str] + place_of_performance: Optional[List[str]] = None + extracted_legal_basis_directive: Optional[str] = None legal_basis_directive: str form_number: str eforms_subtype: str @@ -90,17 +89,17 @@ class NormalisedMetadataView(Metadata): publication_date: str ojs_issue_number: str ojs_type: str - city_of_buyer: Optional[str] - name_of_buyer: Optional[str] - original_language: Optional[str] - country_of_buyer: Optional[str] - eu_institution: Optional[bool] - document_sent_date: Optional[str] - deadline_for_submission: Optional[str] + city_of_buyer: Optional[str] = None + name_of_buyer: Optional[str] = None + original_language: Optional[str] = None + country_of_buyer: Optional[str] = None + eu_institution: Optional[bool] = None + document_sent_date: Optional[str] = None + deadline_for_submission: Optional[str] = None notice_type: str form_type: str - place_of_performance: Optional[List[str]] - extracted_legal_basis_directive: Optional[str] + place_of_performance: Optional[List[str]] = None + extracted_legal_basis_directive: Optional[str] = None legal_basis_directive: str form_number: str eforms_subtype: str @@ -113,34 +112,34 @@ class TEDMetadata(Metadata): """ Stores notice original metadata """ - AA: List[str] = None - AC: str = None - CY: List[str] = None - DD: str = None - DI: str = None - DS: str = None - DT: List[str] = None - MA: List[str] = None - NC: List[str] = None - ND: str = None - NL: str = None - OC: List[str] = None - OJ: str = None - OL: str = None - OY: List[str] = None - PC: List[str] = None - PD: str = None - PR: str = None - RC: List[str] = None - RN: List[str] = None - RP: str = None - TD: str = None - TVH: str = None - TVL: str = None - TY: str = None - award_criterion_type: str = Field(default=None, alias='award-criterion-type') + AA: Optional[List[str]] = None + AC: Optional[str] = None + CY: Optional[List[str]] = None + DD: Optional[str] = None + DI: Optional[str] = None + DS: Optional[str] = None + DT: Optional[List[str]] = None + MA: Optional[List[str]] = None + NC: Optional[List[str]] = None + ND: Optional[str] = None + NL: Optional[str] = None + OC: Optional[List[str]] = None + OJ: Optional[str] = None + OL: Optional[str] = None + OY: Optional[List[str]] = None + PC: Optional[List[str]] = None + PD: Optional[str] = None + PR: Optional[str] = None + RC: Optional[List[str]] = None + RN: Optional[List[int]] = None #TODO check if this is a list of strings, now we change to list of ints to fix errors + RP: Optional[str] = None + TD: Optional[str] = None + TVH: Optional[str] = None + TVL: Optional[str] = None + TY: Optional[str] = None + award_criterion_type: Optional[str] = Field(default=None, alias='award-criterion-type') corporate_body: List[str] = Field(default=None, alias='corporate-body') - funding: List[str] = None - notice_identifier: str = Field(default=None, alias='notice-identifier') - notice_type: str = Field(default=None, alias='notice-type') - notice_version: str = Field(default=None, alias='notice-version') + funding: Optional[List[str]] = None + notice_identifier: Optional[str] = Field(default=None, alias='notice-identifier') + notice_type: Optional[str] = Field(default=None, alias='notice-type') + notice_version: Optional[str] = Field(default=None, alias='notice-version') diff --git a/ted_sws/core/model/notice.py b/ted_sws/core/model/notice.py index 86046d8c..2a85a920 100644 --- a/ted_sws/core/model/notice.py +++ b/ted_sws/core/model/notice.py @@ -19,7 +19,7 @@ from functools import total_ordering from typing import Optional, List, Union -from pydantic import Field +from pydantic import Field, computed_field from ted_sws.core.model import PropertyBaseModel from ted_sws.core.model.lazy_object import LazyObjectABC, LazyObjectFieldsLoaderABC @@ -128,8 +128,9 @@ class Config: created_at: str = datetime.now().replace(microsecond=0).isoformat() version_number: int = 0 + @computed_field @property - def status(self): + def status(self) -> NoticeStatus: return self._status @abc.abstractmethod @@ -192,14 +193,16 @@ class Notice(LazyWorkExpression): _rdf_manifestation: Optional[RDFManifestation] = None _mets_manifestation: Optional[METSManifestation] = None _xml_metadata: Optional[XMLMetadata] = None - validation_summary: Optional[ValidationSummaryReport] + validation_summary: Optional[ValidationSummaryReport] = None + @computed_field @property def original_metadata(self) -> Optional[TEDMetadata]: if self._original_metadata is None: self.load_lazy_field(property_field=Notice.original_metadata) return self._original_metadata + @computed_field @property def xml_manifestation(self) -> XMLManifestation: if self._xml_manifestation is None: @@ -212,36 +215,42 @@ def set_original_metadata(self, ted_metadata: TEDMetadata): def set_xml_manifestation(self, xml_manifestation: XMLManifestation): self._xml_manifestation = xml_manifestation + @computed_field @property def xml_metadata(self) -> XMLMetadata: if self._xml_metadata is None: self.load_lazy_field(property_field=Notice.xml_metadata) return self._xml_metadata + @computed_field @property def preprocessed_xml_manifestation(self) -> XMLManifestation: if self._preprocessed_xml_manifestation is None: self.load_lazy_field(property_field=Notice.preprocessed_xml_manifestation) return self._preprocessed_xml_manifestation + @computed_field @property def distilled_rdf_manifestation(self) -> RDFManifestation: if self._distilled_rdf_manifestation is None: self.load_lazy_field(property_field=Notice.distilled_rdf_manifestation) return self._distilled_rdf_manifestation + @computed_field @property def normalised_metadata(self) -> NormalisedMetadata: if self._normalised_metadata is None: self.load_lazy_field(property_field=Notice.normalised_metadata) return self._normalised_metadata + @computed_field @property def rdf_manifestation(self) -> RDFManifestation: if self._rdf_manifestation is None: self.load_lazy_field(property_field=Notice.rdf_manifestation) return self._rdf_manifestation + @computed_field @property def mets_manifestation(self) -> METSManifestation: if self._mets_manifestation is None: @@ -343,8 +352,8 @@ def _check_status_is_validated(self) -> bool: return True return False - def set_rdf_validation(self, rdf_validation: Union[SPARQLTestSuiteValidationReport, - SHACLTestSuiteValidationReport]): + def set_rdf_validation(self, + rdf_validation: Union[SPARQLTestSuiteValidationReport, SHACLTestSuiteValidationReport]): """ Add an RDF validation result to the notice. If METS package data are available, erase them and reset the state. @@ -357,7 +366,7 @@ def set_rdf_validation(self, rdf_validation: Union[SPARQLTestSuiteValidationRepo self.rdf_manifestation.add_validation(validation=rdf_validation) def set_distilled_rdf_validation(self, rdf_validation: Union[SPARQLTestSuiteValidationReport, - SHACLTestSuiteValidationReport]): + SHACLTestSuiteValidationReport]): """ :param rdf_validation: @@ -486,7 +495,7 @@ def update_status_to(self, new_status: NoticeStatus): if new_status < NoticeStatus.NORMALISED_METADATA: self.remove_lazy_field(Notice.normalised_metadata) self._normalised_metadata = None - #TODO: preprocessed_xml_manifestation is the same as xml_manifestation + # TODO: preprocessed_xml_manifestation is the same as xml_manifestation # if delete preprocessed xml manifestation will delete xml_manifestation # in future remove _preprocessed_xml_manifestation field from model self._preprocessed_xml_manifestation = None diff --git a/ted_sws/core/model/supra_notice.py b/ted_sws/core/model/supra_notice.py index 6faf607a..6b381b84 100644 --- a/ted_sws/core/model/supra_notice.py +++ b/ted_sws/core/model/supra_notice.py @@ -33,8 +33,8 @@ class SupraNoticeValidationReport(Manifestation): """ Result of checking whether all the notices published in TED are present in the internal database. """ - missing_notice_ids: Optional[List[str]] - not_published_notice_ids: Optional[List[str]] + missing_notice_ids: Optional[List[str]] = None + not_published_notice_ids: Optional[List[str]] = None def is_valid(self): if not self.missing_notice_ids and not self.not_published_notice_ids: @@ -47,5 +47,5 @@ class DailySupraNotice(SupraNotice): This is an aggregate over the notices published in TED in a specific day. """ ted_publication_date: date - validation_report: Optional[SupraNoticeValidationReport] - validation_summary: Optional[ValidationSummaryReport] + validation_report: Optional[SupraNoticeValidationReport] = None + validation_summary: Optional[ValidationSummaryReport] = None diff --git a/ted_sws/core/model/transform.py b/ted_sws/core/model/transform.py index 6c1db252..2ab2cc7b 100644 --- a/ted_sws/core/model/transform.py +++ b/ted_sws/core/model/transform.py @@ -24,7 +24,7 @@ class FileResource(MappingSuiteComponent): """ file_name: str file_content: str - original_name: Optional[str] + original_name: Optional[str] = None parents: Optional[List[str]] = [] @@ -75,103 +75,103 @@ class TransformationTestData(MappingSuiteComponent): class ConceptualMappingXPATH(MappingSuiteComponent): xpath: str - form_field: Optional[str] + form_field: Optional[str] = None class ConceptualMappingDiffMetadata(MappingSuiteComponent): """""" - branches: Optional[List[str]] - mapping_suite_ids: Optional[List[str]] - files: Optional[List[Optional[str]]] - defaults: Optional[dict] - metadata: Optional[List[dict]] + branches: Optional[List[str]] = None + mapping_suite_ids: Optional[List[str]] = None + files: Optional[List[Optional[str]]] = None + defaults: Optional[dict] = None + metadata: Optional[List[dict]] = None class ConceptualMappingDiffData(MappingSuiteComponent): """""" - html: Optional[str] - transformed: Optional[dict] - original: Optional[dict] + html: Optional[str] = None + transformed: Optional[dict] = None + original: Optional[dict] = None class ConceptualMappingDiff(MappingSuiteComponent): """""" created_at: str = datetime.now().isoformat() - metadata: Optional[ConceptualMappingDiffMetadata] - data: Optional[ConceptualMappingDiffData] + metadata: Optional[ConceptualMappingDiffMetadata] = None + data: Optional[ConceptualMappingDiffData] = None class ConceptualMappingMetadataConstraints(PropertyBaseModel): """ This class contains Mapping Suite Conceptual Mapping Metadata Constraints Object model structure """ - eforms_subtype: Optional[List[str]] - start_date: Optional[str] - end_date: Optional[str] - min_xsd_version: Optional[str] - max_xsd_version: Optional[str] + eforms_subtype: Optional[List[str]] = None + start_date: Optional[str] = None + end_date: Optional[str] = None + min_xsd_version: Optional[str] = None + max_xsd_version: Optional[str] = None class ConceptualMappingMetadata(MappingSuiteComponent): """ """ - identifier: Optional[str] - title: Optional[str] - description: Optional[str] - mapping_version: Optional[str] - epo_version: Optional[str] - base_xpath: Optional[str] - metadata_constraints: Optional[ConceptualMappingMetadataConstraints] + identifier: Optional[str] = None + title: Optional[str] = None + description: Optional[str] = None + mapping_version: Optional[str] = None + epo_version: Optional[str] = None + base_xpath: Optional[str] = None + metadata_constraints: Optional[ConceptualMappingMetadataConstraints] = None class ConceptualMappingRule(MappingSuiteComponent): """ """ - standard_form_field_id: Optional[str] - standard_form_field_name: Optional[str] - eform_bt_id: Optional[str] - eform_bt_name: Optional[str] - field_xpath: Optional[List[str]] - field_xpath_condition: Optional[List[str]] - class_path: Optional[List[str]] - property_path: Optional[List[str]] - triple_fingerprint: Optional[List[str]] - fragment_fingerprint: Optional[List[str]] + standard_form_field_id: Optional[str] = None + standard_form_field_name: Optional[str] = None + eform_bt_id: Optional[str] = None + eform_bt_name: Optional[str] = None + field_xpath: Optional[List[str]] = None + field_xpath_condition: Optional[List[str]] = None + class_path: Optional[List[str]] = None + property_path: Optional[List[str]] = None + triple_fingerprint: Optional[List[str]] = None + fragment_fingerprint: Optional[List[str]] = None class ConceptualMappingResource(MappingSuiteComponent): """ """ - file_name: Optional[str] + file_name: Optional[str] = None class ConceptualMappingRMLModule(MappingSuiteComponent): """ """ - file_name: Optional[str] + file_name: Optional[str] = None class ConceptualMappingRemark(MappingSuiteComponent): """ """ - standard_form_field_id: Optional[str] - standard_form_field_name: Optional[str] - field_xpath: Optional[List[str]] + standard_form_field_id: Optional[str] = None + standard_form_field_name: Optional[str] = None + field_xpath: Optional[List[str]] = None class ConceptualMappingControlList(MappingSuiteComponent): """ """ - field_value: Optional[str] - mapping_reference: Optional[str] - super_type: Optional[str] - xml_path_fragment: Optional[str] + field_value: Optional[str] = None + mapping_reference: Optional[str] = None + super_type: Optional[str] = None + xml_path_fragment: Optional[str] = None class ConceptualMapping(MappingSuiteComponent): @@ -179,7 +179,7 @@ class ConceptualMapping(MappingSuiteComponent): """ xpaths: List[ConceptualMappingXPATH] = [] - metadata: Optional[ConceptualMappingMetadata] + metadata: Optional[ConceptualMappingMetadata] = None rules: List[ConceptualMappingRule] = [] mapping_remarks: List[ConceptualMappingRemark] = [] resources: List[ConceptualMappingResource] = [] @@ -204,7 +204,7 @@ class MappingSuite(MappingSuiteComponent): shacl_test_suites: List[SHACLTestSuite] sparql_test_suites: List[SPARQLTestSuite] transformation_test_data: TransformationTestData - conceptual_mapping: Optional[ConceptualMapping] + conceptual_mapping: Optional[ConceptualMapping] = None def get_mongodb_id(self) -> str: return f"{self.identifier}_v{self.version}" diff --git a/ted_sws/core/model/validation_report.py b/ted_sws/core/model/validation_report.py index 217e761e..ee278cb5 100644 --- a/ted_sws/core/model/validation_report.py +++ b/ted_sws/core/model/validation_report.py @@ -8,7 +8,7 @@ class ReportNoticeMetadata(PropertyBaseModel): - path: Optional[Path] + path: Optional[Path] = None class ReportNotice(PropertyBaseModel): @@ -37,7 +37,7 @@ class SPARQLSummaryQuery(PropertyBaseModel): """ Stores SPARQL query details """ - title: Optional[str] + title: Optional[str] = None query: str @@ -46,9 +46,9 @@ class SPARQLValidationSummaryQueryResult(PropertyBaseModel): """ query: SPARQLSummaryQuery - identifier: Optional[str] + identifier: Optional[str] = None aggregate: Optional[SPARQLValidationSummaryCountReport] = SPARQLValidationSummaryCountReport() - test_suite_identifier: Optional[str] + test_suite_identifier: Optional[str] = None class SPARQLValidationSummaryReport(ValidationManifestation): @@ -62,7 +62,7 @@ class SHACLSummaryQuery(PropertyBaseModel): """ Stores SPARQL query details """ - result_path: Optional[str] + result_path: Optional[str] = None class SHACLValidationSummarySeverityCountResult(PropertyBaseModel): @@ -75,7 +75,7 @@ class SHACLValidationSummaryResult(PropertyBaseModel): query: Optional[SHACLSummaryQuery] = SHACLSummaryQuery() result_severity: Optional[SHACLValidationSummarySeverityCountResult] = SHACLValidationSummarySeverityCountResult() conforms: Optional[QueryValidationSummaryCountReportStatus] = QueryValidationSummaryCountReportStatus() - test_suite_identifier: Optional[str] + test_suite_identifier: Optional[str] = None class SHACLValidationSummaryReport(ValidationManifestation): diff --git a/ted_sws/notice_packager/model/metadata.py b/ted_sws/notice_packager/model/metadata.py index a2259155..a6ccd060 100644 --- a/ted_sws/notice_packager/model/metadata.py +++ b/ted_sws/notice_packager/model/metadata.py @@ -65,9 +65,9 @@ class NoticeMetadata(Metadata): """ General notice metadata """ - id: Optional[str] - public_number_document: Optional[str] - public_number_edition: Optional[str] + id: Optional[str] = None + public_number_document: Optional[str] = None + public_number_edition: Optional[str] = None class MetsMetadata(Metadata): @@ -80,19 +80,19 @@ class MetsMetadata(Metadata): type: str = METS_TYPE_CREATE profile: str = METS_PROFILE createdate: str = datetime.datetime.now().isoformat() - document_id: Optional[str] - dmd_id: Optional[str] + document_id: Optional[str] = None + dmd_id: Optional[str] = None dmd_mdtype: str = METS_DMD_MDTYPE dmd_othermdtype: str = METS_DMD_OTHERMDTYPE - dmd_href: Optional[str] - tmd_id: Optional[str] - tmd_href: Optional[str] + dmd_href: Optional[str] = None + tmd_id: Optional[str] = None + tmd_href: Optional[str] = None tmd_mdtype: str = METS_TMD_MDTYPE tmd_othermdtype: str = METS_TMD_OTHERMDTYPE - file_id: Optional[str] - notice_file_href: Optional[str] + file_id: Optional[str] = None + notice_file_href: Optional[str] = None notice_file_mimetype: Optional[str] = METS_NOTICE_FILE_MIMETYPE - notice_file_checksum: Optional[str] + notice_file_checksum: Optional[str] = None notice_file_checksum_type: Optional[str] = METS_NOTICE_FILE_CHECKSUM_TYPE @validator('type') @@ -107,10 +107,10 @@ class WorkMetadata(Metadata): and the rest is a bunch of constants OR generated values (e.g. date, URI, ...) """ - identifier: Optional[str] - oj_identifier: Optional[str] - cdm_rdf_type: Optional[str] - resource_type: Optional[str] + identifier: Optional[str] = None + oj_identifier: Optional[str] = None + cdm_rdf_type: Optional[str] = None + resource_type: Optional[str] = None uri: Optional[str] = None do_not_index: str = WORK_DO_NOT_INDEX date_document: str = datetime.datetime.now().strftime('%Y-%m-%d') @@ -123,18 +123,18 @@ class WorkMetadata(Metadata): dataset_version: Optional[str] = None dataset_keyword: List[str] = DATASET_KEYWORD dataset_has_frequency_publication_frequency: str = PUBLICATION_FREQUENCY - procurement_public_issued_by_country: Optional[str] - procurement_public_url_etendering: Optional[List[str]] + procurement_public_issued_by_country: Optional[str] = None + procurement_public_url_etendering: Optional[List[str]] = None class ExpressionMetadata(Metadata): - identifier: Optional[str] + identifier: Optional[str] = None title: Optional[Dict[str, str]] = None uses_language: str = USES_LANGUAGE class ManifestationMetadata(Metadata): - identifier: Optional[str] + identifier: Optional[str] = None type: str = MANIFESTATION_TYPE date_publication: str = datetime.datetime.now().strftime('%Y-%m-%d') distribution_has_status_distribution_status: str = DISTRIBUTION_STATUS diff --git a/tests/conftest.py b/tests/conftest.py index 81f97ad3..eff7320d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -161,7 +161,7 @@ def normalised_metadata_dict(): EXTRACTED_LEGAL_BASIS_KEY: 'http://publications.europa.eu/resource/authority/legal-basis/32009L0081', FORM_NUMBER_KEY: 'F18', LEGAL_BASIS_DIRECTIVE_KEY: 'http://publications.europa.eu/resource/authority/legal-basis/32009L0081', - E_FORMS_SUBTYPE_KEY: 16, + E_FORMS_SUBTYPE_KEY: '16', XSD_VERSION_KEY: "R2.0.9.S04.E01" } diff --git a/tests/features/conftest.py b/tests/features/conftest.py index 147a0ddb..c4408022 100644 --- a/tests/features/conftest.py +++ b/tests/features/conftest.py @@ -81,11 +81,11 @@ def publicly_available_notice(fetched_notice_data, normalised_metadata_dict) -> sparql_validation = SPARQLTestSuiteValidationReport(object_data="This is validation report!", test_suite_identifier="sparql_test_id", mapping_suite_identifier="mapping_suite_id", - validation_results=[]) + validation_results="") shacl_validation = SHACLTestSuiteValidationReport(object_data="This is validation report!", test_suite_identifier="shacl_test_id", mapping_suite_identifier="mapping_suite_id", - validation_results=[]) + validation_results="") notice = Notice(ted_id=ted_id) notice.set_xml_manifestation(xml_manifestation) notice.set_original_metadata(original_metadata) diff --git a/tests/unit/core/model/conftest.py b/tests/unit/core/model/conftest.py index c0dd1ff4..e0b8ab73 100644 --- a/tests/unit/core/model/conftest.py +++ b/tests/unit/core/model/conftest.py @@ -34,7 +34,7 @@ def publicly_available_notice(fetched_notice_data, normalised_metadata_dict) -> shacl_validation = SHACLTestSuiteValidationReport(object_data="This is validation report!", test_suite_identifier="shacl_test_id", mapping_suite_identifier="mapping_suite_id", - validation_results=[]) + validation_results="") xpath_coverage_validation = XPATHCoverageValidationReport(object_data="This is validation report!", test_suite_identifier="shacl_test_id", mapping_suite_identifier="mapping_suite_id", diff --git a/tests/unit/core/model/test_pydantic.py b/tests/unit/core/model/test_pydantic.py index 5ff5e508..aac9ed65 100644 --- a/tests/unit/core/model/test_pydantic.py +++ b/tests/unit/core/model/test_pydantic.py @@ -18,7 +18,7 @@ class Metadata(BaseModel): class User(BaseModel): id: int - name = 'John Doe' + name: str = 'John Doe' signup_ts: Optional[datetime] = None friends: List[int] = [] @@ -29,8 +29,8 @@ class Foo(BaseModel): class Bar(BaseModel): - apple = 'x' - banana = 'y' + apple: str = 'x' + banana: str = 'y' class Spam(BaseModel): diff --git a/tests/unit/data_manager/test_mapping_suite_repository.py b/tests/unit/data_manager/test_mapping_suite_repository.py index 9010c703..14a126b3 100644 --- a/tests/unit/data_manager/test_mapping_suite_repository.py +++ b/tests/unit/data_manager/test_mapping_suite_repository.py @@ -69,6 +69,6 @@ def test_inter_transactions_mapping_suite_repositories(mongodb_client, file_syst result_mapping_suite = mapping_suite_repository_mongodb.get(reference=fake_mapping_suite_identifier_with_version) mapping_suite_repository_file_system.add(mapping_suite=result_mapping_suite) result_mapping_suite = mapping_suite_repository_file_system.get(reference=fake_mapping_suite.identifier) - assert DeepDiff(result_mapping_suite, fake_mapping_suite) == {} + assert DeepDiff(result_mapping_suite.model_dump(), fake_mapping_suite.model_dump()) == {} mapping_suite_repository_file_system.clear_repository() mongodb_client.drop_database(aggregates_database_name) From bacff55fedca1689fbb61355a7b18257d3212779 Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Tue, 12 Sep 2023 17:20:54 +0300 Subject: [PATCH 5/7] Update conftest.py --- tests/unit/core/model/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/unit/core/model/conftest.py b/tests/unit/core/model/conftest.py index e0b8ab73..237f4a9d 100644 --- a/tests/unit/core/model/conftest.py +++ b/tests/unit/core/model/conftest.py @@ -30,7 +30,7 @@ def publicly_available_notice(fetched_notice_data, normalised_metadata_dict) -> sparql_validation = SPARQLTestSuiteValidationReport(object_data="This is validation report!", test_suite_identifier="sparql_test_id", mapping_suite_identifier="mapping_suite_id", - validation_results=[]) + validation_results="") shacl_validation = SHACLTestSuiteValidationReport(object_data="This is validation report!", test_suite_identifier="shacl_test_id", mapping_suite_identifier="mapping_suite_id", From c3ff9706cf9fbd58718a365d18289225de8873fb Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Tue, 12 Sep 2023 17:26:22 +0300 Subject: [PATCH 6/7] Update conftest.py --- tests/features/model/conftest.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/features/model/conftest.py b/tests/features/model/conftest.py index c90496c6..2d6cbf1a 100644 --- a/tests/features/model/conftest.py +++ b/tests/features/model/conftest.py @@ -20,11 +20,11 @@ def publicly_available_notice(fetched_notice_data, normalised_metadata_dict) -> sparql_validation = SPARQLTestSuiteValidationReport(object_data="This is validation report!", test_suite_identifier="sparql_test_id", mapping_suite_identifier="mapping_suite_id", - validation_results=[]) + validation_results="") shacl_validation = SHACLTestSuiteValidationReport(object_data="This is validation report!", test_suite_identifier="shacl_test_id", mapping_suite_identifier="mapping_suite_id", - validation_results=[]) + validation_results="") notice = Notice(ted_id=ted_id) notice.set_xml_manifestation(xml_manifestation) notice.set_original_metadata(original_metadata) From 0c44bfaf654b00e3b8ba550b16d5709a397fc4b8 Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Tue, 12 Sep 2023 17:48:14 +0300 Subject: [PATCH 7/7] Update entity_deduplication.py --- ted_sws/master_data_registry/services/entity_deduplication.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ted_sws/master_data_registry/services/entity_deduplication.py b/ted_sws/master_data_registry/services/entity_deduplication.py index 77508484..3c13b1fb 100644 --- a/ted_sws/master_data_registry/services/entity_deduplication.py +++ b/ted_sws/master_data_registry/services/entity_deduplication.py @@ -239,7 +239,7 @@ def deduplicate_procedure_entities(notices: List[Notice], procedure_cet_uri: str notice_families = defaultdict(list) for notice in notices: if notice.original_metadata and notice.original_metadata.RN: - parent_notice_id = notice.original_metadata.RN[0] + parent_notice_id = str(notice.original_metadata.RN[0]) parent_notice_id = f"{parent_notice_id[4:]}-{parent_notice_id[:4]}" notice_families[parent_notice_id].append(notice)