From 4296f0ea6c5979f7ddbe1f69b7c705b1e17921a7 Mon Sep 17 00:00:00 2001 From: Dumitru Date: Mon, 2 Oct 2023 20:37:14 +0300 Subject: [PATCH] create DailyNoticesMetadata model, repository + tests --- ted_sws/core/model/supra_notice.py | 43 +++++++++ .../daily_notices_metadata_repository.py | 92 +++++++++++++++++++ .../data_manager/adapters/repository_abc.py | 39 +++++++- tests/unit/core/model/conftest.py | 11 +++ .../core/model/test_daily_notice_metadata.py | 8 ++ tests/unit/data_manager/conftest.py | 12 ++- .../test_daily_notices_metadata_repository.py | 23 +++++ 7 files changed, 226 insertions(+), 2 deletions(-) create mode 100644 ted_sws/data_manager/adapters/daily_notices_metadata_repository.py create mode 100644 tests/unit/core/model/test_daily_notice_metadata.py create mode 100644 tests/unit/data_manager/test_daily_notices_metadata_repository.py diff --git a/ted_sws/core/model/supra_notice.py b/ted_sws/core/model/supra_notice.py index 6b381b84..1b5f7361 100644 --- a/ted_sws/core/model/supra_notice.py +++ b/ted_sws/core/model/supra_notice.py @@ -10,8 +10,11 @@ from datetime import datetime, date from typing import List, Optional +from pydantic import computed_field + from ted_sws.core.model import PropertyBaseModel from ted_sws.core.model.manifestation import Manifestation, ValidationSummaryReport +from ted_sws.core.model.notice import NoticeStatus class SupraNotice(PropertyBaseModel, abc.ABC): @@ -49,3 +52,43 @@ class DailySupraNotice(SupraNotice): ted_publication_date: date validation_report: Optional[SupraNoticeValidationReport] = None validation_summary: Optional[ValidationSummaryReport] = None + + +class DailyNoticesMetadataABC(PropertyBaseModel): + class Config: + underscore_attrs_are_private = True + validate_assignment = True + orm_mode = True + + +NOTICE_STATUSES_DEFAULT_STATS = {str(notice_status).lower(): 0 for notice_status in NoticeStatus} + + +class DailyNoticesMetadata(DailyNoticesMetadataABC): + """ + This is an aggregate over the notices published in TED in a specific day. + """ + ted_api_notice_ids: List[str] = [] + fetched_notice_ids: List[str] = [] + aggregation_date: date + + mapping_suite_packages: List[str] = [] # unique list of used mapping_suite_packages + + notice_statuses: dict = NOTICE_STATUSES_DEFAULT_STATS + + @computed_field + @property + def notice_statuses_coverage(self) -> dict: + ted_api_notice_count = self.ted_api_notice_count or 1 + return {f"{notice_status}_coverage": notice_status_count / ted_api_notice_count + for notice_status, notice_status_count in self.notice_statuses.items()} + + @computed_field + @property + def ted_api_notice_count(self) -> int: + return len(self.ted_api_notice_ids) + + @computed_field + @property + def fetched_notices_count(self) -> int: + return len(self.fetched_notice_ids) diff --git a/ted_sws/data_manager/adapters/daily_notices_metadata_repository.py b/ted_sws/data_manager/adapters/daily_notices_metadata_repository.py new file mode 100644 index 00000000..6a41d480 --- /dev/null +++ b/ted_sws/data_manager/adapters/daily_notices_metadata_repository.py @@ -0,0 +1,92 @@ +from datetime import datetime, time +from typing import Iterator, Optional + +from pymongo import MongoClient, ASCENDING + +from ted_sws import config +from ted_sws.core.model.supra_notice import DailyNoticesMetadata +from ted_sws.data_manager.adapters import inject_date_string_fields +from ted_sws.data_manager.adapters.repository_abc import DailyNoticesMetadataRepositoryABC + +DAILY_NOTICES_METADATA_AGGREGATION_DATE = "aggregation_date" +DAILY_NOTICES_METADATA_ID = "_id" + + +class DailyNoticesMetadataRepository(DailyNoticesMetadataRepositoryABC): + """ + This repository is intended for storing DailyNoticesMetadata objects. + """ + + _collection_name = "daily_notices_metadata_collection" + + def __init__(self, mongodb_client: MongoClient, database_name: str = None): + self._database_name = database_name or config.MONGO_DB_AGGREGATES_DATABASE_NAME + self.mongodb_client = mongodb_client + daily_supra_notice_db = mongodb_client[self._database_name] + self.collection = daily_supra_notice_db[self._collection_name] + self.collection.create_index( + [(DAILY_NOTICES_METADATA_AGGREGATION_DATE, + ASCENDING)]) # TODO: index creation may bring race condition error. + + def _update_daily_notices_metadata(self, daily_notices_metadata: DailyNoticesMetadata, upsert: bool = False): + """ + Updates a DailyNoticesMetadata object in the repository. + :param daily_notices_metadata: + :param upsert: + :return: + """ + daily_notices_metadata_dict = daily_notices_metadata.model_dump() + daily_notices_metadata_dict[DAILY_NOTICES_METADATA_AGGREGATION_DATE] = daily_notices_metadata_dict[ + DAILY_NOTICES_METADATA_AGGREGATION_DATE].isoformat() + self.collection.update_one( + {DAILY_NOTICES_METADATA_ID: daily_notices_metadata_dict[DAILY_NOTICES_METADATA_AGGREGATION_DATE]}, + {"$set": daily_notices_metadata_dict}, upsert=upsert) + + def _create_daily_notices_metadata_from_dict(self, daily_notices_metadata_dict: dict) -> Optional[ + DailyNoticesMetadata]: + """ + Creates a DailyNoticesMetadata object from a dictionary. + :param daily_notices_metadata_dict: + :return: + """ + if not daily_notices_metadata_dict: + return None + daily_notices_metadata_dict[DAILY_NOTICES_METADATA_AGGREGATION_DATE] = datetime.fromisoformat( + daily_notices_metadata_dict[ + DAILY_NOTICES_METADATA_AGGREGATION_DATE]) + daily_notices_metadata_dict.pop(DAILY_NOTICES_METADATA_ID, None) + return DailyNoticesMetadata.model_validate(daily_notices_metadata_dict) + + def add(self, daily_notices_metadata: DailyNoticesMetadata): + """ + Adds a DailyNoticesMetadata object to the repository. + :param daily_notices_metadata: + :return: + """ + self._update_daily_notices_metadata(daily_notices_metadata=daily_notices_metadata, upsert=True) + + def update(self, daily_notices_metadata: DailyNoticesMetadata): + """ + Updates a DailyNoticesMetadata object in the repository. + :param daily_notices_metadata: + :return: + """ + self._update_daily_notices_metadata(daily_notices_metadata=daily_notices_metadata) + + def get(self, reference) -> DailyNoticesMetadata: + """ + Gets a DailyNoticesMetadata object from the repository. + :param reference: + :return: + """ + reference = reference.isoformat() + result_dict = self.collection.find_one({DAILY_NOTICES_METADATA_ID: reference}) + return self._create_daily_notices_metadata_from_dict(daily_notices_metadata_dict=result_dict) + + def list(self) -> Iterator[DailyNoticesMetadata]: + """ + Gets all DailyNoticesMetadata objects from the repository. + :return: + """ + for result_dict in self.collection.find(): + yield self._create_daily_notices_metadata_from_dict(daily_notices_metadata_dict=result_dict) diff --git a/ted_sws/data_manager/adapters/repository_abc.py b/ted_sws/data_manager/adapters/repository_abc.py index 939889d0..bc03c51a 100644 --- a/ted_sws/data_manager/adapters/repository_abc.py +++ b/ted_sws/data_manager/adapters/repository_abc.py @@ -4,7 +4,7 @@ from ted_sws.core.model.manifestation import Manifestation from ted_sws.core.model.metadata import Metadata from ted_sws.core.model.notice import Notice -from ted_sws.core.model.supra_notice import DailySupraNotice +from ted_sws.core.model.supra_notice import DailySupraNotice, DailyNoticesMetadata from ted_sws.core.model.transform import MappingSuite @@ -203,3 +203,40 @@ def list(self) -> Iterator[DailySupraNotice]: This method allows all records to be retrieved from the repository. :return: list of DailySupraNotice """ + + +class DailyNoticesMetadataRepositoryABC(RepositoryABC): + """ + This repository is intended for storing DailyNoticesMetadata objects. + """ + + @abc.abstractmethod + def add(self, daily_notices_metadata: DailyNoticesMetadata): + """ + This method allows you to add DailyNoticesMetadata objects to the repository. + :param daily_notices_metadata: + :return: + """ + + @abc.abstractmethod + def update(self, daily_notices_metadata: DailyNoticesMetadata): + """ + This method allows you to update DailyNoticesMetadata objects to the repository + :param daily_notices_metadata: + :return: + """ + + @abc.abstractmethod + def get(self, reference) -> DailyNoticesMetadata: + """ + This method allows a DailyNoticesMetadata to be obtained based on an identification reference. + :param reference: + :return: DailyNoticesMetadata + """ + + @abc.abstractmethod + def list(self) -> Iterator[DailyNoticesMetadata]: + """ + This method allows all records to be retrieved from the repository. + :return: list of DailyNoticesMetadata + """ diff --git a/tests/unit/core/model/conftest.py b/tests/unit/core/model/conftest.py index 237f4a9d..35264a03 100644 --- a/tests/unit/core/model/conftest.py +++ b/tests/unit/core/model/conftest.py @@ -6,6 +6,7 @@ # Email: costezki.eugen@gmail.com """ """ +from datetime import datetime, date import pytest @@ -14,6 +15,7 @@ XPATHCoverageValidationReport from ted_sws.core.model.metadata import TEDMetadata, NormalisedMetadata from ted_sws.core.model.notice import Notice, NoticeStatus +from ted_sws.core.model.supra_notice import DailyNoticesMetadata @pytest.fixture @@ -77,3 +79,12 @@ def transformation_eligible_notice(indexed_notice) -> Notice: indexed_notice.set_normalised_metadata(normalised_metadata=NormalisedMetadata(**{"AA": "notice metadata "})) indexed_notice.update_status_to(NoticeStatus.ELIGIBLE_FOR_TRANSFORMATION) return indexed_notice + + +@pytest.fixture() +def notice_aggregation_date_date() -> date: + return datetime.strptime("2021-01-08", "%Y-%m-%d") + +@pytest.fixture() +def daily_notice_metadata(notice_aggregation_date_date) -> DailyNoticesMetadata: + return DailyNoticesMetadata(aggregation_date=notice_aggregation_date_date) \ No newline at end of file diff --git a/tests/unit/core/model/test_daily_notice_metadata.py b/tests/unit/core/model/test_daily_notice_metadata.py new file mode 100644 index 00000000..eccc23fb --- /dev/null +++ b/tests/unit/core/model/test_daily_notice_metadata.py @@ -0,0 +1,8 @@ +from ted_sws.core.model.notice import NoticeStatus +from ted_sws.core.model.supra_notice import DailyNoticesMetadata + + +def test_daily_notice_metadata_model(daily_notice_metadata): + daily_notice_metadata_dict = daily_notice_metadata.model_dump() + daily_notice_metadata_from_dict = DailyNoticesMetadata(**daily_notice_metadata_dict) + assert daily_notice_metadata == daily_notice_metadata_from_dict diff --git a/tests/unit/data_manager/conftest.py b/tests/unit/data_manager/conftest.py index 2ed7fcf3..0d14c631 100644 --- a/tests/unit/data_manager/conftest.py +++ b/tests/unit/data_manager/conftest.py @@ -1,6 +1,6 @@ from datetime import date import pytest -from ted_sws.core.model.supra_notice import DailySupraNotice +from ted_sws.core.model.supra_notice import DailySupraNotice, DailyNoticesMetadata from ted_sws.core.model.transform import MetadataConstraints, FileResource, TransformationRuleSet, SHACLTestSuite, \ SPARQLTestSuite, MappingSuite, TransformationTestData from tests import TEST_DATA_PATH @@ -47,3 +47,13 @@ def daily_supra_notice(): @pytest.fixture def fake_mapping_suite_identifier_with_version(fake_mapping_suite): return fake_mapping_suite.get_mongodb_id() + + +@pytest.fixture +def daily_notices_metadata(): + return DailyNoticesMetadata(aggregation_date=date.today(), + ted_api_notice_ids=["1", "2", "3"], + fetched_notice_ids=["1", "2", "3"], + notice_statuses={"published": 3, "raw": 0}, + mapping_suite_packages=["fake_mapping_suite_ver_1", "fake_mapping_suite_ver_2"], + notice_count=3) \ No newline at end of file diff --git a/tests/unit/data_manager/test_daily_notices_metadata_repository.py b/tests/unit/data_manager/test_daily_notices_metadata_repository.py new file mode 100644 index 00000000..ec406aee --- /dev/null +++ b/tests/unit/data_manager/test_daily_notices_metadata_repository.py @@ -0,0 +1,23 @@ +from ted_sws.data_manager.adapters.daily_notices_metadata_repository import DailyNoticesMetadataRepository + + +def test_daily_notices_metadata_repository(mongodb_client, daily_notices_metadata): + daily_notices_metadata_repository = DailyNoticesMetadataRepository(mongodb_client=mongodb_client) + + # Upset is False by default + daily_notices_metadata_repository.update(daily_notices_metadata) + assert daily_notices_metadata_repository.get(daily_notices_metadata.aggregation_date) is None + + # Creates a new object + daily_notices_metadata_repository.add(daily_notices_metadata) + assert daily_notices_metadata == daily_notices_metadata_repository.get(daily_notices_metadata.aggregation_date) + + # Check if only on object in the repository + assert len(list(daily_notices_metadata_repository.list())) == 1 + + # Only one object in the repository + assert list(daily_notices_metadata_repository.list()) == [daily_notices_metadata] + + # Check if on add updates the object + daily_notices_metadata_repository.add(daily_notices_metadata) + assert list(daily_notices_metadata_repository.list()) == [daily_notices_metadata]