From 4296f0ea6c5979f7ddbe1f69b7c705b1e17921a7 Mon Sep 17 00:00:00 2001 From: Dumitru Date: Mon, 2 Oct 2023 20:37:14 +0300 Subject: [PATCH 01/13] create DailyNoticesMetadata model, repository + tests --- ted_sws/core/model/supra_notice.py | 43 +++++++++ .../daily_notices_metadata_repository.py | 92 +++++++++++++++++++ .../data_manager/adapters/repository_abc.py | 39 +++++++- tests/unit/core/model/conftest.py | 11 +++ .../core/model/test_daily_notice_metadata.py | 8 ++ tests/unit/data_manager/conftest.py | 12 ++- .../test_daily_notices_metadata_repository.py | 23 +++++ 7 files changed, 226 insertions(+), 2 deletions(-) create mode 100644 ted_sws/data_manager/adapters/daily_notices_metadata_repository.py create mode 100644 tests/unit/core/model/test_daily_notice_metadata.py create mode 100644 tests/unit/data_manager/test_daily_notices_metadata_repository.py diff --git a/ted_sws/core/model/supra_notice.py b/ted_sws/core/model/supra_notice.py index 6b381b84..1b5f7361 100644 --- a/ted_sws/core/model/supra_notice.py +++ b/ted_sws/core/model/supra_notice.py @@ -10,8 +10,11 @@ from datetime import datetime, date from typing import List, Optional +from pydantic import computed_field + from ted_sws.core.model import PropertyBaseModel from ted_sws.core.model.manifestation import Manifestation, ValidationSummaryReport +from ted_sws.core.model.notice import NoticeStatus class SupraNotice(PropertyBaseModel, abc.ABC): @@ -49,3 +52,43 @@ class DailySupraNotice(SupraNotice): ted_publication_date: date validation_report: Optional[SupraNoticeValidationReport] = None validation_summary: Optional[ValidationSummaryReport] = None + + +class DailyNoticesMetadataABC(PropertyBaseModel): + class Config: + underscore_attrs_are_private = True + validate_assignment = True + orm_mode = True + + +NOTICE_STATUSES_DEFAULT_STATS = {str(notice_status).lower(): 0 for notice_status in NoticeStatus} + + +class DailyNoticesMetadata(DailyNoticesMetadataABC): + """ + This is an aggregate over the notices published in TED in a specific day. + """ + ted_api_notice_ids: List[str] = [] + fetched_notice_ids: List[str] = [] + aggregation_date: date + + mapping_suite_packages: List[str] = [] # unique list of used mapping_suite_packages + + notice_statuses: dict = NOTICE_STATUSES_DEFAULT_STATS + + @computed_field + @property + def notice_statuses_coverage(self) -> dict: + ted_api_notice_count = self.ted_api_notice_count or 1 + return {f"{notice_status}_coverage": notice_status_count / ted_api_notice_count + for notice_status, notice_status_count in self.notice_statuses.items()} + + @computed_field + @property + def ted_api_notice_count(self) -> int: + return len(self.ted_api_notice_ids) + + @computed_field + @property + def fetched_notices_count(self) -> int: + return len(self.fetched_notice_ids) diff --git a/ted_sws/data_manager/adapters/daily_notices_metadata_repository.py b/ted_sws/data_manager/adapters/daily_notices_metadata_repository.py new file mode 100644 index 00000000..6a41d480 --- /dev/null +++ b/ted_sws/data_manager/adapters/daily_notices_metadata_repository.py @@ -0,0 +1,92 @@ +from datetime import datetime, time +from typing import Iterator, Optional + +from pymongo import MongoClient, ASCENDING + +from ted_sws import config +from ted_sws.core.model.supra_notice import DailyNoticesMetadata +from ted_sws.data_manager.adapters import inject_date_string_fields +from ted_sws.data_manager.adapters.repository_abc import DailyNoticesMetadataRepositoryABC + +DAILY_NOTICES_METADATA_AGGREGATION_DATE = "aggregation_date" +DAILY_NOTICES_METADATA_ID = "_id" + + +class DailyNoticesMetadataRepository(DailyNoticesMetadataRepositoryABC): + """ + This repository is intended for storing DailyNoticesMetadata objects. + """ + + _collection_name = "daily_notices_metadata_collection" + + def __init__(self, mongodb_client: MongoClient, database_name: str = None): + self._database_name = database_name or config.MONGO_DB_AGGREGATES_DATABASE_NAME + self.mongodb_client = mongodb_client + daily_supra_notice_db = mongodb_client[self._database_name] + self.collection = daily_supra_notice_db[self._collection_name] + self.collection.create_index( + [(DAILY_NOTICES_METADATA_AGGREGATION_DATE, + ASCENDING)]) # TODO: index creation may bring race condition error. + + def _update_daily_notices_metadata(self, daily_notices_metadata: DailyNoticesMetadata, upsert: bool = False): + """ + Updates a DailyNoticesMetadata object in the repository. + :param daily_notices_metadata: + :param upsert: + :return: + """ + daily_notices_metadata_dict = daily_notices_metadata.model_dump() + daily_notices_metadata_dict[DAILY_NOTICES_METADATA_AGGREGATION_DATE] = daily_notices_metadata_dict[ + DAILY_NOTICES_METADATA_AGGREGATION_DATE].isoformat() + self.collection.update_one( + {DAILY_NOTICES_METADATA_ID: daily_notices_metadata_dict[DAILY_NOTICES_METADATA_AGGREGATION_DATE]}, + {"$set": daily_notices_metadata_dict}, upsert=upsert) + + def _create_daily_notices_metadata_from_dict(self, daily_notices_metadata_dict: dict) -> Optional[ + DailyNoticesMetadata]: + """ + Creates a DailyNoticesMetadata object from a dictionary. + :param daily_notices_metadata_dict: + :return: + """ + if not daily_notices_metadata_dict: + return None + daily_notices_metadata_dict[DAILY_NOTICES_METADATA_AGGREGATION_DATE] = datetime.fromisoformat( + daily_notices_metadata_dict[ + DAILY_NOTICES_METADATA_AGGREGATION_DATE]) + daily_notices_metadata_dict.pop(DAILY_NOTICES_METADATA_ID, None) + return DailyNoticesMetadata.model_validate(daily_notices_metadata_dict) + + def add(self, daily_notices_metadata: DailyNoticesMetadata): + """ + Adds a DailyNoticesMetadata object to the repository. + :param daily_notices_metadata: + :return: + """ + self._update_daily_notices_metadata(daily_notices_metadata=daily_notices_metadata, upsert=True) + + def update(self, daily_notices_metadata: DailyNoticesMetadata): + """ + Updates a DailyNoticesMetadata object in the repository. + :param daily_notices_metadata: + :return: + """ + self._update_daily_notices_metadata(daily_notices_metadata=daily_notices_metadata) + + def get(self, reference) -> DailyNoticesMetadata: + """ + Gets a DailyNoticesMetadata object from the repository. + :param reference: + :return: + """ + reference = reference.isoformat() + result_dict = self.collection.find_one({DAILY_NOTICES_METADATA_ID: reference}) + return self._create_daily_notices_metadata_from_dict(daily_notices_metadata_dict=result_dict) + + def list(self) -> Iterator[DailyNoticesMetadata]: + """ + Gets all DailyNoticesMetadata objects from the repository. + :return: + """ + for result_dict in self.collection.find(): + yield self._create_daily_notices_metadata_from_dict(daily_notices_metadata_dict=result_dict) diff --git a/ted_sws/data_manager/adapters/repository_abc.py b/ted_sws/data_manager/adapters/repository_abc.py index 939889d0..bc03c51a 100644 --- a/ted_sws/data_manager/adapters/repository_abc.py +++ b/ted_sws/data_manager/adapters/repository_abc.py @@ -4,7 +4,7 @@ from ted_sws.core.model.manifestation import Manifestation from ted_sws.core.model.metadata import Metadata from ted_sws.core.model.notice import Notice -from ted_sws.core.model.supra_notice import DailySupraNotice +from ted_sws.core.model.supra_notice import DailySupraNotice, DailyNoticesMetadata from ted_sws.core.model.transform import MappingSuite @@ -203,3 +203,40 @@ def list(self) -> Iterator[DailySupraNotice]: This method allows all records to be retrieved from the repository. :return: list of DailySupraNotice """ + + +class DailyNoticesMetadataRepositoryABC(RepositoryABC): + """ + This repository is intended for storing DailyNoticesMetadata objects. + """ + + @abc.abstractmethod + def add(self, daily_notices_metadata: DailyNoticesMetadata): + """ + This method allows you to add DailyNoticesMetadata objects to the repository. + :param daily_notices_metadata: + :return: + """ + + @abc.abstractmethod + def update(self, daily_notices_metadata: DailyNoticesMetadata): + """ + This method allows you to update DailyNoticesMetadata objects to the repository + :param daily_notices_metadata: + :return: + """ + + @abc.abstractmethod + def get(self, reference) -> DailyNoticesMetadata: + """ + This method allows a DailyNoticesMetadata to be obtained based on an identification reference. + :param reference: + :return: DailyNoticesMetadata + """ + + @abc.abstractmethod + def list(self) -> Iterator[DailyNoticesMetadata]: + """ + This method allows all records to be retrieved from the repository. + :return: list of DailyNoticesMetadata + """ diff --git a/tests/unit/core/model/conftest.py b/tests/unit/core/model/conftest.py index 237f4a9d..35264a03 100644 --- a/tests/unit/core/model/conftest.py +++ b/tests/unit/core/model/conftest.py @@ -6,6 +6,7 @@ # Email: costezki.eugen@gmail.com """ """ +from datetime import datetime, date import pytest @@ -14,6 +15,7 @@ XPATHCoverageValidationReport from ted_sws.core.model.metadata import TEDMetadata, NormalisedMetadata from ted_sws.core.model.notice import Notice, NoticeStatus +from ted_sws.core.model.supra_notice import DailyNoticesMetadata @pytest.fixture @@ -77,3 +79,12 @@ def transformation_eligible_notice(indexed_notice) -> Notice: indexed_notice.set_normalised_metadata(normalised_metadata=NormalisedMetadata(**{"AA": "notice metadata "})) indexed_notice.update_status_to(NoticeStatus.ELIGIBLE_FOR_TRANSFORMATION) return indexed_notice + + +@pytest.fixture() +def notice_aggregation_date_date() -> date: + return datetime.strptime("2021-01-08", "%Y-%m-%d") + +@pytest.fixture() +def daily_notice_metadata(notice_aggregation_date_date) -> DailyNoticesMetadata: + return DailyNoticesMetadata(aggregation_date=notice_aggregation_date_date) \ No newline at end of file diff --git a/tests/unit/core/model/test_daily_notice_metadata.py b/tests/unit/core/model/test_daily_notice_metadata.py new file mode 100644 index 00000000..eccc23fb --- /dev/null +++ b/tests/unit/core/model/test_daily_notice_metadata.py @@ -0,0 +1,8 @@ +from ted_sws.core.model.notice import NoticeStatus +from ted_sws.core.model.supra_notice import DailyNoticesMetadata + + +def test_daily_notice_metadata_model(daily_notice_metadata): + daily_notice_metadata_dict = daily_notice_metadata.model_dump() + daily_notice_metadata_from_dict = DailyNoticesMetadata(**daily_notice_metadata_dict) + assert daily_notice_metadata == daily_notice_metadata_from_dict diff --git a/tests/unit/data_manager/conftest.py b/tests/unit/data_manager/conftest.py index 2ed7fcf3..0d14c631 100644 --- a/tests/unit/data_manager/conftest.py +++ b/tests/unit/data_manager/conftest.py @@ -1,6 +1,6 @@ from datetime import date import pytest -from ted_sws.core.model.supra_notice import DailySupraNotice +from ted_sws.core.model.supra_notice import DailySupraNotice, DailyNoticesMetadata from ted_sws.core.model.transform import MetadataConstraints, FileResource, TransformationRuleSet, SHACLTestSuite, \ SPARQLTestSuite, MappingSuite, TransformationTestData from tests import TEST_DATA_PATH @@ -47,3 +47,13 @@ def daily_supra_notice(): @pytest.fixture def fake_mapping_suite_identifier_with_version(fake_mapping_suite): return fake_mapping_suite.get_mongodb_id() + + +@pytest.fixture +def daily_notices_metadata(): + return DailyNoticesMetadata(aggregation_date=date.today(), + ted_api_notice_ids=["1", "2", "3"], + fetched_notice_ids=["1", "2", "3"], + notice_statuses={"published": 3, "raw": 0}, + mapping_suite_packages=["fake_mapping_suite_ver_1", "fake_mapping_suite_ver_2"], + notice_count=3) \ No newline at end of file diff --git a/tests/unit/data_manager/test_daily_notices_metadata_repository.py b/tests/unit/data_manager/test_daily_notices_metadata_repository.py new file mode 100644 index 00000000..ec406aee --- /dev/null +++ b/tests/unit/data_manager/test_daily_notices_metadata_repository.py @@ -0,0 +1,23 @@ +from ted_sws.data_manager.adapters.daily_notices_metadata_repository import DailyNoticesMetadataRepository + + +def test_daily_notices_metadata_repository(mongodb_client, daily_notices_metadata): + daily_notices_metadata_repository = DailyNoticesMetadataRepository(mongodb_client=mongodb_client) + + # Upset is False by default + daily_notices_metadata_repository.update(daily_notices_metadata) + assert daily_notices_metadata_repository.get(daily_notices_metadata.aggregation_date) is None + + # Creates a new object + daily_notices_metadata_repository.add(daily_notices_metadata) + assert daily_notices_metadata == daily_notices_metadata_repository.get(daily_notices_metadata.aggregation_date) + + # Check if only on object in the repository + assert len(list(daily_notices_metadata_repository.list())) == 1 + + # Only one object in the repository + assert list(daily_notices_metadata_repository.list()) == [daily_notices_metadata] + + # Check if on add updates the object + daily_notices_metadata_repository.add(daily_notices_metadata) + assert list(daily_notices_metadata_repository.list()) == [daily_notices_metadata] From f5fba8e05a976dbe5a42a5397fd1a31abdb44d6b Mon Sep 17 00:00:00 2001 From: Dumitru Date: Mon, 2 Oct 2023 22:16:35 +0300 Subject: [PATCH 02/13] WIP --- .../daily_notices_metadata_services.py | 55 +++++++++++++++++++ tests/unit/data_manager/services/__init__.py | 0 .../test_notices_metadata_services.py | 17 ++++++ tests/unit/event_manager/__init__.py | 0 tests/unit/event_manager/services/__init__.py | 0 5 files changed, 72 insertions(+) create mode 100644 ted_sws/data_manager/services/daily_notices_metadata_services.py create mode 100644 tests/unit/data_manager/services/__init__.py create mode 100644 tests/unit/data_manager/services/test_notices_metadata_services.py create mode 100644 tests/unit/event_manager/__init__.py create mode 100644 tests/unit/event_manager/services/__init__.py diff --git a/ted_sws/data_manager/services/daily_notices_metadata_services.py b/ted_sws/data_manager/services/daily_notices_metadata_services.py new file mode 100644 index 00000000..a73f04a6 --- /dev/null +++ b/ted_sws/data_manager/services/daily_notices_metadata_services.py @@ -0,0 +1,55 @@ +from datetime import date, datetime, timedelta +from typing import Optional + +from dateutil import rrule +from pymongo import MongoClient + +from ted_sws import config +from ted_sws.data_manager.adapters.daily_notices_metadata_repository import DailyNoticesMetadataRepository +from ted_sws.notice_fetcher.adapters.ted_api import TedAPIAdapter, TedRequestAPI + +DEFAULT_TED_API_START_DATE = "2023-09-01" # TODO: Change to 2014-01-01 +DEFAULT_TED_API_START_DATE_FORMAT = "%Y-%m-%d" + + +def generate_list_of_dates_from_date_range(start_date: date, end_date: date) -> Optional[list]: + """ + Given a date range returns all daily dates in that range + :param start_date: + :param end_date: + :return: + """ + if start_date > end_date: + return None + return [dt for dt in rrule.rrule(rrule.DAILY, + dtstart=start_date, + until=end_date)] + + +def update_daily_notices_metadata_from_ted(start_date: date = None, + end_date: date = None, + ted_api: TedAPIAdapter = None, + mongo_client: MongoClient = None, + daily_notices_metadata_repo: DailyNoticesMetadataRepository = None): + """ + Updates the daily notices metadata from the TED API. + """ + start_date = start_date or datetime.strptime(DEFAULT_TED_API_START_DATE, DEFAULT_TED_API_START_DATE_FORMAT) + end_date = end_date or datetime.today() - timedelta(days=1) + + if start_date > end_date: + raise Exception("Start date cannot be greater than end date") + + ted_api = ted_api or TedAPIAdapter(TedRequestAPI(), config.TED_API_URL) + mongo_client = mongo_client or MongoClient(config.MONGO_DB_AUTH_URL) + daily_notices_metadata_repo = daily_notices_metadata_repo or DailyNoticesMetadataRepository(mongo_client) + + # Generate list of dates from date range + date_range = generate_list_of_dates_from_date_range(start_date, end_date) + + # Getting from metadata repository dates that are not in the repository from date range + dates_not_in_repository = [day for day in date_range if not daily_notices_metadata_repo.get(day)] # TODO: Lazy evaluation + + # Getting from TED API dates that are not in the repository from date range + #ted_api.get_by_query(query={"q": ""}) + diff --git a/tests/unit/data_manager/services/__init__.py b/tests/unit/data_manager/services/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/unit/data_manager/services/test_notices_metadata_services.py b/tests/unit/data_manager/services/test_notices_metadata_services.py new file mode 100644 index 00000000..22c16535 --- /dev/null +++ b/tests/unit/data_manager/services/test_notices_metadata_services.py @@ -0,0 +1,17 @@ +from ted_sws import config +from ted_sws.data_manager.adapters.daily_notices_metadata_repository import DailyNoticesMetadataRepository +from ted_sws.data_manager.services.daily_notices_metadata_services import update_daily_notices_metadata_from_ted +from ted_sws.notice_fetcher.adapters.ted_api import TedAPIAdapter, TedRequestAPI + + +def test_update_daily_notices_metadata_from_ted(mongodb_client): + """ + Test update_daily_notices_metadata_from_ted function + """ + + ted_api = TedAPIAdapter(TedRequestAPI(), config.TED_API_URL) + daily_notices_metadata_repo = DailyNoticesMetadataRepository(mongodb_client) + + update_daily_notices_metadata_from_ted(ted_api=ted_api, + mongo_client=mongodb_client, + daily_notices_metadata_repo=daily_notices_metadata_repo) diff --git a/tests/unit/event_manager/__init__.py b/tests/unit/event_manager/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/unit/event_manager/services/__init__.py b/tests/unit/event_manager/services/__init__.py new file mode 100644 index 00000000..e69de29b From 56cbb7f74864b5f8670cc9cdfb3aa3c9e2bb43dc Mon Sep 17 00:00:00 2001 From: Dumitru Date: Tue, 3 Oct 2023 12:27:12 +0300 Subject: [PATCH 03/13] WIP --- dags/daily_notices_metadata_update.py | 48 +++++++++++++++++++ .../daily_notices_metadata_repository.py | 17 ++++++- .../daily_notices_metadata_services.py | 25 ++++++++-- .../test_notices_metadata_services.py | 12 ++++- 4 files changed, 95 insertions(+), 7 deletions(-) create mode 100644 dags/daily_notices_metadata_update.py diff --git a/dags/daily_notices_metadata_update.py b/dags/daily_notices_metadata_update.py new file mode 100644 index 00000000..eefc94cf --- /dev/null +++ b/dags/daily_notices_metadata_update.py @@ -0,0 +1,48 @@ +""" +DAG to update daily notices metadata from TED. +""" + +from datetime import date + +from airflow.models import Param +from airflow.decorators import dag, task + +from dags import DEFAULT_DAG_ARGUMENTS +from dags.dags_utils import get_dag_param + +START_DATE_PARAM_KEY = "start_date" +END_DATE_PARAM_KEY = "end_date" + +@dag(default_args=DEFAULT_DAG_ARGUMENTS, + schedule_interval=None, + tags=['daily', "dashboards", "metadata", "ted", "notices"], + description=__doc__[0: __doc__.find(".")], + doc_md=__doc__, + params={ + START_DATE_PARAM_KEY: Param( + default=f"{date.today()}", + type="string", + format="date", + title="Start Date", + description="""This field is required. + Start date of the date range to fetch notices from TED.""" + ), + END_DATE_PARAM_KEY: Param( + default=f"{date.today()}", + type="string", + format="date", + title="End Date", + description="""This field is required. + End date of the date range to fetch notices from TED.""" + ) + } + ) +def daily_notices_metadata_update(): + @task + def update_daily_notices_metadata_from_ted(): + start_date = get_dag_param(key=START_DATE_PARAM_KEY, raise_error=True) + end_date = get_dag_param(key=END_DATE_PARAM_KEY, raise_error=True) + + update_daily_notices_metadata_from_ted(start_date=start_date, end_date=end_date) + + update_daily_notices_metadata_from_ted() \ No newline at end of file diff --git a/ted_sws/data_manager/adapters/daily_notices_metadata_repository.py b/ted_sws/data_manager/adapters/daily_notices_metadata_repository.py index 6a41d480..ff6d8562 100644 --- a/ted_sws/data_manager/adapters/daily_notices_metadata_repository.py +++ b/ted_sws/data_manager/adapters/daily_notices_metadata_repository.py @@ -1,5 +1,5 @@ -from datetime import datetime, time -from typing import Iterator, Optional +from datetime import datetime, time, date +from typing import Iterator, Optional, List from pymongo import MongoClient, ASCENDING @@ -90,3 +90,16 @@ def list(self) -> Iterator[DailyNoticesMetadata]: """ for result_dict in self.collection.find(): yield self._create_daily_notices_metadata_from_dict(daily_notices_metadata_dict=result_dict) + + def list_daily_notices_metadata_aggregation_date(self) -> List[date]: + """ + Gets all DailyNoticesMetadata ids from the repository. + :return: + """ + daily_notices_metadata_list = list(self.collection.find({}, + {DAILY_NOTICES_METADATA_AGGREGATION_DATE: 1, + DAILY_NOTICES_METADATA_ID: 0})) + if not daily_notices_metadata_list: + return [] + return [datetime.fromisoformat(aggregation_date[DAILY_NOTICES_METADATA_AGGREGATION_DATE]) for aggregation_date + in daily_notices_metadata_list] diff --git a/ted_sws/data_manager/services/daily_notices_metadata_services.py b/ted_sws/data_manager/services/daily_notices_metadata_services.py index a73f04a6..426e57d0 100644 --- a/ted_sws/data_manager/services/daily_notices_metadata_services.py +++ b/ted_sws/data_manager/services/daily_notices_metadata_services.py @@ -5,11 +5,19 @@ from pymongo import MongoClient from ted_sws import config +from ted_sws.core.model.supra_notice import DailyNoticesMetadata from ted_sws.data_manager.adapters.daily_notices_metadata_repository import DailyNoticesMetadataRepository from ted_sws.notice_fetcher.adapters.ted_api import TedAPIAdapter, TedRequestAPI -DEFAULT_TED_API_START_DATE = "2023-09-01" # TODO: Change to 2014-01-01 +DEFAULT_TED_API_START_DATE = "2023-09-29" # TODO: Change to 2014-01-01 DEFAULT_TED_API_START_DATE_FORMAT = "%Y-%m-%d" +TED_API_NOTICE_ID_FIELD = "ND" +TED_API_WILDCARD_DATE_FORMAT = "%Y%m%d*" +DAILY_NOTICES_METADATA_TED_API_QUERY_RESULT_FIELDS = {"fields": ["ND"]} +TED_API_QUERY_FIELD = "q" +DAILY_NOTICES_METADATA_TED_API_QUERY = { + TED_API_QUERY_FIELD: "PD=[{aggregation_date}]" +} def generate_list_of_dates_from_date_range(start_date: date, end_date: date) -> Optional[list]: @@ -48,8 +56,17 @@ def update_daily_notices_metadata_from_ted(start_date: date = None, date_range = generate_list_of_dates_from_date_range(start_date, end_date) # Getting from metadata repository dates that are not in the repository from date range - dates_not_in_repository = [day for day in date_range if not daily_notices_metadata_repo.get(day)] # TODO: Lazy evaluation + dates_not_in_repository = [day for day in date_range if + not daily_notices_metadata_repo.list_daily_notices_metadata_aggregation_date()] # Getting from TED API dates that are not in the repository from date range - #ted_api.get_by_query(query={"q": ""}) - + # TODO: If in ted are 0 notices, coverage is 1 to all + for day in dates_not_in_repository: + ted_api_query = DAILY_NOTICES_METADATA_TED_API_QUERY + ted_api_query[TED_API_QUERY_FIELD] = ted_api_query[TED_API_QUERY_FIELD].format( + aggregation_date=day.strftime(TED_API_WILDCARD_DATE_FORMAT)) + notice_ids = ted_api.get_by_query(ted_api_query, + result_fields=DAILY_NOTICES_METADATA_TED_API_QUERY_RESULT_FIELDS) + daily_notices_metadata = DailyNoticesMetadata(aggregation_date=day) + daily_notices_metadata.ted_api_notice_ids = [notice[TED_API_NOTICE_ID_FIELD] for notice in notice_ids] + daily_notices_metadata_repo.add(daily_notices_metadata) diff --git a/tests/unit/data_manager/services/test_notices_metadata_services.py b/tests/unit/data_manager/services/test_notices_metadata_services.py index 22c16535..1ece797f 100644 --- a/tests/unit/data_manager/services/test_notices_metadata_services.py +++ b/tests/unit/data_manager/services/test_notices_metadata_services.py @@ -1,3 +1,5 @@ +from datetime import date + from ted_sws import config from ted_sws.data_manager.adapters.daily_notices_metadata_repository import DailyNoticesMetadataRepository from ted_sws.data_manager.services.daily_notices_metadata_services import update_daily_notices_metadata_from_ted @@ -12,6 +14,14 @@ def test_update_daily_notices_metadata_from_ted(mongodb_client): ted_api = TedAPIAdapter(TedRequestAPI(), config.TED_API_URL) daily_notices_metadata_repo = DailyNoticesMetadataRepository(mongodb_client) - update_daily_notices_metadata_from_ted(ted_api=ted_api, + update_daily_notices_metadata_from_ted(start_date=date(2021, 1, 7), + end_date=date(2021, 1, 7), + ted_api=ted_api, mongo_client=mongodb_client, daily_notices_metadata_repo=daily_notices_metadata_repo) + + # update_daily_notices_metadata_from_ted(start_date=date(2021, 1, 7), + # end_date=date(2021, 1, 7), + # ted_api=ted_api, + # mongo_client=mongodb_client, + # daily_notices_metadata_repo=daily_notices_metadata_repo) \ No newline at end of file From b7d4389c17bef4505f2dd5824cb941294edc4fd3 Mon Sep 17 00:00:00 2001 From: Dumitru Date: Tue, 3 Oct 2023 15:03:37 +0300 Subject: [PATCH 04/13] WIP --- dags/daily_notices_metadata_update.py | 23 ++++++-- .../daily_notices_metadata_services.py | 52 ++++++++++++++++++- 2 files changed, 70 insertions(+), 5 deletions(-) diff --git a/dags/daily_notices_metadata_update.py b/dags/daily_notices_metadata_update.py index eefc94cf..9acc1bd7 100644 --- a/dags/daily_notices_metadata_update.py +++ b/dags/daily_notices_metadata_update.py @@ -2,17 +2,20 @@ DAG to update daily notices metadata from TED. """ -from datetime import date +from datetime import date, datetime from airflow.models import Param from airflow.decorators import dag, task from dags import DEFAULT_DAG_ARGUMENTS from dags.dags_utils import get_dag_param +from ted_sws.data_manager.services.daily_notices_metadata_services import update_daily_notices_metadata_from_ted, \ + update_daily_notices_metadata_with_fetched_data START_DATE_PARAM_KEY = "start_date" END_DATE_PARAM_KEY = "end_date" + @dag(default_args=DEFAULT_DAG_ARGUMENTS, schedule_interval=None, tags=['daily', "dashboards", "metadata", "ted", "notices"], @@ -39,10 +42,22 @@ ) def daily_notices_metadata_update(): @task - def update_daily_notices_metadata_from_ted(): + def update_daily_notices_metadata_from_ted_api(): + start_date = get_dag_param(key=START_DATE_PARAM_KEY, raise_error=True) + end_date = get_dag_param(key=END_DATE_PARAM_KEY, raise_error=True) + + update_daily_notices_metadata_from_ted(start_date=datetime.strptime(start_date, "%Y-%m-%d"), + end_date=datetime.strptime(end_date, "%Y-%m-%d")) + + @task + def update_daily_notices_metadata_with_fetched_data_from_repo(): start_date = get_dag_param(key=START_DATE_PARAM_KEY, raise_error=True) end_date = get_dag_param(key=END_DATE_PARAM_KEY, raise_error=True) - update_daily_notices_metadata_from_ted(start_date=start_date, end_date=end_date) + update_daily_notices_metadata_with_fetched_data(start_date=datetime.strptime(start_date, "%Y-%m-%d"), + end_date=datetime.strptime(end_date, "%Y-%m-%d")) + + update_daily_notices_metadata_from_ted_api() >> update_daily_notices_metadata_with_fetched_data_from_repo() + - update_daily_notices_metadata_from_ted() \ No newline at end of file +dag = daily_notices_metadata_update() diff --git a/ted_sws/data_manager/services/daily_notices_metadata_services.py b/ted_sws/data_manager/services/daily_notices_metadata_services.py index 426e57d0..3c00feb7 100644 --- a/ted_sws/data_manager/services/daily_notices_metadata_services.py +++ b/ted_sws/data_manager/services/daily_notices_metadata_services.py @@ -5,8 +5,10 @@ from pymongo import MongoClient from ted_sws import config +from ted_sws.core.model.notice import Notice, NoticeStatus from ted_sws.core.model.supra_notice import DailyNoticesMetadata from ted_sws.data_manager.adapters.daily_notices_metadata_repository import DailyNoticesMetadataRepository +from ted_sws.data_manager.adapters.notice_repository import NoticeRepository from ted_sws.notice_fetcher.adapters.ted_api import TedAPIAdapter, TedRequestAPI DEFAULT_TED_API_START_DATE = "2023-09-29" # TODO: Change to 2014-01-01 @@ -57,7 +59,7 @@ def update_daily_notices_metadata_from_ted(start_date: date = None, # Getting from metadata repository dates that are not in the repository from date range dates_not_in_repository = [day for day in date_range if - not daily_notices_metadata_repo.list_daily_notices_metadata_aggregation_date()] + day not in daily_notices_metadata_repo.list_daily_notices_metadata_aggregation_date()] # Getting from TED API dates that are not in the repository from date range # TODO: If in ted are 0 notices, coverage is 1 to all @@ -70,3 +72,51 @@ def update_daily_notices_metadata_from_ted(start_date: date = None, daily_notices_metadata = DailyNoticesMetadata(aggregation_date=day) daily_notices_metadata.ted_api_notice_ids = [notice[TED_API_NOTICE_ID_FIELD] for notice in notice_ids] daily_notices_metadata_repo.add(daily_notices_metadata) + + +def update_daily_notices_metadata_with_fetched_data(start_date: date = None, + end_date: date = None, + ted_api: TedAPIAdapter = None, + mongo_client: MongoClient = None, + daily_notices_metadata_repo: DailyNoticesMetadataRepository = None): + """ + Updates the daily notices metadata witch fetched data. + """ + + start_date = start_date or datetime.strptime(DEFAULT_TED_API_START_DATE, DEFAULT_TED_API_START_DATE_FORMAT) + end_date = end_date or datetime.today() - timedelta(days=1) + + if start_date > end_date: + raise Exception("Start date cannot be greater than end date") + + ted_api = ted_api or TedAPIAdapter(TedRequestAPI(), config.TED_API_URL) + mongo_client = mongo_client or MongoClient(config.MONGO_DB_AUTH_URL) + daily_notices_metadata_repo = daily_notices_metadata_repo or DailyNoticesMetadataRepository(mongo_client) + notice_repo = NoticeRepository(mongo_client) + + # Generate list of dates from date range + date_range = generate_list_of_dates_from_date_range(start_date, end_date) + + for day in date_range: + daily_notices_metadata = daily_notices_metadata_repo.get(day) + for notice_id in daily_notices_metadata.ted_api_notice_ids: + notice: Notice = notice_repo.get(notice_id) + + notice_statuses = {notice_status: 0 for notice_status in daily_notices_metadata.notice_statuses} + mapping_suite_packages = [] + fetched_notice_ids = [] + + if notice: + fetched_notice_ids.append(notice_id) + notice_status = notice.status + notice_statuses[notice_status] += 1 + if notice_status >= NoticeStatus.TRANSFORMED: # Having distilled_rdf_manifestation + mapping_suite_id = notice.rdf_manifestation.mapping_suite_id + mapping_suite_packages.append(mapping_suite_id) + + daily_notices_metadata.notice_statuses= notice_statuses + daily_notices_metadata.mapping_suite_packages = mapping_suite_packages + daily_notices_metadata.fetched_notice_ids = fetched_notice_ids + + + daily_notices_metadata_repo.update(daily_notices_metadata) From 6c4f6145b4e7ee29c6e854b788d8fd6a7a96aa05 Mon Sep 17 00:00:00 2001 From: CaptainOfHacks <39195263+CaptainOfHacks@users.noreply.github.com> Date: Tue, 3 Oct 2023 15:27:31 +0300 Subject: [PATCH 05/13] add cumulative frequency --- ted_sws/core/model/supra_notice.py | 2 +- .../daily_notices_metadata_services.py | 47 +++++++++++++++---- 2 files changed, 39 insertions(+), 10 deletions(-) diff --git a/ted_sws/core/model/supra_notice.py b/ted_sws/core/model/supra_notice.py index 1b5f7361..fa6be44e 100644 --- a/ted_sws/core/model/supra_notice.py +++ b/ted_sws/core/model/supra_notice.py @@ -61,7 +61,7 @@ class Config: orm_mode = True -NOTICE_STATUSES_DEFAULT_STATS = {str(notice_status).lower(): 0 for notice_status in NoticeStatus} +NOTICE_STATUSES_DEFAULT_STATS = {notice_status: 0 for notice_status in NoticeStatus} class DailyNoticesMetadata(DailyNoticesMetadataABC): diff --git a/ted_sws/data_manager/services/daily_notices_metadata_services.py b/ted_sws/data_manager/services/daily_notices_metadata_services.py index 3c00feb7..3c43fd3e 100644 --- a/ted_sws/data_manager/services/daily_notices_metadata_services.py +++ b/ted_sws/data_manager/services/daily_notices_metadata_services.py @@ -21,6 +21,32 @@ TED_API_QUERY_FIELD: "PD=[{aggregation_date}]" } +NOTICE_SUCCESS_TRANSITION_DOWNSTREAM = [NoticeStatus.PUBLISHED, NoticeStatus.ELIGIBLE_FOR_PUBLISHING, + NoticeStatus.PACKAGED, NoticeStatus.ELIGIBLE_FOR_PACKAGING, + NoticeStatus.VALIDATED, NoticeStatus.DISTILLED, + NoticeStatus.TRANSFORMED, NoticeStatus.PREPROCESSED_FOR_TRANSFORMATION, + NoticeStatus.ELIGIBLE_FOR_TRANSFORMATION, NoticeStatus.NORMALISED_METADATA, + NoticeStatus.INDEXED, NoticeStatus.RAW] + +NOTICE_STATUS_COVERAGE_DOWNSTREAM_TRANSITION = { + NoticeStatus.PUBLICLY_AVAILABLE: NOTICE_SUCCESS_TRANSITION_DOWNSTREAM, + NoticeStatus.PUBLICLY_UNAVAILABLE: NOTICE_SUCCESS_TRANSITION_DOWNSTREAM, + NoticeStatus.PUBLISHED: NOTICE_SUCCESS_TRANSITION_DOWNSTREAM[1:], + NoticeStatus.ELIGIBLE_FOR_PUBLISHING: NOTICE_SUCCESS_TRANSITION_DOWNSTREAM[2:], + NoticeStatus.INELIGIBLE_FOR_PUBLISHING: NOTICE_SUCCESS_TRANSITION_DOWNSTREAM[2:], + NoticeStatus.PACKAGED: NOTICE_SUCCESS_TRANSITION_DOWNSTREAM[3:], + NoticeStatus.ELIGIBLE_FOR_PACKAGING: NOTICE_SUCCESS_TRANSITION_DOWNSTREAM[4:], + NoticeStatus.INELIGIBLE_FOR_PACKAGING: NOTICE_SUCCESS_TRANSITION_DOWNSTREAM[4:], + NoticeStatus.VALIDATED: NOTICE_SUCCESS_TRANSITION_DOWNSTREAM[5:], + NoticeStatus.DISTILLED: NOTICE_SUCCESS_TRANSITION_DOWNSTREAM[6:], + NoticeStatus.TRANSFORMED: NOTICE_SUCCESS_TRANSITION_DOWNSTREAM[7:], + NoticeStatus.PREPROCESSED_FOR_TRANSFORMATION: NOTICE_SUCCESS_TRANSITION_DOWNSTREAM[8:], + NoticeStatus.ELIGIBLE_FOR_TRANSFORMATION: NOTICE_SUCCESS_TRANSITION_DOWNSTREAM[9:], + NoticeStatus.INELIGIBLE_FOR_TRANSFORMATION: NOTICE_SUCCESS_TRANSITION_DOWNSTREAM[9:], + NoticeStatus.NORMALISED_METADATA: NOTICE_SUCCESS_TRANSITION_DOWNSTREAM[10:], + NoticeStatus.INDEXED: NOTICE_SUCCESS_TRANSITION_DOWNSTREAM[11:], +} + def generate_list_of_dates_from_date_range(start_date: date, end_date: date) -> Optional[list]: """ @@ -101,22 +127,25 @@ def update_daily_notices_metadata_with_fetched_data(start_date: date = None, daily_notices_metadata = daily_notices_metadata_repo.get(day) for notice_id in daily_notices_metadata.ted_api_notice_ids: notice: Notice = notice_repo.get(notice_id) - - notice_statuses = {notice_status: 0 for notice_status in daily_notices_metadata.notice_statuses} - mapping_suite_packages = [] - fetched_notice_ids = [] - if notice: + notice_statuses = {notice_status: 0 for notice_status in daily_notices_metadata.notice_statuses} + mapping_suite_packages = [] + fetched_notice_ids = [] fetched_notice_ids.append(notice_id) notice_status = notice.status notice_statuses[notice_status] += 1 - if notice_status >= NoticeStatus.TRANSFORMED: # Having distilled_rdf_manifestation + if notice_status >= NoticeStatus.TRANSFORMED: # Having distilled_rdf_manifestation mapping_suite_id = notice.rdf_manifestation.mapping_suite_id mapping_suite_packages.append(mapping_suite_id) - - daily_notices_metadata.notice_statuses= notice_statuses + for current_notice_status, linked_notice_statuses in NOTICE_STATUS_COVERAGE_DOWNSTREAM_TRANSITION.items(): + current_notice_status = current_notice_status + if notice_statuses[current_notice_status] > 0: + for linked_notice_status in linked_notice_statuses: + linked_notice_status = linked_notice_status + notice_statuses[linked_notice_status] += notice_statuses[current_notice_status] + + daily_notices_metadata.notice_statuses = notice_statuses daily_notices_metadata.mapping_suite_packages = mapping_suite_packages daily_notices_metadata.fetched_notice_ids = fetched_notice_ids - daily_notices_metadata_repo.update(daily_notices_metadata) From 262d3fbb243c4d0a1f47cce73cd68b8d9acd8d9a Mon Sep 17 00:00:00 2001 From: Dumitru Date: Tue, 3 Oct 2023 17:31:06 +0300 Subject: [PATCH 06/13] WIP --- ted_sws/core/model/supra_notice.py | 2 +- .../daily_notices_metadata_repository.py | 2 +- .../daily_notices_metadata_services.py | 43 ++++++++++--------- .../test_notices_metadata_services.py | 21 ++++++++- 4 files changed, 44 insertions(+), 24 deletions(-) diff --git a/ted_sws/core/model/supra_notice.py b/ted_sws/core/model/supra_notice.py index fa6be44e..6277b2e2 100644 --- a/ted_sws/core/model/supra_notice.py +++ b/ted_sws/core/model/supra_notice.py @@ -61,7 +61,7 @@ class Config: orm_mode = True -NOTICE_STATUSES_DEFAULT_STATS = {notice_status: 0 for notice_status in NoticeStatus} +NOTICE_STATUSES_DEFAULT_STATS = {str(notice_status): 0 for notice_status in NoticeStatus} class DailyNoticesMetadata(DailyNoticesMetadataABC): diff --git a/ted_sws/data_manager/adapters/daily_notices_metadata_repository.py b/ted_sws/data_manager/adapters/daily_notices_metadata_repository.py index ff6d8562..2a81068c 100644 --- a/ted_sws/data_manager/adapters/daily_notices_metadata_repository.py +++ b/ted_sws/data_manager/adapters/daily_notices_metadata_repository.py @@ -101,5 +101,5 @@ def list_daily_notices_metadata_aggregation_date(self) -> List[date]: DAILY_NOTICES_METADATA_ID: 0})) if not daily_notices_metadata_list: return [] - return [datetime.fromisoformat(aggregation_date[DAILY_NOTICES_METADATA_AGGREGATION_DATE]) for aggregation_date + return [datetime.fromisoformat(aggregation_date[DAILY_NOTICES_METADATA_AGGREGATION_DATE]).date() for aggregation_date in daily_notices_metadata_list] diff --git a/ted_sws/data_manager/services/daily_notices_metadata_services.py b/ted_sws/data_manager/services/daily_notices_metadata_services.py index 3c43fd3e..ab66163b 100644 --- a/ted_sws/data_manager/services/daily_notices_metadata_services.py +++ b/ted_sws/data_manager/services/daily_notices_metadata_services.py @@ -1,5 +1,5 @@ from datetime import date, datetime, timedelta -from typing import Optional +from typing import Optional, List from dateutil import rrule from pymongo import MongoClient @@ -48,7 +48,7 @@ } -def generate_list_of_dates_from_date_range(start_date: date, end_date: date) -> Optional[list]: +def generate_list_of_dates_from_date_range(start_date: date, end_date: date) -> Optional[List[date]]: """ Given a date range returns all daily dates in that range :param start_date: @@ -57,7 +57,7 @@ def generate_list_of_dates_from_date_range(start_date: date, end_date: date) -> """ if start_date > end_date: return None - return [dt for dt in rrule.rrule(rrule.DAILY, + return [ dt.date() for dt in rrule.rrule(rrule.DAILY, dtstart=start_date, until=end_date)] @@ -102,7 +102,6 @@ def update_daily_notices_metadata_from_ted(start_date: date = None, def update_daily_notices_metadata_with_fetched_data(start_date: date = None, end_date: date = None, - ted_api: TedAPIAdapter = None, mongo_client: MongoClient = None, daily_notices_metadata_repo: DailyNoticesMetadataRepository = None): """ @@ -115,7 +114,6 @@ def update_daily_notices_metadata_with_fetched_data(start_date: date = None, if start_date > end_date: raise Exception("Start date cannot be greater than end date") - ted_api = ted_api or TedAPIAdapter(TedRequestAPI(), config.TED_API_URL) mongo_client = mongo_client or MongoClient(config.MONGO_DB_AUTH_URL) daily_notices_metadata_repo = daily_notices_metadata_repo or DailyNoticesMetadataRepository(mongo_client) notice_repo = NoticeRepository(mongo_client) @@ -125,27 +123,32 @@ def update_daily_notices_metadata_with_fetched_data(start_date: date = None, for day in date_range: daily_notices_metadata = daily_notices_metadata_repo.get(day) + + mapping_suite_packages = [] + fetched_notice_ids = [] + notice_statuses = {notice_status: 0 for notice_status in daily_notices_metadata.notice_statuses.keys()} + for notice_id in daily_notices_metadata.ted_api_notice_ids: notice: Notice = notice_repo.get(notice_id) + if notice: - notice_statuses = {notice_status: 0 for notice_status in daily_notices_metadata.notice_statuses} - mapping_suite_packages = [] - fetched_notice_ids = [] fetched_notice_ids.append(notice_id) notice_status = notice.status - notice_statuses[notice_status] += 1 + notice_statuses[str(notice_status)] += 1 if notice_status >= NoticeStatus.TRANSFORMED: # Having distilled_rdf_manifestation mapping_suite_id = notice.rdf_manifestation.mapping_suite_id - mapping_suite_packages.append(mapping_suite_id) - for current_notice_status, linked_notice_statuses in NOTICE_STATUS_COVERAGE_DOWNSTREAM_TRANSITION.items(): - current_notice_status = current_notice_status - if notice_statuses[current_notice_status] > 0: - for linked_notice_status in linked_notice_statuses: - linked_notice_status = linked_notice_status - notice_statuses[linked_notice_status] += notice_statuses[current_notice_status] - - daily_notices_metadata.notice_statuses = notice_statuses - daily_notices_metadata.mapping_suite_packages = mapping_suite_packages - daily_notices_metadata.fetched_notice_ids = fetched_notice_ids + if mapping_suite_id not in mapping_suite_packages: + mapping_suite_packages.append(mapping_suite_id) + + for current_notice_status, linked_notice_statuses in NOTICE_STATUS_COVERAGE_DOWNSTREAM_TRANSITION.items(): + current_notice_status = str(current_notice_status) + if notice_statuses[current_notice_status] > 0: + for linked_notice_status in linked_notice_statuses: + linked_notice_status = linked_notice_status + notice_statuses[linked_notice_status] += notice_statuses[current_notice_status] + + daily_notices_metadata.notice_statuses = notice_statuses + daily_notices_metadata.mapping_suite_packages = mapping_suite_packages + daily_notices_metadata.fetched_notice_ids = fetched_notice_ids daily_notices_metadata_repo.update(daily_notices_metadata) diff --git a/tests/unit/data_manager/services/test_notices_metadata_services.py b/tests/unit/data_manager/services/test_notices_metadata_services.py index 1ece797f..17a45b2c 100644 --- a/tests/unit/data_manager/services/test_notices_metadata_services.py +++ b/tests/unit/data_manager/services/test_notices_metadata_services.py @@ -1,8 +1,11 @@ from datetime import date +from pymongo import MongoClient + from ted_sws import config from ted_sws.data_manager.adapters.daily_notices_metadata_repository import DailyNoticesMetadataRepository -from ted_sws.data_manager.services.daily_notices_metadata_services import update_daily_notices_metadata_from_ted +from ted_sws.data_manager.services.daily_notices_metadata_services import update_daily_notices_metadata_from_ted, \ + update_daily_notices_metadata_with_fetched_data from ted_sws.notice_fetcher.adapters.ted_api import TedAPIAdapter, TedRequestAPI @@ -24,4 +27,18 @@ def test_update_daily_notices_metadata_from_ted(mongodb_client): # end_date=date(2021, 1, 7), # ted_api=ted_api, # mongo_client=mongodb_client, - # daily_notices_metadata_repo=daily_notices_metadata_repo) \ No newline at end of file + # daily_notices_metadata_repo=daily_notices_metadata_repo) + + +def test_update_daily_notices_metadata_with_fetched_data(): #mongodb_client + mongodb_client = MongoClient(config.MONGO_DB_AUTH_URL) + + ted_api = TedAPIAdapter(TedRequestAPI(), config.TED_API_URL) + + + daily_notices_metadata_repo = DailyNoticesMetadataRepository(mongodb_client) + + update_daily_notices_metadata_with_fetched_data(start_date=date(2021, 1, 7), + end_date=date(2021, 1, 7), + mongo_client=mongodb_client, + daily_notices_metadata_repo=daily_notices_metadata_repo) \ No newline at end of file From 272d0a88b49dc126874bd88368ea24ce873bac05 Mon Sep 17 00:00:00 2001 From: Dumitru Date: Tue, 3 Oct 2023 23:08:08 +0300 Subject: [PATCH 07/13] WIP --- .../services/daily_notices_metadata_services.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/ted_sws/data_manager/services/daily_notices_metadata_services.py b/ted_sws/data_manager/services/daily_notices_metadata_services.py index ab66163b..3eaafcd3 100644 --- a/ted_sws/data_manager/services/daily_notices_metadata_services.py +++ b/ted_sws/data_manager/services/daily_notices_metadata_services.py @@ -127,6 +127,7 @@ def update_daily_notices_metadata_with_fetched_data(start_date: date = None, mapping_suite_packages = [] fetched_notice_ids = [] notice_statuses = {notice_status: 0 for notice_status in daily_notices_metadata.notice_statuses.keys()} + result_notice_statuses = notice_statuses.copy() for notice_id in daily_notices_metadata.ted_api_notice_ids: notice: Notice = notice_repo.get(notice_id) @@ -140,15 +141,16 @@ def update_daily_notices_metadata_with_fetched_data(start_date: date = None, if mapping_suite_id not in mapping_suite_packages: mapping_suite_packages.append(mapping_suite_id) + #result_notice_statuses = {notice_status: 0 for notice_status in daily_notices_metadata.notice_statuses.keys()} for current_notice_status, linked_notice_statuses in NOTICE_STATUS_COVERAGE_DOWNSTREAM_TRANSITION.items(): current_notice_status = str(current_notice_status) if notice_statuses[current_notice_status] > 0: for linked_notice_status in linked_notice_statuses: - linked_notice_status = linked_notice_status - notice_statuses[linked_notice_status] += notice_statuses[current_notice_status] + #linked_notice_status = linked_notice_status + result_notice_statuses[str(linked_notice_status)] += notice_statuses[current_notice_status] - daily_notices_metadata.notice_statuses = notice_statuses + daily_notices_metadata.notice_statuses = result_notice_statuses daily_notices_metadata.mapping_suite_packages = mapping_suite_packages daily_notices_metadata.fetched_notice_ids = fetched_notice_ids daily_notices_metadata_repo.update(daily_notices_metadata) From 78440f7c5b7f523f055df4848f0a68b0c72705e4 Mon Sep 17 00:00:00 2001 From: Dumitru Date: Wed, 4 Oct 2023 00:23:25 +0300 Subject: [PATCH 08/13] WIP --- ted_sws/core/model/supra_notice.py | 7 +++++++ .../services/daily_notices_metadata_services.py | 7 +++++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/ted_sws/core/model/supra_notice.py b/ted_sws/core/model/supra_notice.py index 6277b2e2..bb57dfb4 100644 --- a/ted_sws/core/model/supra_notice.py +++ b/ted_sws/core/model/supra_notice.py @@ -76,6 +76,13 @@ class DailyNoticesMetadata(DailyNoticesMetadataABC): notice_statuses: dict = NOTICE_STATUSES_DEFAULT_STATS + @computed_field + @property + def fetched_notices_coverage(self) -> float: + if self.fetched_notices_count > 0: + return self.ted_api_notice_count / self.fetched_notices_count + return 0 + @computed_field @property def notice_statuses_coverage(self) -> dict: diff --git a/ted_sws/data_manager/services/daily_notices_metadata_services.py b/ted_sws/data_manager/services/daily_notices_metadata_services.py index 3eaafcd3..b28f89b2 100644 --- a/ted_sws/data_manager/services/daily_notices_metadata_services.py +++ b/ted_sws/data_manager/services/daily_notices_metadata_services.py @@ -83,7 +83,7 @@ def update_daily_notices_metadata_from_ted(start_date: date = None, # Generate list of dates from date range date_range = generate_list_of_dates_from_date_range(start_date, end_date) - # Getting from metadata repository dates that are not in the repository from date range + # Getting from metadata repository dates that are not in the repository from date rangeasdasd dates_not_in_repository = [day for day in date_range if day not in daily_notices_metadata_repo.list_daily_notices_metadata_aggregation_date()] @@ -127,7 +127,7 @@ def update_daily_notices_metadata_with_fetched_data(start_date: date = None, mapping_suite_packages = [] fetched_notice_ids = [] notice_statuses = {notice_status: 0 for notice_status in daily_notices_metadata.notice_statuses.keys()} - result_notice_statuses = notice_statuses.copy() + for notice_id in daily_notices_metadata.ted_api_notice_ids: notice: Notice = notice_repo.get(notice_id) @@ -141,6 +141,7 @@ def update_daily_notices_metadata_with_fetched_data(start_date: date = None, if mapping_suite_id not in mapping_suite_packages: mapping_suite_packages.append(mapping_suite_id) + result_notice_statuses = notice_statuses.copy() #result_notice_statuses = {notice_status: 0 for notice_status in daily_notices_metadata.notice_statuses.keys()} for current_notice_status, linked_notice_statuses in NOTICE_STATUS_COVERAGE_DOWNSTREAM_TRANSITION.items(): current_notice_status = str(current_notice_status) @@ -154,3 +155,5 @@ def update_daily_notices_metadata_with_fetched_data(start_date: date = None, daily_notices_metadata.mapping_suite_packages = mapping_suite_packages daily_notices_metadata.fetched_notice_ids = fetched_notice_ids daily_notices_metadata_repo.update(daily_notices_metadata) + + From 102ebf19080dd50e6e29e82a44406f98a9336093 Mon Sep 17 00:00:00 2001 From: Dumitru Date: Wed, 4 Oct 2023 12:54:43 +0300 Subject: [PATCH 09/13] prepare tests --- ted_sws/core/model/supra_notice.py | 2 +- .../daily_notices_metadata_services.py | 29 +++---- tests/e2e/data_manager/conftest.py | 7 ++ .../test_notices_metadata_services.py | 84 +++++++++++++++++++ .../core/model/test_daily_notice_metadata.py | 17 +++- tests/unit/data_manager/services/__init__.py | 0 .../test_notices_metadata_services.py | 44 ---------- 7 files changed, 121 insertions(+), 62 deletions(-) create mode 100644 tests/e2e/data_manager/test_notices_metadata_services.py delete mode 100644 tests/unit/data_manager/services/__init__.py delete mode 100644 tests/unit/data_manager/services/test_notices_metadata_services.py diff --git a/ted_sws/core/model/supra_notice.py b/ted_sws/core/model/supra_notice.py index bb57dfb4..970351e3 100644 --- a/ted_sws/core/model/supra_notice.py +++ b/ted_sws/core/model/supra_notice.py @@ -80,7 +80,7 @@ class DailyNoticesMetadata(DailyNoticesMetadataABC): @property def fetched_notices_coverage(self) -> float: if self.fetched_notices_count > 0: - return self.ted_api_notice_count / self.fetched_notices_count + return self.fetched_notices_count / self.ted_api_notice_count return 0 @computed_field diff --git a/ted_sws/data_manager/services/daily_notices_metadata_services.py b/ted_sws/data_manager/services/daily_notices_metadata_services.py index b28f89b2..04474568 100644 --- a/ted_sws/data_manager/services/daily_notices_metadata_services.py +++ b/ted_sws/data_manager/services/daily_notices_metadata_services.py @@ -11,7 +11,7 @@ from ted_sws.data_manager.adapters.notice_repository import NoticeRepository from ted_sws.notice_fetcher.adapters.ted_api import TedAPIAdapter, TedRequestAPI -DEFAULT_TED_API_START_DATE = "2023-09-29" # TODO: Change to 2014-01-01 +DEFAULT_TED_API_START_DATE = "2014-01-01" DEFAULT_TED_API_START_DATE_FORMAT = "%Y-%m-%d" TED_API_NOTICE_ID_FIELD = "ND" TED_API_WILDCARD_DATE_FORMAT = "%Y%m%d*" @@ -65,7 +65,6 @@ def generate_list_of_dates_from_date_range(start_date: date, end_date: date) -> def update_daily_notices_metadata_from_ted(start_date: date = None, end_date: date = None, ted_api: TedAPIAdapter = None, - mongo_client: MongoClient = None, daily_notices_metadata_repo: DailyNoticesMetadataRepository = None): """ Updates the daily notices metadata from the TED API. @@ -77,18 +76,18 @@ def update_daily_notices_metadata_from_ted(start_date: date = None, raise Exception("Start date cannot be greater than end date") ted_api = ted_api or TedAPIAdapter(TedRequestAPI(), config.TED_API_URL) - mongo_client = mongo_client or MongoClient(config.MONGO_DB_AUTH_URL) - daily_notices_metadata_repo = daily_notices_metadata_repo or DailyNoticesMetadataRepository(mongo_client) + if not daily_notices_metadata_repo: + mongo_client = MongoClient(config.MONGO_DB_AUTH_URL) + daily_notices_metadata_repo = DailyNoticesMetadataRepository(mongo_client) # Generate list of dates from date range date_range = generate_list_of_dates_from_date_range(start_date, end_date) - # Getting from metadata repository dates that are not in the repository from date rangeasdasd + # Getting from metadata repository dates that are not in the repository from date range dates_not_in_repository = [day for day in date_range if day not in daily_notices_metadata_repo.list_daily_notices_metadata_aggregation_date()] # Getting from TED API dates that are not in the repository from date range - # TODO: If in ted are 0 notices, coverage is 1 to all for day in dates_not_in_repository: ted_api_query = DAILY_NOTICES_METADATA_TED_API_QUERY ted_api_query[TED_API_QUERY_FIELD] = ted_api_query[TED_API_QUERY_FIELD].format( @@ -102,7 +101,7 @@ def update_daily_notices_metadata_from_ted(start_date: date = None, def update_daily_notices_metadata_with_fetched_data(start_date: date = None, end_date: date = None, - mongo_client: MongoClient = None, + notice_repo: NoticeRepository = None, daily_notices_metadata_repo: DailyNoticesMetadataRepository = None): """ Updates the daily notices metadata witch fetched data. @@ -114,21 +113,23 @@ def update_daily_notices_metadata_with_fetched_data(start_date: date = None, if start_date > end_date: raise Exception("Start date cannot be greater than end date") - mongo_client = mongo_client or MongoClient(config.MONGO_DB_AUTH_URL) - daily_notices_metadata_repo = daily_notices_metadata_repo or DailyNoticesMetadataRepository(mongo_client) - notice_repo = NoticeRepository(mongo_client) + if not daily_notices_metadata_repo: + mongo_client = MongoClient(config.MONGO_DB_AUTH_URL) + daily_notices_metadata_repo = DailyNoticesMetadataRepository(mongo_client) + notice_repo = notice_repo or NoticeRepository(daily_notices_metadata_repo.mongodb_client) # Generate list of dates from date range date_range = generate_list_of_dates_from_date_range(start_date, end_date) for day in date_range: daily_notices_metadata = daily_notices_metadata_repo.get(day) + if not daily_notices_metadata: + continue mapping_suite_packages = [] fetched_notice_ids = [] notice_statuses = {notice_status: 0 for notice_status in daily_notices_metadata.notice_statuses.keys()} - for notice_id in daily_notices_metadata.ted_api_notice_ids: notice: Notice = notice_repo.get(notice_id) @@ -136,24 +137,20 @@ def update_daily_notices_metadata_with_fetched_data(start_date: date = None, fetched_notice_ids.append(notice_id) notice_status = notice.status notice_statuses[str(notice_status)] += 1 - if notice_status >= NoticeStatus.TRANSFORMED: # Having distilled_rdf_manifestation + if notice_status >= NoticeStatus.TRANSFORMED: # Having rdf_manifestation mapping_suite_id = notice.rdf_manifestation.mapping_suite_id if mapping_suite_id not in mapping_suite_packages: mapping_suite_packages.append(mapping_suite_id) result_notice_statuses = notice_statuses.copy() - #result_notice_statuses = {notice_status: 0 for notice_status in daily_notices_metadata.notice_statuses.keys()} for current_notice_status, linked_notice_statuses in NOTICE_STATUS_COVERAGE_DOWNSTREAM_TRANSITION.items(): current_notice_status = str(current_notice_status) if notice_statuses[current_notice_status] > 0: for linked_notice_status in linked_notice_statuses: - #linked_notice_status = linked_notice_status result_notice_statuses[str(linked_notice_status)] += notice_statuses[current_notice_status] - daily_notices_metadata.notice_statuses = result_notice_statuses daily_notices_metadata.mapping_suite_packages = mapping_suite_packages daily_notices_metadata.fetched_notice_ids = fetched_notice_ids daily_notices_metadata_repo.update(daily_notices_metadata) - diff --git a/tests/e2e/data_manager/conftest.py b/tests/e2e/data_manager/conftest.py index c81feed3..594634de 100644 --- a/tests/e2e/data_manager/conftest.py +++ b/tests/e2e/data_manager/conftest.py @@ -1,3 +1,5 @@ +from datetime import date + import pytest from ted_sws.core.model.manifestation import RDFManifestation @@ -225,3 +227,8 @@ def notice_with_distilled_status(notice_2020, rdf_file_content): def packaged_notice(): test_notice_repository = NoticeRepositoryInFileSystem(repository_path=TEST_DATA_PATH / "notices" / "packaged") return test_notice_repository.get("632521-2022") + + +@pytest.fixture +def example_date(): + return date(2021, 1, 7) \ No newline at end of file diff --git a/tests/e2e/data_manager/test_notices_metadata_services.py b/tests/e2e/data_manager/test_notices_metadata_services.py new file mode 100644 index 00000000..84e17273 --- /dev/null +++ b/tests/e2e/data_manager/test_notices_metadata_services.py @@ -0,0 +1,84 @@ +from datetime import date + +from pymongo import MongoClient + +from ted_sws import config +from ted_sws.core.model.notice import Notice +from ted_sws.core.model.supra_notice import DailyNoticesMetadata +from ted_sws.data_manager.adapters.daily_notices_metadata_repository import DailyNoticesMetadataRepository +from ted_sws.data_manager.adapters.notice_repository import NoticeRepository +from ted_sws.data_manager.services.daily_notices_metadata_services import update_daily_notices_metadata_from_ted, \ + update_daily_notices_metadata_with_fetched_data +from ted_sws.notice_fetcher.adapters.ted_api import TedAPIAdapter, TedRequestAPI + + +def test_update_daily_notices_metadata_from_ted(fake_mongodb_client, example_date): + """ + Test update_daily_notices_metadata_from_ted function + """ + + ted_api = TedAPIAdapter(TedRequestAPI(), config.TED_API_URL) + daily_notices_metadata_repo = DailyNoticesMetadataRepository(fake_mongodb_client) + + update_daily_notices_metadata_from_ted(start_date=example_date, + end_date=example_date, + ted_api=ted_api, + daily_notices_metadata_repo=daily_notices_metadata_repo) + + daily_notices_metadata: DailyNoticesMetadata = daily_notices_metadata_repo.get(example_date) + assert daily_notices_metadata is not None + assert daily_notices_metadata.ted_api_notice_ids is not None + assert len(daily_notices_metadata.ted_api_notice_ids) == daily_notices_metadata.ted_api_notice_count + + update_daily_notices_metadata_from_ted(start_date=example_date, + end_date=example_date, + ted_api=ted_api, + daily_notices_metadata_repo=daily_notices_metadata_repo) + + assert len(list(daily_notices_metadata_repo.list())) == 1 + + +def test_update_daily_notices_metadata_with_fetched_data(fake_mongodb_client, + fake_notice_repository, + example_date, + notice_with_distilled_status, + notice_2021): + daily_notices_metadata_repo = DailyNoticesMetadataRepository(fake_mongodb_client) + + update_daily_notices_metadata_with_fetched_data(start_date=example_date, + end_date=example_date, + notice_repo=fake_notice_repository, + daily_notices_metadata_repo=daily_notices_metadata_repo) + + assert daily_notices_metadata_repo.get(example_date) is None + + fake_notice_repository.add(notice_with_distilled_status) + fake_notice_repository.add(notice_2021) + + daily_notices_metadata: DailyNoticesMetadata = DailyNoticesMetadata(aggregation_date=example_date) + daily_notices_metadata.ted_api_notice_ids.append(notice_with_distilled_status.ted_id) + daily_notices_metadata.ted_api_notice_ids.append(notice_2021.ted_id) + daily_notices_metadata_repo.add(daily_notices_metadata) + + update_daily_notices_metadata_with_fetched_data(start_date=example_date, + end_date=example_date, + notice_repo=fake_notice_repository, + daily_notices_metadata_repo=daily_notices_metadata_repo) + + daily_notices_metadata: DailyNoticesMetadata = daily_notices_metadata_repo.get(example_date) + + assert daily_notices_metadata is not None + + assert daily_notices_metadata.fetched_notices_count == 2 + assert daily_notices_metadata.ted_api_notice_count == 2 + assert daily_notices_metadata.notice_statuses['RAW'] == 2 + assert daily_notices_metadata.notice_statuses['INDEXED'] > 0 + assert daily_notices_metadata.notice_statuses['NORMALISED_METADATA'] > 0 + assert daily_notices_metadata.notice_statuses['ELIGIBLE_FOR_TRANSFORMATION'] > 0 + assert daily_notices_metadata.notice_statuses['INELIGIBLE_FOR_TRANSFORMATION'] == 0 + + assert daily_notices_metadata.notice_statuses_coverage['RAW_coverage'] == 1.0 + assert daily_notices_metadata.notice_statuses_coverage['INDEXED_coverage'] == 0.5 + assert daily_notices_metadata.notice_statuses_coverage['NORMALISED_METADATA_coverage'] == 0.5 + assert daily_notices_metadata.notice_statuses_coverage['INELIGIBLE_FOR_TRANSFORMATION_coverage'] == 0.0 + assert len(daily_notices_metadata.mapping_suite_packages) > 0 diff --git a/tests/unit/core/model/test_daily_notice_metadata.py b/tests/unit/core/model/test_daily_notice_metadata.py index eccc23fb..011ecbe4 100644 --- a/tests/unit/core/model/test_daily_notice_metadata.py +++ b/tests/unit/core/model/test_daily_notice_metadata.py @@ -2,7 +2,22 @@ from ted_sws.core.model.supra_notice import DailyNoticesMetadata -def test_daily_notice_metadata_model(daily_notice_metadata): +def test_daily_notice_metadata_model(daily_notice_metadata: DailyNoticesMetadata): daily_notice_metadata_dict = daily_notice_metadata.model_dump() daily_notice_metadata_from_dict = DailyNoticesMetadata(**daily_notice_metadata_dict) assert daily_notice_metadata == daily_notice_metadata_from_dict + + assert daily_notice_metadata.fetched_notices_coverage == 0 + assert daily_notice_metadata.ted_api_notice_count == 0 + assert daily_notice_metadata.fetched_notices_count == 0 + + daily_notice_metadata.ted_api_notice_ids = ["1", "2", "3"] + daily_notice_metadata.fetched_notice_ids = ["1", "2"] + + assert daily_notice_metadata.fetched_notices_coverage == 0.6666666666666666 + assert daily_notice_metadata.ted_api_notice_count == 3 + assert daily_notice_metadata.fetched_notices_count == 2 + + daily_notice_metadata.notice_statuses = {str(NoticeStatus.PUBLISHED): 1, str(NoticeStatus.RAW): 2} + assert daily_notice_metadata.notice_statuses_coverage == {"PUBLISHED_coverage": 0.3333333333333333, + "RAW_coverage": 0.6666666666666666} diff --git a/tests/unit/data_manager/services/__init__.py b/tests/unit/data_manager/services/__init__.py deleted file mode 100644 index e69de29b..00000000 diff --git a/tests/unit/data_manager/services/test_notices_metadata_services.py b/tests/unit/data_manager/services/test_notices_metadata_services.py deleted file mode 100644 index 17a45b2c..00000000 --- a/tests/unit/data_manager/services/test_notices_metadata_services.py +++ /dev/null @@ -1,44 +0,0 @@ -from datetime import date - -from pymongo import MongoClient - -from ted_sws import config -from ted_sws.data_manager.adapters.daily_notices_metadata_repository import DailyNoticesMetadataRepository -from ted_sws.data_manager.services.daily_notices_metadata_services import update_daily_notices_metadata_from_ted, \ - update_daily_notices_metadata_with_fetched_data -from ted_sws.notice_fetcher.adapters.ted_api import TedAPIAdapter, TedRequestAPI - - -def test_update_daily_notices_metadata_from_ted(mongodb_client): - """ - Test update_daily_notices_metadata_from_ted function - """ - - ted_api = TedAPIAdapter(TedRequestAPI(), config.TED_API_URL) - daily_notices_metadata_repo = DailyNoticesMetadataRepository(mongodb_client) - - update_daily_notices_metadata_from_ted(start_date=date(2021, 1, 7), - end_date=date(2021, 1, 7), - ted_api=ted_api, - mongo_client=mongodb_client, - daily_notices_metadata_repo=daily_notices_metadata_repo) - - # update_daily_notices_metadata_from_ted(start_date=date(2021, 1, 7), - # end_date=date(2021, 1, 7), - # ted_api=ted_api, - # mongo_client=mongodb_client, - # daily_notices_metadata_repo=daily_notices_metadata_repo) - - -def test_update_daily_notices_metadata_with_fetched_data(): #mongodb_client - mongodb_client = MongoClient(config.MONGO_DB_AUTH_URL) - - ted_api = TedAPIAdapter(TedRequestAPI(), config.TED_API_URL) - - - daily_notices_metadata_repo = DailyNoticesMetadataRepository(mongodb_client) - - update_daily_notices_metadata_with_fetched_data(start_date=date(2021, 1, 7), - end_date=date(2021, 1, 7), - mongo_client=mongodb_client, - daily_notices_metadata_repo=daily_notices_metadata_repo) \ No newline at end of file From e61ab8567ba12bd9b154097d2df60ed8b678a142 Mon Sep 17 00:00:00 2001 From: Dumitru Date: Wed, 4 Oct 2023 12:59:07 +0300 Subject: [PATCH 10/13] Update conftest.py --- tests/unit/core/model/conftest.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/unit/core/model/conftest.py b/tests/unit/core/model/conftest.py index 35264a03..89ddbfe5 100644 --- a/tests/unit/core/model/conftest.py +++ b/tests/unit/core/model/conftest.py @@ -82,9 +82,9 @@ def transformation_eligible_notice(indexed_notice) -> Notice: @pytest.fixture() -def notice_aggregation_date_date() -> date: +def notice_aggregation_date() -> date: return datetime.strptime("2021-01-08", "%Y-%m-%d") @pytest.fixture() -def daily_notice_metadata(notice_aggregation_date_date) -> DailyNoticesMetadata: - return DailyNoticesMetadata(aggregation_date=notice_aggregation_date_date) \ No newline at end of file +def daily_notice_metadata(notice_aggregation_date) -> DailyNoticesMetadata: + return DailyNoticesMetadata(aggregation_date=notice_aggregation_date) \ No newline at end of file From a45c5d36a6d0dc20dfbd9050eca453bd9226fde6 Mon Sep 17 00:00:00 2001 From: Dumitru Date: Wed, 4 Oct 2023 15:23:29 +0300 Subject: [PATCH 11/13] make changes according review --- dags/daily_notices_metadata_update.py | 2 +- ted_sws/core/model/supra_notice.py | 7 +++---- .../services/daily_notices_metadata_services.py | 0 .../test_notices_metadata_services.py | 8 +------- 4 files changed, 5 insertions(+), 12 deletions(-) rename ted_sws/{data_manager => supra_notice_manager}/services/daily_notices_metadata_services.py (100%) rename tests/e2e/{data_manager => supra_notice_manager}/test_notices_metadata_services.py (93%) diff --git a/dags/daily_notices_metadata_update.py b/dags/daily_notices_metadata_update.py index 9acc1bd7..f4c42135 100644 --- a/dags/daily_notices_metadata_update.py +++ b/dags/daily_notices_metadata_update.py @@ -9,7 +9,7 @@ from dags import DEFAULT_DAG_ARGUMENTS from dags.dags_utils import get_dag_param -from ted_sws.data_manager.services.daily_notices_metadata_services import update_daily_notices_metadata_from_ted, \ +from ted_sws.supra_notice_manager.services.daily_notices_metadata_services import update_daily_notices_metadata_from_ted, \ update_daily_notices_metadata_with_fetched_data START_DATE_PARAM_KEY = "start_date" diff --git a/ted_sws/core/model/supra_notice.py b/ted_sws/core/model/supra_notice.py index 970351e3..7348e0c4 100644 --- a/ted_sws/core/model/supra_notice.py +++ b/ted_sws/core/model/supra_notice.py @@ -16,6 +16,8 @@ from ted_sws.core.model.manifestation import Manifestation, ValidationSummaryReport from ted_sws.core.model.notice import NoticeStatus +NOTICE_STATUSES_DEFAULT_STATS = {str(notice_status): 0 for notice_status in NoticeStatus} + class SupraNotice(PropertyBaseModel, abc.ABC): """ @@ -54,16 +56,13 @@ class DailySupraNotice(SupraNotice): validation_summary: Optional[ValidationSummaryReport] = None -class DailyNoticesMetadataABC(PropertyBaseModel): +class DailyNoticesMetadataABC(PropertyBaseModel, abc.ABC): class Config: underscore_attrs_are_private = True validate_assignment = True orm_mode = True -NOTICE_STATUSES_DEFAULT_STATS = {str(notice_status): 0 for notice_status in NoticeStatus} - - class DailyNoticesMetadata(DailyNoticesMetadataABC): """ This is an aggregate over the notices published in TED in a specific day. diff --git a/ted_sws/data_manager/services/daily_notices_metadata_services.py b/ted_sws/supra_notice_manager/services/daily_notices_metadata_services.py similarity index 100% rename from ted_sws/data_manager/services/daily_notices_metadata_services.py rename to ted_sws/supra_notice_manager/services/daily_notices_metadata_services.py diff --git a/tests/e2e/data_manager/test_notices_metadata_services.py b/tests/e2e/supra_notice_manager/test_notices_metadata_services.py similarity index 93% rename from tests/e2e/data_manager/test_notices_metadata_services.py rename to tests/e2e/supra_notice_manager/test_notices_metadata_services.py index 84e17273..08f516c2 100644 --- a/tests/e2e/data_manager/test_notices_metadata_services.py +++ b/tests/e2e/supra_notice_manager/test_notices_metadata_services.py @@ -1,13 +1,7 @@ -from datetime import date - -from pymongo import MongoClient - from ted_sws import config -from ted_sws.core.model.notice import Notice from ted_sws.core.model.supra_notice import DailyNoticesMetadata from ted_sws.data_manager.adapters.daily_notices_metadata_repository import DailyNoticesMetadataRepository -from ted_sws.data_manager.adapters.notice_repository import NoticeRepository -from ted_sws.data_manager.services.daily_notices_metadata_services import update_daily_notices_metadata_from_ted, \ +from ted_sws.supra_notice_manager.services.daily_notices_metadata_services import update_daily_notices_metadata_from_ted, \ update_daily_notices_metadata_with_fetched_data from ted_sws.notice_fetcher.adapters.ted_api import TedAPIAdapter, TedRequestAPI From 4f63665f7dfeba0a02bc73de33437adf15a32c0b Mon Sep 17 00:00:00 2001 From: Dumitru Date: Wed, 4 Oct 2023 16:08:47 +0300 Subject: [PATCH 12/13] update conftests --- tests/e2e/conftest.py | 7 +++++++ tests/e2e/data_manager/conftest.py | 7 ------- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/tests/e2e/conftest.py b/tests/e2e/conftest.py index 2f4e672a..a0fee080 100644 --- a/tests/e2e/conftest.py +++ b/tests/e2e/conftest.py @@ -1,3 +1,5 @@ +from datetime import date + import mongomock import pymongo import pytest @@ -78,3 +80,8 @@ def path_to_file_system_repository(): @pytest.fixture def fake_notice_repository(fake_mongodb_client): return NoticeRepository(mongodb_client=fake_mongodb_client) + + +@pytest.fixture +def example_date(): + return date(2021, 1, 7) diff --git a/tests/e2e/data_manager/conftest.py b/tests/e2e/data_manager/conftest.py index 594634de..c81feed3 100644 --- a/tests/e2e/data_manager/conftest.py +++ b/tests/e2e/data_manager/conftest.py @@ -1,5 +1,3 @@ -from datetime import date - import pytest from ted_sws.core.model.manifestation import RDFManifestation @@ -227,8 +225,3 @@ def notice_with_distilled_status(notice_2020, rdf_file_content): def packaged_notice(): test_notice_repository = NoticeRepositoryInFileSystem(repository_path=TEST_DATA_PATH / "notices" / "packaged") return test_notice_repository.get("632521-2022") - - -@pytest.fixture -def example_date(): - return date(2021, 1, 7) \ No newline at end of file From 7440de78d074b364b9244c5e380b1455b20a1a6b Mon Sep 17 00:00:00 2001 From: Dumitru Date: Wed, 4 Oct 2023 16:10:48 +0300 Subject: [PATCH 13/13] Update test_notices_metadata_services.py --- .../supra_notice_manager/test_notices_metadata_services.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/e2e/supra_notice_manager/test_notices_metadata_services.py b/tests/e2e/supra_notice_manager/test_notices_metadata_services.py index 08f516c2..f1f92064 100644 --- a/tests/e2e/supra_notice_manager/test_notices_metadata_services.py +++ b/tests/e2e/supra_notice_manager/test_notices_metadata_services.py @@ -35,7 +35,7 @@ def test_update_daily_notices_metadata_from_ted(fake_mongodb_client, example_dat def test_update_daily_notices_metadata_with_fetched_data(fake_mongodb_client, fake_notice_repository, example_date, - notice_with_distilled_status, + notice_with_rdf_manifestation, notice_2021): daily_notices_metadata_repo = DailyNoticesMetadataRepository(fake_mongodb_client) @@ -46,11 +46,11 @@ def test_update_daily_notices_metadata_with_fetched_data(fake_mongodb_client, assert daily_notices_metadata_repo.get(example_date) is None - fake_notice_repository.add(notice_with_distilled_status) + fake_notice_repository.add(notice_with_rdf_manifestation) fake_notice_repository.add(notice_2021) daily_notices_metadata: DailyNoticesMetadata = DailyNoticesMetadata(aggregation_date=example_date) - daily_notices_metadata.ted_api_notice_ids.append(notice_with_distilled_status.ted_id) + daily_notices_metadata.ted_api_notice_ids.append(notice_with_rdf_manifestation.ted_id) daily_notices_metadata.ted_api_notice_ids.append(notice_2021.ted_id) daily_notices_metadata_repo.add(daily_notices_metadata)