From 102ebf19080dd50e6e29e82a44406f98a9336093 Mon Sep 17 00:00:00 2001 From: Dumitru Date: Wed, 4 Oct 2023 12:54:43 +0300 Subject: [PATCH] prepare tests --- ted_sws/core/model/supra_notice.py | 2 +- .../daily_notices_metadata_services.py | 29 +++---- tests/e2e/data_manager/conftest.py | 7 ++ .../test_notices_metadata_services.py | 84 +++++++++++++++++++ .../core/model/test_daily_notice_metadata.py | 17 +++- tests/unit/data_manager/services/__init__.py | 0 .../test_notices_metadata_services.py | 44 ---------- 7 files changed, 121 insertions(+), 62 deletions(-) create mode 100644 tests/e2e/data_manager/test_notices_metadata_services.py delete mode 100644 tests/unit/data_manager/services/__init__.py delete mode 100644 tests/unit/data_manager/services/test_notices_metadata_services.py diff --git a/ted_sws/core/model/supra_notice.py b/ted_sws/core/model/supra_notice.py index bb57dfb4d..970351e39 100644 --- a/ted_sws/core/model/supra_notice.py +++ b/ted_sws/core/model/supra_notice.py @@ -80,7 +80,7 @@ class DailyNoticesMetadata(DailyNoticesMetadataABC): @property def fetched_notices_coverage(self) -> float: if self.fetched_notices_count > 0: - return self.ted_api_notice_count / self.fetched_notices_count + return self.fetched_notices_count / self.ted_api_notice_count return 0 @computed_field diff --git a/ted_sws/data_manager/services/daily_notices_metadata_services.py b/ted_sws/data_manager/services/daily_notices_metadata_services.py index b28f89b2f..04474568a 100644 --- a/ted_sws/data_manager/services/daily_notices_metadata_services.py +++ b/ted_sws/data_manager/services/daily_notices_metadata_services.py @@ -11,7 +11,7 @@ from ted_sws.data_manager.adapters.notice_repository import NoticeRepository from ted_sws.notice_fetcher.adapters.ted_api import TedAPIAdapter, TedRequestAPI -DEFAULT_TED_API_START_DATE = "2023-09-29" # TODO: Change to 2014-01-01 +DEFAULT_TED_API_START_DATE = "2014-01-01" DEFAULT_TED_API_START_DATE_FORMAT = "%Y-%m-%d" TED_API_NOTICE_ID_FIELD = "ND" TED_API_WILDCARD_DATE_FORMAT = "%Y%m%d*" @@ -65,7 +65,6 @@ def generate_list_of_dates_from_date_range(start_date: date, end_date: date) -> def update_daily_notices_metadata_from_ted(start_date: date = None, end_date: date = None, ted_api: TedAPIAdapter = None, - mongo_client: MongoClient = None, daily_notices_metadata_repo: DailyNoticesMetadataRepository = None): """ Updates the daily notices metadata from the TED API. @@ -77,18 +76,18 @@ def update_daily_notices_metadata_from_ted(start_date: date = None, raise Exception("Start date cannot be greater than end date") ted_api = ted_api or TedAPIAdapter(TedRequestAPI(), config.TED_API_URL) - mongo_client = mongo_client or MongoClient(config.MONGO_DB_AUTH_URL) - daily_notices_metadata_repo = daily_notices_metadata_repo or DailyNoticesMetadataRepository(mongo_client) + if not daily_notices_metadata_repo: + mongo_client = MongoClient(config.MONGO_DB_AUTH_URL) + daily_notices_metadata_repo = DailyNoticesMetadataRepository(mongo_client) # Generate list of dates from date range date_range = generate_list_of_dates_from_date_range(start_date, end_date) - # Getting from metadata repository dates that are not in the repository from date rangeasdasd + # Getting from metadata repository dates that are not in the repository from date range dates_not_in_repository = [day for day in date_range if day not in daily_notices_metadata_repo.list_daily_notices_metadata_aggregation_date()] # Getting from TED API dates that are not in the repository from date range - # TODO: If in ted are 0 notices, coverage is 1 to all for day in dates_not_in_repository: ted_api_query = DAILY_NOTICES_METADATA_TED_API_QUERY ted_api_query[TED_API_QUERY_FIELD] = ted_api_query[TED_API_QUERY_FIELD].format( @@ -102,7 +101,7 @@ def update_daily_notices_metadata_from_ted(start_date: date = None, def update_daily_notices_metadata_with_fetched_data(start_date: date = None, end_date: date = None, - mongo_client: MongoClient = None, + notice_repo: NoticeRepository = None, daily_notices_metadata_repo: DailyNoticesMetadataRepository = None): """ Updates the daily notices metadata witch fetched data. @@ -114,21 +113,23 @@ def update_daily_notices_metadata_with_fetched_data(start_date: date = None, if start_date > end_date: raise Exception("Start date cannot be greater than end date") - mongo_client = mongo_client or MongoClient(config.MONGO_DB_AUTH_URL) - daily_notices_metadata_repo = daily_notices_metadata_repo or DailyNoticesMetadataRepository(mongo_client) - notice_repo = NoticeRepository(mongo_client) + if not daily_notices_metadata_repo: + mongo_client = MongoClient(config.MONGO_DB_AUTH_URL) + daily_notices_metadata_repo = DailyNoticesMetadataRepository(mongo_client) + notice_repo = notice_repo or NoticeRepository(daily_notices_metadata_repo.mongodb_client) # Generate list of dates from date range date_range = generate_list_of_dates_from_date_range(start_date, end_date) for day in date_range: daily_notices_metadata = daily_notices_metadata_repo.get(day) + if not daily_notices_metadata: + continue mapping_suite_packages = [] fetched_notice_ids = [] notice_statuses = {notice_status: 0 for notice_status in daily_notices_metadata.notice_statuses.keys()} - for notice_id in daily_notices_metadata.ted_api_notice_ids: notice: Notice = notice_repo.get(notice_id) @@ -136,24 +137,20 @@ def update_daily_notices_metadata_with_fetched_data(start_date: date = None, fetched_notice_ids.append(notice_id) notice_status = notice.status notice_statuses[str(notice_status)] += 1 - if notice_status >= NoticeStatus.TRANSFORMED: # Having distilled_rdf_manifestation + if notice_status >= NoticeStatus.TRANSFORMED: # Having rdf_manifestation mapping_suite_id = notice.rdf_manifestation.mapping_suite_id if mapping_suite_id not in mapping_suite_packages: mapping_suite_packages.append(mapping_suite_id) result_notice_statuses = notice_statuses.copy() - #result_notice_statuses = {notice_status: 0 for notice_status in daily_notices_metadata.notice_statuses.keys()} for current_notice_status, linked_notice_statuses in NOTICE_STATUS_COVERAGE_DOWNSTREAM_TRANSITION.items(): current_notice_status = str(current_notice_status) if notice_statuses[current_notice_status] > 0: for linked_notice_status in linked_notice_statuses: - #linked_notice_status = linked_notice_status result_notice_statuses[str(linked_notice_status)] += notice_statuses[current_notice_status] - daily_notices_metadata.notice_statuses = result_notice_statuses daily_notices_metadata.mapping_suite_packages = mapping_suite_packages daily_notices_metadata.fetched_notice_ids = fetched_notice_ids daily_notices_metadata_repo.update(daily_notices_metadata) - diff --git a/tests/e2e/data_manager/conftest.py b/tests/e2e/data_manager/conftest.py index c81feed30..594634de0 100644 --- a/tests/e2e/data_manager/conftest.py +++ b/tests/e2e/data_manager/conftest.py @@ -1,3 +1,5 @@ +from datetime import date + import pytest from ted_sws.core.model.manifestation import RDFManifestation @@ -225,3 +227,8 @@ def notice_with_distilled_status(notice_2020, rdf_file_content): def packaged_notice(): test_notice_repository = NoticeRepositoryInFileSystem(repository_path=TEST_DATA_PATH / "notices" / "packaged") return test_notice_repository.get("632521-2022") + + +@pytest.fixture +def example_date(): + return date(2021, 1, 7) \ No newline at end of file diff --git a/tests/e2e/data_manager/test_notices_metadata_services.py b/tests/e2e/data_manager/test_notices_metadata_services.py new file mode 100644 index 000000000..84e17273e --- /dev/null +++ b/tests/e2e/data_manager/test_notices_metadata_services.py @@ -0,0 +1,84 @@ +from datetime import date + +from pymongo import MongoClient + +from ted_sws import config +from ted_sws.core.model.notice import Notice +from ted_sws.core.model.supra_notice import DailyNoticesMetadata +from ted_sws.data_manager.adapters.daily_notices_metadata_repository import DailyNoticesMetadataRepository +from ted_sws.data_manager.adapters.notice_repository import NoticeRepository +from ted_sws.data_manager.services.daily_notices_metadata_services import update_daily_notices_metadata_from_ted, \ + update_daily_notices_metadata_with_fetched_data +from ted_sws.notice_fetcher.adapters.ted_api import TedAPIAdapter, TedRequestAPI + + +def test_update_daily_notices_metadata_from_ted(fake_mongodb_client, example_date): + """ + Test update_daily_notices_metadata_from_ted function + """ + + ted_api = TedAPIAdapter(TedRequestAPI(), config.TED_API_URL) + daily_notices_metadata_repo = DailyNoticesMetadataRepository(fake_mongodb_client) + + update_daily_notices_metadata_from_ted(start_date=example_date, + end_date=example_date, + ted_api=ted_api, + daily_notices_metadata_repo=daily_notices_metadata_repo) + + daily_notices_metadata: DailyNoticesMetadata = daily_notices_metadata_repo.get(example_date) + assert daily_notices_metadata is not None + assert daily_notices_metadata.ted_api_notice_ids is not None + assert len(daily_notices_metadata.ted_api_notice_ids) == daily_notices_metadata.ted_api_notice_count + + update_daily_notices_metadata_from_ted(start_date=example_date, + end_date=example_date, + ted_api=ted_api, + daily_notices_metadata_repo=daily_notices_metadata_repo) + + assert len(list(daily_notices_metadata_repo.list())) == 1 + + +def test_update_daily_notices_metadata_with_fetched_data(fake_mongodb_client, + fake_notice_repository, + example_date, + notice_with_distilled_status, + notice_2021): + daily_notices_metadata_repo = DailyNoticesMetadataRepository(fake_mongodb_client) + + update_daily_notices_metadata_with_fetched_data(start_date=example_date, + end_date=example_date, + notice_repo=fake_notice_repository, + daily_notices_metadata_repo=daily_notices_metadata_repo) + + assert daily_notices_metadata_repo.get(example_date) is None + + fake_notice_repository.add(notice_with_distilled_status) + fake_notice_repository.add(notice_2021) + + daily_notices_metadata: DailyNoticesMetadata = DailyNoticesMetadata(aggregation_date=example_date) + daily_notices_metadata.ted_api_notice_ids.append(notice_with_distilled_status.ted_id) + daily_notices_metadata.ted_api_notice_ids.append(notice_2021.ted_id) + daily_notices_metadata_repo.add(daily_notices_metadata) + + update_daily_notices_metadata_with_fetched_data(start_date=example_date, + end_date=example_date, + notice_repo=fake_notice_repository, + daily_notices_metadata_repo=daily_notices_metadata_repo) + + daily_notices_metadata: DailyNoticesMetadata = daily_notices_metadata_repo.get(example_date) + + assert daily_notices_metadata is not None + + assert daily_notices_metadata.fetched_notices_count == 2 + assert daily_notices_metadata.ted_api_notice_count == 2 + assert daily_notices_metadata.notice_statuses['RAW'] == 2 + assert daily_notices_metadata.notice_statuses['INDEXED'] > 0 + assert daily_notices_metadata.notice_statuses['NORMALISED_METADATA'] > 0 + assert daily_notices_metadata.notice_statuses['ELIGIBLE_FOR_TRANSFORMATION'] > 0 + assert daily_notices_metadata.notice_statuses['INELIGIBLE_FOR_TRANSFORMATION'] == 0 + + assert daily_notices_metadata.notice_statuses_coverage['RAW_coverage'] == 1.0 + assert daily_notices_metadata.notice_statuses_coverage['INDEXED_coverage'] == 0.5 + assert daily_notices_metadata.notice_statuses_coverage['NORMALISED_METADATA_coverage'] == 0.5 + assert daily_notices_metadata.notice_statuses_coverage['INELIGIBLE_FOR_TRANSFORMATION_coverage'] == 0.0 + assert len(daily_notices_metadata.mapping_suite_packages) > 0 diff --git a/tests/unit/core/model/test_daily_notice_metadata.py b/tests/unit/core/model/test_daily_notice_metadata.py index eccc23fb9..011ecbe4f 100644 --- a/tests/unit/core/model/test_daily_notice_metadata.py +++ b/tests/unit/core/model/test_daily_notice_metadata.py @@ -2,7 +2,22 @@ from ted_sws.core.model.supra_notice import DailyNoticesMetadata -def test_daily_notice_metadata_model(daily_notice_metadata): +def test_daily_notice_metadata_model(daily_notice_metadata: DailyNoticesMetadata): daily_notice_metadata_dict = daily_notice_metadata.model_dump() daily_notice_metadata_from_dict = DailyNoticesMetadata(**daily_notice_metadata_dict) assert daily_notice_metadata == daily_notice_metadata_from_dict + + assert daily_notice_metadata.fetched_notices_coverage == 0 + assert daily_notice_metadata.ted_api_notice_count == 0 + assert daily_notice_metadata.fetched_notices_count == 0 + + daily_notice_metadata.ted_api_notice_ids = ["1", "2", "3"] + daily_notice_metadata.fetched_notice_ids = ["1", "2"] + + assert daily_notice_metadata.fetched_notices_coverage == 0.6666666666666666 + assert daily_notice_metadata.ted_api_notice_count == 3 + assert daily_notice_metadata.fetched_notices_count == 2 + + daily_notice_metadata.notice_statuses = {str(NoticeStatus.PUBLISHED): 1, str(NoticeStatus.RAW): 2} + assert daily_notice_metadata.notice_statuses_coverage == {"PUBLISHED_coverage": 0.3333333333333333, + "RAW_coverage": 0.6666666666666666} diff --git a/tests/unit/data_manager/services/__init__.py b/tests/unit/data_manager/services/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/tests/unit/data_manager/services/test_notices_metadata_services.py b/tests/unit/data_manager/services/test_notices_metadata_services.py deleted file mode 100644 index 17a45b2c4..000000000 --- a/tests/unit/data_manager/services/test_notices_metadata_services.py +++ /dev/null @@ -1,44 +0,0 @@ -from datetime import date - -from pymongo import MongoClient - -from ted_sws import config -from ted_sws.data_manager.adapters.daily_notices_metadata_repository import DailyNoticesMetadataRepository -from ted_sws.data_manager.services.daily_notices_metadata_services import update_daily_notices_metadata_from_ted, \ - update_daily_notices_metadata_with_fetched_data -from ted_sws.notice_fetcher.adapters.ted_api import TedAPIAdapter, TedRequestAPI - - -def test_update_daily_notices_metadata_from_ted(mongodb_client): - """ - Test update_daily_notices_metadata_from_ted function - """ - - ted_api = TedAPIAdapter(TedRequestAPI(), config.TED_API_URL) - daily_notices_metadata_repo = DailyNoticesMetadataRepository(mongodb_client) - - update_daily_notices_metadata_from_ted(start_date=date(2021, 1, 7), - end_date=date(2021, 1, 7), - ted_api=ted_api, - mongo_client=mongodb_client, - daily_notices_metadata_repo=daily_notices_metadata_repo) - - # update_daily_notices_metadata_from_ted(start_date=date(2021, 1, 7), - # end_date=date(2021, 1, 7), - # ted_api=ted_api, - # mongo_client=mongodb_client, - # daily_notices_metadata_repo=daily_notices_metadata_repo) - - -def test_update_daily_notices_metadata_with_fetched_data(): #mongodb_client - mongodb_client = MongoClient(config.MONGO_DB_AUTH_URL) - - ted_api = TedAPIAdapter(TedRequestAPI(), config.TED_API_URL) - - - daily_notices_metadata_repo = DailyNoticesMetadataRepository(mongodb_client) - - update_daily_notices_metadata_with_fetched_data(start_date=date(2021, 1, 7), - end_date=date(2021, 1, 7), - mongo_client=mongodb_client, - daily_notices_metadata_repo=daily_notices_metadata_repo) \ No newline at end of file