Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

resolve bug with daily notice metadata stores not days without notices in TED API #506

Merged
merged 10 commits into from
Oct 6, 2023
32 changes: 21 additions & 11 deletions dags/daily_notices_metadata_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,29 @@
DAG to update daily notices metadata from TED.
"""

from datetime import date, datetime
from datetime import date, datetime, timedelta

from airflow.models import Param
from airflow.decorators import dag, task
from airflow.models import Param
from airflow.timetables.trigger import CronTriggerTimetable

from dags import DEFAULT_DAG_ARGUMENTS
from dags.dags_utils import get_dag_param
from ted_sws.supra_notice_manager.services.daily_notices_metadata_services import update_daily_notices_metadata_from_ted, \
from ted_sws.supra_notice_manager.services.daily_notices_metadata_services import \
update_daily_notices_metadata_from_ted, \
update_daily_notices_metadata_with_fetched_data

START_DATE_PARAM_KEY = "start_date"
END_DATE_PARAM_KEY = "end_date"
DEFAULT_TED_API_START_DATE = "2014-01-01"
DEFAULT_TED_API_START_DATE_FORMAT = "%Y-%m-%d"


@dag(default_args=DEFAULT_DAG_ARGUMENTS,
schedule_interval=None,
tags=['daily', "dashboards", "metadata", "ted", "notices"],
catchup=False,
timetable=CronTriggerTimetable('0 19 * * *', timezone='UTC'),
description=__doc__[0: __doc__.find(".")],
doc_md=__doc__,
params={
Expand All @@ -43,19 +49,23 @@
def daily_notices_metadata_update():
@task
def update_daily_notices_metadata_from_ted_api():
start_date = get_dag_param(key=START_DATE_PARAM_KEY, raise_error=True)
end_date = get_dag_param(key=END_DATE_PARAM_KEY, raise_error=True)
start_date = get_dag_param(key=START_DATE_PARAM_KEY, default_value=DEFAULT_TED_API_START_DATE)
end_date = get_dag_param(key=END_DATE_PARAM_KEY, default_value=(datetime.today() - timedelta(days=1)).strftime(
DEFAULT_TED_API_START_DATE_FORMAT))

update_daily_notices_metadata_from_ted(start_date=datetime.strptime(start_date, "%Y-%m-%d"),
end_date=datetime.strptime(end_date, "%Y-%m-%d"))
update_daily_notices_metadata_from_ted(
start_date=datetime.strptime(start_date, DEFAULT_TED_API_START_DATE_FORMAT),
end_date=datetime.strptime(end_date, DEFAULT_TED_API_START_DATE_FORMAT))

@task
def update_daily_notices_metadata_with_fetched_data_from_repo():
start_date = get_dag_param(key=START_DATE_PARAM_KEY, raise_error=True)
end_date = get_dag_param(key=END_DATE_PARAM_KEY, raise_error=True)
start_date = get_dag_param(key=START_DATE_PARAM_KEY, default_value=DEFAULT_TED_API_START_DATE)
end_date = get_dag_param(key=END_DATE_PARAM_KEY, default_value=(datetime.today() - timedelta(days=1)).strftime(
DEFAULT_TED_API_START_DATE_FORMAT))

update_daily_notices_metadata_with_fetched_data(start_date=datetime.strptime(start_date, "%Y-%m-%d"),
end_date=datetime.strptime(end_date, "%Y-%m-%d"))
update_daily_notices_metadata_with_fetched_data(
start_date=datetime.strptime(start_date, DEFAULT_TED_API_START_DATE_FORMAT),
end_date=datetime.strptime(end_date, DEFAULT_TED_API_START_DATE_FORMAT))

update_daily_notices_metadata_from_ted_api() >> update_daily_notices_metadata_with_fetched_data_from_repo()

Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from datetime import date, datetime, timedelta
from datetime import date
from typing import Optional, List

from dateutil import rrule
Expand All @@ -11,8 +11,6 @@
from ted_sws.data_manager.adapters.notice_repository import NoticeRepository
from ted_sws.notice_fetcher.adapters.ted_api import TedAPIAdapter, TedRequestAPI

DEFAULT_TED_API_START_DATE = "2014-01-01"
DEFAULT_TED_API_START_DATE_FORMAT = "%Y-%m-%d"
TED_API_NOTICE_ID_FIELD = "ND"
TED_API_WILDCARD_DATE_FORMAT = "%Y%m%d*"
DAILY_NOTICES_METADATA_TED_API_QUERY_RESULT_FIELDS = {"fields": ["ND"]}
Expand Down Expand Up @@ -57,20 +55,18 @@ def generate_list_of_dates_from_date_range(start_date: date, end_date: date) ->
"""
if start_date > end_date:
return None
return [ dt.date() for dt in rrule.rrule(rrule.DAILY,
dtstart=start_date,
until=end_date)]
return [dt.date() for dt in rrule.rrule(rrule.DAILY,
dtstart=start_date,
until=end_date)]


def update_daily_notices_metadata_from_ted(start_date: date = None,
end_date: date = None,
def update_daily_notices_metadata_from_ted(start_date: date,
end_date: date,
ted_api: TedAPIAdapter = None,
daily_notices_metadata_repo: DailyNoticesMetadataRepository = None):
"""
Updates the daily notices metadata from the TED API.
"""
start_date = start_date or datetime.strptime(DEFAULT_TED_API_START_DATE, DEFAULT_TED_API_START_DATE_FORMAT)
end_date = end_date or datetime.today() - timedelta(days=1)

if start_date > end_date:
raise Exception("Start date cannot be greater than end date")
Expand All @@ -89,7 +85,7 @@ def update_daily_notices_metadata_from_ted(start_date: date = None,

# Getting from TED API dates that are not in the repository from date range
for day in dates_not_in_repository:
ted_api_query = DAILY_NOTICES_METADATA_TED_API_QUERY
ted_api_query = DAILY_NOTICES_METADATA_TED_API_QUERY.copy()
ted_api_query[TED_API_QUERY_FIELD] = ted_api_query[TED_API_QUERY_FIELD].format(
aggregation_date=day.strftime(TED_API_WILDCARD_DATE_FORMAT))
notice_ids = ted_api.get_by_query(ted_api_query,
Expand All @@ -99,17 +95,14 @@ def update_daily_notices_metadata_from_ted(start_date: date = None,
daily_notices_metadata_repo.add(daily_notices_metadata)


def update_daily_notices_metadata_with_fetched_data(start_date: date = None,
end_date: date = None,
def update_daily_notices_metadata_with_fetched_data(start_date: date,
end_date: date,
notice_repo: NoticeRepository = None,
daily_notices_metadata_repo: DailyNoticesMetadataRepository = None):
"""
Updates the daily notices metadata witch fetched data.
"""

start_date = start_date or datetime.strptime(DEFAULT_TED_API_START_DATE, DEFAULT_TED_API_START_DATE_FORMAT)
end_date = end_date or datetime.today() - timedelta(days=1)

if start_date > end_date:
raise Exception("Start date cannot be greater than end date")

Expand Down Expand Up @@ -153,4 +146,3 @@ def update_daily_notices_metadata_with_fetched_data(start_date: date = None,
daily_notices_metadata.mapping_suite_packages = mapping_suite_packages
daily_notices_metadata.fetched_notice_ids = fetched_notice_ids
daily_notices_metadata_repo.update(daily_notices_metadata)

5 changes: 5 additions & 0 deletions tests/e2e/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,3 +85,8 @@ def fake_notice_repository(fake_mongodb_client):
@pytest.fixture
def example_date():
return date(2021, 1, 7)


@pytest.fixture
def example_date_without_notices():
return date(2021, 1, 9)
16 changes: 12 additions & 4 deletions tests/e2e/supra_notice_manager/test_notices_metadata_services.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,27 @@
from ted_sws import config
from ted_sws.core.model.supra_notice import DailyNoticesMetadata
from ted_sws.data_manager.adapters.daily_notices_metadata_repository import DailyNoticesMetadataRepository
from ted_sws.supra_notice_manager.services.daily_notices_metadata_services import update_daily_notices_metadata_from_ted, \
update_daily_notices_metadata_with_fetched_data
from ted_sws.notice_fetcher.adapters.ted_api import TedAPIAdapter, TedRequestAPI
from ted_sws.supra_notice_manager.services.daily_notices_metadata_services import \
update_daily_notices_metadata_from_ted, \
update_daily_notices_metadata_with_fetched_data


def test_update_daily_notices_metadata_from_ted(fake_mongodb_client, example_date):
def test_update_daily_notices_metadata_from_ted(fake_mongodb_client, example_date, example_date_without_notices):
"""
Test update_daily_notices_metadata_from_ted function
"""

ted_api = TedAPIAdapter(TedRequestAPI(), config.TED_API_URL)
daily_notices_metadata_repo = DailyNoticesMetadataRepository(fake_mongodb_client)

update_daily_notices_metadata_from_ted(start_date=example_date_without_notices,
end_date=example_date_without_notices,
ted_api=ted_api,
daily_notices_metadata_repo=daily_notices_metadata_repo)

assert len(list(daily_notices_metadata_repo.list())) == 1

update_daily_notices_metadata_from_ted(start_date=example_date,
end_date=example_date,
ted_api=ted_api,
Expand All @@ -29,7 +37,7 @@ def test_update_daily_notices_metadata_from_ted(fake_mongodb_client, example_dat
ted_api=ted_api,
daily_notices_metadata_repo=daily_notices_metadata_repo)

assert len(list(daily_notices_metadata_repo.list())) == 1
assert len(list(daily_notices_metadata_repo.list())) == 2


def test_update_daily_notices_metadata_with_fetched_data(fake_mongodb_client,
Expand Down
Loading