From 0f97086d433cb5ff28b13570d66f3497d7c07d47 Mon Sep 17 00:00:00 2001 From: Giovanni M Guidini <99758426+giovanni-guidini@users.noreply.github.com> Date: Mon, 22 Apr 2024 16:43:46 +0200 Subject: [PATCH] feat: retry notify if no apps are currently available (#401) Catches `NoConfiguredAppsAvailable` exception in the notify task and attempts to reschedule it at a later time. The "later time" is in the next full hour, when the rate limits should reset. Notice this only affects gh app installations, so tokenless should not fall unders this category. (meaning we shouldn't have a terrible influx of tokenless notifs at the next hour). Also updating the `get_utc_now` function because `datetime.utcnow` is deprecated. --- helpers/clock.py | 8 ++- helpers/exceptions.py | 7 ++- helpers/tests/unit/test_clock.py | 23 +++++++- services/bots.py | 12 ++--- services/tests/test_bots.py | 2 +- tasks/notify.py | 80 +++++++++++++++++++++------- tasks/tests/unit/test_notify_task.py | 32 ++++++++++- 7 files changed, 132 insertions(+), 32 deletions(-) diff --git a/helpers/clock.py b/helpers/clock.py index eb648dfba..e1dbf69f9 100644 --- a/helpers/clock.py +++ b/helpers/clock.py @@ -2,8 +2,14 @@ def get_utc_now() -> datetime: - return datetime.utcnow().replace(tzinfo=timezone.utc) + return datetime.now(timezone.utc) def get_utc_now_as_iso_format() -> str: return get_utc_now().isoformat() + + +def get_seconds_to_next_hour() -> int: + now = datetime.now(timezone.utc) + current_seconds = (now.minute * 60) + now.second + return 3600 - current_seconds diff --git a/helpers/exceptions.py b/helpers/exceptions.py index d899993ed..666fff6b3 100644 --- a/helpers/exceptions.py +++ b/helpers/exceptions.py @@ -14,8 +14,13 @@ class OwnerWithoutValidBotError(Exception): pass -class CorruptRawReportError(Exception): +class NoConfiguredAppsAvailable(Exception): + def __init__(self, apps_count: int, all_rate_limited: bool) -> None: + self.apps_count = apps_count + self.all_rate_limited = all_rate_limited + +class CorruptRawReportError(Exception): """Error indicated that report is somehow different than it should be Notice that this error should not be used to replace `matches_content` logic on each processor. diff --git a/helpers/tests/unit/test_clock.py b/helpers/tests/unit/test_clock.py index 483918c14..280299f44 100644 --- a/helpers/tests/unit/test_clock.py +++ b/helpers/tests/unit/test_clock.py @@ -1,6 +1,13 @@ from datetime import datetime, timezone -from helpers.clock import get_utc_now, get_utc_now_as_iso_format +import pytest +from freezegun import freeze_time + +from helpers.clock import ( + get_seconds_to_next_hour, + get_utc_now, + get_utc_now_as_iso_format, +) def test_get_utc_now(): @@ -13,3 +20,17 @@ def test_get_utc_now_as_iso_format(): res = get_utc_now_as_iso_format() assert isinstance(res, str) assert isinstance(datetime.fromisoformat(res), datetime) + + +@pytest.mark.parametrize( + "timestamp, expected", + [ + ("2024-04-22T10:22:00", 38 * 60), + ("2024-04-22T10:22:59", 38 * 60 - 59), + ("2024-04-22T10:59:59", 1), + ("2024-04-22T10:59:00", 60), + ], +) +def test_get_seconds_to_next_hour(timestamp, expected): + with freeze_time(timestamp): + assert get_seconds_to_next_hour() == expected diff --git a/services/bots.py b/services/bots.py index 8fb8caa4c..129b32ea3 100644 --- a/services/bots.py +++ b/services/bots.py @@ -14,7 +14,11 @@ GithubAppInstallation, ) from helpers.environment import is_enterprise -from helpers.exceptions import OwnerWithoutValidBotError, RepositoryWithoutValidBotError +from helpers.exceptions import ( + NoConfiguredAppsAvailable, + OwnerWithoutValidBotError, + RepositoryWithoutValidBotError, +) from services.encryption import encryptor from services.github import get_github_integration_token from services.redis import get_redis_connection @@ -90,12 +94,6 @@ def _get_apps_from_weighted_selection( return list_to_return -class NoConfiguredAppsAvailable(Exception): - def __init__(self, apps_count: int, all_rate_limited: bool) -> None: - self.apps_count = apps_count - self.all_rate_limited = all_rate_limited - - def get_owner_installation_id( owner: Owner, deprecated_using_integration: bool, diff --git a/services/tests/test_bots.py b/services/tests/test_bots.py index 1d6ad4978..0494e19bc 100644 --- a/services/tests/test_bots.py +++ b/services/tests/test_bots.py @@ -8,8 +8,8 @@ GithubAppInstallation, ) from database.tests.factories import OwnerFactory, RepositoryFactory +from helpers.exceptions import NoConfiguredAppsAvailable from services.bots import ( - NoConfiguredAppsAvailable, OwnerWithoutValidBotError, RepositoryWithoutValidBotError, TokenType, diff --git a/tasks/notify.py b/tasks/notify.py index 5bfa17a1c..bc9e5415c 100644 --- a/tasks/notify.py +++ b/tasks/notify.py @@ -1,4 +1,5 @@ import logging +from typing import Optional from asgiref.sync import async_to_sync from celery.exceptions import MaxRetriesExceededError, SoftTimeLimitExceeded @@ -21,7 +22,8 @@ from database.models.core import GITHUB_APP_INSTALLATION_DEFAULT_NAME from helpers.checkpoint_logger import from_kwargs as checkpoints_from_kwargs from helpers.checkpoint_logger.flows import UploadFlow -from helpers.exceptions import RepositoryWithoutValidBotError +from helpers.clock import get_seconds_to_next_hour +from helpers.exceptions import NoConfiguredAppsAvailable, RepositoryWithoutValidBotError from helpers.github_installation import get_installation_name_for_owner_for_task from helpers.save_commit_error import save_commit_error from services.activation import activate_user @@ -127,6 +129,35 @@ def log_checkpoint(self, kwargs, checkpoint): if checkpoints.data: checkpoints.log(checkpoint) + def _attempt_retry( + self, + max_retries: int, + countdown: int, + commit: Commit, + current_yaml: Optional[UserYaml], + *args, + **kwargs, + ) -> None: + try: + self.retry(max_retries=max_retries, countdown=countdown) + except MaxRetriesExceededError: + log.warning( + "Not attempting to retry notifications since we already retried too many times", + extra=dict( + repoid=commit.repoid, + commit=commit.commitid, + max_retries=max_retries, + next_countdown_would_be=countdown, + current_yaml=current_yaml.to_dict(), + ), + ) + self.log_checkpoint(kwargs, UploadFlow.NOTIF_TOO_MANY_RETRIES) + return { + "notified": False, + "notifications": None, + "reason": "too_many_retries", + } + def run_impl_within_lock( self, db_session: Session, @@ -175,6 +206,26 @@ def run_impl_within_lock( ) self.log_checkpoint(kwargs, UploadFlow.NOTIF_NO_VALID_INTEGRATION) return {"notified": False, "notifications": None, "reason": "no_valid_bot"} + except NoConfiguredAppsAvailable as exp: + # Min wait time of 1 minute + retry_delay_seconds = max(60, get_seconds_to_next_hour()) + log.warning( + "Unable to start notifications because all ghapps available are rate limited", + extra=dict( + repoid=repoid, + commit=commitid, + apps_available=exp.apps_count, + countdown_seconds=retry_delay_seconds, + ), + ) + return self._attempt_retry( + max_retries=10, + countdown=retry_delay_seconds, + current_yaml=current_yaml, + commit=commit, + **kwargs, + ) + if current_yaml is None: current_yaml = async_to_sync(get_current_yaml)(commit, repository_service) else: @@ -234,25 +285,14 @@ def run_impl_within_lock( else: max_retries = 10 countdown = 15 * 2**self.request.retries - try: - self.retry(max_retries=max_retries, countdown=countdown) - except MaxRetriesExceededError: - log.warning( - "Not attempting to retry notifications since we already retried too many times", - extra=dict( - repoid=commit.repoid, - commit=commit.commitid, - max_retries=max_retries, - next_countdown_would_be=countdown, - current_yaml=current_yaml.to_dict(), - ), - ) - self.log_checkpoint(kwargs, UploadFlow.NOTIF_TOO_MANY_RETRIES) - return { - "notified": False, - "notifications": None, - "reason": "too_many_retries", - } + return self._attempt_retry( + max_retries=max_retries, + countdown=countdown, + current_yaml=current_yaml, + commit=commit, + **kwargs, + ) + report_service = ReportService(current_yaml) head_report = report_service.get_existing_report_for_commit( commit, report_class=ReadOnlyReport diff --git a/tasks/tests/unit/test_notify_task.py b/tasks/tests/unit/test_notify_task.py index 71b7997cc..f915f222c 100644 --- a/tasks/tests/unit/test_notify_task.py +++ b/tasks/tests/unit/test_notify_task.py @@ -3,6 +3,7 @@ import pytest from celery.exceptions import MaxRetriesExceededError, Retry +from freezegun import freeze_time from redis.exceptions import LockError from shared.celery_config import new_user_activated_task_name from shared.reports.resources import Report @@ -21,7 +22,7 @@ ) from helpers.checkpoint_logger import CheckpointLogger, _kwargs_key from helpers.checkpoint_logger.flows import UploadFlow -from helpers.exceptions import RepositoryWithoutValidBotError +from helpers.exceptions import NoConfiguredAppsAvailable, RepositoryWithoutValidBotError from services.decoration import DecorationDetails from services.lock_manager import LockRetry from services.notification import NotificationService @@ -714,6 +715,35 @@ def test_notify_task_no_bot(self, dbsession, mocker): } assert expected_result == res + @freeze_time("2024-04-22T11:15:00") + def test_notify_task_no_ghapp_available(self, dbsession, mocker): + get_repo_provider_service = mocker.patch( + "tasks.notify.get_repo_provider_service" + ) + mock_retry = mocker.patch.object(NotifyTask, "retry", return_value=None) + get_repo_provider_service.side_effect = NoConfiguredAppsAvailable( + apps_count=2, all_rate_limited=True + ) + commit = CommitFactory.create( + message="", + pullid=None, + branch="test-branch-1", + commitid="649eaaf2924e92dc7fd8d370ddb857033231e67a", + repository__using_integration=True, + ) + dbsession.add(commit) + dbsession.flush() + current_yaml = {"codecov": {"require_ci_to_pass": True}} + task = NotifyTask() + res = task.run_impl_within_lock( + dbsession, + repoid=commit.repoid, + commitid=commit.commitid, + current_yaml=current_yaml, + ) + assert res is None + mock_retry.assert_called_with(max_retries=10, countdown=45 * 60) + def test_submit_third_party_notifications_exception(self, mocker, dbsession): current_yaml = {} repository = RepositoryFactory.create()