Skip to content

Commit

Permalink
feat: retry notify if no apps are currently available (#401)
Browse files Browse the repository at this point in the history
Catches `NoConfiguredAppsAvailable` exception in the notify task and
attempts to reschedule it at a later time.

The "later time" is in the next full hour, when the rate limits should reset.
Notice this only affects gh app installations, so tokenless should not fall unders this category.
(meaning we shouldn't have a terrible influx of tokenless notifs at the next hour).

Also updating the `get_utc_now` function because `datetime.utcnow` is deprecated.
  • Loading branch information
giovanni-guidini authored Apr 22, 2024
1 parent e7911ce commit 0f97086
Show file tree
Hide file tree
Showing 7 changed files with 132 additions and 32 deletions.
8 changes: 7 additions & 1 deletion helpers/clock.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,14 @@


def get_utc_now() -> datetime:
return datetime.utcnow().replace(tzinfo=timezone.utc)
return datetime.now(timezone.utc)


def get_utc_now_as_iso_format() -> str:
return get_utc_now().isoformat()


def get_seconds_to_next_hour() -> int:
now = datetime.now(timezone.utc)
current_seconds = (now.minute * 60) + now.second
return 3600 - current_seconds
7 changes: 6 additions & 1 deletion helpers/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,13 @@ class OwnerWithoutValidBotError(Exception):
pass


class CorruptRawReportError(Exception):
class NoConfiguredAppsAvailable(Exception):
def __init__(self, apps_count: int, all_rate_limited: bool) -> None:
self.apps_count = apps_count
self.all_rate_limited = all_rate_limited


class CorruptRawReportError(Exception):
"""Error indicated that report is somehow different than it should be
Notice that this error should not be used to replace `matches_content` logic on each processor.
Expand Down
23 changes: 22 additions & 1 deletion helpers/tests/unit/test_clock.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,13 @@
from datetime import datetime, timezone

from helpers.clock import get_utc_now, get_utc_now_as_iso_format
import pytest
from freezegun import freeze_time

from helpers.clock import (
get_seconds_to_next_hour,
get_utc_now,
get_utc_now_as_iso_format,
)


def test_get_utc_now():
Expand All @@ -13,3 +20,17 @@ def test_get_utc_now_as_iso_format():
res = get_utc_now_as_iso_format()
assert isinstance(res, str)
assert isinstance(datetime.fromisoformat(res), datetime)


@pytest.mark.parametrize(
"timestamp, expected",
[
("2024-04-22T10:22:00", 38 * 60),
("2024-04-22T10:22:59", 38 * 60 - 59),
("2024-04-22T10:59:59", 1),
("2024-04-22T10:59:00", 60),
],
)
def test_get_seconds_to_next_hour(timestamp, expected):
with freeze_time(timestamp):
assert get_seconds_to_next_hour() == expected
12 changes: 5 additions & 7 deletions services/bots.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,11 @@
GithubAppInstallation,
)
from helpers.environment import is_enterprise
from helpers.exceptions import OwnerWithoutValidBotError, RepositoryWithoutValidBotError
from helpers.exceptions import (
NoConfiguredAppsAvailable,
OwnerWithoutValidBotError,
RepositoryWithoutValidBotError,
)
from services.encryption import encryptor
from services.github import get_github_integration_token
from services.redis import get_redis_connection
Expand Down Expand Up @@ -90,12 +94,6 @@ def _get_apps_from_weighted_selection(
return list_to_return


class NoConfiguredAppsAvailable(Exception):
def __init__(self, apps_count: int, all_rate_limited: bool) -> None:
self.apps_count = apps_count
self.all_rate_limited = all_rate_limited


def get_owner_installation_id(
owner: Owner,
deprecated_using_integration: bool,
Expand Down
2 changes: 1 addition & 1 deletion services/tests/test_bots.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
GithubAppInstallation,
)
from database.tests.factories import OwnerFactory, RepositoryFactory
from helpers.exceptions import NoConfiguredAppsAvailable
from services.bots import (
NoConfiguredAppsAvailable,
OwnerWithoutValidBotError,
RepositoryWithoutValidBotError,
TokenType,
Expand Down
80 changes: 60 additions & 20 deletions tasks/notify.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import logging
from typing import Optional

from asgiref.sync import async_to_sync
from celery.exceptions import MaxRetriesExceededError, SoftTimeLimitExceeded
Expand All @@ -21,7 +22,8 @@
from database.models.core import GITHUB_APP_INSTALLATION_DEFAULT_NAME
from helpers.checkpoint_logger import from_kwargs as checkpoints_from_kwargs
from helpers.checkpoint_logger.flows import UploadFlow
from helpers.exceptions import RepositoryWithoutValidBotError
from helpers.clock import get_seconds_to_next_hour
from helpers.exceptions import NoConfiguredAppsAvailable, RepositoryWithoutValidBotError
from helpers.github_installation import get_installation_name_for_owner_for_task
from helpers.save_commit_error import save_commit_error
from services.activation import activate_user
Expand Down Expand Up @@ -127,6 +129,35 @@ def log_checkpoint(self, kwargs, checkpoint):
if checkpoints.data:
checkpoints.log(checkpoint)

def _attempt_retry(
self,
max_retries: int,
countdown: int,
commit: Commit,
current_yaml: Optional[UserYaml],
*args,
**kwargs,
) -> None:
try:
self.retry(max_retries=max_retries, countdown=countdown)
except MaxRetriesExceededError:
log.warning(
"Not attempting to retry notifications since we already retried too many times",
extra=dict(
repoid=commit.repoid,
commit=commit.commitid,
max_retries=max_retries,
next_countdown_would_be=countdown,
current_yaml=current_yaml.to_dict(),
),
)
self.log_checkpoint(kwargs, UploadFlow.NOTIF_TOO_MANY_RETRIES)
return {
"notified": False,
"notifications": None,
"reason": "too_many_retries",
}

def run_impl_within_lock(
self,
db_session: Session,
Expand Down Expand Up @@ -175,6 +206,26 @@ def run_impl_within_lock(
)
self.log_checkpoint(kwargs, UploadFlow.NOTIF_NO_VALID_INTEGRATION)
return {"notified": False, "notifications": None, "reason": "no_valid_bot"}
except NoConfiguredAppsAvailable as exp:
# Min wait time of 1 minute
retry_delay_seconds = max(60, get_seconds_to_next_hour())
log.warning(
"Unable to start notifications because all ghapps available are rate limited",
extra=dict(
repoid=repoid,
commit=commitid,
apps_available=exp.apps_count,
countdown_seconds=retry_delay_seconds,
),
)
return self._attempt_retry(
max_retries=10,
countdown=retry_delay_seconds,
current_yaml=current_yaml,
commit=commit,
**kwargs,
)

if current_yaml is None:
current_yaml = async_to_sync(get_current_yaml)(commit, repository_service)
else:
Expand Down Expand Up @@ -234,25 +285,14 @@ def run_impl_within_lock(
else:
max_retries = 10
countdown = 15 * 2**self.request.retries
try:
self.retry(max_retries=max_retries, countdown=countdown)
except MaxRetriesExceededError:
log.warning(
"Not attempting to retry notifications since we already retried too many times",
extra=dict(
repoid=commit.repoid,
commit=commit.commitid,
max_retries=max_retries,
next_countdown_would_be=countdown,
current_yaml=current_yaml.to_dict(),
),
)
self.log_checkpoint(kwargs, UploadFlow.NOTIF_TOO_MANY_RETRIES)
return {
"notified": False,
"notifications": None,
"reason": "too_many_retries",
}
return self._attempt_retry(
max_retries=max_retries,
countdown=countdown,
current_yaml=current_yaml,
commit=commit,
**kwargs,
)

report_service = ReportService(current_yaml)
head_report = report_service.get_existing_report_for_commit(
commit, report_class=ReadOnlyReport
Expand Down
32 changes: 31 additions & 1 deletion tasks/tests/unit/test_notify_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

import pytest
from celery.exceptions import MaxRetriesExceededError, Retry
from freezegun import freeze_time
from redis.exceptions import LockError
from shared.celery_config import new_user_activated_task_name
from shared.reports.resources import Report
Expand All @@ -21,7 +22,7 @@
)
from helpers.checkpoint_logger import CheckpointLogger, _kwargs_key
from helpers.checkpoint_logger.flows import UploadFlow
from helpers.exceptions import RepositoryWithoutValidBotError
from helpers.exceptions import NoConfiguredAppsAvailable, RepositoryWithoutValidBotError
from services.decoration import DecorationDetails
from services.lock_manager import LockRetry
from services.notification import NotificationService
Expand Down Expand Up @@ -714,6 +715,35 @@ def test_notify_task_no_bot(self, dbsession, mocker):
}
assert expected_result == res

@freeze_time("2024-04-22T11:15:00")
def test_notify_task_no_ghapp_available(self, dbsession, mocker):
get_repo_provider_service = mocker.patch(
"tasks.notify.get_repo_provider_service"
)
mock_retry = mocker.patch.object(NotifyTask, "retry", return_value=None)
get_repo_provider_service.side_effect = NoConfiguredAppsAvailable(
apps_count=2, all_rate_limited=True
)
commit = CommitFactory.create(
message="",
pullid=None,
branch="test-branch-1",
commitid="649eaaf2924e92dc7fd8d370ddb857033231e67a",
repository__using_integration=True,
)
dbsession.add(commit)
dbsession.flush()
current_yaml = {"codecov": {"require_ci_to_pass": True}}
task = NotifyTask()
res = task.run_impl_within_lock(
dbsession,
repoid=commit.repoid,
commitid=commit.commitid,
current_yaml=current_yaml,
)
assert res is None
mock_retry.assert_called_with(max_retries=10, countdown=45 * 60)

def test_submit_third_party_notifications_exception(self, mocker, dbsession):
current_yaml = {}
repository = RepositoryFactory.create()
Expand Down

0 comments on commit 0f97086

Please sign in to comment.