Skip to content

Commit

Permalink
feat: pass fallbakc tokens to Github if available (#385)
Browse files Browse the repository at this point in the history
* feat: pass fallbakc tokens to Github if available

depends on: codecov/shared#186

Passes fallback tokens to GitHub instances if more than 1 app is available (in the case of dedicated apps in particular the default one is the fallback).
Also checks if apps are rate limited before selecting them.
There's a new exception now `NoConfiguredAppsAvailable` due to being rate limited.

I purposefully didn't catch this exception yet.
The idea in a subsequent PR is to do so in the notify task, for example,
so we can reschedule it.

* don't import function if not using in the file
  • Loading branch information
giovanni-guidini authored Apr 22, 2024
1 parent 735b82a commit d8fa620
Show file tree
Hide file tree
Showing 9 changed files with 293 additions and 101 deletions.
2 changes: 1 addition & 1 deletion requirements.in
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
https://github.com/codecov/shared/archive/bf5fd12a12b8bd35c533298ccee4813a15e20067.tar.gz#egg=shared
https://github.com/codecov/shared/archive/93601e376ee28524b066ebde0298f491ee4a43de.tar.gz#egg=shared
https://github.com/codecov/opentelem-python/archive/refs/tags/v0.0.4a1.tar.gz#egg=codecovopentelem
https://github.com/codecov/test-results-parser/archive/5515e960d5d38881036e9127f86320efca649f13.tar.gz#egg=test-results-parser
boto3
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,7 @@ sentry-sdk==1.40.0
# via
# -r requirements.in
# shared
shared @ https://github.com/codecov/shared/archive/bf5fd12a12b8bd35c533298ccee4813a15e20067.tar.gz
shared @ https://github.com/codecov/shared/archive/93601e376ee28524b066ebde0298f491ee4a43de.tar.gz
# via -r requirements.in
six==1.15.0
# via
Expand Down
190 changes: 106 additions & 84 deletions services/bots.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
import logging
import random
from datetime import datetime, timezone
from typing import Dict, List, Optional, Tuple
from typing import Dict, List, Optional, Tuple, TypedDict

from shared.config import get_config
from shared.github import is_installation_rate_limited
from shared.torngit.base import TokenType
from shared.typings.oauth_token_types import GithubInstallationInfo

from database.models import Owner, Repository
from database.models.core import (
Expand All @@ -15,6 +17,7 @@
from helpers.exceptions import OwnerWithoutValidBotError, RepositoryWithoutValidBotError
from services.encryption import encryptor
from services.github import get_github_integration_token
from services.redis import get_redis_connection

log = logging.getLogger(__name__)

Expand All @@ -34,6 +37,65 @@ def _get_installation_weight(installation: GithubAppInstallation) -> int:
return age_hours + 2**age.days


def _get_apps_from_weighted_selection(
owner: Owner, installation_name: str, repository: Optional[Repository]
) -> List[GithubAppInstallation]:
ghapp_installations_filter: List[GithubAppInstallation] = list(
filter(
lambda obj: (
obj.name == installation_name
and obj.is_configured()
and (
# If there is a repo we want only the apps that cover said repo
(repository and obj.is_repo_covered_by_integration(repository))
# If there is no repo we still need some true value
or (not repository)
)
),
owner.github_app_installations or [],
)
)
# We assign weights to the apps based on how long ago they were created.
# The idea is that there's a greater chance that a change misconfigured the app,
# So apps recently created are selected less frequently than older apps
weights = [
min(MAX_GITHUB_APP_SELECTION_WEIGHT, _get_installation_weight(obj))
for obj in ghapp_installations_filter
]
# Random selection of size 3.
# If all apps have roughly the same probability of being selected, the array would have different entries.
# If 1 app dominates the probability of selection than it would probably be that app repeated 3 times, BUT
# from time to time the less frequent one would be selected.
apps_to_consider = (
random.choices(ghapp_installations_filter, weights=weights, k=3)
if len(ghapp_installations_filter) > 0
else []
)
if installation_name != GITHUB_APP_INSTALLATION_DEFAULT_NAME:
# Add the default app as the last fallback if the owner is using a different app for the task
default_apps = filter(
lambda obj: obj.name == GITHUB_APP_INSTALLATION_DEFAULT_NAME,
owner.github_app_installations,
)
if default_apps:
apps_to_consider.extend(default_apps)
# Now we de-duplicate the apps_to_consider list before returning
seen_ids = dict()
list_to_return = []
for app in apps_to_consider:
if seen_ids.get(app.id, False):
continue
seen_ids[app.id] = True
list_to_return.append(app)
return list_to_return


class NoConfiguredAppsAvailable(Exception):
def __init__(self, apps_count: int, all_rate_limited: bool) -> None:
self.apps_count = apps_count
self.all_rate_limited = all_rate_limited


def get_owner_installation_id(
owner: Owner,
deprecated_using_integration: bool,
Expand All @@ -59,77 +121,47 @@ def get_owner_installation_id(
)

if not ignore_installation or deprecated_using_integration:
default_app_installation_filter: List[GithubAppInstallation] = list(
redis_connection = get_redis_connection()
apps_to_consider = _get_apps_from_weighted_selection(
owner, installation_name, repository
)

apps_matching_criteria_count = len(apps_to_consider)
apps_to_consider = list(
filter(
lambda obj: (
obj.name == installation_name
and obj.is_configured()
and (
# If there is a repo we want only the apps that cover said repo
(repository and obj.is_repo_covered_by_integration(repository))
# If there is no repo we still need some true value
or (not repository)
)
),
owner.github_app_installations or [],
lambda obj: not is_installation_rate_limited(redis_connection, obj.id),
apps_to_consider,
)
)
# We assign weights to the apps based on how long ago they were updated.
# The idea is that there's a greater chance that a change misconfigured the app,
# So apps recently updated are selected less frequently than older apps
weights = [
min(MAX_GITHUB_APP_SELECTION_WEIGHT, _get_installation_weight(obj))
for obj in default_app_installation_filter
]
# Random selection of size 3.
# If all apps have roughly the same probability of being selected, the array would have different entries.
# If 1 app dominates the probability of selection than it would probably be that app repeated 3 times, BUT
# from time to time the less frequent one would be selected.
apps_to_consider = (
random.choices(default_app_installation_filter, weights=weights, k=3)
if len(default_app_installation_filter) > 0
else []
)
already_checked = dict()
# filter is an Iterator, so we need to scan matches
for app_installation in apps_to_consider:
if already_checked.get(app_installation.installation_id):
continue
already_checked[app_installation.installation_id] = True
if repository:
if app_installation.is_repo_covered_by_integration(repository):
log.info(
"Selected github installation for repo",
extra=dict(
installation=app_installation.external_id,
installation_name=app_installation.name,
ownerid=owner.ownerid,
repoid=repository.repoid,
),
)
return {
"installation_id": app_installation.installation_id,
"app_id": app_installation.app_id,
"pem_path": app_installation.pem_path,
}
# Not returning None here because we found situations where ghapp installations will mark the
# the repo as NOT covered but it is, in fact, covered.
# We need to backfill some things.
else:
# Getting owner installation - not tied to any particular repo
log.info(
"Selected github installation for owner",
extra=dict(
installation=app_installation.external_id,
installation_name=app_installation.name,
ownerid=owner.ownerid,

if apps_to_consider:
main_name = apps_to_consider[0].name
info_to_get_tokens = list(
map(
lambda obj: GithubInstallationInfo(
installation_id=obj.installation_id,
app_id=obj.app_id,
pem_path=obj.pem_path,
),
apps_to_consider,
)
return {
"installation_id": app_installation.installation_id,
"app_id": app_installation.app_id,
"pem_path": app_installation.pem_path,
}
)
main_selected_info = info_to_get_tokens.pop(0)
log.info(
"Selected installation to communicate with github",
extra=dict(
installation_id=main_selected_info["installation_id"],
installation_name=main_name,
fallback_installations=[
obj["installation_id"] for obj in info_to_get_tokens
],
),
)
return {**main_selected_info, "fallback_installations": info_to_get_tokens}
elif apps_matching_criteria_count > 0:
raise NoConfiguredAppsAvailable(
apps_count=apps_matching_criteria_count, all_rate_limited=True
)
# DEPRECATED FLOW - begin
if owner.integration_id and deprecated_using_integration:
log.info("Selected owner integration to communicate with github")
Expand All @@ -140,33 +172,26 @@ def get_owner_installation_id(

def get_repo_appropriate_bot_token(
repo: Repository,
installation_name_to_use: Optional[str] = GITHUB_APP_INSTALLATION_DEFAULT_NAME,
installation_info: Optional[Dict] = None,
) -> Tuple[Dict, Optional[Owner]]:

log.info(
"Get repo appropriate bot token",
extra=dict(
installation_name_to_use=installation_name_to_use,
installation_info=installation_info,
repoid=repo.repoid,
),
)

if is_enterprise() and get_config(repo.service, "bot"):
return get_public_bot_token(repo)

installation_dict = get_owner_installation_id(
repo.owner,
repo.using_integration,
repository=repo,
ignore_installation=False,
installation_name=installation_name_to_use,
)
if installation_dict:
if installation_info:
github_token = get_github_integration_token(
repo.owner.service,
installation_dict["installation_id"],
app_id=installation_dict.get("app_id", None),
pem_path=installation_dict.get("pem_path", None),
installation_info["installation_id"],
app_id=installation_info.get("app_id", None),
pem_path=installation_info.get("pem_path", None),
)
installation_token = dict(key=github_token)
# The token is not owned by an Owner object, so 2nd arg is None
Expand Down Expand Up @@ -259,11 +284,8 @@ def _get_repo_appropriate_bot(repo: Repository) -> Owner:


def get_owner_appropriate_bot_token(
owner, using_integration, ignore_installation: bool = False
owner, installation_dict: Optional[Dict] = None
) -> Dict:
installation_dict = get_owner_installation_id(
owner, using_integration, ignore_installation=ignore_installation
)
if installation_dict:
github_token = get_github_integration_token(
owner.service,
Expand Down
8 changes: 6 additions & 2 deletions services/owner.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from shared.config import get_config, get_verify_ssl

from helpers.token_refresh import get_token_refresh_callback
from services.bots import get_owner_appropriate_bot_token
from services.bots import get_owner_appropriate_bot_token, get_owner_installation_id

log = logging.getLogger(__name__)

Expand All @@ -17,9 +17,10 @@ def get_owner_provider_service(
get_config("setup", "http", "timeouts", "receive", default=30),
]
service = owner.service
token = get_owner_appropriate_bot_token(
installation_info = get_owner_installation_id(
owner, using_integration, ignore_installation=ignore_installation
)
token = get_owner_appropriate_bot_token(owner, installation_dict=installation_info)
adapter_params = dict(
owner=dict(
service_id=owner.service_id, ownerid=owner.ownerid, username=owner.username
Expand All @@ -36,6 +37,9 @@ def get_owner_provider_service(
on_token_refresh=(
get_token_refresh_callback(owner) if not using_integration else None
),
fallback_installations=installation_info.get("fallback_installations")
if installation_info
else None,
)
return _get_owner_provider_service_instance(service, **adapter_params)

Expand Down
18 changes: 15 additions & 3 deletions services/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,11 @@
GithubAppInstallation,
)
from helpers.token_refresh import get_token_refresh_callback
from services.bots import get_repo_appropriate_bot_token, get_token_type_mapping
from services.bots import (
get_owner_installation_id,
get_repo_appropriate_bot_token,
get_token_type_mapping,
)
from services.yaml import read_yaml_field

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -52,9 +56,14 @@ def get_repo_provider_service(
get_config("setup", "http", "timeouts", "receive", default=60),
]
service = repository.owner.service
token, token_owner = get_repo_appropriate_bot_token(
repository, installation_name_to_use=installation_name_to_use
installation_info = get_owner_installation_id(
repository.owner,
repository.using_integration,
repository=repository,
ignore_installation=False,
installation_name=installation_name_to_use,
)
token, token_owner = get_repo_appropriate_bot_token(repository, installation_info)
adapter_params = dict(
repo=dict(
name=repository.name,
Expand All @@ -78,6 +87,9 @@ def get_repo_provider_service(
secret=get_config(service, "client_secret"),
),
on_token_refresh=get_token_refresh_callback(token_owner),
fallback_installations=installation_info.get("fallback_installations")
if installation_info
else None,
)
return _get_repo_provider_service_instance(repository.service, **adapter_params)

Expand Down
9 changes: 7 additions & 2 deletions services/tests/integration/test_bots.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from database.tests.factories import RepositoryFactory
from helpers.exceptions import RepositoryWithoutValidBotError
from services.bots import get_repo_appropriate_bot_token
from services.bots import get_owner_installation_id, get_repo_appropriate_bot_token

fake_private_key = """-----BEGIN RSA PRIVATE KEY-----
MIICXAIBAAKBgQDCFqq2ygFh9UQU/6PoDJ6L9e4ovLPCHtlBt7vzDwyfwr3XGxln
Expand Down Expand Up @@ -36,7 +36,9 @@ async def test_get_repo_appropriate_bot_token_non_existing_integration(
owner__integration_id=5944641,
name="example-python",
using_integration=True,
private=True,
)
repo.owner.oauth_token = None
dbsession.add(repo)
dbsession.flush()
with pytest.raises(RepositoryWithoutValidBotError):
Expand All @@ -58,4 +60,7 @@ async def test_get_repo_appropriate_bot_token_bad_data(
dbsession.add(repo)
dbsession.flush()
with pytest.raises(requests.exceptions.HTTPError):
get_repo_appropriate_bot_token(repo)
info = get_owner_installation_id(
repo.owner, repo.using_integration, ignore_installation=False
)
get_repo_appropriate_bot_token(repo, info)
Loading

0 comments on commit d8fa620

Please sign in to comment.