diff --git a/requirements.in b/requirements.in index 0a48a7dfc..bfed8d1f6 100644 --- a/requirements.in +++ b/requirements.in @@ -1,4 +1,4 @@ -https://github.com/codecov/shared/archive/bf5fd12a12b8bd35c533298ccee4813a15e20067.tar.gz#egg=shared +https://github.com/codecov/shared/archive/93601e376ee28524b066ebde0298f491ee4a43de.tar.gz#egg=shared https://github.com/codecov/opentelem-python/archive/refs/tags/v0.0.4a1.tar.gz#egg=codecovopentelem https://github.com/codecov/test-results-parser/archive/5515e960d5d38881036e9127f86320efca649f13.tar.gz#egg=test-results-parser boto3 diff --git a/requirements.txt b/requirements.txt index 924140821..db375748c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -376,7 +376,7 @@ sentry-sdk==1.40.0 # via # -r requirements.in # shared -shared @ https://github.com/codecov/shared/archive/bf5fd12a12b8bd35c533298ccee4813a15e20067.tar.gz +shared @ https://github.com/codecov/shared/archive/93601e376ee28524b066ebde0298f491ee4a43de.tar.gz # via -r requirements.in six==1.15.0 # via diff --git a/services/bots.py b/services/bots.py index db6186398..8fb8caa4c 100644 --- a/services/bots.py +++ b/services/bots.py @@ -1,10 +1,12 @@ import logging import random from datetime import datetime, timezone -from typing import Dict, List, Optional, Tuple +from typing import Dict, List, Optional, Tuple, TypedDict from shared.config import get_config +from shared.github import is_installation_rate_limited from shared.torngit.base import TokenType +from shared.typings.oauth_token_types import GithubInstallationInfo from database.models import Owner, Repository from database.models.core import ( @@ -15,6 +17,7 @@ from helpers.exceptions import OwnerWithoutValidBotError, RepositoryWithoutValidBotError from services.encryption import encryptor from services.github import get_github_integration_token +from services.redis import get_redis_connection log = logging.getLogger(__name__) @@ -34,6 +37,65 @@ def _get_installation_weight(installation: GithubAppInstallation) -> int: return age_hours + 2**age.days +def _get_apps_from_weighted_selection( + owner: Owner, installation_name: str, repository: Optional[Repository] +) -> List[GithubAppInstallation]: + ghapp_installations_filter: List[GithubAppInstallation] = list( + filter( + lambda obj: ( + obj.name == installation_name + and obj.is_configured() + and ( + # If there is a repo we want only the apps that cover said repo + (repository and obj.is_repo_covered_by_integration(repository)) + # If there is no repo we still need some true value + or (not repository) + ) + ), + owner.github_app_installations or [], + ) + ) + # We assign weights to the apps based on how long ago they were created. + # The idea is that there's a greater chance that a change misconfigured the app, + # So apps recently created are selected less frequently than older apps + weights = [ + min(MAX_GITHUB_APP_SELECTION_WEIGHT, _get_installation_weight(obj)) + for obj in ghapp_installations_filter + ] + # Random selection of size 3. + # If all apps have roughly the same probability of being selected, the array would have different entries. + # If 1 app dominates the probability of selection than it would probably be that app repeated 3 times, BUT + # from time to time the less frequent one would be selected. + apps_to_consider = ( + random.choices(ghapp_installations_filter, weights=weights, k=3) + if len(ghapp_installations_filter) > 0 + else [] + ) + if installation_name != GITHUB_APP_INSTALLATION_DEFAULT_NAME: + # Add the default app as the last fallback if the owner is using a different app for the task + default_apps = filter( + lambda obj: obj.name == GITHUB_APP_INSTALLATION_DEFAULT_NAME, + owner.github_app_installations, + ) + if default_apps: + apps_to_consider.extend(default_apps) + # Now we de-duplicate the apps_to_consider list before returning + seen_ids = dict() + list_to_return = [] + for app in apps_to_consider: + if seen_ids.get(app.id, False): + continue + seen_ids[app.id] = True + list_to_return.append(app) + return list_to_return + + +class NoConfiguredAppsAvailable(Exception): + def __init__(self, apps_count: int, all_rate_limited: bool) -> None: + self.apps_count = apps_count + self.all_rate_limited = all_rate_limited + + def get_owner_installation_id( owner: Owner, deprecated_using_integration: bool, @@ -59,77 +121,47 @@ def get_owner_installation_id( ) if not ignore_installation or deprecated_using_integration: - default_app_installation_filter: List[GithubAppInstallation] = list( + redis_connection = get_redis_connection() + apps_to_consider = _get_apps_from_weighted_selection( + owner, installation_name, repository + ) + + apps_matching_criteria_count = len(apps_to_consider) + apps_to_consider = list( filter( - lambda obj: ( - obj.name == installation_name - and obj.is_configured() - and ( - # If there is a repo we want only the apps that cover said repo - (repository and obj.is_repo_covered_by_integration(repository)) - # If there is no repo we still need some true value - or (not repository) - ) - ), - owner.github_app_installations or [], + lambda obj: not is_installation_rate_limited(redis_connection, obj.id), + apps_to_consider, ) ) - # We assign weights to the apps based on how long ago they were updated. - # The idea is that there's a greater chance that a change misconfigured the app, - # So apps recently updated are selected less frequently than older apps - weights = [ - min(MAX_GITHUB_APP_SELECTION_WEIGHT, _get_installation_weight(obj)) - for obj in default_app_installation_filter - ] - # Random selection of size 3. - # If all apps have roughly the same probability of being selected, the array would have different entries. - # If 1 app dominates the probability of selection than it would probably be that app repeated 3 times, BUT - # from time to time the less frequent one would be selected. - apps_to_consider = ( - random.choices(default_app_installation_filter, weights=weights, k=3) - if len(default_app_installation_filter) > 0 - else [] - ) - already_checked = dict() - # filter is an Iterator, so we need to scan matches - for app_installation in apps_to_consider: - if already_checked.get(app_installation.installation_id): - continue - already_checked[app_installation.installation_id] = True - if repository: - if app_installation.is_repo_covered_by_integration(repository): - log.info( - "Selected github installation for repo", - extra=dict( - installation=app_installation.external_id, - installation_name=app_installation.name, - ownerid=owner.ownerid, - repoid=repository.repoid, - ), - ) - return { - "installation_id": app_installation.installation_id, - "app_id": app_installation.app_id, - "pem_path": app_installation.pem_path, - } - # Not returning None here because we found situations where ghapp installations will mark the - # the repo as NOT covered but it is, in fact, covered. - # We need to backfill some things. - else: - # Getting owner installation - not tied to any particular repo - log.info( - "Selected github installation for owner", - extra=dict( - installation=app_installation.external_id, - installation_name=app_installation.name, - ownerid=owner.ownerid, + + if apps_to_consider: + main_name = apps_to_consider[0].name + info_to_get_tokens = list( + map( + lambda obj: GithubInstallationInfo( + installation_id=obj.installation_id, + app_id=obj.app_id, + pem_path=obj.pem_path, ), + apps_to_consider, ) - return { - "installation_id": app_installation.installation_id, - "app_id": app_installation.app_id, - "pem_path": app_installation.pem_path, - } + ) + main_selected_info = info_to_get_tokens.pop(0) + log.info( + "Selected installation to communicate with github", + extra=dict( + installation_id=main_selected_info["installation_id"], + installation_name=main_name, + fallback_installations=[ + obj["installation_id"] for obj in info_to_get_tokens + ], + ), + ) + return {**main_selected_info, "fallback_installations": info_to_get_tokens} + elif apps_matching_criteria_count > 0: + raise NoConfiguredAppsAvailable( + apps_count=apps_matching_criteria_count, all_rate_limited=True + ) # DEPRECATED FLOW - begin if owner.integration_id and deprecated_using_integration: log.info("Selected owner integration to communicate with github") @@ -140,13 +172,13 @@ def get_owner_installation_id( def get_repo_appropriate_bot_token( repo: Repository, - installation_name_to_use: Optional[str] = GITHUB_APP_INSTALLATION_DEFAULT_NAME, + installation_info: Optional[Dict] = None, ) -> Tuple[Dict, Optional[Owner]]: log.info( "Get repo appropriate bot token", extra=dict( - installation_name_to_use=installation_name_to_use, + installation_info=installation_info, repoid=repo.repoid, ), ) @@ -154,19 +186,12 @@ def get_repo_appropriate_bot_token( if is_enterprise() and get_config(repo.service, "bot"): return get_public_bot_token(repo) - installation_dict = get_owner_installation_id( - repo.owner, - repo.using_integration, - repository=repo, - ignore_installation=False, - installation_name=installation_name_to_use, - ) - if installation_dict: + if installation_info: github_token = get_github_integration_token( repo.owner.service, - installation_dict["installation_id"], - app_id=installation_dict.get("app_id", None), - pem_path=installation_dict.get("pem_path", None), + installation_info["installation_id"], + app_id=installation_info.get("app_id", None), + pem_path=installation_info.get("pem_path", None), ) installation_token = dict(key=github_token) # The token is not owned by an Owner object, so 2nd arg is None @@ -259,11 +284,8 @@ def _get_repo_appropriate_bot(repo: Repository) -> Owner: def get_owner_appropriate_bot_token( - owner, using_integration, ignore_installation: bool = False + owner, installation_dict: Optional[Dict] = None ) -> Dict: - installation_dict = get_owner_installation_id( - owner, using_integration, ignore_installation=ignore_installation - ) if installation_dict: github_token = get_github_integration_token( owner.service, diff --git a/services/owner.py b/services/owner.py index cbb3ff348..2dbbc6e02 100644 --- a/services/owner.py +++ b/services/owner.py @@ -4,7 +4,7 @@ from shared.config import get_config, get_verify_ssl from helpers.token_refresh import get_token_refresh_callback -from services.bots import get_owner_appropriate_bot_token +from services.bots import get_owner_appropriate_bot_token, get_owner_installation_id log = logging.getLogger(__name__) @@ -17,9 +17,10 @@ def get_owner_provider_service( get_config("setup", "http", "timeouts", "receive", default=30), ] service = owner.service - token = get_owner_appropriate_bot_token( + installation_info = get_owner_installation_id( owner, using_integration, ignore_installation=ignore_installation ) + token = get_owner_appropriate_bot_token(owner, installation_dict=installation_info) adapter_params = dict( owner=dict( service_id=owner.service_id, ownerid=owner.ownerid, username=owner.username @@ -36,6 +37,9 @@ def get_owner_provider_service( on_token_refresh=( get_token_refresh_callback(owner) if not using_integration else None ), + fallback_installations=installation_info.get("fallback_installations") + if installation_info + else None, ) return _get_owner_provider_service_instance(service, **adapter_params) diff --git a/services/repository.py b/services/repository.py index 5e25cea0c..c21d56496 100644 --- a/services/repository.py +++ b/services/repository.py @@ -20,7 +20,11 @@ GithubAppInstallation, ) from helpers.token_refresh import get_token_refresh_callback -from services.bots import get_repo_appropriate_bot_token, get_token_type_mapping +from services.bots import ( + get_owner_installation_id, + get_repo_appropriate_bot_token, + get_token_type_mapping, +) from services.yaml import read_yaml_field log = logging.getLogger(__name__) @@ -52,9 +56,14 @@ def get_repo_provider_service( get_config("setup", "http", "timeouts", "receive", default=60), ] service = repository.owner.service - token, token_owner = get_repo_appropriate_bot_token( - repository, installation_name_to_use=installation_name_to_use + installation_info = get_owner_installation_id( + repository.owner, + repository.using_integration, + repository=repository, + ignore_installation=False, + installation_name=installation_name_to_use, ) + token, token_owner = get_repo_appropriate_bot_token(repository, installation_info) adapter_params = dict( repo=dict( name=repository.name, @@ -78,6 +87,9 @@ def get_repo_provider_service( secret=get_config(service, "client_secret"), ), on_token_refresh=get_token_refresh_callback(token_owner), + fallback_installations=installation_info.get("fallback_installations") + if installation_info + else None, ) return _get_repo_provider_service_instance(repository.service, **adapter_params) diff --git a/services/tests/integration/test_bots.py b/services/tests/integration/test_bots.py index f11df1aa4..025bcc859 100644 --- a/services/tests/integration/test_bots.py +++ b/services/tests/integration/test_bots.py @@ -3,7 +3,7 @@ from database.tests.factories import RepositoryFactory from helpers.exceptions import RepositoryWithoutValidBotError -from services.bots import get_repo_appropriate_bot_token +from services.bots import get_owner_installation_id, get_repo_appropriate_bot_token fake_private_key = """-----BEGIN RSA PRIVATE KEY----- MIICXAIBAAKBgQDCFqq2ygFh9UQU/6PoDJ6L9e4ovLPCHtlBt7vzDwyfwr3XGxln @@ -36,7 +36,9 @@ async def test_get_repo_appropriate_bot_token_non_existing_integration( owner__integration_id=5944641, name="example-python", using_integration=True, + private=True, ) + repo.owner.oauth_token = None dbsession.add(repo) dbsession.flush() with pytest.raises(RepositoryWithoutValidBotError): @@ -58,4 +60,7 @@ async def test_get_repo_appropriate_bot_token_bad_data( dbsession.add(repo) dbsession.flush() with pytest.raises(requests.exceptions.HTTPError): - get_repo_appropriate_bot_token(repo) + info = get_owner_installation_id( + repo.owner, repo.using_integration, ignore_installation=False + ) + get_repo_appropriate_bot_token(repo, info) diff --git a/services/tests/test_bots.py b/services/tests/test_bots.py index bae9835bf..1d6ad4978 100644 --- a/services/tests/test_bots.py +++ b/services/tests/test_bots.py @@ -9,6 +9,7 @@ ) from database.tests.factories import OwnerFactory, RepositoryFactory from services.bots import ( + NoConfiguredAppsAvailable, OwnerWithoutValidBotError, RepositoryWithoutValidBotError, TokenType, @@ -284,7 +285,13 @@ def test_get_repo_appropriate_bot_token_repo_with_user_with_integration_bot_usin ), ) expected_result = ({"key": "v1.test50wm4qyel2pbtpbusklcarg7c2etcbunnswp"}, None) - assert get_repo_appropriate_bot_token(repo) == expected_result + installation_info = get_owner_installation_id( + repo.owner, repo.using_integration, ignore_installation=False + ) + assert installation_info == {"installation_id": 1654873} + assert ( + get_repo_appropriate_bot_token(repo, installation_info) == expected_result + ) def test_get_repo_appropriate_bot_token_via_installation_covered_repo( self, mock_configuration, dbsession, mocker @@ -311,7 +318,16 @@ def test_get_repo_appropriate_bot_token_via_installation_covered_repo( "services.bots.get_github_integration_token", return_value="installation_token", ) - response = get_repo_appropriate_bot_token(repo) + installation_info = get_owner_installation_id( + repo.owner, repo.using_integration, ignore_installation=False + ) + assert installation_info == { + "installation_id": 12341234, + "app_id": None, + "pem_path": None, + "fallback_installations": [], + } + response = get_repo_appropriate_bot_token(repo, installation_info) mock_get_github_integration_token.assert_called_with( "github", 12341234, app_id=None, pem_path=None ) @@ -324,7 +340,7 @@ def test_get_owner_appropriate_bot_token_owner_no_bot_no_integration(self): owner = OwnerFactory.create( unencrypted_oauth_token="owner_token", integration_id=None, bot=None ) - assert get_owner_appropriate_bot_token(owner, using_integration=False) == { + assert get_owner_appropriate_bot_token(owner, None) == { "key": "owner_token", "secret": None, } @@ -335,7 +351,7 @@ def test_get_owner_appropriate_bot_token_owner_has_bot_no_integration(self): integration_id=None, bot=OwnerFactory.create(unencrypted_oauth_token="bot_token"), ) - assert get_owner_appropriate_bot_token(owner, using_integration=False) == { + assert get_owner_appropriate_bot_token(owner, None) == { "key": "bot_token", "secret": None, } @@ -347,7 +363,7 @@ def test_get_owner_appropriate_bot_token_repo_with_no_oauth_token_at_all(self): bot=OwnerFactory.create(unencrypted_oauth_token=None), ) with pytest.raises(OwnerWithoutValidBotError): - get_owner_appropriate_bot_token(owner, using_integration=False) + get_owner_appropriate_bot_token(owner, None) def test_get_owner_appropriate_bot_token_with_user_with_integration_bot_using_it( self, mock_configuration, codecov_vcr @@ -370,9 +386,11 @@ def test_get_owner_appropriate_bot_token_with_user_with_integration_bot_using_it ) expected_result = {"key": "v1.test50wm4qyel2pbtpbusklcarg7c2etcbunnswp"} + integration_dict = get_owner_installation_id( + owner, True, ignore_installation=False + ) assert ( - get_owner_appropriate_bot_token(owner, using_integration=True) - == expected_result + get_owner_appropriate_bot_token(owner, integration_dict) == expected_result ) def test_get_owner_installation_id_no_installation_no_legacy_integration( @@ -406,8 +424,70 @@ def test_get_owner_installation_id_yes_installation_yes_legacy_integration( "installation_id": 123456, "app_id": None, "pem_path": None, + "fallback_installations": [], } + def test_get_owner_installation_id_yes_installation_yes_legacy_integration_yes_fallback( + self, mocker, dbsession + ): + owner = OwnerFactory(service="github", integration_id=12341234) + installation_0 = GithubAppInstallation( + owner=owner, + name=GITHUB_APP_INSTALLATION_DEFAULT_NAME, + repository_service_ids=None, + installation_id=123456, + ) + installation_1 = GithubAppInstallation( + owner=owner, + name="my_app", + repository_service_ids=None, + installation_id=12000, + app_id=1212, + pem_path="path", + ) + dbsession.add(installation_0) + dbsession.flush() + assert owner.github_app_installations == [installation_0, installation_1] + assert get_owner_installation_id(owner, True, installation_name="my_app") == { + "installation_id": 12000, + "app_id": 1212, + "pem_path": "path", + "fallback_installations": [ + {"installation_id": 123456, "app_id": None, "pem_path": None} + ], + } + + def test_get_owner_installation_id_yes_installation_all_rate_limited( + self, mocker, dbsession, mock_redis + ): + mock_redis.exists.return_value = True + owner = OwnerFactory(service="github", integration_id=12341234) + installation_0 = GithubAppInstallation( + owner=owner, + name=GITHUB_APP_INSTALLATION_DEFAULT_NAME, + repository_service_ids=None, + installation_id=123456, + ) + installation_1 = GithubAppInstallation( + owner=owner, + name="my_app", + repository_service_ids=None, + installation_id=12000, + app_id=1212, + pem_path="path", + ) + dbsession.add(installation_0) + dbsession.flush() + assert owner.github_app_installations == [installation_0, installation_1] + with pytest.raises(NoConfiguredAppsAvailable): + get_owner_installation_id(owner, True, installation_name="my_app") + mock_redis.exists.assert_any_call( + f"rate_limited_installations_{installation_0.id}" + ) + mock_redis.exists.assert_any_call( + f"rate_limited_installations_{installation_1.id}" + ) + def test_get_owner_installation_id_yes_installation_yes_legacy_integration_specific_repos( self, mocker, dbsession ): @@ -433,7 +513,12 @@ def test_get_owner_installation_id_yes_installation_yes_legacy_integration_speci owner, repo_covered_by_installation.using_integration, repository=repo_covered_by_installation, - ) == {"installation_id": 123456, "app_id": 123, "pem_path": "some_path"} + ) == { + "installation_id": 123456, + "app_id": 123, + "pem_path": "some_path", + "fallback_installations": [], + } # Notice that the installation object overrides the `Repository.using_integration` column completely # ^ Not true anymore. We decided against it because there are some edge cases in filling up the list assert get_owner_installation_id( diff --git a/services/tests/test_owner_service.py b/services/tests/test_owner_service.py index c5f31f93a..168fb42eb 100644 --- a/services/tests/test_owner_service.py +++ b/services/tests/test_owner_service.py @@ -21,6 +21,7 @@ def test_get_owner_provider_service(self, dbsession): "username": owner.username, }, "repo": {}, + "fallback_installations": None, } assert res.service == "github" assert res.data == expected_data @@ -40,6 +41,7 @@ def test_get_owner_provider_service_other_service(self, dbsession): "username": owner.username, }, "repo": {}, + "fallback_installations": None, } assert res.service == "gitlab" assert res.data == expected_data @@ -61,6 +63,7 @@ def test_get_owner_provider_service_different_bot(self, dbsession): "username": owner.username, }, "repo": {}, + "fallback_installations": None, } assert res.data["repo"] == expected_data["repo"] assert res.data == expected_data diff --git a/services/tests/test_repository_service.py b/services/tests/test_repository_service.py index 7c12e0b16..b54b8cd7b 100644 --- a/services/tests/test_repository_service.py +++ b/services/tests/test_repository_service.py @@ -102,6 +102,7 @@ def test_get_repo_provider_service_github(self, dbsession): "service_id": repo.service_id, "repoid": repo.repoid, }, + "fallback_installations": None, } assert res.data == expected_data assert repo.owner.service == "github" @@ -113,6 +114,60 @@ def test_get_repo_provider_service_github(self, dbsession): "secret": None, } + def test_get_repo_provider_service_github_with_installations( + self, dbsession, mocker + ): + mocker.patch( + "services.bots.get_github_integration_token", + return_value="installation_token", + ) + repo = RepositoryFactory.create( + owner__service="github", + name="example-python", + using_integration=False, + ) + installation_0 = GithubAppInstallation( + name=GITHUB_APP_INSTALLATION_DEFAULT_NAME, + installation_id=1200, + app_id=200, + repository_service_ids=None, + owner=repo.owner, + ) + installation_1 = GithubAppInstallation( + name="my_app", + installation_id=1300, + app_id=300, + pem_path="path", + repository_service_ids=None, + owner=repo.owner, + ) + repo.owner.github_app_installations = [installation_0, installation_1] + dbsession.add_all([repo, installation_0, installation_1]) + dbsession.flush() + res = get_repo_provider_service(repo, installation_name_to_use="my_app") + expected_data = { + "owner": { + "ownerid": repo.owner.ownerid, + "service_id": repo.owner.service_id, + "username": repo.owner.username, + }, + "repo": { + "name": "example-python", + "using_integration": True, + "service_id": repo.service_id, + "repoid": repo.repoid, + }, + "fallback_installations": [ + {"app_id": 200, "installation_id": 1200, "pem_path": None} + ], + } + assert res.data == expected_data + assert repo.owner.service == "github" + assert res._on_token_refresh is None + assert res.token == { + "key": "installation_token", + } + def test_get_repo_provider_service_bitbucket(self, dbsession): repo = RepositoryFactory.create( owner__unencrypted_oauth_token="testyftq3ovzkb3zmt823u3t04lkrt9w", @@ -134,6 +189,7 @@ def test_get_repo_provider_service_bitbucket(self, dbsession): "service_id": repo.service_id, "repoid": repo.repoid, }, + "fallback_installations": None, } assert res.data == expected_data assert repo.owner.service == "bitbucket" @@ -165,6 +221,7 @@ def test_get_repo_provider_service_with_token_refresh_callback(self, dbsession): "service_id": repo.service_id, "repoid": repo.repoid, }, + "fallback_installations": None, } assert res.data == expected_data assert res._on_token_refresh is not None @@ -197,6 +254,7 @@ def test_get_repo_provider_service_repo_bot(self, dbsession, mock_configuration) "service_id": repo.service_id, "repoid": repo.repoid, }, + "fallback_installations": None, } assert res.data == expected_data assert res.token == { @@ -248,6 +306,7 @@ def test_get_repo_provider_service_different_bot(self, dbsession): "service_id": repo.service_id, "repoid": repo.repoid, }, + "fallback_installations": None, } assert res.data["repo"] == expected_data["repo"] assert res.data == expected_data @@ -282,6 +341,7 @@ def test_get_repo_provider_service_no_bot(self, dbsession): "service_id": repo.service_id, "repoid": repo.repoid, }, + "fallback_installations": None, } assert res.data == expected_data assert res.token == { @@ -1020,6 +1080,7 @@ async def test_get_repo_gh_no_integration(self, dbsession, mocker): "service_id": "123456", "repoid": repo.repoid, }, + "fallback_installations": None, } assert res.data["repo"] == expected_data["repo"] assert res.data == expected_data