Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace use of boto with boto3 for awsProvisioner.py #4859

Merged
merged 26 commits into from
Apr 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
61b15a6
Take out boto2 from awsProvisioner.py
stxue1 Apr 2, 2024
a95bbb0
Add mypy stub file for s3
stxue1 Apr 2, 2024
8aebb84
Lazy import aws to avoid dependency if extra is not installed yet
stxue1 Apr 2, 2024
209465b
Also lazy import in tests
stxue1 Apr 2, 2024
abd96bf
Separate out wdl kubernetes test to avoid missing dependency
stxue1 Apr 2, 2024
c4ffe89
Add unittest main
stxue1 Apr 2, 2024
65c04de
Fix wdl CI to run separated tests
stxue1 Apr 2, 2024
5a03dfc
Fix typo in lookup
stxue1 Apr 2, 2024
c178dbb
Update moto and remove leftover line in node.py
stxue1 Apr 2, 2024
5452343
Apply suggestions from code review
stxue1 Apr 10, 2024
fa9d277
Apply fixes
stxue1 Apr 10, 2024
3f19826
Merge branch 'issues/4718-boto-python-312' of github.com:DataBiospher…
stxue1 Apr 10, 2024
7dc3663
Merge master into issues/4718-boto-python-312
github-actions[bot] Apr 10, 2024
281da5a
Abstract AWS ErrorCondition server errors into a constant instance
stxue1 Apr 10, 2024
41127b8
Merge branch 'issues/4718-boto-python-312' of github.com:DataBiospher…
stxue1 Apr 10, 2024
8153a8e
Merge master into issues/4718-boto-python-312
github-actions[bot] Apr 10, 2024
77c2fd6
Merge master into issues/4718-boto-python-312
github-actions[bot] Apr 10, 2024
9e74234
Merge master into issues/4718-boto-python-312
github-actions[bot] Apr 11, 2024
f80f590
Move AWSServiceErrors declaration to a better place
stxue1 Apr 11, 2024
47e5f01
Merge branch 'issues/4718-boto-python-312' of github.com:DataBiospher…
stxue1 Apr 11, 2024
9c714f7
Prevent aliasing from confusing sphinx and remove cached autoapi in c…
stxue1 Apr 11, 2024
43db4cf
Update src/toil/lib/aws/__init__.py
stxue1 Apr 16, 2024
62f3a56
Change retry loop
stxue1 Apr 16, 2024
ad05b0d
Merge master into issues/4718-boto-python-312
github-actions[bot] Apr 17, 2024
d6a9992
Merge master into issues/4718-boto-python-312
github-actions[bot] Apr 17, 2024
05fa3b0
Replace assert with raise
adamnovak Apr 18, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitlab-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -318,7 +318,7 @@ wdl:
- pwd
- apt update && apt install -y default-jre
- ${MAIN_PYTHON_PKG} -m virtualenv venv && . venv/bin/activate && pip install -U pip wheel && make prepare && make develop extras=[all]
- make test threads="${TEST_THREADS}" marker="${MARKER}" tests=src/toil/test/wdl/wdltoil_test.py
- make test threads="${TEST_THREADS}" marker="${MARKER}" tests=src/toil/test/wdl/

jobstore:
rules:
Expand Down
1 change: 0 additions & 1 deletion contrib/admin/mypy-with-ignore.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ def main():
'src/toil/provisioners/__init__.py',
'src/toil/provisioners/node.py',
'src/toil/provisioners/aws/boto2Context.py',
'src/toil/provisioners/aws/awsProvisioner.py',
'src/toil/provisioners/aws/__init__.py',
'src/toil/batchSystems/slurm.py',
'src/toil/batchSystems/gridengine.py',
Expand Down
6 changes: 5 additions & 1 deletion docs/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,13 @@ BUILDDIR = _build
help:
@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

.PHONY: help Makefile
.PHONY: help Makefile clean

# Catch-all target: route all unknown targets to Sphinx using the new
# "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS).
%: Makefile
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)

clean:
@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
rm -rf autoapi
6 changes: 4 additions & 2 deletions requirements-aws.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
boto>=2.48.0, <3
boto3-stubs[s3,sdb,iam,sts,boto3]>=1.28.3.post2, <2
boto3-stubs[s3,sdb,iam,sts,boto3,ec2,autoscaling]>=1.28.3.post2, <2
mypy-boto3-iam>=1.28.3.post2, <2 # Need to force .post1 to be replaced
moto>=4.1.11, <5
mypy-boto3-s3>=1.28.3.post2, <2
moto>=5.0.3, <6
ec2_metadata<3
11 changes: 5 additions & 6 deletions src/toil/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,7 @@
from typing_extensions import Literal

from toil import logProcessContext, lookupEnvVar
from toil.batchSystems.options import (add_all_batchsystem_options,
set_batchsystem_options)
from toil.batchSystems.options import set_batchsystem_options
from toil.bus import (ClusterDesiredSizeMessage,
ClusterSizeMessage,
JobCompletedMessage,
Expand All @@ -75,17 +74,15 @@
MessageBus,
QueueSizeMessage)
from toil.fileStores import FileID
from toil.lib.aws import zone_to_region, build_tag_dict_from_env
from toil.lib.compatibility import deprecated
from toil.lib.io import try_path, AtomicFileCreate
from toil.lib.retry import retry
from toil.provisioners import (add_provisioner_options,
cluster_factory,
parse_node_types)
cluster_factory)
from toil.realtimeLogger import RealtimeLogger
from toil.statsAndLogging import (add_logging_options,
set_logging_from_options)
from toil.version import dockerRegistry, dockerTag, version, baseVersion
from toil.version import dockerRegistry, dockerTag, version

if TYPE_CHECKING:
from toil.batchSystems.abstractBatchSystem import AbstractBatchSystem
Expand Down Expand Up @@ -1440,6 +1437,8 @@ def __init__(self, bus: MessageBus, provisioner: Optional["AbstractProvisioner"]
clusterName = str(provisioner.clusterName)
if provisioner._zone is not None:
if provisioner.cloud == 'aws':
# lazy import to avoid AWS dependency if the aws extra is not installed
from toil.lib.aws import zone_to_region
# Remove AZ name
region = zone_to_region(provisioner._zone)
else:
Expand Down
20 changes: 5 additions & 15 deletions src/toil/jobStores/aws/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,19 +24,18 @@
from botocore.client import Config
from botocore.exceptions import ClientError

from toil.lib.aws import session
from toil.lib.aws import session, AWSServerErrors
from toil.lib.aws.utils import connection_reset, get_bucket_region
from toil.lib.compatibility import compat_bytes
from toil.lib.retry import (DEFAULT_DELAYS,
DEFAULT_TIMEOUT,
ErrorCondition,
get_error_code,
get_error_message,
get_error_status,
old_retry,
retry)
if TYPE_CHECKING:
from mypy_boto3_s3 import S3Client, S3ServiceResource
from mypy_boto3_s3 import S3ServiceResource

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -193,10 +192,7 @@ def fileSizeAndTime(localFilePath):
return file_stat.st_size, file_stat.st_mtime


@retry(errors=[ErrorCondition(
error=ClientError,
error_codes=[404, 500, 502, 503, 504]
)])
@retry(errors=[AWSServerErrors])
def uploadFromPath(localFilePath: str,
resource,
bucketName: str,
Expand Down Expand Up @@ -232,10 +228,7 @@ def uploadFromPath(localFilePath: str,
return version


@retry(errors=[ErrorCondition(
error=ClientError,
error_codes=[404, 500, 502, 503, 504]
)])
@retry(errors=[AWSServerErrors])
def uploadFile(readable,
resource,
bucketName: str,
Expand Down Expand Up @@ -287,10 +280,7 @@ class ServerSideCopyProhibitedError(RuntimeError):
insists that you pay to download and upload the data yourself instead.
"""

@retry(errors=[ErrorCondition(
error=ClientError,
error_codes=[404, 500, 502, 503, 504]
)])
@retry(errors=[AWSServerErrors])
def copyKeyMultipart(resource: "S3ServiceResource",
srcBucketName: str,
srcKeyName: str,
Expand Down
25 changes: 19 additions & 6 deletions src/toil/lib/aws/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,11 +16,25 @@
import os
import re
import socket
import toil.lib.retry
from http.client import HTTPException
from typing import Dict, MutableMapping, Optional
from typing import Dict, MutableMapping, Optional, Union, Literal
from urllib.error import URLError
from urllib.request import urlopen

from botocore.exceptions import ClientError

from mypy_boto3_s3.literals import BucketLocationConstraintType

AWSRegionName = Union[BucketLocationConstraintType, Literal["us-east-1"]]

# These are errors where we think something randomly
# went wrong on the AWS side and we ought to retry.
AWSServerErrors = toil.lib.retry.ErrorCondition(
stxue1 marked this conversation as resolved.
Show resolved Hide resolved
error=ClientError,
error_codes=[404, 500, 502, 503, 504]
)

logger = logging.getLogger(__name__)

# This file isn't allowed to import anything that depends on Boto or Boto3,
Expand Down Expand Up @@ -67,11 +81,10 @@ def get_aws_zone_from_metadata() -> Optional[str]:
# metadata.
try:
# Use the EC2 metadata service
import boto
str(boto) # to prevent removal of the import
from boto.utils import get_instance_metadata
from ec2_metadata import ec2_metadata

logger.debug("Fetch AZ from EC2 metadata")
return get_instance_metadata()['placement']['availability-zone']
return ec2_metadata.availability_zone
except ImportError:
# This is expected to happen a lot
logger.debug("No boto to fetch ECS metadata")
Expand Down Expand Up @@ -128,7 +141,7 @@ def get_current_aws_zone() -> Optional[str]:
get_aws_zone_from_environment_region() or \
get_aws_zone_from_boto()

def zone_to_region(zone: str) -> str:
def zone_to_region(zone: str) -> AWSRegionName:
"""Get a region (e.g. us-west-2) from a zone (e.g. us-west-1c)."""
# re.compile() caches the regex internally so we don't have to
availability_zone = re.compile(r'^([a-z]{2}-[a-z]+-[1-9][0-9]*)([a-z])$')
Expand Down
4 changes: 2 additions & 2 deletions src/toil/lib/aws/iam.py
Original file line number Diff line number Diff line change
Expand Up @@ -257,8 +257,8 @@ def get_policy_permissions(region: str) -> AllowedActionCollection:
:param zone: AWS zone to connect to
"""

iam: IAMClient = cast(IAMClient, get_client('iam', region))
sts: STSClient = cast(STSClient, get_client('sts', region))
iam: IAMClient = get_client('iam', region)
sts: STSClient = get_client('sts', region)
#TODO Condider effect: deny at some point
allowed_actions: AllowedActionCollection = defaultdict(lambda: {'Action': [], 'NotAction': []})
try:
Expand Down
85 changes: 63 additions & 22 deletions src/toil/lib/aws/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,16 +15,22 @@
import logging
import os
import threading
from typing import Dict, Optional, Tuple, cast
from typing import Dict, Optional, Tuple, cast, Union, Literal, overload, TypeVar

import boto
stxue1 marked this conversation as resolved.
Show resolved Hide resolved
import boto3
import boto3.resources.base
import boto.connection
import botocore
from boto3 import Session
from botocore.client import Config
from botocore.session import get_session
from botocore.utils import JSONFileCache
from mypy_boto3_autoscaling import AutoScalingClient
from mypy_boto3_ec2 import EC2Client, EC2ServiceResource
from mypy_boto3_iam import IAMClient, IAMServiceResource
from mypy_boto3_s3 import S3Client, S3ServiceResource
from mypy_boto3_sdb import SimpleDBClient
from mypy_boto3_sts import STSClient

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -120,6 +126,13 @@ def session(self, region: Optional[str]) -> boto3.session.Session:
storage.item = _new_boto3_session(region_name=region)
return cast(boto3.session.Session, storage.item)

@overload
def resource(self, region: Optional[str], service_name: Literal["s3"], endpoint_url: Optional[str] = None) -> S3ServiceResource: ...
@overload
def resource(self, region: Optional[str], service_name: Literal["iam"], endpoint_url: Optional[str] = None) -> IAMServiceResource: ...
@overload
def resource(self, region: Optional[str], service_name: Literal["ec2"], endpoint_url: Optional[str] = None) -> EC2ServiceResource: ...

def resource(self, region: Optional[str], service_name: str, endpoint_url: Optional[str] = None) -> boto3.resources.base.ServiceResource:
"""
Get the Boto3 Resource to use with the given service (like 'ec2') in the given region.
Expand All @@ -146,7 +159,28 @@ def resource(self, region: Optional[str], service_name: str, endpoint_url: Optio

return cast(boto3.resources.base.ServiceResource, storage.item)

def client(self, region: Optional[str], service_name: str, endpoint_url: Optional[str] = None, config: Optional[Config] = None) -> botocore.client.BaseClient:
@overload
def client(self, region: Optional[str], service_name: Literal["ec2"], endpoint_url: Optional[str] = None,
config: Optional[Config] = None) -> EC2Client: ...
@overload
def client(self, region: Optional[str], service_name: Literal["iam"], endpoint_url: Optional[str] = None,
config: Optional[Config] = None) -> IAMClient: ...
@overload
def client(self, region: Optional[str], service_name: Literal["s3"], endpoint_url: Optional[str] = None,
config: Optional[Config] = None) -> S3Client: ...
@overload
def client(self, region: Optional[str], service_name: Literal["sts"], endpoint_url: Optional[str] = None,
config: Optional[Config] = None) -> STSClient: ...
@overload
def client(self, region: Optional[str], service_name: Literal["sdb"], endpoint_url: Optional[str] = None,
config: Optional[Config] = None) -> SimpleDBClient: ...
@overload
def client(self, region: Optional[str], service_name: Literal["autoscaling"], endpoint_url: Optional[str] = None,
config: Optional[Config] = None) -> AutoScalingClient: ...


def client(self, region: Optional[str], service_name: Literal["ec2", "iam", "s3", "sts", "sdb", "autoscaling"], endpoint_url: Optional[str] = None,
config: Optional[Config] = None) -> botocore.client.BaseClient:
"""
Get the Boto3 Client to use with the given service (like 'ec2') in the given region.

Expand All @@ -159,9 +193,9 @@ def client(self, region: Optional[str], service_name: str, endpoint_url: Optiona
# Don't try and memoize if a custom config is used
with _init_lock:
if endpoint_url is not None:
return self.session(region).client(service_name, endpoint_url=endpoint_url, config=config) # type: ignore
return self.session(region).client(service_name, endpoint_url=endpoint_url, config=config)
else:
return self.session(region).client(service_name, config=config) # type: ignore
return self.session(region).client(service_name, config=config)

key = (region, service_name, endpoint_url)
storage = self.client_cache[key]
Expand All @@ -172,25 +206,12 @@ def client(self, region: Optional[str], service_name: str, endpoint_url: Optiona
if endpoint_url is not None:
# The Boto3 stubs are probably missing an overload here too. See:
# <https://github.com/vemel/mypy_boto3_builder/issues/121#issuecomment-1011322636>
storage.item = self.session(region).client(service_name, endpoint_url=endpoint_url) # type: ignore
storage.item = self.session(region).client(service_name, endpoint_url=endpoint_url)
else:
# We might not be able to pass None to Boto3 and have it be the same as no argument.
storage.item = self.session(region).client(service_name) # type: ignore
storage.item = self.session(region).client(service_name)
return cast(botocore.client.BaseClient , storage.item)

def boto2(self, region: Optional[str], service_name: str) -> boto.connection.AWSAuthConnection:
"""
Get the connected boto2 connection for the given region and service.
"""
if service_name == 'iam':
# IAM connections are regionless
region = 'universal'
key = (region, service_name)
storage = self.boto2_cache[key]
if not hasattr(storage, 'item'):
with _init_lock:
storage.item = getattr(boto, service_name).connect_to_region(region, profile_name=os.environ.get("TOIL_AWS_PROFILE", None))
return cast(boto.connection.AWSAuthConnection, storage.item)

# If you don't want your own AWSConnectionManager, we have a global one and some global functions
_global_manager = AWSConnectionManager()
Expand All @@ -205,7 +226,20 @@ def establish_boto3_session(region_name: Optional[str] = None) -> Session:
# Just use a global version of the manager. Note that we change the argument order!
return _global_manager.session(region_name)

def client(service_name: str, region_name: Optional[str] = None, endpoint_url: Optional[str] = None, config: Optional[Config] = None) -> botocore.client.BaseClient:
@overload
def client(service_name: Literal["ec2"], region_name: Optional[str] = None, endpoint_url: Optional[str] = None, config: Optional[Config] = None) -> EC2Client: ...
@overload
def client(service_name: Literal["iam"], region_name: Optional[str] = None, endpoint_url: Optional[str] = None, config: Optional[Config] = None) -> IAMClient: ...
@overload
def client(service_name: Literal["s3"], region_name: Optional[str] = None, endpoint_url: Optional[str] = None, config: Optional[Config] = None) -> S3Client: ...
@overload
def client(service_name: Literal["sts"], region_name: Optional[str] = None, endpoint_url: Optional[str] = None, config: Optional[Config] = None) -> STSClient: ...
@overload
def client(service_name: Literal["sdb"], region_name: Optional[str] = None, endpoint_url: Optional[str] = None, config: Optional[Config] = None) -> SimpleDBClient: ...
@overload
def client(service_name: Literal["autoscaling"], region_name: Optional[str] = None, endpoint_url: Optional[str] = None, config: Optional[Config] = None) -> AutoScalingClient: ...

def client(service_name: Literal["ec2", "iam", "s3", "sts", "sdb", "autoscaling"], region_name: Optional[str] = None, endpoint_url: Optional[str] = None, config: Optional[Config] = None) -> botocore.client.BaseClient:
"""
Get a Boto 3 client for a particular AWS service, usable by the current thread.

Expand All @@ -215,7 +249,14 @@ def client(service_name: str, region_name: Optional[str] = None, endpoint_url: O
# Just use a global version of the manager. Note that we change the argument order!
return _global_manager.client(region_name, service_name, endpoint_url=endpoint_url, config=config)

def resource(service_name: str, region_name: Optional[str] = None, endpoint_url: Optional[str] = None) -> boto3.resources.base.ServiceResource:
@overload
def resource(service_name: Literal["s3"], region_name: Optional[str] = None, endpoint_url: Optional[str] = None) -> S3ServiceResource: ...
@overload
def resource(service_name: Literal["iam"], region_name: Optional[str] = None, endpoint_url: Optional[str] = None) -> IAMServiceResource: ...
@overload
def resource(service_name: Literal["ec2"], region_name: Optional[str] = None, endpoint_url: Optional[str] = None) -> EC2ServiceResource: ...

def resource(service_name: Literal["s3", "iam", "ec2"], region_name: Optional[str] = None, endpoint_url: Optional[str] = None) -> boto3.resources.base.ServiceResource:
"""
Get a Boto 3 resource for a particular AWS service, usable by the current thread.

Expand Down
Loading
Loading