Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Amazon linux 2023 fix #358

Open
wants to merge 5 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 11 additions & 11 deletions flintrock/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -632,6 +632,17 @@ def setup_node(
Cluster methods like provision_node() and add_slaves_node() should
delegate the main work of setting up new nodes to this function.
"""
# TODO: Move Python and Java setup to new service under services.py.
# New service to cover Python/Scala/Java: LanguageRuntimes (name?)
ssh_check_output(
client=ssh_client,
command=(
"""
set -e
sudo yum install -y python3 python
"""
)
)
host = ssh_client.get_transport().getpeername()[0]
ssh_check_output(
client=ssh_client,
Expand Down Expand Up @@ -666,17 +677,6 @@ def setup_node(
cluster.storage_dirs.root = storage_dirs['root']
cluster.storage_dirs.ephemeral = storage_dirs['ephemeral']

# TODO: Move Python and Java setup to new service under services.py.
# New service to cover Python/Scala/Java: LanguageRuntimes (name?)
ssh_check_output(
client=ssh_client,
command=(
"""
set -e
sudo yum install -y python3
"""
)
)
ensure_java(ssh_client, java_version)

for service in services:
Expand Down
7 changes: 6 additions & 1 deletion flintrock/ec2.py
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,7 @@ def add_slaves(
num_slaves: int,
spot_price: float,
min_root_ebs_size_gb: int,
ebs_volume_type: str,
tags: list,
assume_yes: bool,
):
Expand All @@ -283,6 +284,7 @@ def add_slaves(
for group in self.master_instance.security_groups]
block_device_mappings = get_ec2_block_device_mappings(
min_root_ebs_size_gb=min_root_ebs_size_gb,
ebs_volume_type=ebs_volume_type,
ami=self.master_instance.image_id,
region=self.region)
availability_zone = self.master_instance.placement['AvailabilityZone']
Expand Down Expand Up @@ -648,6 +650,7 @@ def get_or_create_flintrock_security_groups(
def get_ec2_block_device_mappings(
*,
min_root_ebs_size_gb: int,
ebs_volume_type: str,
ami: str,
region: str) -> 'List[dict]':
"""
Expand Down Expand Up @@ -683,7 +686,7 @@ def get_ec2_block_device_mappings(
# of a root instance store volume.
'VolumeSize': min_root_ebs_size_gb,
# gp2 is general-purpose SSD
'VolumeType': 'gp2'})
'VolumeType': ebs_volume_type})
del root_device['Ebs']['Encrypted']
block_device_mappings.append(root_device)

Expand Down Expand Up @@ -793,6 +796,7 @@ def launch(
security_groups,
spot_price=None,
min_root_ebs_size_gb,
ebs_volume_type,
vpc_id,
subnet_id,
instance_profile_name,
Expand Down Expand Up @@ -843,6 +847,7 @@ def launch(
security_group_ids = [sg.id for sg in user_security_groups + flintrock_security_groups]
block_device_mappings = get_ec2_block_device_mappings(
min_root_ebs_size_gb=min_root_ebs_size_gb,
ebs_volume_type=ebs_volume_type,
ami=ami,
region=region)

Expand Down
6 changes: 6 additions & 0 deletions flintrock/flintrock.py
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,7 @@ def cli(cli_context, config, provider, debug):
@click.option('--ec2-spot-request-duration',
help="(DEPRECATED) Duration a spot request is valid (e.g. 3d 2h 1m).")
@click.option('--ec2-min-root-ebs-size-gb', type=int, default=30)
@click.option('--ec2-ebs-volume-type', default='gp2')
@click.option('--ec2-vpc-id', default='', help="Leave empty for default VPC.")
@click.option('--ec2-subnet-id', default='')
@click.option('--ec2-instance-profile-name', default='')
Expand Down Expand Up @@ -406,6 +407,7 @@ def launch(
ec2_spot_price,
ec2_spot_request_duration,
ec2_min_root_ebs_size_gb,
ec2_ebs_volume_type,
ec2_vpc_id,
ec2_subnet_id,
ec2_instance_profile_name,
Expand Down Expand Up @@ -520,6 +522,7 @@ def launch(
security_groups=ec2_security_groups,
spot_price=ec2_spot_price,
min_root_ebs_size_gb=ec2_min_root_ebs_size_gb,
ebs_volume_type=ec2_ebs_volume_type,
vpc_id=ec2_vpc_id,
subnet_id=ec2_subnet_id,
instance_profile_name=ec2_instance_profile_name,
Expand Down Expand Up @@ -797,6 +800,7 @@ def stop(cli_context, cluster_name, ec2_region, ec2_vpc_id, assume_yes):
@click.option('--ec2-spot-request-duration',
help="(DEPRECATED) Duration a spot request is valid (e.g. 3d 2h 1m).")
@click.option('--ec2-min-root-ebs-size-gb', type=int, default=30)
@click.option('--ec2-ebs-volume-type', default='gp2')
@click.option('--assume-yes/--no-assume-yes', default=False)
@click.option('--ec2-tag', 'ec2_tags',
callback=ec2.cli_validate_tags,
Expand All @@ -815,6 +819,7 @@ def add_slaves(
ec2_spot_price,
ec2_spot_request_duration,
ec2_min_root_ebs_size_gb,
ec2_ebs_volume_type,
ec2_tags,
assume_yes):
"""
Expand Down Expand Up @@ -850,6 +855,7 @@ def add_slaves(
identity_file = ec2_identity_file
provider_options = {
'min_root_ebs_size_gb': ec2_min_root_ebs_size_gb,
'ebs_volume_type': ec2_ebs_volume_type,
'spot_price': ec2_spot_price,
'tags': ec2_tags
}
Expand Down
2 changes: 1 addition & 1 deletion flintrock/scripts/adoptium.repo
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

[Adoptium]
name=Adoptium
baseurl=https://packages.adoptium.net/artifactory/rpm/amazonlinux/$releasever/$basearch
baseurl=https://packages.adoptium.net/artifactory/rpm/amazonlinux/2/$basearch
enabled=1
gpgcheck=1
gpgkey=https://packages.adoptium.net/artifactory/api/gpg/key/public
38 changes: 13 additions & 25 deletions flintrock/scripts/setup-ephemeral-storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,11 @@ def device_pairs_to_tuple(pairs):
return BlockDevice(**device_dict)


def device_to_tuple(device):
device_dict = {'kname': device['name']}
return BlockDevice(**device_dict)


def get_non_root_block_devices():
"""
Get all the non-root block devices available to the host.
Expand All @@ -65,34 +70,17 @@ def get_non_root_block_devices():
"""
block_devices_raw = subprocess.check_output([
'lsblk',
'--ascii',
'--pairs',
'--bytes',
'--paths',
'--output', 'KNAME,MOUNTPOINT,SIZE',
# --inverse and --nodeps make sure that
# 1) we get the mount points for devices that have holder devices
# 2) we don't get the holder devices themselves
'--inverse',
'--nodeps',
'--noheadings',
'--fs',
'--json',
'-p',
]).decode('utf-8')
block_devices = [
device_pairs_to_tuple(line.split())
for line in block_devices_raw.splitlines()
]
non_root_block_devices = [
device for device in block_devices
if device.mountpoint != '/'
]
# Skip tiny devices, like the 1M devices that show up on
# m5 instances on EC2.
# See: https://github.com/nchammas/flintrock/issues/256
non_trivial_non_root_block_devices = [
device for device in non_root_block_devices
if int(device.size) >= 1024 ** 3
device_to_tuple(device)
for device in json.loads(block_devices_raw)['blockdevices']
if 'children' not in device and (('mountpoints' in device and device['mountpoints'][0] != '/')
or ('mountpoint' in device and device['mountpoint'] != '/'))
]
return non_trivial_non_root_block_devices
return block_devices


def unmount_devices(devices):
Expand Down
15 changes: 14 additions & 1 deletion flintrock/templates/spark/conf/spark-env.sh
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,20 @@ export HADOOP_CONF_DIR="$HOME/hadoop/conf"

# TODO: Make this non-EC2-specific.
# Bind Spark's web UIs to this machine's public EC2 hostname
export SPARK_PUBLIC_DNS="$(curl --silent http://169.254.169.254/latest/meta-data/public-hostname)"
spark_public_hostname="$(curl --silent http://169.254.169.254/latest/meta-data/public-hostname)" #IMDSv1 check
if [[ -z "$spark_public_hostname" ]] || [[ "$spark_public_hostname" == *"DOCTYPE"*"html"*"head"*"body"* ]]
then
TOKEN="$(curl --silent -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 21600")"
spark_public_hostname="$(curl --silent -H "X-aws-ec2-metadata-token: $TOKEN" http://169.254.169.254/latest/meta-data/public-hostname)" #IMDSv2 check
if [[ -z "$spark_public_hostname" ]] || [[ "$spark_public_hostname" == *"DOCTYPE"*"html"*"head"*"body"* ]]
then
true #skip setting SPARK_PUBLIC_DNS
else
export SPARK_PUBLIC_DNS="$spark_public_hostname"
fi
else
export SPARK_PUBLIC_DNS="$spark_public_hostname"
fi

# TODO: Set a high ulimit for large shuffles
# Need to find a way to do this, since "sudo ulimit..." doesn't fly.
Expand Down
Loading