Skip to content

Commit

Permalink
Add S3-based lock for CI (#118)
Browse files Browse the repository at this point in the history
* Add S3-based lock for deployments

* Remove wait_in_queue as it is replaced with the S3 lock

* Create ephemeral S3 credential for run

* Fix unbound variable error

* Fix broken conditions

* Remove loop for FIP allocation

* Use ingress IP envvar in leafcloud env

* Add CI_S3_LOCK_HOST to arcus environment

* Add lock bucket to arcus config

* Fix typo

* Reinstate the actual test

* S3 lock merged to master
  • Loading branch information
mkjpryor authored Mar 11, 2024
1 parent 0c154fb commit 80967c4
Show file tree
Hide file tree
Showing 9 changed files with 119 additions and 43 deletions.
48 changes: 44 additions & 4 deletions .github/actions/destroy/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,48 @@ runs:
set -eo pipefail
source ci.env
source ./bin/activate "$AZIMUTH_CONFIG_ENVIRONMENT" "$AZIMUTH_ENVIRONMENT"
FIP_ID="$(openstack floating ip list --tags "$AZIMUTH_ENVIRONMENT" -f json | jq -r '.[0].ID // ""')"
[ -n "$FIP_ID" ] && openstack floating ip delete $FIP_ID
env:
INGRESS_IP: ${{ steps.ingress-ip.outputs.ip-address }}
if [ -n "$INGRESS_IP" ]; then
openstack floating ip delete $INGRESS_IP
fi
if: ${{ always() }}

- name: Configure S3 lock
id: s3-lock-config
shell: bash
run: |
set -e
source ci.env
source ./bin/activate "$AZIMUTH_CONFIG_ENVIRONMENT" "$AZIMUTH_ENVIRONMENT"
if [ -z "$CI_S3_LOCK_HOST" ]; then
echo "CI_S3_LOCK_HOST not set - no lock will be used"
exit
elif [ -z "$CI_S3_LOCK_BUCKET" ]; then
echo "CI_S3_LOCK_BUCKET is required when using the lock" >&2
exit 1
fi
echo "host=${CI_S3_LOCK_HOST}" >> "$GITHUB_OUTPUT"
echo "access-key=${CI_S3_LOCK_ACCESS_KEY}" >> "$GITHUB_OUTPUT"
echo "secret-key=${CI_S3_LOCK_SECRET_KEY}" >> "$GITHUB_OUTPUT"
echo "bucket=${CI_S3_LOCK_BUCKET}" >> "$GITHUB_OUTPUT"
if: ${{ always() }}

- name: Release S3 lock
uses: stackhpc/github-actions/s3-lock@master
with:
host: ${{ steps.s3-lock-config.outputs.host }}
access-key: ${{ steps.s3-lock-config.outputs.access-key }}
secret-key: ${{ steps.s3-lock-config.outputs.secret-key }}
bucket: ${{ steps.s3-lock-config.outputs.bucket }}
action: release
if: ${{ steps.s3-lock-config.outputs.host != '' && always() }}

- name: Delete S3 credential
shell: bash
run: |
set -e
source ./ci.env
source ./bin/activate "$AZIMUTH_CONFIG_ENVIRONMENT" "$AZIMUTH_ENVIRONMENT"
if [ -n "$CI_S3_LOCK_ACCESS_KEY" ]; then
openstack ec2 credentials delete $CI_S3_LOCK_ACCESS_KEY
fi
if: ${{ always() }}
73 changes: 49 additions & 24 deletions .github/actions/setup/action.yml
Original file line number Diff line number Diff line change
Expand Up @@ -107,36 +107,61 @@ runs:
source ./bin/activate "$AZIMUTH_CONFIG_ENVIRONMENT" "$AZIMUTH_ENVIRONMENT"
ansible-galaxy install -f -r requirements.yml
# Generate and append the S3 credential to the CI environment file
- name: Configure S3 lock
id: s3-lock-config
shell: bash
run: |
set -e
source ci.env
source ./bin/activate "$AZIMUTH_CONFIG_ENVIRONMENT" "$AZIMUTH_ENVIRONMENT"
if [ -z "$CI_S3_LOCK_HOST" ]; then
echo "CI_S3_LOCK_HOST not set - no lock will be used"
exit
elif [ -z "$CI_S3_LOCK_BUCKET" ]; then
echo "CI_S3_LOCK_BUCKET is required when using the lock" >&2
exit 1
fi
CI_S3_LOCK_ACCESS_KEY="$(openstack ec2 credentials create -f value -c access)"
CI_S3_LOCK_SECRET_KEY="$(openstack ec2 credentials show -f value -c secret $CI_S3_LOCK_ACCESS_KEY)"
cat >> ci.env <<EOF
export CI_S3_LOCK_ACCESS_KEY="$CI_S3_LOCK_ACCESS_KEY"
export CI_S3_LOCK_SECRET_KEY="$CI_S3_LOCK_SECRET_KEY"
EOF
echo "host=${CI_S3_LOCK_HOST}" >> "$GITHUB_OUTPUT"
echo "access-key=${CI_S3_LOCK_ACCESS_KEY}" >> "$GITHUB_OUTPUT"
echo "secret-key=${CI_S3_LOCK_SECRET_KEY}" >> "$GITHUB_OUTPUT"
echo "bucket=${CI_S3_LOCK_BUCKET}" >> "$GITHUB_OUTPUT"
- name: Acquire S3 lock
uses: stackhpc/github-actions/s3-lock@master
with:
host: ${{ steps.s3-lock-config.outputs.host }}
access-key: ${{ steps.s3-lock-config.outputs.access-key }}
secret-key: ${{ steps.s3-lock-config.outputs.secret-key }}
bucket: ${{ steps.s3-lock-config.outputs.bucket }}
action: acquire
if: ${{ steps.s3-lock-config.outputs.host != '' }}

- name: Allocate floating IP for ingress
shell: bash
run: |
set -eo pipefail
source ci.env
source ./bin/activate "$AZIMUTH_CONFIG_ENVIRONMENT" "$AZIMUTH_ENVIRONMENT"
ansible_variable() {
ansible -m debug -a "var=$1" -e @extra-vars.yml all |
jq -r ".plays[0].tasks[0].hosts.localhost.$1"
}
INSTALL_MODE="$(ansible_variable install_mode)"
EXTNET_ID="$(ansible_variable infra_external_network_id)"
IP_ADDRESS=
until \
IP_ADDRESS="$(
openstack floating ip create $EXTNET_ID \
--description "ingress IP for $AZIMUTH_ENVIRONMENT" \
--tag "$AZIMUTH_ENVIRONMENT" \
--format value \
--column floating_ip_address
)"
do
sleep 30
done
VAR_NAME="$([ "$INSTALL_MODE" = "ha" ] && echo "capi_cluster_addons_ingress_load_balancer_ip" || echo "infra_fixed_floatingip")"
echo "$VAR_NAME: $IP_ADDRESS" >> extra-vars.yml
EXTNET_ID="$(
ansible -m debug -a "var=infra_external_network_id" -e @extra-vars.yml all |
jq -r ".plays[0].tasks[0].hosts.localhost.infra_external_network_id"
)"
IP_ADDRESS="$(
openstack floating ip create $EXTNET_ID \
--description "ingress IP for $AZIMUTH_ENVIRONMENT" \
--format value \
--column floating_ip_address
)"
cat >> ci.env <<EOF
export INGRESS_IP="$IP_ADDRESS"
EOF
env:
ANSIBLE_LOAD_CALLBACK_PLUGINS: "true"
ANSIBLE_STDOUT_CALLBACK: json

- name: Output extra-vars.yml for debugging
shell: bash
run: cat extra-vars.yml
2 changes: 2 additions & 0 deletions .github/environments/arcus-ha/env
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
CI_S3_LOCK_HOST="object.arcus.openstack.hpc.cam.ac.uk"
CI_S3_LOCK_BUCKET="azimuth-ci"
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
# Unset the network ID so that a network + router are provisioned
infra_network_id:

# Unset the infra IP so we can use the ingress IP for the ingress controller
infra_fixed_floatingip:
capi_cluster_addons_ingress_load_balancer_ip: "{{ lookup('env', 'INGRESS_IP') }}"

# Flavor auto-detection picks the wrong flavors on Arcus, so override them
# The flavor to use for the seed VM (vm.ska.cpu.general.small)
infra_flavor_id: c8b72062-5d52-4590-9d7a-68a670b44442
Expand Down
2 changes: 2 additions & 0 deletions .github/environments/arcus/env
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
CI_S3_LOCK_HOST="object.arcus.openstack.hpc.cam.ac.uk"
CI_S3_LOCK_BUCKET="azimuth-ci"
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ infra_external_network_id: "{{ lookup('pipe', 'openstack network show CUDN-Inter
# Use the pre-existing portal-internal network so that we don't need to steal a router
infra_network_id: "{{ lookup('pipe', 'openstack network show portal-internal -f value -c id') }}"

# The ingress IP comes from an environment variable
infra_fixed_floatingip: "{{ lookup('env', 'INGRESS_IP') }}"

# Flavor auto-detection picks the wrong flavors on Arcus, so override them
# The flavor to use for the Azimuth AIO VM (vm.ska.cpu.general.eighth)
infra_flavor_id: 5f9def81-c93f-4c1f-a521-3b810061ff6c
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
# Unset the network ID so that a network + router are provisioned
infra_network_id:

# Unset the infra IP so we can use the ingress IP for the ingress controller
infra_fixed_floatingip:
capi_cluster_addons_ingress_load_balancer_ip: "{{ lookup('env', 'INGRESS_IP') }}"

# Make sure we pick flavors that keep the costs down
# The flavor to use for the seed VM
infra_flavor_id: ec1.medium # 2 vCPUs, 4GB RAM @ leaf site
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ infra_external_network_id: "{{ lookup('pipe', 'openstack network show external -
# Use the pre-existing portal-internal network so that we don't need to steal a router
infra_network_id: "{{ lookup('pipe', 'openstack network show portal-internal -f value -c id') }}"

# The ingress IP comes from an environment variable
infra_fixed_floatingip: "{{ lookup('env', 'INGRESS_IP') }}"

# The flavors only have 20GB root disks, which is not enough to unpack images for uploading
# So we need to use a Cinder root volume
# We also don't need the encypted volume type
Expand Down
23 changes: 8 additions & 15 deletions .github/workflows/test-singlenode.yml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,13 @@ on:
- environments/demo/**
- environments/ci/**

# Use the head ref for workflow concurrency, with cancellation
# This should mean that any previous runs of this workflow for the same PR
# are cancelled when a new commit is pushed
concurrency:
group: ${{ github.workflow }}-${{ github.head_ref }}
cancel-in-progress: true

jobs:
# This job exists so that PRs from outside the main repo are rejected
fail_on_remote:
Expand All @@ -47,22 +54,8 @@ jobs:
- name: Code under test must be from a branch in the azimuth-config repo
run: exit ${{ github.repository == 'stackhpc/azimuth-config' && '0' || '1' }}

# We want jobs to wait in a queue for a slot to run, so as not to overload the test infra
# GitHub concurrency _almost_ does this, except the queue length is one :-(
# There is a feature request for what we need https://github.com/orgs/community/discussions/12835
# Until that is implemented, the only other viable option is a busy wait
wait_in_queue:
needs: [fail_on_remote]
if: ${{ github.event_name == 'workflow_dispatch' || !github.event.pull_request.draft }}
runs-on: ubuntu-latest
steps:
- name: Wait for an available slot
uses: stackhpc/github-actions/workflow-concurrency@master
with:
max-concurrency: 1

run_azimuth_tests:
needs: [wait_in_queue]
needs: [fail_on_remote]
runs-on: ubuntu-latest
steps:
# We need to check out the code under test first in order to use local actions
Expand Down

0 comments on commit 80967c4

Please sign in to comment.