Skip to content

Conformance EKS (ci-eks) #748

Conformance EKS (ci-eks)

Conformance EKS (ci-eks) #748

name: Conformance EKS (ci-eks)
# Any change in triggers needs to be reflected in the concurrency group.
on:
workflow_dispatch:
inputs:
PR-number:
description: "Pull request number."
required: true
context-ref:
description: "Context in which the workflow runs. If PR is from a fork, will be the PR target branch (general case). If PR is NOT from a fork, will be the PR branch itself (this allows committers to test changes to workflows directly from PRs)."
required: true
SHA:
description: "SHA under test (head of the PR branch)."
required: true
extra-args:
description: "[JSON object] Arbitrary arguments passed from the trigger comment via regex capture group. Parse with 'fromJson(inputs.extra-args).argName' in workflow."
required: false
default: '{}'
# Run every 6 hours
schedule:
- cron: '0 1/6 * * *'
# By specifying the access of one of the scopes, all of those that are not
# specified are set to 'none'.
permissions:
# To be able to access the repository with actions/checkout
contents: read
# To allow retrieving information from the PR API
pull-requests: read
# To be able to set commit status
statuses: write
concurrency:
# Structure:
# - Workflow name
# - Event type
# - A unique identifier depending on event type:
# - schedule: SHA
# - workflow_dispatch: PR number
#
# This structure ensures a unique concurrency group name is generated for each
# type of testing, such that re-runs will cancel the previous run.
group: |
${{ github.workflow }}
${{ github.event_name }}
${{
(github.event_name == 'schedule' && github.sha) ||
(github.event_name == 'workflow_dispatch' && github.event.inputs.PR-number)
}}
cancel-in-progress: true
env:
clusterName: ${{ github.repository_owner }}-${{ github.event.repository.name }}-${{ github.run_id }}-${{ github.run_attempt }}
cilium_cli_ci_version:
CILIUM_CLI_MODE: helm
check_url: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }}
# renovate: datasource=github-releases depName=eksctl-io/eksctl
eksctl_version: v0.166.0
# renovate: datasource=github-releases depName=kubernetes/kubernetes
kubectl_version: v1.29.0-rc.1
jobs:
commit-status-start:
name: Commit Status Start
runs-on: ubuntu-latest
steps:
- name: Set initial commit status
uses: myrotvorets/set-commit-status-action@38f3f27c7d52fb381273e95542f07f0fba301307 # v2.0.0
with:
sha: ${{ inputs.SHA || github.sha }}
generate-matrix:
name: Generate Matrix
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
steps:
- name: Checkout context ref (trusted)
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
with:
ref: ${{ inputs.context-ref || github.sha }}
persist-credentials: false
- name: Convert YAML to JSON
run: |
work_dir=".github/actions/aws"
destination_directory="/tmp/generated/aws"
mkdir -p "${destination_directory}"
yq -o=json "${work_dir}/k8s-versions.yaml" | jq . > "${destination_directory}/aws.json"
- name: Generate Matrix
id: set-matrix
run: |
cd /tmp/generated/aws
# Use complete matrix in case of scheduled run
# main -> event_name = schedule
# other stable branches -> PR-number starting with v (e.g. v1.14)
if [[ "${{ github.event_name }}" == "schedule" || "${{ inputs.PR-number }}" == v* ]];then
cp aws.json /tmp/matrix.json
else
jq '{ "include": [ .include[] | select(.default) ] }' aws.json > /tmp/matrix.json
fi
echo "Generated matrix:"
cat /tmp/matrix.json
echo "matrix=$(jq -c . < /tmp/matrix.json)" >> $GITHUB_OUTPUT
installation-and-connectivity:
name: Installation and Connectivity Test
needs: generate-matrix
runs-on: ubuntu-latest
timeout-minutes: 75
env:
job_name: "Installation and Connectivity Test"
strategy:
fail-fast: false
matrix: ${{fromJson(needs.generate-matrix.outputs.matrix)}}
steps:
- name: Checkout context ref (trusted)
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
with:
ref: ${{ inputs.context-ref || github.sha }}
persist-credentials: false
- name: Set Environment Variables
uses: ./.github/actions/set-env-variables
- name: Get Cilium's default values
id: default_vars
uses: ./.github/actions/helm-default
with:
image-tag: ${{ inputs.SHA }}
- name: Set up job variables
id: vars
run: |
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then
OWNER="${{ inputs.PR-number }}"
else
OWNER="${{ github.ref_name }}"
OWNER="${OWNER/./-}"
fi
CILIUM_INSTALL_DEFAULTS="${{ steps.default_vars.outputs.cilium_install_defaults }} \
--cluster-name=${{ env.clusterName }} \
--helm-set=hubble.relay.enabled=true \
--helm-set loadBalancer.l7.backend=envoy \
--helm-set tls.secretsBackend=k8s \
--helm-set=bpf.monitorAggregation=none \
--wait=false"
CONNECTIVITY_TEST_DEFAULTS="--flow-validation=disabled --hubble=false --collect-sysdump-on-failure \
--external-target amazon.com"
echo cilium_install_defaults=${CILIUM_INSTALL_DEFAULTS} >> $GITHUB_OUTPUT
echo connectivity_test_defaults=${CONNECTIVITY_TEST_DEFAULTS} >> $GITHUB_OUTPUT
echo sha=${{ steps.default_vars.outputs.sha }} >> $GITHUB_OUTPUT
echo owner=${OWNER} >> $GITHUB_OUTPUT
- name: Install Cilium CLI
uses: cilium/cilium-cli@32109b32259a487c803648a3c9463cdcafa4c0c3 # v0.15.19
with:
repository: ${{ env.CILIUM_CLI_RELEASE_REPO }}
release-version: ${{ env.CILIUM_CLI_VERSION }}
ci-version: ${{ env.cilium_cli_ci_version }}
- name: Install kubectl
run: |
curl -sLO "https://dl.k8s.io/release/${{ env.kubectl_version }}/bin/linux/amd64/kubectl"
curl -sLO "https://dl.k8s.io/${{ env.kubectl_version }}/bin/linux/amd64/kubectl.sha256"
echo "$(cat kubectl.sha256) kubectl" | sha256sum --check
sudo install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl
kubectl version --client
- name: Install eksctl CLI
run: |
curl -LO "https://github.com/eksctl-io/eksctl/releases/download/${{ env.eksctl_version }}/eksctl_$(uname -s)_amd64.tar.gz"
sudo tar xzvfC eksctl_$(uname -s)_amd64.tar.gz /usr/bin
rm eksctl_$(uname -s)_amd64.tar.gz
- name: Set up AWS CLI credentials
uses: aws-actions/configure-aws-credentials@010d0da01d0b5a38af31e9c3470dbfdabdecca3a # v4.0.1
with:
aws-access-key-id: ${{ secrets.AWS_PR_SA_ID }}
aws-secret-access-key: ${{ secrets.AWS_PR_SA_KEY }}
aws-region: ${{ matrix.region }}
- name: Create EKS cluster
uses: ./.github/actions/setup-eks-cluster
with:
cluster_name: ${{ env.clusterName }}
region: ${{ matrix.region }}
owner: "${{ steps.vars.outputs.owner }}"
version: ${{ matrix.version }}
spot: ${{ github.event_name != 'schedule' }}
- name: Wait for images to be available
timeout-minutes: 30
shell: bash
run: |
for image in cilium-ci operator-aws-ci hubble-relay-ci ; do
until docker manifest inspect quay.io/${{ env.QUAY_ORGANIZATION_DEV }}/$image:${{ steps.vars.outputs.sha }} &> /dev/null; do sleep 45s; done
done
- name: Make sure images available from cluster
run: |
kubectl create -f - <<EOF
apiVersion: batch/v1
kind: Job
metadata:
name: wait-for-images
spec:
completions: 1
backoffLimit: 3
template:
spec:
containers:
- name: wait-for-images
image: quay.io/cilium/cilium-ci:${{ steps.vars.outputs.sha }}
command: ["true"]
tolerations:
- key: "node.cilium.io/agent-not-ready"
operator: "Equal"
value: "true"
effect: "NoExecute"
restartPolicy: Never
EOF
kubectl wait --for=condition=complete --timeout=10m job/wait-for-images
# This is a workaround for flake #16938.
- name: Remove AWS-CNI
run: |
kubectl -n kube-system delete daemonset aws-node
# Warning: since this is a privileged workflow, subsequent workflow job
# steps must take care not to execute untrusted code.
- name: Checkout pull request branch (NOT TRUSTED)
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1
with:
ref: ${{ steps.vars.outputs.sha }}
persist-credentials: false
- name: Install Cilium
id: install-cilium
run: |
cilium install ${{ steps.vars.outputs.cilium_install_defaults }}
- name: Wait for Cilium to be ready
run: |
cilium status --wait --wait-duration=10m
kubectl get pods -n kube-system
- name: Check that AWS leftover iptables chains have been removed
run: |
for pod in $(kubectl get po -n kube-system -l app.kubernetes.io/name=cilium-agent -o name); do
echo "Checking ${pod}"
if kubectl exec -n kube-system ${pod} -c cilium-agent -- iptables-save | grep --silent ':AWS'; then
echo "Unexpected AWS leftover iptables chains"
kubectl exec -n kube-system ds/cilium -- iptables-save | grep ':AWS'
exit 1
fi
done
- name: Port forward Relay
run: |
cilium hubble port-forward&
sleep 10s
[[ $(pgrep -f "cilium.*hubble.*port-forward|kubectl.*port-forward.*hubble-relay" | wc -l) == 2 ]]
- name: Make JUnit report directory
run: |
mkdir -p cilium-junits
- name: Run connectivity test (${{ join(matrix.*, ', ') }})
run: |
cilium connectivity test ${{ steps.vars.outputs.connectivity_test_defaults }} \
--junit-file "cilium-junits/${{ env.job_name }} (${{ join(matrix.*, ', ') }}) - 1.xml" \
--junit-property github_job_step="Run connectivity test (${{ join(matrix.*, ', ') }})"
- name: Clean up Cilium
run: |
pkill -f "cilium.*hubble.*port-forward|kubectl.*port-forward.*hubble-relay" || test $? -eq 1
cilium uninstall --wait
- name: Create custom IPsec secret
run: |
kubectl create -n kube-system secret generic cilium-ipsec-keys --from-literal=keys="15 rfc4106(gcm(aes)) $(echo $(dd if=/dev/urandom count=20 bs=1 2> /dev/null | xxd -p -c 64)) 128"
- name: Install Cilium with encryption
run: |
cilium install ${{ steps.vars.outputs.cilium_install_defaults }} \
--helm-set encryption.enabled=true \
--helm-set encryption.type=ipsec
- name: Wait for Cilium to be ready
run: |
cilium status --wait --wait-duration=10m
kubectl get pods -n kube-system
- name: Port forward Relay
run: |
cilium hubble port-forward&
sleep 10s
[[ $(pgrep -f "cilium.*hubble.*port-forward|kubectl.*port-forward.*hubble-relay" | wc -l) == 2 ]]
- name: Run connectivity test with IPSec (${{ join(matrix.*, ', ') }})
run: |
cilium connectivity test ${{ steps.vars.outputs.connectivity_test_defaults }} --force-deploy \
--junit-file "cilium-junits/${{ env.job_name }} (${{ join(matrix.*, ', ') }}) - 2.xml" \
--junit-property github_job_step="Run connectivity test with IPSec (${{ join(matrix.*, ', ') }})"
- name: Post-test information gathering
if: ${{ !success() && steps.install-cilium.outcome != 'skipped' }}
run: |
kubectl get pods --all-namespaces -o wide
cilium status
cilium sysdump --output-filename cilium-sysdump-final-${{ join(matrix.*, '-') }}
shell: bash {0} # Disable default fail-fast behaviour so that all commands run independently
- name: Upload artifacts
if: ${{ !success() }}
uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # v3.1.3
with:
name: cilium-sysdumps
path: cilium-sysdump-*.zip
retention-days: 5
- name: Upload JUnits [junit]
if: ${{ always() }}
uses: actions/upload-artifact@a8a3f3ad30e3422c9c7b888a15615d19a852ae32 # v3.1.3
with:
name: cilium-junits
path: cilium-junits/*.xml
retention-days: 5
- name: Publish Test Results As GitHub Summary
if: ${{ always() }}
uses: aanm/junit2md@332ebf0fddd34e91b03a832cfafaa826306558f9 # v0.0.3
with:
junit-directory: "cilium-junits"
commit-status-final:
if: ${{ always() }}
name: Commit Status Final
needs: installation-and-connectivity
runs-on: ubuntu-latest
steps:
- name: Set final commit status
uses: myrotvorets/set-commit-status-action@38f3f27c7d52fb381273e95542f07f0fba301307 # v2.0.0
with:
sha: ${{ inputs.SHA || github.sha }}
status: ${{ needs.installation-and-connectivity.result }}
cleanup:
name: Cleanup EKS Clusters
if: ${{ always() }}
continue-on-error: true
needs: [generate-matrix, installation-and-connectivity]
runs-on: ubuntu-latest
timeout-minutes: 30
strategy:
fail-fast: false
matrix: ${{fromJson(needs.generate-matrix.outputs.matrix)}}
steps:
- name: Install eksctl CLI
run: |
curl -LO "https://github.com/eksctl-io/eksctl/releases/download/${{ env.eksctl_version }}/eksctl_$(uname -s)_amd64.tar.gz"
sudo tar xzvfC eksctl_$(uname -s)_amd64.tar.gz /usr/bin
rm eksctl_$(uname -s)_amd64.tar.gz
- name: Set up AWS CLI credentials
uses: aws-actions/configure-aws-credentials@010d0da01d0b5a38af31e9c3470dbfdabdecca3a # v4.0.1
with:
aws-access-key-id: ${{ secrets.AWS_PR_SA_ID }}
aws-secret-access-key: ${{ secrets.AWS_PR_SA_KEY }}
aws-region: ${{ matrix.region }}
- name: Clean up EKS
run: |
eksctl delete cluster --name ${{ env.clusterName }} --region ${{ matrix.region }}
shell: bash {0} # Disable default fail-fast behaviour so that all commands run independently