Cilium E2E Upgrade (ci-e2e-upgrade) #77
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: Cilium E2E Upgrade (ci-e2e-upgrade) | |
# Any change in triggers needs to be reflected in the concurrency group. | |
on: | |
workflow_dispatch: | |
inputs: | |
PR-number: | |
description: "Pull request number." | |
required: true | |
context-ref: | |
description: "Context in which the workflow runs. If PR is from a fork, will be the PR target branch (general case). If PR is NOT from a fork, will be the PR branch itself (this allows committers to test changes to workflows directly from PRs)." | |
required: true | |
SHA: | |
description: "SHA under test (head of the PR branch)." | |
required: true | |
extra-args: | |
description: "[JSON object] Arbitrary arguments passed from the trigger comment via regex capture group. Parse with 'fromJson(inputs.extra-args).argName' in workflow." | |
required: false | |
default: '{}' | |
# Run every 6 hours | |
schedule: | |
- cron: '0 5/6 * * *' | |
# By specifying the access of one of the scopes, all of those that are not | |
# specified are set to 'none'. | |
permissions: | |
# To be able to access the repository with actions/checkout | |
contents: read | |
# To allow retrieving information from the PR API | |
pull-requests: read | |
# To be able to set commit status | |
statuses: write | |
concurrency: | |
# Structure: | |
# - Workflow name | |
# - Event type | |
# - A unique identifier depending on event type: | |
# - schedule: SHA | |
# - workflow_dispatch: PR number | |
# | |
# This structure ensures a unique concurrency group name is generated for each | |
# type of testing, such that re-runs will cancel the previous run. | |
group: | | |
${{ github.workflow }} | |
${{ github.event_name }} | |
${{ | |
(github.event_name == 'schedule' && github.sha) || | |
(github.event_name == 'workflow_dispatch' && github.event.inputs.PR-number) | |
}} | |
cancel-in-progress: true | |
env: | |
cilium_cli_ci_version: | |
check_url: https://github.com/${{ github.repository }}/actions/runs/${{ github.run_id }} | |
jobs: | |
commit-status-start: | |
name: Commit Status Start | |
runs-on: ubuntu-latest | |
steps: | |
- name: Set initial commit status | |
uses: myrotvorets/set-commit-status-action@38f3f27c7d52fb381273e95542f07f0fba301307 # v2.0.0 | |
with: | |
sha: ${{ inputs.SHA || github.sha }} | |
setup-and-test: | |
runs-on: ubuntu-latest-4cores-16gb | |
name: 'Setup & Test' | |
env: | |
job_name: 'Setup & Test' | |
strategy: | |
fail-fast: false | |
max-parallel: 16 | |
matrix: | |
include: | |
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! | |
# ! NOTE: keep conformance-e2e.yaml config in sync ! | |
# !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! | |
- name: '1' | |
# renovate: datasource=docker depName=quay.io/lvh-images/kind | |
kernel: '4.19-20240110.080621' | |
kube-proxy: 'iptables' | |
kpr: 'false' | |
tunnel: 'vxlan' | |
- name: '2' | |
# renovate: datasource=docker depName=quay.io/lvh-images/kind | |
kernel: '5.4-20240110.080621' | |
kube-proxy: 'iptables' | |
kpr: 'false' | |
tunnel: 'disabled' | |
- name: '3' | |
# renovate: datasource=docker depName=quay.io/lvh-images/kind | |
kernel: '5.10-20240110.080621' | |
kube-proxy: 'iptables' | |
kpr: 'false' | |
tunnel: 'disabled' | |
endpoint-routes: 'true' | |
- name: '4' | |
# renovate: datasource=docker depName=quay.io/lvh-images/kind | |
kernel: '5.10-20240110.080621' | |
kube-proxy: 'iptables' | |
kpr: 'true' | |
devices: '{eth0,eth1}' | |
secondary-network: 'true' | |
tunnel: 'vxlan' | |
lb-mode: 'snat' | |
endpoint-routes: 'true' | |
egress-gateway: 'true' | |
- name: '5' | |
# renovate: datasource=docker depName=quay.io/lvh-images/kind | |
kernel: '5.15-20240110.080621' | |
kube-proxy: 'iptables' | |
kpr: 'true' | |
devices: '{eth0,eth1}' | |
secondary-network: 'true' | |
tunnel: 'disabled' | |
lb-mode: 'dsr' | |
endpoint-routes: 'true' | |
egress-gateway: 'true' | |
host-fw: 'false' # enabling breaks downgrading (missed tail calls) | |
- name: '6' | |
# renovate: datasource=docker depName=quay.io/lvh-images/kind | |
kernel: '6.1-20240110.080621' | |
kube-proxy: 'none' | |
kpr: 'true' | |
tunnel: 'vxlan' | |
lb-mode: 'snat' | |
egress-gateway: 'true' | |
host-fw: 'true' | |
lb-acceleration: 'testing-only' | |
ingress-controller: 'true' | |
- name: '7' | |
# renovate: datasource=docker depName=quay.io/lvh-images/kind | |
kernel: 'bpf-next-20240122.013132' | |
kube-proxy: 'none' | |
kpr: 'true' | |
devices: '{eth0,eth1}' | |
secondary-network: 'true' | |
tunnel: 'disabled' | |
lb-mode: 'snat' | |
egress-gateway: 'true' | |
lb-acceleration: 'testing-only' | |
ingress-controller: 'true' | |
- name: '8' | |
# renovate: datasource=docker depName=quay.io/lvh-images/kind | |
kernel: 'bpf-next-20240122.013132' | |
kube-proxy: 'iptables' | |
kpr: 'false' | |
tunnel: 'geneve' | |
endpoint-routes: 'true' | |
- name: '9' | |
# renovate: datasource=docker depName=quay.io/lvh-images/kind | |
kernel: '5.10-20240110.080621' | |
kube-proxy: 'iptables' | |
kpr: 'true' | |
devices: '{eth0,eth1}' | |
secondary-network: 'true' | |
tunnel: 'vxlan' | |
encryption: 'wireguard' | |
encryption-node: 'false' | |
lb-mode: 'snat' | |
endpoint-routes: 'true' | |
egress-gateway: 'true' | |
- name: '10' | |
# renovate: datasource=docker depName=quay.io/lvh-images/kind | |
kernel: '5.15-20240110.080621' | |
kube-proxy: 'iptables' | |
kpr: 'true' | |
devices: '{eth0,eth1}' | |
secondary-network: 'true' | |
tunnel: 'disabled' | |
encryption: 'wireguard' | |
encryption-node: 'false' | |
lb-mode: 'dsr' | |
endpoint-routes: 'true' | |
egress-gateway: 'true' | |
- name: '11' | |
# renovate: datasource=docker depName=quay.io/lvh-images/kind | |
kernel: '6.1-20240110.080621' | |
kube-proxy: 'none' | |
kpr: 'true' | |
devices: '{eth0,eth1}' | |
secondary-network: 'true' | |
tunnel: 'vxlan' | |
encryption: 'wireguard' | |
encryption-node: 'true' | |
lb-mode: 'snat' | |
egress-gateway: 'true' | |
ingress-controller: 'true' | |
- name: '12' | |
# renovate: datasource=docker depName=quay.io/lvh-images/kind | |
kernel: 'bpf-next-20240122.013132' | |
kube-proxy: 'none' | |
kpr: 'true' | |
devices: '{eth0,eth1}' | |
secondary-network: 'true' | |
tunnel: 'disabled' | |
encryption: 'wireguard' | |
encryption-node: 'true' | |
lb-mode: 'snat' | |
egress-gateway: 'true' | |
ingress-controller: 'true' | |
- name: '14' | |
# renovate: datasource=docker depName=quay.io/lvh-images/kind | |
kernel: '5.4-20240110.080621' | |
kube-proxy: 'iptables' | |
kpr: 'true' | |
devices: '{eth0,eth1}' | |
secondary-network: 'true' | |
tunnel: 'vxlan' | |
lb-mode: 'snat' | |
egress-gateway: 'true' | |
lb-acceleration: 'testing-only' | |
timeout-minutes: 60 | |
steps: | |
- name: Checkout context ref (trusted) | |
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 | |
with: | |
ref: ${{ inputs.context-ref || github.sha }} | |
persist-credentials: false | |
- name: Set Environment Variables | |
uses: ./.github/actions/set-env-variables | |
- name: Set up job variables | |
id: vars | |
run: | | |
if [ "${{ github.event_name }}" = "workflow_dispatch" ]; then | |
SHA="${{ inputs.SHA }}" | |
else | |
SHA="${{ github.sha }}" | |
fi | |
echo sha=${SHA} >> $GITHUB_OUTPUT | |
CILIUM_DOWNGRADE_VERSION=$(contrib/scripts/print-downgrade-version.sh) | |
echo downgrade_version=${CILIUM_DOWNGRADE_VERSION} >> $GITHUB_OUTPUT | |
- name: Derive stable Cilium installation config | |
id: cilium-stable-config | |
uses: ./.github/actions/cilium-config | |
with: | |
image-tag: ${{ steps.vars.outputs.downgrade_version }} | |
chart-dir: './untrusted/cilium-downgrade/install/kubernetes/cilium/' | |
tunnel: ${{ matrix.tunnel }} | |
devices: ${{ matrix.devices }} | |
endpoint-routes: ${{ matrix.endpoint-routes }} | |
ipv6: ${{ matrix.ipv6 }} | |
kpr: ${{ matrix.kpr }} | |
lb-mode: ${{ matrix.lb-mode }} | |
lb-acceleration: ${{ matrix.lb-acceleration }} | |
encryption: ${{ matrix.encryption }} | |
encryption-node: ${{ matrix.encryption-node }} | |
egress-gateway: ${{ matrix.egress-gateway }} | |
host-fw: ${{ matrix.host-fw }} | |
mutual-auth: false | |
misc: 'bpfClockProbe=false,cni.uninstall=false' # TODO(brb) maybe it's only needed for <1.14 | |
- name: Derive newest Cilium installation config | |
id: cilium-newest-config | |
uses: ./.github/actions/cilium-config | |
with: | |
image-tag: ${{ steps.vars.outputs.sha }} | |
chart-dir: './untrusted/cilium-newest/install/kubernetes/cilium' | |
tunnel: ${{ matrix.tunnel }} | |
devices: ${{ matrix.devices }} | |
endpoint-routes: ${{ matrix.endpoint-routes }} | |
ipv6: ${{ matrix.ipv6 }} | |
kpr: ${{ matrix.kpr }} | |
lb-mode: ${{ matrix.lb-mode }} | |
lb-acceleration: ${{ matrix.lb-acceleration }} | |
encryption: ${{ matrix.encryption }} | |
encryption-node: ${{ matrix.encryption-node }} | |
egress-gateway: ${{ matrix.egress-gateway }} | |
host-fw: ${{ matrix.host-fw }} | |
mutual-auth: false | |
misc: 'bpfClockProbe=false,cni.uninstall=false' | |
- name: Install Cilium CLI | |
uses: cilium/cilium-cli@4aa6347c532075df28027772fa1e4ec2f7415341 # v0.15.20 | |
with: | |
repository: ${{ env.CILIUM_CLI_RELEASE_REPO }} | |
release-version: ${{ env.CILIUM_CLI_VERSION }} | |
ci-version: ${{ env.cilium_cli_ci_version }} | |
binary-name: cilium-cli | |
binary-dir: ./ | |
- name: Provision LVH VMs | |
uses: cilium/little-vm-helper@8410a93e544b7e180a2365e5fdab0724a39bc02a # v0.0.13 | |
with: | |
test-name: ipsec-upgrade | |
image-version: ${{ matrix.kernel }} | |
host-mount: ./ | |
cpu: 4 | |
mem: '12G' | |
install-dependencies: 'true' | |
cmd: | | |
git config --global --add safe.directory /host | |
- name: Setup K8s cluster | |
uses: cilium/little-vm-helper@8410a93e544b7e180a2365e5fdab0724a39bc02a # v0.0.13 | |
with: | |
provision: 'false' | |
cmd: | | |
cd /host/ | |
IP_FAM="dual" | |
if [ "${{ matrix.ipv6 }}" == "false" ]; then | |
IP_FAM="ipv4" | |
fi | |
./contrib/scripts/kind.sh --xdp --secondary-network "" 3 "" "" "${{ matrix.kube-proxy }}" \$IP_FAM | |
kubectl patch node kind-worker3 --type=json -p='[{"op":"add","path":"/metadata/labels/cilium.io~1no-schedule","value":"true"}]' | |
mkdir -p cilium-junits | |
# Warning: since this is a privileged workflow, subsequent workflow job | |
# steps must take care not to execute untrusted code. | |
- name: Checkout pull request branch (NOT TRUSTED) | |
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 | |
with: | |
ref: ${{ steps.vars.outputs.sha }} | |
persist-credentials: false | |
path: untrusted/cilium-newest | |
sparse-checkout: | | |
install/kubernetes/cilium | |
- name: Checkout ${{ steps.vars.outputs.downgrade_version }} branch to get the Helm chart | |
uses: actions/checkout@b4ffde65f46336ab88eb53be808477a3936bae11 # v4.1.1 | |
with: | |
ref: ${{ steps.vars.outputs.downgrade_version }} | |
persist-credentials: false | |
path: untrusted/cilium-downgrade | |
sparse-checkout: | | |
install/kubernetes/cilium | |
- name: Wait for images to be available | |
timeout-minutes: 10 | |
shell: bash | |
run: | | |
for image in cilium-ci operator-generic-ci hubble-relay-ci ; do | |
until docker manifest inspect quay.io/${{ env.QUAY_ORGANIZATION_DEV }}/$image:${{ steps.vars.outputs.sha }} &> /dev/null; do sleep 45s; done | |
done | |
- name: Install Cilium ${{ env.cilium_stable_version }} | |
uses: cilium/little-vm-helper@8410a93e544b7e180a2365e5fdab0724a39bc02a # v0.0.13 | |
with: | |
provision: 'false' | |
cmd: | | |
cd /host/ | |
CILIUM_CLI_MODE=helm ./cilium-cli install \ | |
${{ steps.cilium-stable-config.outputs.config }} | |
./cilium-cli status --wait | |
kubectl get pods --all-namespaces -o wide | |
kubectl -n kube-system exec daemonset/cilium -- cilium status | |
- name: Start conn-disrupt-test | |
uses: cilium/little-vm-helper@8410a93e544b7e180a2365e5fdab0724a39bc02a # v0.0.13 | |
with: | |
provision: 'false' | |
cmd: | | |
cd /host/ | |
# Create pods which establish long lived connections. It will be used by | |
# subsequent connectivity tests with --include-conn-disrupt-test to catch any | |
# interruption in such flows. | |
./cilium-cli connectivity test --include-conn-disrupt-test --conn-disrupt-test-setup \ | |
--conn-disrupt-dispatch-interval 0ms | |
- name: Upgrade Cilium | |
uses: cilium/little-vm-helper@8410a93e544b7e180a2365e5fdab0724a39bc02a # v0.0.13 | |
with: | |
provision: 'false' | |
cmd: | | |
cd /host/ | |
CILIUM_CLI_MODE=helm ./cilium-cli upgrade \ | |
${{ steps.cilium-newest-config.outputs.config }} | |
./cilium-cli status --wait | |
kubectl get pods --all-namespaces -o wide | |
kubectl -n kube-system exec daemonset/cilium -- cilium status | |
- name: Test Cilium after upgrade | |
uses: cilium/little-vm-helper@8410a93e544b7e180a2365e5fdab0724a39bc02a # v0.0.13 | |
with: | |
provision: 'false' | |
cmd: | | |
cd /host/ | |
EXTRA="" | |
if [ "${{ matrix.secondary-network }}" = "true" ]; then | |
EXTRA="--secondary-network-iface=eth1" | |
fi | |
# Disable check-log-errors due to https://github.com/cilium/cilium-cli/issues/1858 | |
./cilium-cli connectivity test --include-unsafe-tests --collect-sysdump-on-failure \ | |
--include-conn-disrupt-test \ | |
--flush-ct \ | |
--sysdump-hubble-flows-count=1000000 --sysdump-hubble-flows-timeout=5m \ | |
--sysdump-output-filename "cilium-sysdump-${{ matrix.name }}-<ts>" \ | |
--junit-file "cilium-junits/${{ env.job_name }} (${{ join(matrix.*, ', ') }}).xml" \ | |
--junit-property github_job_step="Run tests upgrade 2 (${{ join(matrix.*, ', ') }})" \ | |
\$EXTRA | |
# --flush-ct interrupts the flows, so we need to set up again. | |
./cilium-cli connectivity test --include-conn-disrupt-test --conn-disrupt-test-setup \ | |
--conn-disrupt-dispatch-interval 0ms | |
- name: Downgrade Cilium ${{ env.cilium_stable_version }} | |
uses: cilium/little-vm-helper@8410a93e544b7e180a2365e5fdab0724a39bc02a # v0.0.13 | |
with: | |
provision: 'false' | |
cmd: | | |
cd /host/ | |
CILIUM_CLI_MODE=helm ./cilium-cli upgrade \ | |
${{ steps.cilium-stable-config.outputs.config }} | |
./cilium-cli status --wait | |
kubectl get pods --all-namespaces -o wide | |
kubectl -n kube-system exec daemonset/cilium -- cilium status | |
- name: Test Cilium after downgrade to ${{ env.cilium_stable_version }} | |
uses: cilium/little-vm-helper@8410a93e544b7e180a2365e5fdab0724a39bc02a # v0.0.13 | |
with: | |
provision: 'false' | |
cmd: | | |
cd /host/ | |
EXTRA="" | |
if [ "${{ matrix.secondary-network }}" = "true" ]; then | |
EXTRA="--secondary-network-iface=eth1" | |
fi | |
kubectl -n kube-system get pods -l k8s-app=cilium --template '{{range .items}}{{.metadata.name}}{{"\n"}}{{end}}' | xargs -I'{}' /bin/sh -c "echo '{}' && kubectl -n kube-system exec '{}' -c cilium-agent -- cilium metrics list | grep drop_count" | |
./cilium-cli connectivity test --include-unsafe-tests --collect-sysdump-on-failure \ | |
--include-conn-disrupt-test \ | |
--flush-ct \ | |
--sysdump-hubble-flows-count=1000000 --sysdump-hubble-flows-timeout=5m \ | |
--sysdump-output-filename "cilium-sysdump-${{ matrix.name }}-<ts>" \ | |
--junit-file "cilium-junits/${{ env.job_name }} (${{ join(matrix.*, ', ') }}).xml" \ | |
--junit-property github_job_step="Run tests upgrade 3 (${{ join(matrix.*, ', ') }})" \ | |
\$EXTRA | |
- name: Fetch artifacts | |
if: ${{ !success() }} | |
uses: cilium/little-vm-helper@8410a93e544b7e180a2365e5fdab0724a39bc02a # v0.0.13 | |
with: | |
provision: 'false' | |
cmd: | | |
cd /host | |
kubectl -n kube-system get pods -l k8s-app=cilium --template '{{range .items}}{{.metadata.name}}{{"\n"}}{{end}}' | xargs -I'{}' /bin/sh -c "echo '{}' && kubectl -n kube-system exec '{}' -c cilium-agent -- cilium metrics list | grep drop_count" | |
kubectl get pods --all-namespaces -o wide | |
./cilium-cli status | |
mkdir -p cilium-sysdumps | |
./cilium-cli sysdump --output-filename cilium-sysdump-${{ matrix.name }}-final | |
# To debug https://github.com/cilium/cilium/issues/26062 | |
head -n -0 /proc/buddyinfo /proc/pagetypeinfo | |
- name: Upload artifacts | |
if: ${{ !success() }} | |
uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8 # v4.3.0 | |
with: | |
name: cilium-sysdumps-${{ matrix.name }} | |
path: cilium-sysdump-*.zip | |
- name: Upload JUnits [junit] | |
if: ${{ always() }} | |
uses: actions/upload-artifact@26f96dfa697d77e81fd5907df203aa23a56210a8 # v4.3.0 | |
with: | |
name: cilium-junits-${{ matrix.name }} | |
path: cilium-junits/*.xml | |
- name: Publish Test Results As GitHub Summary | |
if: ${{ always() }} | |
uses: aanm/junit2md@332ebf0fddd34e91b03a832cfafaa826306558f9 # v0.0.3 | |
with: | |
junit-directory: "cilium-junits" | |
merge-upload: | |
if: ${{ always() }} | |
name: Merge and Upload Artifacts | |
runs-on: ubuntu-latest | |
needs: setup-and-test | |
steps: | |
- name: Merge Sysdumps | |
if: ${{ needs.setup-and-test.result == 'failure' }} | |
uses: actions/upload-artifact/merge@26f96dfa697d77e81fd5907df203aa23a56210a8 # v4.3.0 | |
with: | |
name: cilium-sysdumps | |
pattern: cilium-sysdumps-* | |
retention-days: 5 | |
delete-merged: true | |
continue-on-error: true | |
- name: Merge JUnits | |
uses: actions/upload-artifact/merge@26f96dfa697d77e81fd5907df203aa23a56210a8 # v4.3.0 | |
with: | |
name: cilium-junits | |
pattern: cilium-junits-* | |
retention-days: 5 | |
delete-merged: true | |
commit-status-final: | |
if: ${{ always() }} | |
name: Commit Status Final | |
needs: setup-and-test | |
runs-on: ubuntu-latest | |
steps: | |
- name: Set final commit status | |
uses: myrotvorets/set-commit-status-action@38f3f27c7d52fb381273e95542f07f0fba301307 # v2.0.0 | |
with: | |
sha: ${{ inputs.SHA || github.sha }} | |
status: ${{ needs.setup-and-test.result }} |