Skip to content

Commit

Permalink
Updating the Radius installation step of the long-haul workflow
Browse files Browse the repository at this point in the history
Signed-off-by: ytimocin <[email protected]>
  • Loading branch information
ytimocin committed Jan 10, 2024
1 parent be2bc0c commit 66fa182
Showing 1 changed file with 47 additions and 11 deletions.
58 changes: 47 additions & 11 deletions .github/workflows/long-running-azure.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,11 @@ on:
branches:
- main
paths:
- '.github/workflows/long-running-azure.yaml'
- ".github/workflows/long-running-azure.yaml"

env:
# Go version
GOVER: '^1.21'
GOVER: "^1.21"
GOPROXY: https://proxy.golang.org

# gotestsum version - see: https://github.com/gotestyourself/gotestsum
Expand All @@ -70,21 +70,24 @@ env:
# The region for AWS resources
AWS_REGION: us-west-2
# The AWS account ID
AWS_ACCOUNT_ID: '${{ secrets.FUNCTEST_AWS_ACCOUNT_ID }}'
AWS_ACCOUNT_ID: "${{ secrets.FUNCTEST_AWS_ACCOUNT_ID }}"

# The valid radius build time window in seconds to rebuild radius. 24 hours = 24 * 60 * 60 = 86400
VALID_RADIUS_BUILD_WINDOW: 86400

# The AKS cluster name
AKS_CLUSTER_NAME: 'radiuse2e00-aks'
AKS_CLUSTER_NAME: "radiuse2e00-aks"
# The resource group for AKS_CLUSTER_NAME resource.
AKS_RESOURCE_GROUP: 'radiuse2e00'
AKS_RESOURCE_GROUP: "radiuse2e00"

# Server where terraform test modules are deployed
TF_RECIPE_MODULE_SERVER_URL: 'http://tf-module-server.radius-test-tf-module-server.svc.cluster.local'
TF_RECIPE_MODULE_SERVER_URL: "http://tf-module-server.radius-test-tf-module-server.svc.cluster.local"

# Radius test environment name
RADIUS_TEST_ENVIRONMENT_NAME: 'kind-radius'
RADIUS_TEST_ENVIRONMENT_NAME: "kind-radius"

# The current GitHub action link
ACTION_LINK: "${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}"

jobs:
build:
Expand Down Expand Up @@ -349,6 +352,14 @@ jobs:
--name ${{ env.AKS_CLUSTER_NAME }} --admin
env:
RESOURCE_GROUP: ${{ env.AZURE_TEST_RESOURCE_GROUP }}
- name: Check if tests are already running
run: |
if kubectl get configmap long-running-test-lock -n radius-system --ignore-not-found; then
echo "Tests are already running. Exiting..."
exit 1
fi
- name: Set test run flag
run: kubectl create configmap long-running-test-lock -n radius-system
- name: Clean up cluster
run: ./.github/scripts/cleanup-cluster.sh
- name: Download Bicep
Expand All @@ -368,6 +379,12 @@ jobs:
export PATH=$GITHUB_WORKSPACE/bin:$PATH
which rad || { echo "cannot find rad"; exit 1; }
echo "*** Uninstalling existing Radius installation ***"
rad uninstall kubernetes
echo "*** Deleting radius-system namespace ***"
kubectl delete namespace radius-system --ignore-not-found
echo "*** Installing Radius to Kubernetes ***"
rad install kubernetes --reinstall \
--chart ${{ env.RADIUS_CHART_LOCATION }} \
Expand All @@ -388,10 +405,6 @@ jobs:
rad env create ${{ env.RADIUS_TEST_ENVIRONMENT_NAME }} --namespace default
rad env switch ${{ env.RADIUS_TEST_ENVIRONMENT_NAME }}
# Temporary workaround to fix the x509 certificate error in the controller.
# https://github.com/radius-project/radius/issues/6989
kubectl delete secrets controller-cert -n radius-system --ignore-not-found
echo "*** Configuring Azure provider ***"
rad env update ${{ env.RADIUS_TEST_ENVIRONMENT_NAME }} --azure-subscription-id ${{ secrets.INTEGRATION_TEST_SUBSCRIPTION_ID }} \
--azure-resource-group ${{ env.AZURE_TEST_RESOURCE_GROUP }}
Expand Down Expand Up @@ -480,3 +493,26 @@ jobs:
- name: Clean up cluster
if: always()
run: ./.github/scripts/cleanup-cluster.sh
- name: Clear test run flag
if: always()
run: |
# At this point, there has to be a lock, so this deletion command should not fail.
# If it fails, then there is a bug in the workflow.
kubectl delete configmap long-running-test-lock -n radius-system
report-failure:
name: Report test failure
needs: [build, tests]
runs-on: ubuntu-latest
if: failure() && github.repository == 'radius-project/radius'
steps:
- name: Create failure issue for failing long running test run
uses: actions/github-script@v6
with:
github-token: ${{ secrets.GH_RAD_CI_BOT_PAT }}
script: |
github.rest.issues.create({
...context.repo,
title: `Scheduled long running test failed - Run ID: ${context.runId}`,
labels: ['bug', 'test-failure'],
body: `## Bug information \n\nThis bug is generated automatically if the scheduled long running test fails. The Radius long running test operates on a schedule of every 2 hours everyday. It's important to understand that the test may fail due to workflow infrastructure issues, like network problems, rather than the flakiness of the test itself. For the further investigation, please visit [here](${process.env.ACTION_LINK}).`
})

0 comments on commit 66fa182

Please sign in to comment.