Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[wip] Add logs for kubernetes events #6793

Closed
wants to merge 10 commits into from
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
181 changes: 62 additions & 119 deletions .github/workflows/functional-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,10 @@

name: Functional tests
on:
schedule:
# Run every 4 hours on weekdays.
- cron: "30 0,4,8,12,16,20 * * 1-5"
# Run every 12 hours on weekends.
- cron: "30 0,12 * * 0,6"
# Dispatch on external events
repository_dispatch:
types: [de-functional-test]
workflow_run:
workflows: ['Approve Functional Tests']
types:
- completed
workflow_dispatch:

env:
# Go version
Expand All @@ -51,9 +43,9 @@ env:
# Azure workload identity webhook chart version
AZURE_WORKLOAD_IDENTITY_WEBHOOK_VER: '1.1.0'
# Container registry for storing container images
CONTAINER_REGISTRY: ghcr.io/radius-project/dev
CONTAINER_REGISTRY: ghcr.io/sk593/dev
# Container registry for storing Bicep recipe artifacts
BICEP_RECIPE_REGISTRY: ghcr.io/radius-project/dev
BICEP_RECIPE_REGISTRY: ghcr.io/sk593/dev
# The radius functional test timeout
FUNCTIONALTEST_TIMEOUT: 60m
# The Azure Location to store test resources
Expand All @@ -75,11 +67,9 @@ jobs:
build:
name: Build Radius for test
runs-on: ubuntu-latest
if: github.event_name == 'repository_dispatch' || (github.event_name == 'schedule' && github.repository == 'radius-project/radius') || github.event_name == 'workflow_run'
env:
DE_IMAGE: 'ghcr.io/radius-project/deployment-engine'
DE_TAG: 'latest'
FUNCTIONAL_TEST_APP_ID: 425843
outputs:
REL_VERSION: ${{ steps.gen-id.outputs.REL_VERSION }}
UNIQUE_ID: ${{ steps.gen-id.outputs.UNIQUE_ID }}
Expand All @@ -90,12 +80,6 @@ jobs:
DE_IMAGE: ${{ steps.gen-id.outputs.DE_IMAGE }}
DE_TAG: ${{ steps.gen-id.outputs.DE_TAG }}
steps:
- name: Login as the GitHub App
uses: tibdex/github-app-token@v1
id: get_installation_token
with:
app_id: ${{ env.FUNCTIONAL_TEST_APP_ID }}
private_key: ${{ secrets.FUNCTIONAL_TEST_APP_PRIVATE_KEY }}
- name: Set up checkout target (scheduled)
if: github.event_name == 'schedule'
run: |
Expand All @@ -107,8 +91,6 @@ jobs:
echo "CHECKOUT_REPO=${{ github.repository }}" >> $GITHUB_ENV
echo "CHECKOUT_REF=${{ github.ref }}" >> $GITHUB_ENV
echo "PR_NUMBER=${{ github.event.pull_request.number }}" >> $GITHUB_ENV
- name: Use custom actions
uses: actions/checkout@v3
- name: 'Download PR data artifacts'
if: github.event_name == 'workflow_run'
uses: ./.github/actions/download-pr-data-artifact
Expand Down Expand Up @@ -173,7 +155,6 @@ jobs:
if: env.PR_NUMBER != ''
continue-on-error: true
with:
GITHUB_TOKEN: ${{ steps.get_installation_token.outputs.token }}
header: teststatus-${{ github.run_id }}
number: ${{ env.PR_NUMBER }}
hide: true
Expand Down Expand Up @@ -218,7 +199,6 @@ jobs:
if: env.PR_NUMBER != ''
continue-on-error: true
with:
GITHUB_TOKEN: ${{ steps.get_installation_token.outputs.token }}
header: teststatus-${{ github.run_id }}
number: ${{ env.PR_NUMBER }}
append: true
Expand All @@ -240,7 +220,6 @@ jobs:
if: success() && env.PR_NUMBER != ''
continue-on-error: true
with:
GITHUB_TOKEN: ${{ steps.get_installation_token.outputs.token }}
header: teststatus-${{ github.run_id }}
number: ${{ env.PR_NUMBER }}
append: true
Expand All @@ -250,7 +229,6 @@ jobs:
if: failure() && env.PR_NUMBER != ''
continue-on-error: true
with:
GITHUB_TOKEN: ${{ steps.get_installation_token.outputs.token }}
header: teststatus-${{ github.run_id }}
number: ${{ env.PR_NUMBER }}
append: true
Expand All @@ -260,7 +238,6 @@ jobs:
if: env.PR_NUMBER != ''
continue-on-error: true
with:
GITHUB_TOKEN: ${{ steps.get_installation_token.outputs.token }}
header: teststatus-${{ github.run_id }}
number: ${{ env.PR_NUMBER }}
append: true
Expand All @@ -283,7 +260,6 @@ jobs:
if: success() && env.PR_NUMBER != ''
continue-on-error: true
with:
GITHUB_TOKEN: ${{ steps.get_installation_token.outputs.token }}
header: teststatus-${{ github.run_id }}
number: ${{ env.PR_NUMBER }}
append: true
Expand All @@ -293,7 +269,6 @@ jobs:
if: failure() && env.PR_NUMBER != ''
continue-on-error: true
with:
GITHUB_TOKEN: ${{ steps.get_installation_token.outputs.token }}
header: teststatus-${{ github.run_id }}
number: ${{ env.PR_NUMBER }}
append: true
Expand All @@ -302,7 +277,6 @@ jobs:
tests:
name: Run ${{ matrix.name }} functional tests
needs: build
if: github.event_name == 'repository_dispatch' || (github.event_name == 'schedule' && github.repository == 'radius-project/radius') || github.event_name == 'workflow_run'
strategy:
fail-fast: true
matrix:
Expand All @@ -324,23 +298,7 @@ jobs:
BICEP_RECIPE_TAG_VERSION: ${{ needs.build.outputs.REL_VERSION }}
DE_IMAGE: ${{ needs.build.outputs.DE_IMAGE }}
DE_TAG: ${{ needs.build.outputs.DE_TAG }}
FUNCTIONAL_TEST_APP_ID: 425843
steps:
- name: Login as the GitHub App
uses: tibdex/github-app-token@v1
id: get_installation_token
with:
app_id: ${{ env.FUNCTIONAL_TEST_APP_ID }}
private_key: ${{ secrets.FUNCTIONAL_TEST_APP_PRIVATE_KEY }}
- uses: LouisBrunner/[email protected]
if: always()
with:
token: ${{ steps.get_installation_token.outputs.token }}
name: 'Functional Test Run'
status: in_progress
repo: ${{ github.repository }}
sha: ${{ env.CHECKOUT_REF }}
details_url: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
- name: Checkout
uses: actions/checkout@v3
with:
Expand Down Expand Up @@ -383,23 +341,22 @@ jobs:
- uses: marocchino/sticky-pull-request-comment@v2
continue-on-error: true
with:
GITHUB_TOKEN: ${{ steps.get_installation_token.outputs.token }}
header: teststatus-${{ github.run_id }}
number: ${{ env.PR_NUMBER }}
append: true
message: |
:hourglass: Starting ${{ matrix.name }} functional tests...
- name: Create azure resource group - ${{ env.AZURE_TEST_RESOURCE_GROUP }}
run: |
current_time=$(date +%s)
az group create \
--location ${{ env.AZURE_LOCATION }} \
--name $RESOURCE_GROUP \
--subscription ${{ secrets.INTEGRATION_TEST_SUBSCRIPTION_ID }} \
--tags creationTime=$current_time
while [ $(az group exists --name $RESOURCE_GROUP) = false ]; do sleep 2; done
env:
RESOURCE_GROUP: ${{ env.AZURE_TEST_RESOURCE_GROUP }}
# - name: Create azure resource group - ${{ env.AZURE_TEST_RESOURCE_GROUP }}
# run: |
# current_time=$(date +%s)
# az group create \
# --location ${{ env.AZURE_LOCATION }} \
# --name $RESOURCE_GROUP \
# --subscription ${{ secrets.INTEGRATION_TEST_SUBSCRIPTION_ID }} \
# --tags creationTime=$current_time
# while [ $(az group exists --name $RESOURCE_GROUP) = false ]; do sleep 2; done
# env:
# RESOURCE_GROUP: ${{ env.AZURE_TEST_RESOURCE_GROUP }}
- uses: azure/setup-helm@v3
with:
version: ${{ env.HELM_VER }}
Expand All @@ -418,31 +375,9 @@ jobs:
echo "{\"auths\":{\"ghcr.io\":{\"auth\":\"${AUTHKEY}\"}}}" > "./ghcr_secret.json"

# Create KinD cluster with OIDC Issuer keys
echo $AZURE_OIDC_ISSUER_PUBLIC_KEY | base64 -d > sa.pub
echo $AZURE_OIDC_ISSUER_PRIVATE_KEY | base64 -d > sa.key
cat <<EOF | ./kind create cluster --name radius --config=-
kind: Cluster
apiVersion: kind.x-k8s.io/v1alpha4
nodes:
- role: control-plane
extraMounts:
- hostPath: ./sa.pub
containerPath: /etc/kubernetes/pki/sa.pub
- hostPath: ./sa.key
containerPath: /etc/kubernetes/pki/sa.key
- hostPath: ./ghcr_secret.json
containerPath: /var/lib/kubelet/config.json
kubeadmConfigPatches:
- |
kind: ClusterConfiguration
apiServer:
extraArgs:
service-account-issuer: $AZURE_OIDC_ISSUER
service-account-key-file: /etc/kubernetes/pki/sa.pub
service-account-signing-key-file: /etc/kubernetes/pki/sa.key
controllerManager:
extraArgs:
service-account-private-key-file: /etc/kubernetes/pki/sa.key
# echo $AZURE_OIDC_ISSUER_PUBLIC_KEY | base64 -d > sa.pub
# echo $AZURE_OIDC_ISSUER_PRIVATE_KEY | base64 -d > sa.key
cat <<EOF | ./kind create cluster --name radius
EOF
- name: Install dapr into cluster
run: |
Expand Down Expand Up @@ -476,7 +411,6 @@ jobs:
if: failure() && env.PR_NUMBER != ''
continue-on-error: true
with:
GITHUB_TOKEN: ${{ steps.get_installation_token.outputs.token }}
header: teststatus-${{ github.run_id }}
number: ${{ env.PR_NUMBER }}
append: true
Expand All @@ -501,28 +435,28 @@ jobs:
rad env create kind-radius --namespace default
rad env switch kind-radius

echo "*** Configuring Azure provider ***"
rad env update kind-radius --azure-subscription-id ${{ secrets.INTEGRATION_TEST_SUBSCRIPTION_ID }} \
--azure-resource-group ${{ env.AZURE_TEST_RESOURCE_GROUP }}
rad credential register azure --client-id ${{ secrets.INTEGRATION_TEST_SP_APP_ID }} \
--client-secret ${{ secrets.INTEGRATION_TEST_SP_PASSWORD }} \
--tenant-id ${{ secrets.INTEGRATION_TEST_TENANT_ID }}
# echo "*** Configuring Azure provider ***"
# rad env update kind-radius --azure-subscription-id ${{ secrets.INTEGRATION_TEST_SUBSCRIPTION_ID }} \
# --azure-resource-group ${{ env.AZURE_TEST_RESOURCE_GROUP }}
# rad credential register azure --client-id ${{ secrets.INTEGRATION_TEST_SP_APP_ID }} \
# --client-secret ${{ secrets.INTEGRATION_TEST_SP_PASSWORD }} \
# --tenant-id ${{ secrets.INTEGRATION_TEST_TENANT_ID }}

echo "*** Configuring AWS provider ***"
rad env update kind-radius --aws-region ${{ env.AWS_REGION }} --aws-account-id ${{ secrets.FUNCTEST_AWS_ACCOUNT_ID }}
rad credential register aws \
--access-key-id ${{ secrets.FUNCTEST_AWS_ACCESS_KEY_ID }} --secret-access-key ${{ secrets.FUNCTEST_AWS_SECRET_ACCESS_KEY }}
# echo "*** Configuring AWS provider ***"
# rad env update kind-radius --aws-region ${{ env.AWS_REGION }} --aws-account-id ${{ secrets.FUNCTEST_AWS_ACCOUNT_ID }}
# rad credential register aws \
# --access-key-id ${{ secrets.FUNCTEST_AWS_ACCESS_KEY_ID }} --secret-access-key ${{ secrets.FUNCTEST_AWS_SECRET_ACCESS_KEY }}
- uses: marocchino/sticky-pull-request-comment@v2
if: failure() && env.PR_NUMBER != ''
continue-on-error: true
with:
GITHUB_TOKEN: ${{ steps.get_installation_token.outputs.token }}
header: teststatus-${{ github.run_id }}
number: ${{ env.PR_NUMBER }}
append: true
message: |
:x: Failed to install Radius for ${{ matrix.name }} functional test. Please check [the logs](${{ env.ACTION_LINK }}) for more details
- name: Publish Terraform test recipes
id: publish-tf-recipes
run: |
make publish-test-terraform-recipes
- name: Run functional tests
Expand Down Expand Up @@ -575,12 +509,34 @@ jobs:
uses: actions/upload-artifact@v3
with:
name: ${{ matrix.name }}_container_logs
path: ./${{ env.RADIUS_CONTAINER_LOG_BASE }}
path: ./${{ env.RADIUS_CONTAINER_LOG_BASE }}
- name: Get Terraform recipe publishing logs
if: always()
run: |
# Create pod-logs directory
mkdir -p recipes/pod-logs
# Get pod logs and save to file
namespace="radius-test-tf-module-server"
label="app.kubernetes.io/name=tf-module-server"
pod_names=($(kubectl get pods -l $label -n $namespace -o jsonpath='{.items[*].metadata.name}'))
for pod_name in "${pod_names[@]}"; do
kubectl logs $pod_name -n $namespace > recipes/pod-logs/${pod_name}.txt
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can add a log here like echo "Pod ${pod_name} logs saved to recipes/pod-logs/"

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There's a line below that logs when all of them are uploaded but will update to this

done
echo "Pod logs saved to recipes/pod-logs/"
# Get kubernetes events and save to file
kubectl get events -n $namespace > recipes/pod-logs/events.txt
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And maybe another log after this line to state that the events are saved?

- name: Upload Terraform recipe publishing logs
uses: actions/upload-artifact@v3
if: always()
with:
name: recipes-pod-logs
path: recipes/pod-logs
retention-days: 30
if-no-files-found: error
- uses: marocchino/sticky-pull-request-comment@v2
if: success() && env.PR_NUMBER != ''
continue-on-error: true
with:
GITHUB_TOKEN: ${{ steps.get_installation_token.outputs.token }}
header: teststatus-${{ github.run_id }}
number: ${{ env.PR_NUMBER }}
append: true
Expand All @@ -590,34 +546,19 @@ jobs:
if: failure() && env.PR_NUMBER != ''
continue-on-error: true
with:
GITHUB_TOKEN: ${{ steps.get_installation_token.outputs.token }}
header: teststatus-${{ github.run_id }}
number: ${{ env.PR_NUMBER }}
append: true
message: |
:x: ${{ matrix.name }} functional test failed. Please check [the logs](${{ env.ACTION_LINK }}) for more details
- uses: marocchino/sticky-pull-request-comment@v2
if: cancelled() && env.PR_NUMBER != ''
continue-on-error: true
with:
GITHUB_TOKEN: ${{ steps.get_installation_token.outputs.token }}
header: teststatus-${{ github.run_id }}
number: ${{ env.PR_NUMBER }}
append: true
message: |
:x: ${{ matrix.name }} functional test cancelled. Please check [the logs](${{ env.ACTION_LINK }}) for more details
- uses: LouisBrunner/[email protected]
- name: Delete azure resource group - ${{ env.AZURE_TEST_RESOURCE_GROUP }}
if: always()
with:
token: ${{ steps.get_installation_token.outputs.token }}
name: 'Functional Test Run'
repo: ${{ github.repository }}
sha: ${{ env.CHECKOUT_REF }}
status: completed
conclusion: ${{ job.status }}
output: |
{"summary":"Functional Test run completed. See links for more information.","title":"Functional Test Run"}
details_url: ${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}
run: |
# if deletion fails, purge workflow will purge the resource group and its resources later.
az group delete \
--subscription ${{ secrets.INTEGRATION_TEST_SUBSCRIPTION_ID }} \
--name ${{ env.AZURE_TEST_RESOURCE_GROUP }} \
--yes --verbose
report-failure:
name: Report test failure
needs: [build, tests]
Expand All @@ -634,4 +575,6 @@ jobs:
title: `Scheduled functional test failed - Run ID: ${context.runId}`,
labels: ['bug', 'test-failure'],
body: `## Bug information \n\nThis bug is generated automatically if the scheduled functional test fails. The Radius functional test operates on a schedule of every 4 hours during weekdays and every 12 hours over the weekend. It's important to understand that the test may fail due to workflow infrastructure issues, like network problems, rather than the flakiness of the test itself. For the further investigation, please visit [here](${process.env.ACTION_LINK}).`
})
})

## test comment for PR
Loading