From bf663f6d5e1261dc4a0a548d2022b0e32e86a4f2 Mon Sep 17 00:00:00 2001 From: Orfeas Kourkakis Date: Tue, 19 Mar 2024 15:49:32 +0200 Subject: [PATCH] update from bundle-kubeflow --- .github/workflows/deploy-to-aks.yaml | 32 +++++++++++++++++++--------- 1 file changed, 22 insertions(+), 10 deletions(-) diff --git a/.github/workflows/deploy-to-aks.yaml b/.github/workflows/deploy-to-aks.yaml index d0991b4..68576c2 100644 --- a/.github/workflows/deploy-to-aks.yaml +++ b/.github/workflows/deploy-to-aks.yaml @@ -6,6 +6,9 @@ on: description: 'Comma-separated list of bundle versions e.g. "1.7","1.8"' default: '"1.8"' required: true + k8s_version: + description: 'Kubernetes version to be used for the AKS cluster' + required: false # schedule: # - cron: "17 0 * * 2" @@ -14,17 +17,17 @@ jobs: runs-on: ubuntu-22.04 strategy: matrix: - bundle_version: ${{ fromJSON(format('[{0}]', inputs.bundle_version || '"1.7","1.8","latest"')) }} + bundle_version: ${{ fromJSON(format('[{0}]', inputs.bundle_version || '"1.8","latest"')) }} fail-fast: false env: AZURE_CORE_OUTPUT: none - K8S_VERSION: ${{ fromJSON('{"1.8":"1.26"}')[matrix.bundle_version] }} - JUJU_VERSION: ${{ fromJSON('{"1.8":"3.1"}')[ matrix.bundle_version ] }} + K8S_VERSION: ${{ inputs.k8s_version || fromJSON('{"1.8":"1.26", "latest":"1.26"}')[matrix.bundle_version] }} + JUJU_VERSION: ${{ fromJSON('{"1.8":"3.1","latest":"3.1"}')[ matrix.bundle_version ] }} steps: - name: Checkout repository uses: actions/checkout@v2 - - name: Install CLI tools tox charmcraft juju + - name: Install CLI tools run: | python -m pip install --upgrade pip pip install tox @@ -70,29 +73,38 @@ jobs: tox -vve test_bundle_deployment-${{ matrix.bundle_version }} -- --model kubeflow --keep-models -vv -s # On failure, capture debugging resources + - name: Save debug artifacts + uses: canonical/kubeflow-ci/actions/dump-charm-debug-artifacts@main + if: always() + - name: Get juju status run: juju status - if: failure() + if: failure() || cancelled() - name: Get juju debug logs run: juju debug-log --replay --no-tail - if: failure() + if: failure() || cancelled() - name: Get all kubernetes resources run: kubectl get all -A - if: failure() + if: failure() || cancelled() + + - name: Describe all pods + if: failure() || cancelled() + run: | + sudo microk8s kubectl describe pods --all-namespaces - name: Get logs from pods with status = Pending run: kubectl -n kubeflow get pods | tail -n +2 | grep Pending | awk '{print $1}' | xargs -n1 kubectl -n kubeflow logs --all-containers=true --tail 100 - if: failure() + if: failure() || cancelled() - name: Get logs from pods with status = Failed run: kubectl -n kubeflow get pods | tail -n +2 | grep Failed | awk '{print $1}' | xargs -n1 kubectl -n kubeflow logs --all-containers=true --tail 100 - if: failure() + if: failure() || cancelled() - name: Get logs from pods with status = CrashLoopBackOff run: kubectl -n kubeflow get pods | tail -n +2 | grep CrashLoopBackOff | awk '{print $1}' | xargs -n1 kubectl -n kubeflow logs --all-containers=true --tail 100 - if: failure() + if: failure() || cancelled() - name: Delete AKS cluster if: always()