Skip to content

Commit

Permalink
update from bundle-kubeflow
Browse files Browse the repository at this point in the history
  • Loading branch information
orfeas-k committed Mar 19, 2024
1 parent 57374da commit bf663f6
Showing 1 changed file with 22 additions and 10 deletions.
32 changes: 22 additions & 10 deletions .github/workflows/deploy-to-aks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ on:
description: 'Comma-separated list of bundle versions e.g. "1.7","1.8"'
default: '"1.8"'
required: true
k8s_version:
description: 'Kubernetes version to be used for the AKS cluster'
required: false

# schedule:
# - cron: "17 0 * * 2"
Expand All @@ -14,17 +17,17 @@ jobs:
runs-on: ubuntu-22.04
strategy:
matrix:
bundle_version: ${{ fromJSON(format('[{0}]', inputs.bundle_version || '"1.7","1.8","latest"')) }}
bundle_version: ${{ fromJSON(format('[{0}]', inputs.bundle_version || '"1.8","latest"')) }}
fail-fast: false
env:
AZURE_CORE_OUTPUT: none
K8S_VERSION: ${{ fromJSON('{"1.8":"1.26"}')[matrix.bundle_version] }}
JUJU_VERSION: ${{ fromJSON('{"1.8":"3.1"}')[ matrix.bundle_version ] }}
K8S_VERSION: ${{ inputs.k8s_version || fromJSON('{"1.8":"1.26", "latest":"1.26"}')[matrix.bundle_version] }}
JUJU_VERSION: ${{ fromJSON('{"1.8":"3.1","latest":"3.1"}')[ matrix.bundle_version ] }}
steps:
- name: Checkout repository
uses: actions/checkout@v2

- name: Install CLI tools tox charmcraft juju
- name: Install CLI tools
run: |
python -m pip install --upgrade pip
pip install tox
Expand Down Expand Up @@ -70,29 +73,38 @@ jobs:
tox -vve test_bundle_deployment-${{ matrix.bundle_version }} -- --model kubeflow --keep-models -vv -s
# On failure, capture debugging resources
- name: Save debug artifacts
uses: canonical/kubeflow-ci/actions/dump-charm-debug-artifacts@main
if: always()

- name: Get juju status
run: juju status
if: failure()
if: failure() || cancelled()

- name: Get juju debug logs
run: juju debug-log --replay --no-tail
if: failure()
if: failure() || cancelled()

- name: Get all kubernetes resources
run: kubectl get all -A
if: failure()
if: failure() || cancelled()

- name: Describe all pods
if: failure() || cancelled()
run: |
sudo microk8s kubectl describe pods --all-namespaces
- name: Get logs from pods with status = Pending
run: kubectl -n kubeflow get pods | tail -n +2 | grep Pending | awk '{print $1}' | xargs -n1 kubectl -n kubeflow logs --all-containers=true --tail 100
if: failure()
if: failure() || cancelled()

- name: Get logs from pods with status = Failed
run: kubectl -n kubeflow get pods | tail -n +2 | grep Failed | awk '{print $1}' | xargs -n1 kubectl -n kubeflow logs --all-containers=true --tail 100
if: failure()
if: failure() || cancelled()

- name: Get logs from pods with status = CrashLoopBackOff
run: kubectl -n kubeflow get pods | tail -n +2 | grep CrashLoopBackOff | awk '{print $1}' | xargs -n1 kubectl -n kubeflow logs --all-containers=true --tail 100
if: failure()
if: failure() || cancelled()

- name: Delete AKS cluster
if: always()
Expand Down

0 comments on commit bf663f6

Please sign in to comment.