Skip to content

Create AKS cluster, deploy CKF and run bundle test #25

Create AKS cluster, deploy CKF and run bundle test

Create AKS cluster, deploy CKF and run bundle test #25

name: Create AKS cluster, deploy CKF and run bundle test
on:
workflow_dispatch:
inputs:
bundle_version:
description: 'Comma-separated list of bundle versions e.g. "1.7","1.8"'
default: '"1.8"'
required: true
schedule:
- cron: "23 0 * * 2"
jobs:
deploy-ckf-to-aks:
runs-on: ubuntu-22.04
strategy:
matrix:
bundle_version: ${{ fromJSON(format('[{0}]', inputs.bundle_version || '"1.7","1.8"')) }}
fail-fast: false
env:
AZURE_CORE_OUTPUT: none
JUJU_VERSION: 3.1
K8S_VERSION: 1.26
steps:
- name: print input
run: |
version=${{ matrix.bundle_version }}
name="kf${VERSION//.}"
echo name=${name}
- name: next step access
run: |
echo name = ${name}
echo name = ${{ env.name }}
# - name: Checkout repository
# uses: actions/checkout@v2
# - name: Install CLI tools tox charmcraft juju
# run: |
# python -m pip install --upgrade pip
# pip install tox
# sudo snap install juju --classic --channel=${{ env.JUJU_VERSION }}/stable
# sudo snap install charmcraft --classic
# juju version
# - uses: azure/login@v1
# with:
# creds: ${{ secrets.AZURE_CREDENTIALS }}
# - name: Create resource group and cluster
# run: |
# # We need to remove the dot from version
# # due to cluster naming restrictions
# version=${{ matrix.bundle_version }}
# name="kf${VERSION//.}"
# az group create --name ${name}ResourceGroup --location westeurope
# az aks create \
# --resource-group ${name}ResourceGroup \
# --name ${name}AKSCluster \
# --kubernetes-version ${{ env.K8S_VERSION }} \
# --node-count 2 \
# --node-vm-size Standard_DS2_v2 \ # Standard_D8s_v3
# --node-osdisk-size 100 \
# --node-osdisk-type Managed \
# --os-sku Ubuntu \
# --no-ssh-key
# - name: Setup juju
# run: |
# az aks get-credentials --resource-group ${name}ResourceGroup --name ${name}AKSCluster --admin
# juju add-k8s aks --client
# juju bootstrap aks aks-controller
# juju add-model kubeflow
# - name: Test bundle deployment
# run: |
# tox -vve test_bundle_deployment-${{ matrix.bundle_version }} -- --model kubeflow --keep-models -vv -s
# # On failure, capture debugging resources
# - name: Get juju status
# run: juju status
# if: failure()
# - name: Get juju debug logs
# run: juju debug-log --replay --no-tail
# if: failure()
# - name: Get all kubernetes resources
# run: kubectl get all -A
# if: failure()
# - name: Get logs from pods with status = Pending
# run: kubectl -n kubeflow get pods | tail -n +2 | grep Pending | awk '{print $1}' | xargs -n1 kubectl -n kubeflow logs --all-containers=true --tail 100
# if: failure()
# - name: Get logs from pods with status = Failed
# run: kubectl -n kubeflow get pods | tail -n +2 | grep Failed | awk '{print $1}' | xargs -n1 kubectl -n kubeflow logs --all-containers=true --tail 100
# if: failure()
# - name: Get logs from pods with status = CrashLoopBackOff
# run: kubectl -n kubeflow get pods | tail -n +2 | grep CrashLoopBackOff | awk '{print $1}' | xargs -n1 kubectl -n kubeflow logs --all-containers=true --tail 100
# if: failure()
# - name: Delete AKS cluster
# if: always()
# run: az group delete --name ${name}RG --yes