From c3edd876c08a739dce06aafeccbe7327f8446f8d Mon Sep 17 00:00:00 2001 From: Michal Hucko Date: Tue, 11 Jul 2023 10:29:11 +0200 Subject: [PATCH] KF-3704 add bundle test in ci for eks aws (#171) * Bundle test in ci for eks aws --- .github/cluster.yaml | 40 ++++++++++++ .github/workflows/deploy-eks.yaml | 87 +++++++++++++++++++++++++ .github/workflows/publish.yaml | 2 +- releases/latest/edge/mlflow/bundle.yaml | 3 +- tests/integration/test_bundle.py | 45 +++++++++++++ tox.ini | 11 ++++ 6 files changed, 186 insertions(+), 2 deletions(-) create mode 100644 .github/cluster.yaml create mode 100644 .github/workflows/deploy-eks.yaml create mode 100644 tests/integration/test_bundle.py diff --git a/.github/cluster.yaml b/.github/cluster.yaml new file mode 100644 index 00000000..fdbd0430 --- /dev/null +++ b/.github/cluster.yaml @@ -0,0 +1,40 @@ +apiVersion: eksctl.io/v1alpha5 +availabilityZones: +- eu-central-1a +- eu-central-1b +cloudWatch: + clusterLogging: {} +iam: + vpcResourceControllerPolicy: true + withOIDC: false +addons: +- name: aws-ebs-csi-driver + serviceAccountRoleARN: "arn:aws:iam::aws:policy/service-role/AmazonEBSCSIDriverPolicy" + wellKnownPolicies: + ebsCSIController: true +kind: ClusterConfig +kubernetesNetworkConfig: + ipFamily: IPv4 +managedNodeGroups: +- amiFamily: Ubuntu2004 + iam: + withAddonPolicies: + ebs: true + instanceType: t2.2xlarge + labels: + alpha.eksctl.io/cluster-name: scrumptious-wardrobe-1684842095 + alpha.eksctl.io/nodegroup-name: ng-d06bd84e + maxSize: 2 + minSize: 2 + name: ng-d06bd84e + releaseVersion: "" + ssh: + allow: false + tags: + alpha.eksctl.io/nodegroup-name: ng-d06bd84e + alpha.eksctl.io/nodegroup-type: managed + volumeSize: 100 +metadata: + name: mlflow-bundle-test + region: eu-central-1 + version: "1.24" diff --git a/.github/workflows/deploy-eks.yaml b/.github/workflows/deploy-eks.yaml new file mode 100644 index 00000000..d29d45a9 --- /dev/null +++ b/.github/workflows/deploy-eks.yaml @@ -0,0 +1,87 @@ +name: Configure EKS and run MLflow bundle test + +on: + workflow_dispatch: # This event allows manual triggering from the GitHub UI + secrets: + AWS_ACCESS_KEY_ID: + required: true + AWS_SECRET_ACCESS_KEY: + required: true +jobs: + deploy-eks: + runs-on: ubuntu-22.04 + + steps: + - name: Checkout repository + uses: actions/checkout@v2 + + - name: Install tox and run tests + run: | + sudo apt-get install -y python3-pip + sudo pip3 install tox + + - name: Configure AWS Credentials + env: + AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + run: | + aws configure set aws_access_key_id $AWS_ACCESS_KEY_ID + aws configure set aws_secret_access_key $AWS_SECRET_ACCESS_KEY + aws configure set default.region eu-central-1 + + - name: Install kubectl + run: | + sudo snap install kubectl --classic + mkdir ~/.kube + kubectl version --client + + - name: Install eksctl + run: | + sudo apt-get update + sudo apt-get install -y unzip + curl --silent --location "https://github.com/weaveworks/eksctl/releases/latest/download/eksctl_$(uname -s)_amd64.tar.gz" | tar xz -C /tmp + sudo mv /tmp/eksctl /usr/local/bin + eksctl version + + - name: Install juju + run: | + sudo snap install juju --classic + sudo snap install charmcraft --classic + juju version + + - name: Create cluster + run: | + eksctl create cluster -f .github/cluster.yaml + kubectl get nodes + + - name: Setup juju + run: | + juju add-k8s kubeflow --client + juju bootstrap --no-gui kubeflow kubeflow-controller + juju add-model kubeflow + + - name: Test bundle deployment + run: | + tox -vve bundle-test -- --model kubeflow --keep-models -vv -s + + - name: Remove eks + if: always() + run: | + eksctl delete cluster --name=mlflow-bundle-test + + # On failure, capture debugging resources + - name: Get all kubernetes resources + run: kubectl get all -A + if: failure() + + - name: Get juju status + run: juju status + if: failure() + + - name: Get workload logs + run: kubectl logs --tail 100 -ntesting -lapp.kubernetes.io/name=mlflow-server + if: failure() + + - name: Get operator logs + run: kubectl logs --tail 100 -ntesting -loperator.juju.is/name=mlflow-server + if: failure() diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml index a42a4540..3fe61fc0 100644 --- a/.github/workflows/publish.yaml +++ b/.github/workflows/publish.yaml @@ -10,5 +10,5 @@ on: jobs: publish: name: Publish - uses: canonical/charmed-kubeflow-workflows/.github/workflows/_publish.yaml@all + uses: canonical/charmed-kubeflow-workflows/.github/workflows/_publish.yaml@main secrets: inherit diff --git a/releases/latest/edge/mlflow/bundle.yaml b/releases/latest/edge/mlflow/bundle.yaml index a840e62a..eefd4acf 100644 --- a/releases/latest/edge/mlflow/bundle.yaml +++ b/releases/latest/edge/mlflow/bundle.yaml @@ -10,7 +10,8 @@ applications: _github_repo_name: minio-operator mlflow-mysql: charm: mysql-k8s - channel: 8.0/stable + # Setting to edge because stable can't be deployed to eks change to stable after release + channel: 8.0/edge series: jammy scale: 1 trust: true diff --git a/tests/integration/test_bundle.py b/tests/integration/test_bundle.py new file mode 100644 index 00000000..509fc7e8 --- /dev/null +++ b/tests/integration/test_bundle.py @@ -0,0 +1,45 @@ +import subprocess + +import pytest +import requests +from pytest_operator.plugin import OpsTest +from tenacity import retry, stop_after_delay, wait_fixed + +BUNDLE_PATH = "./releases/latest/edge/mlflow/bundle.yaml" +MLFLOW_APP_NAME = "mlflow-server" + + +@pytest.fixture +def forward_connections(): + mlflow_process = subprocess.Popen( + ["kubectl", "-n", "kubeflow", "port-forward", "pod/mlflow-server-0", "5002:5000"] + ) + + exporter_process = subprocess.Popen( + ["kubectl", "-n", "kubeflow", "port-forward", "pod/mlflow-server-0", "8002:8000"] + ) + yield + mlflow_process.terminate() + exporter_process.terminate() + + +class TestCharm: + @pytest.mark.abort_on_fail + async def test_deploy_bundle_works(self, ops_test: OpsTest): + subprocess.Popen(["juju", "deploy", f"{BUNDLE_PATH}", "--trust"]) + await ops_test.model.wait_for_idle( + apps=[MLFLOW_APP_NAME], + status="active", + raise_on_blocked=False, + raise_on_error=False, + timeout=1500, + ) + + @retry(stop=stop_after_delay(5), wait=wait_fixed(1)) + @pytest.mark.abort_on_fail + async def test_mlflow_connetion(self, forward_connections, ops_test: OpsTest): + mlflow_response = requests.get("http://localhost:5002") + exporter_response = requests.get("http://localhost:8002") + + assert mlflow_response.status_code == 200 + assert exporter_response.status_code == 200 diff --git a/tox.ini b/tox.ini index ee280527..d3f753c4 100644 --- a/tox.ini +++ b/tox.ini @@ -88,3 +88,14 @@ commands = deps = -r requirements-integration.txt description = Run integration tests + +[testenv:bundle-test] +commands = + pytest -v --tb native --asyncio-mode=auto {[vars]tst_path}integration/test_bundle.py --keep-models --log-cli-level=INFO -s {posargs} +deps = + aiohttp + pytest-operator + tenacity + ops>=2.3.0 + juju==3.0.4 +description = Run bundle test