From a05ba9cf900aa062d6dcd64640ee47eae3118de1 Mon Sep 17 00:00:00 2001 From: Maciej Golaszewski Date: Wed, 28 Aug 2024 15:55:26 +0200 Subject: [PATCH 1/7] Add CI test for disabling the default CNI in Microk8s CAPI provider KU-1226 --- README.md | 14 +++--- integration/README.md | 67 ++++++++++++++++------------- integration/e2e_test.go | 54 +++++++++++++++++++++-- templates/cluster-template-aws.yaml | 2 + 4 files changed, 97 insertions(+), 40 deletions(-) diff --git a/README.md b/README.md index 6ced325..2413cb5 100644 --- a/README.md +++ b/README.md @@ -9,12 +9,12 @@ This project offers a cluster API bootstrap provider controller that manages the ### Prerequisites * Install clusterctl following the [upstream instructions](https://cluster-api.sigs.k8s.io/user/quick-start.html#install-clusterctl) -``` +```bash curl -L https://github.com/kubernetes-sigs/cluster-api/releases/download/v1.1.3/clusterctl-linux-amd64 -o clusterctl ``` * Install a bootstrap Kubernetes cluster. To use MicroK8s as a bootstrap cluster: -``` +```bash sudo snap install microk8s --classic sudo microk8s.config > ~/.kube/config sudo microk8s enable dns @@ -24,7 +24,7 @@ sudo microk8s enable dns To to configure clusterctl with the two MicroK8s providers edit `~/.cluster-api/clusterctl.yaml` and add the following: -``` +```yaml providers: - name: "microk8s" url: "https://github.com/canonical/cluster-api-bootstrap-provider-microk8s/releases/latest/bootstrap-components.yaml" @@ -44,21 +44,21 @@ Alternatively, you can build the providers manually as described in the followin -### Building from source +### Building from sourcebssdfsdfsdfsdfsdfsdsdfsdfsfdsfdsfsdfsdfsd * Install the cluster provider of your choice. Have a look at the [cluster API book](https://cluster-api.sigs.k8s.io/user/quick-start.html#initialization-for-common-providers) for your options at this step. You should deploy only the infrastructure controller leaving the bootstrap and control plane ones empty. For example assuming we want to provision a MicroK8s cluster on AWS: -``` +```bash clusterctl init --infrastructure aws --bootstrap "-" --control-plane "-" ``` * Clone the two cluster API MicroK8s specific repositories and start the controllers on two separate terminals: -``` +```bash cd $GOPATH/src/github.com/canonical/cluster-api-bootstrap-provider-microk8s/ make install make run ``` And: -``` +```bash cd $GOPATH/src/github.com/canonical/cluster-api-control-plane-provider-microk8s/ make install make run diff --git a/integration/README.md b/integration/README.md index a905784..440cd8c 100644 --- a/integration/README.md +++ b/integration/README.md @@ -4,36 +4,43 @@ The integration/e2e tests have the following prerequisites: - * an environment variable `CLUSTER_MANIFEST_FILE` pointing to the cluster manifest. Cluster manifests can be produced with the help of the templates found under `templates`. For example: + * an environment variable `CLUSTER_MANIFEST_FILE` pointing to the cluster manifest. Cluster manifests can be produced with the help of the templates found under [`templates`](../templates). For example: + ```bash +export AWS_REGION=us-east-1 +export AWS_SSH_KEY_NAME=capi +export CONTROL_PLANE_MACHINE_COUNT=3 +export WORKER_MACHINE_COUNT=3 +export AWS_CREATE_BASTION=false +export AWS_PUBLIC_IP=false +export AWS_CONTROL_PLANE_MACHINE_FLAVOR=t3.large +export AWS_NODE_MACHINE_FLAVOR=t3.large +export CLUSTER_NAME=test-ci-cluster +clusterctl generate cluster ${CLUSTER_NAME} --from "templates/cluster-template-aws.yaml" --kubernetes-version 1.27.0 > cluster.yaml +export CLUSTER_MANIFEST_FILE=$PWD/cluster.yaml ``` - export AWS_REGION=us-east-1 - export AWS_SSH_KEY_NAME=capi - export CONTROL_PLANE_MACHINE_COUNT=3 - export WORKER_MACHINE_COUNT=3 - export AWS_CREATE_BASTION=false - export AWS_PUBLIC_IP=false - export AWS_CONTROL_PLANE_MACHINE_FLAVOR=t3.large - export AWS_NODE_MACHINE_FLAVOR=t3.large - export CLUSTER_NAME=test-ci-cluster - clusterctl generate cluster ${CLUSTER_NAME} --from "templates/cluster-template-aws.yaml" --kubernetes-version 1.25.0 > cluster.yaml - export CLUSTER_MANIFEST_FILE=$PWD/cluster.yaml - ``` - +> NOTE: AWS_SSH_KEY_NAME is name of ssh key in AWS that you are plan to use, if you don't have one yet refer +> to capi on [aws prerequisites documentation](https://cluster-api-aws.sigs.k8s.io/topics/using-clusterawsadm-to-fulfill-prerequisites#ssh-key-pair) * Additional environment variables when testing cluster upgrades: - ``` - export CAPI_UPGRADE_VERSION=v1.26.0 - export CAPI_UPGRADE_MD_NAME=${CLUSTER_NAME}-md-0 - export CAPI_UPGRADE_MD_TYPE=machinedeployments.cluster.x-k8s.io - export CAPI_UPGRADE_CP_NAME=${CLUSTER_NAME}-control-plane - export CAPI_UPGRADE_CP_TYPE=microk8scontrolplanes.controlplane.cluster.x-k8s.io - - # Change the control plane and worker machine count to desired values for in-place upgrades tests and create a new cluster manifest. - CONTROL_PLANE_MACHINE_COUNT=1 - WORKER_MACHINE_COUNT=1 - clusterctl generate cluster ${CLUSTER_NAME} --from "templates/cluster-template-aws.yaml" --kubernetes-version 1.25.0 > cluster-inplace.yaml - export CLUSTER_INPLACE_MANIFEST_FILE=$PWD/cluster-inplace.yaml + ```bash +export CAPI_UPGRADE_VERSION=v1.28.0 +export CAPI_UPGRADE_MD_NAME=${CLUSTER_NAME}-md-0 +export CAPI_UPGRADE_MD_TYPE=machinedeployments.cluster.x-k8s.io +export CAPI_UPGRADE_CP_NAME=${CLUSTER_NAME}-control-plane +export CAPI_UPGRADE_CP_TYPE=microk8scontrolplanes.controlplane.cluster.x-k8s.io +# Change the control plane and worker machine count to desired values for in-place upgrades tests and create a new cluster manifest. +CONTROL_PLANE_MACHINE_COUNT=1 +WORKER_MACHINE_COUNT=1 +clusterctl generate cluster ${CLUSTER_NAME} --from "templates/cluster-template-aws.yaml" --kubernetes-version 1.27.0 > cluster-inplace.yaml +export CLUSTER_INPLACE_MANIFEST_FILE=$PWD/cluster-inplace.yaml +``` - ``` + * Additional environment variables when testing disable default CNI flag: + ```bash +export DISABLE_DEFAULT_CNI=true +export POST_RUN_COMMANDS='["helm install cilium cilium/cilium --namespace kube-system --set cni.confPath=/var/snap/microk8s/current/args/cni-network --set cni.binPath=/var/snap/microk8s/current/opt/cni/bin --set daemon.runPath=/var/snap/microk8s/current/var/run/cilium --set operator.replicas=1 --set ipam.operator.clusterPoolIPv4PodCIDRList=\"10.1.0.0/16\" --set nodePort.enabled=true"]' # install Calico in place default CNI +clusterctl generate cluster ${CLUSTER_NAME} --from "templates/cluster-template-aws.yaml" --kubernetes-version 1.27.0 > cluster_disable_default_cni.yaml +export CLUSTER_DISABLE_DEFAULT_CNI_MANIFEST_FILE=$PWD/cluster_disable_default_cni.yaml +``` * `clusterctl` available in the PATH @@ -67,10 +74,10 @@ microk8s config > ~/.kube/config #### Initialize infrastructure provider -Visit [here](https://cluster-api.sigs.k8s.io/user/quick-start.html#initialization-for-common-providers) for a list of common infrasturture providers. +Visit [here](https://cluster-api.sigs.k8s.io/user/quick-start.html#initialization-for-common-providers) for a list of common infrastructure providers. ```bash - clusterctl init --infrastructure --bootstrap - --control-plane - +clusterctl init --infrastructure --bootstrap - --control-plane - ``` #### Build Docker images and release manifests from the checked out source code @@ -83,7 +90,7 @@ docker push /capi-bootstrap-provider-microk8s: sed "s,docker.io/cdkbot/capi-bootstrap-provider-microk8s:latest,docker.io//capi-bootstrap-provider-microk8s:," -i bootstrap-components.yaml ``` -Similarly for control-plane provider +Similarly, for control-plane provider ```bash cd control-plane docker build -t /capi-control-plane-provider-microk8s: . diff --git a/integration/e2e_test.go b/integration/e2e_test.go index a284de8..d9b61c8 100644 --- a/integration/e2e_test.go +++ b/integration/e2e_test.go @@ -45,7 +45,7 @@ func init() { // TestBasic waits for the target cluster to deploy and start a 30 pod deployment. // The CLUSTER_MANIFEST_FILE environment variable should point to a manifest with the target cluster -// kubectl and clusterctl have to be avaibale in the caller's path. +// kubectl and clusterctl have to be available in the caller's path. // kubectl should be setup so it uses the kubeconfig of the management cluster by default. func TestBasic(t *testing.T) { cluster_manifest_file := os.Getenv("CLUSTER_MANIFEST_FILE") @@ -88,7 +88,29 @@ func TestInPlaceUpgrade(t *testing.T) { // Important: the cluster is deleted in the Cleanup function // which is called after all subtests are finished. t.Logf("Deleting the cluster") +} + +// TestDisableDefaultCNI deploys cluster disabled defalut CNI . +// The CLUSTER_DISABLE_DEFAULT_CNI_MANIFEST_FILE environment variable should point to a manifest with the target cluster +// With post actions calico will be installed. +func TestDisableDefaultCNI(t *testing.T) { + cluster_manifest_file := os.Getenv("CLUSTER_DISABLE_DEFAULT_CNI_MANIFEST_FILE") + if cluster_manifest_file == "" { + t.Fatalf("Environment variable CLUSTER_DISABLE_DEFAULT_CNI_MANIFEST_FILE is not set. " + + "CLUSTER_DISABLE_DEFAULT_CNI_MANIFEST_FILE is expected to hold the PATH to a cluster manifest.") + } + t.Logf("Cluster to setup is in %s", cluster_manifest_file) + setupCheck(t) + t.Cleanup(teardownCluster) + + t.Run("DeployCluster", func(t *testing.T) { deployCluster(t, os.Getenv("CLUSTER_DISABLE_DEFAULT_CNI_MANIFEST_FILE")) }) + t.Run("ValidateCalico", func(t *testing.T) { validateCalico(t) }) + t.Run("DeployMicrobot", func(t *testing.T) { deployMicrobot(t) }) + t.Run("UpgradeClusterRollout", func(t *testing.T) { upgradeCluster(t, "RollingUpgrade") }) + // Important: the cluster is deleted in the Cleanup function + // which is called after all subtests are finished. + t.Logf("Deleting the cluster") } // setupCheck checks that the environment is ready to run the tests. @@ -149,7 +171,7 @@ func teardownCluster() { // deployCluster deploys a cluster using the manifest in CLUSTER_MANIFEST_FILE. func deployCluster(t testing.TB, cluster_manifest_file string) { - t.Log("Setting up the cluster") + t.Logf("Setting up the cluster using %s", cluster_manifest_file) command := []string{"kubectl", "apply", "-f", cluster_manifest_file} cmd := exec.Command(command[0], command[1:]...) outputBytes, err := cmd.CombinedOutput() @@ -182,7 +204,7 @@ func deployCluster(t testing.TB, cluster_manifest_file string) { t.Fatal(err) } else { attempt++ - t.Log("Failed to get the target's kubeconfig, retrying.") + t.Logf("Failed to get the target's kubeconfig for %s, retrying.", cluster) time.Sleep(20 * time.Second) } } else { @@ -307,6 +329,32 @@ func deployMicrobot(t testing.TB) { command = []string{"kubectl", "--kubeconfig=" + KUBECONFIG, "wait", "deploy/bot", "--for=jsonpath={.status.readyReplicas}=30"} for { cmd = exec.Command(command[0], command[1:]...) + outputBytes, err = cmd.CombinedOutput() + if err != nil { + t.Log(string(outputBytes)) + if attempt >= maxAttempts { + t.Fatal(err) + } else { + attempt++ + t.Log("Retrying") + time.Sleep(10 * time.Second) + } + } else { + break + } + } +} + +// validateCalico checks a deployment of calico demonset. +func validateCalico(t testing.TB) { + t.Log("Validate Calico") + // Make sure we have as many nodes as machines + attempt := 0 + maxAttempts := 60 + t.Log("Waiting for the deployment to complete") + command := []string{"kubectl", "--kubeconfig=" + KUBECONFIG, "-n", "kube-system", "wait", "ds/calico-node", "--for=jsonpath={.status.numberAvailable}=6"} + for { + cmd := exec.Command(command[0], command[1:]...) outputBytes, err := cmd.CombinedOutput() if err != nil { t.Log(string(outputBytes)) diff --git a/templates/cluster-template-aws.yaml b/templates/cluster-template-aws.yaml index 31afb6d..b46ce17 100644 --- a/templates/cluster-template-aws.yaml +++ b/templates/cluster-template-aws.yaml @@ -108,3 +108,5 @@ spec: initConfiguration: riskLevel: "${SNAP_RISKLEVEL:=}" confinement: "${SNAP_CONFINEMENT:=}" + disableDefaultCNI: ${DISABLE_DEFAULT_CNI:=false} + postRunCommands: ${POST_RUN_COMMANDS:=[]} From 6389bd96838751806b6933e278b2aa4f48c5117a Mon Sep 17 00:00:00 2001 From: Maciej Golaszewski Date: Wed, 28 Aug 2024 16:00:28 +0200 Subject: [PATCH 2/7] typo fix --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 2413cb5..89ca58c 100644 --- a/README.md +++ b/README.md @@ -44,7 +44,7 @@ Alternatively, you can build the providers manually as described in the followin -### Building from sourcebssdfsdfsdfsdfsdfsdsdfsdfsfdsfdsfsdfsdfsd +### Building from source * Install the cluster provider of your choice. Have a look at the [cluster API book](https://cluster-api.sigs.k8s.io/user/quick-start.html#initialization-for-common-providers) for your options at this step. You should deploy only the infrastructure controller leaving the bootstrap and control plane ones empty. For example assuming we want to provision a MicroK8s cluster on AWS: ```bash From 7914508993d2f8d9e5cb81c16ebfccb43e0c6b5a Mon Sep 17 00:00:00 2001 From: Maciej Golaszewski Date: Mon, 2 Sep 2024 19:43:14 +0200 Subject: [PATCH 3/7] remove environment variables from integration tests KU-1226 --- README.md | 14 +- integration/README.md | 48 +--- .../cluster-disable-default-cni.yaml | 117 ++++++++ .../cluster-manifests/cluster-inplace.yaml | 117 ++++++++ integration/cluster-manifests/cluster.yaml | 117 ++++++++ integration/e2e_test.go | 260 ++++++++++-------- templates/cluster-template-aws.yaml | 2 - 7 files changed, 516 insertions(+), 159 deletions(-) create mode 100644 integration/cluster-manifests/cluster-disable-default-cni.yaml create mode 100644 integration/cluster-manifests/cluster-inplace.yaml create mode 100644 integration/cluster-manifests/cluster.yaml diff --git a/README.md b/README.md index 89ca58c..32c94a5 100644 --- a/README.md +++ b/README.md @@ -9,12 +9,12 @@ This project offers a cluster API bootstrap provider controller that manages the ### Prerequisites * Install clusterctl following the [upstream instructions](https://cluster-api.sigs.k8s.io/user/quick-start.html#install-clusterctl) -```bash +``` curl -L https://github.com/kubernetes-sigs/cluster-api/releases/download/v1.1.3/clusterctl-linux-amd64 -o clusterctl ``` * Install a bootstrap Kubernetes cluster. To use MicroK8s as a bootstrap cluster: -```bash +``` sudo snap install microk8s --classic sudo microk8s.config > ~/.kube/config sudo microk8s enable dns @@ -24,7 +24,7 @@ sudo microk8s enable dns To to configure clusterctl with the two MicroK8s providers edit `~/.cluster-api/clusterctl.yaml` and add the following: -```yaml +``` providers: - name: "microk8s" url: "https://github.com/canonical/cluster-api-bootstrap-provider-microk8s/releases/latest/bootstrap-components.yaml" @@ -47,18 +47,18 @@ Alternatively, you can build the providers manually as described in the followin ### Building from source * Install the cluster provider of your choice. Have a look at the [cluster API book](https://cluster-api.sigs.k8s.io/user/quick-start.html#initialization-for-common-providers) for your options at this step. You should deploy only the infrastructure controller leaving the bootstrap and control plane ones empty. For example assuming we want to provision a MicroK8s cluster on AWS: -```bash +``` clusterctl init --infrastructure aws --bootstrap "-" --control-plane "-" ``` * Clone the two cluster API MicroK8s specific repositories and start the controllers on two separate terminals: -```bash +``` cd $GOPATH/src/github.com/canonical/cluster-api-bootstrap-provider-microk8s/ make install make run ``` And: -```bash +``` cd $GOPATH/src/github.com/canonical/cluster-api-control-plane-provider-microk8s/ make install make run @@ -68,7 +68,7 @@ make run As soon as the bootstrap and control-plane controllers are up and running you can apply the cluster manifests describing the desired specs of the cluster you want to provision. Each machine is associated with a MicroK8sConfig through which you can set the cluster's properties. Please review the available options in [the respective definitions file](./apis/v1beta1/microk8sconfig_types.go). You may also find useful the example manifests found under the [examples](./examples/) directory. Note that the configuration structure followed is similar to the the one of kubeadm, in the MicroK8sConfig you will find a CLusterConfiguration and an InitConfiguration sections. When targeting a specific infrastructure you should be aware of which ports are used by MicroK8s and allow them in the network security groups on your deployment. -Two workload cluster templates are available under the [templates](./templates/) folder, which are actively used to validate releases: +Two workload cluster templates are available under the [templates](./templates) folder, which are actively used to validate releases: - [AWS](./templates/cluster-template-aws.yaml), using the [AWS Infrastructure Provider](https://github.com/kubernetes-sigs/cluster-api-provider-aws) - [OpenStack](./templates/cluster-template-openstack.yaml), using the [OpenStack Infrastructure Provider](https://github.com/kubernetes-sigs/cluster-api-provider-openstack) diff --git a/integration/README.md b/integration/README.md index 440cd8c..e4999e8 100644 --- a/integration/README.md +++ b/integration/README.md @@ -4,47 +4,10 @@ The integration/e2e tests have the following prerequisites: - * an environment variable `CLUSTER_MANIFEST_FILE` pointing to the cluster manifest. Cluster manifests can be produced with the help of the templates found under [`templates`](../templates). For example: - ```bash -export AWS_REGION=us-east-1 -export AWS_SSH_KEY_NAME=capi -export CONTROL_PLANE_MACHINE_COUNT=3 -export WORKER_MACHINE_COUNT=3 -export AWS_CREATE_BASTION=false -export AWS_PUBLIC_IP=false -export AWS_CONTROL_PLANE_MACHINE_FLAVOR=t3.large -export AWS_NODE_MACHINE_FLAVOR=t3.large -export CLUSTER_NAME=test-ci-cluster -clusterctl generate cluster ${CLUSTER_NAME} --from "templates/cluster-template-aws.yaml" --kubernetes-version 1.27.0 > cluster.yaml -export CLUSTER_MANIFEST_FILE=$PWD/cluster.yaml - ``` -> NOTE: AWS_SSH_KEY_NAME is name of ssh key in AWS that you are plan to use, if you don't have one yet refer -> to capi on [aws prerequisites documentation](https://cluster-api-aws.sigs.k8s.io/topics/using-clusterawsadm-to-fulfill-prerequisites#ssh-key-pair) - * Additional environment variables when testing cluster upgrades: - ```bash -export CAPI_UPGRADE_VERSION=v1.28.0 -export CAPI_UPGRADE_MD_NAME=${CLUSTER_NAME}-md-0 -export CAPI_UPGRADE_MD_TYPE=machinedeployments.cluster.x-k8s.io -export CAPI_UPGRADE_CP_NAME=${CLUSTER_NAME}-control-plane -export CAPI_UPGRADE_CP_TYPE=microk8scontrolplanes.controlplane.cluster.x-k8s.io -# Change the control plane and worker machine count to desired values for in-place upgrades tests and create a new cluster manifest. -CONTROL_PLANE_MACHINE_COUNT=1 -WORKER_MACHINE_COUNT=1 -clusterctl generate cluster ${CLUSTER_NAME} --from "templates/cluster-template-aws.yaml" --kubernetes-version 1.27.0 > cluster-inplace.yaml -export CLUSTER_INPLACE_MANIFEST_FILE=$PWD/cluster-inplace.yaml -``` - - * Additional environment variables when testing disable default CNI flag: - ```bash -export DISABLE_DEFAULT_CNI=true -export POST_RUN_COMMANDS='["helm install cilium cilium/cilium --namespace kube-system --set cni.confPath=/var/snap/microk8s/current/args/cni-network --set cni.binPath=/var/snap/microk8s/current/opt/cni/bin --set daemon.runPath=/var/snap/microk8s/current/var/run/cilium --set operator.replicas=1 --set ipam.operator.clusterPoolIPv4PodCIDRList=\"10.1.0.0/16\" --set nodePort.enabled=true"]' # install Calico in place default CNI -clusterctl generate cluster ${CLUSTER_NAME} --from "templates/cluster-template-aws.yaml" --kubernetes-version 1.27.0 > cluster_disable_default_cni.yaml -export CLUSTER_DISABLE_DEFAULT_CNI_MANIFEST_FILE=$PWD/cluster_disable_default_cni.yaml -``` + * make sure to have ssh key in aws `capi`in `us-east-1 region` if you do not have key refer + to CAPI on [AWS prerequisites documentation](https://cluster-api-aws.sigs.k8s.io/topics/using-clusterawsadm-to-fulfill-prerequisites#ssh-key-pair) - * `clusterctl` available in the PATH - - * `kubectl` available in the PATH + * to run tests locally you will need to have on path fallowing programs `clusterctl`, `kubectl`, `helm` * a management cluster initialised via `clusterctl` with the infrastructure targeted as well as the version of the MicroK8s providers we want to be tested @@ -77,7 +40,7 @@ microk8s config > ~/.kube/config Visit [here](https://cluster-api.sigs.k8s.io/user/quick-start.html#initialization-for-common-providers) for a list of common infrastructure providers. ```bash -clusterctl init --infrastructure --bootstrap - --control-plane - + clusterctl init --infrastructure --bootstrap - --control-plane - ``` #### Build Docker images and release manifests from the checked out source code @@ -103,6 +66,9 @@ sed "s,docker.io/cdkbot/capi-control-plane-provider-microk8s:latest,docker.io/= maxAttempts { + t.Fatal(err) + } else { + attempt++ + t.Log("Retrying") + time.Sleep(10 * time.Second) + } + } else { + machines = strings.Count(output, "\n") + if machines > 0 { + break + } + } + } + + t.Log("Checking Cilium daemon set") + command = []string{"kubectl", "--kubeconfig=" + KUBECONFIG, "-n", "kube-system", "wait", "ds/cilium", fmt.Sprintf("--for=jsonpath={.status.numberAvailable}=%d", machines)} + for { + cmd := exec.Command(command[0], command[1:]...) + outputBytes, err := cmd.CombinedOutput() + if err != nil { + t.Logf("running command: %s", strings.Join(command, " ")) + t.Log(string(outputBytes)) + if attempt >= maxAttempts { + t.Fatal(err) + } else { + attempt++ + t.Log("Retrying") + time.Sleep(20 * time.Second) + } } else { break } @@ -372,21 +447,24 @@ func validateCalico(t testing.TB) { } // upgradeCluster upgrades the cluster to a new version based on the upgrade strategy. -func upgradeCluster(t testing.TB, upgrade_strategy string) { +func upgradeCluster(t testing.TB, upgradeStrategy string) { - version, control_plane_name, control_plane_type, worker_deployment_name, - worker_deployment_type := getUpgradeEnvVars(t) + version := "v1.28.0" + controlPlaneName := "test-ci-cluster-control-plane" + controlPlaneType := "microk8scontrolplanes.controlplane.cluster.x-k8s.io" + workerDeploymentName := "test-ci-cluster-md-0" + workerDeploymentType := "machinedeployments.cluster.x-k8s.io" - t.Logf("Upgrading cluster to %s via %s", version, upgrade_strategy) - // Patch contol plane machine upgrades based on type of upgrade strategy. - outputBytes, err := controlPlanePatch(control_plane_name, control_plane_type, version, upgrade_strategy) + t.Logf("Upgrading cluster to %s via %s", version, upgradeStrategy) + // Patch control plane machine upgrades based on type of upgrade strategy. + outputBytes, err := controlPlanePatch(controlPlaneName, controlPlaneType, version, upgradeStrategy) if err != nil { t.Error(string(outputBytes)) t.Fatalf("Failed to merge the patch to control plane. %s", err) } // Patch worker machine upgrades. - outputBytes, err = workerPatch(worker_deployment_name, worker_deployment_type, version) + outputBytes, err = workerPatch(workerDeploymentName, workerDeploymentType, version) if err != nil { t.Error(string(outputBytes)) t.Fatalf("Failed to merge the patch to the machine deployments. %s", err) @@ -396,11 +474,11 @@ func upgradeCluster(t testing.TB, upgrade_strategy string) { // Now all the machines should be upgraded to the new version. attempt := 0 - maxAttempts := 60 + maxAttempts := 120 command := []string{"kubectl", "get", "machine", "--no-headers"} for { cmd := exec.Command(command[0], command[1:]...) - outputBytes, err := cmd.CombinedOutput() + outputBytes, err = cmd.CombinedOutput() output := string(outputBytes) if err != nil { t.Log(output) @@ -430,55 +508,19 @@ func upgradeCluster(t testing.TB, upgrade_strategy string) { } // controlPlanePatch patches the control plane machines based on the upgrade strategy and version. -func controlPlanePatch(control_plane_name, control_plane_type, version, upgrade_strategy string) ([]byte, error) { - command := []string{"kubectl", "patch", "--type=merge", control_plane_type, control_plane_name, "--patch", - fmt.Sprintf(`{"spec":{"version":"%s","upgradeStrategy":"%s"}}`, version, upgrade_strategy)} +func controlPlanePatch(controlPlaneName, controlPlaneType, version, upgradeStrategy string) ([]byte, error) { + command := []string{"kubectl", "patch", "--type=merge", controlPlaneType, controlPlaneName, "--patch", + fmt.Sprintf(`{"spec":{"version":"%s","upgradeStrategy":"%s"}}`, version, upgradeStrategy)} cmd := exec.Command(command[0], command[1:]...) return cmd.CombinedOutput() } // workerPatch patches a given worker machines with the given version. -func workerPatch(worker_deployment_name, worker_deployment_type, version string) ([]byte, error) { - command := []string{"kubectl", "patch", "--type=merge", worker_deployment_type, worker_deployment_name, "--patch", +func workerPatch(workerDeploymentName, workerDeploymentType, version string) ([]byte, error) { + command := []string{"kubectl", "patch", "--type=merge", workerDeploymentType, workerDeploymentName, "--patch", fmt.Sprintf(`{"spec":{"template":{"spec":{"version":"%s"}}}}`, version)} cmd := exec.Command(command[0], command[1:]...) return cmd.CombinedOutput() } - -// getUpgradeEnvVars returns the environment variables needed for the upgrade test. -func getUpgradeEnvVars(t testing.TB) (version string, control_plane_name string, control_plane_type string, - worker_deployment_name string, worker_deployment_type string) { - version = os.Getenv("CAPI_UPGRADE_VERSION") - if version == "" { - t.Fatalf("Environment variable CAPI_UPGRADE_VERSION is not set." + - "Please set it to the version you want to upgrade to.") - } - - control_plane_name = os.Getenv("CAPI_UPGRADE_CP_NAME") - if control_plane_name == "" { - t.Fatalf("Environment variable CAPI_UPGRADE_CP_NAME is not set." + - "Please set it to the name of the control plane you want to upgrade.") - } - - control_plane_type = os.Getenv("CAPI_UPGRADE_CP_TYPE") - if control_plane_type == "" { - t.Fatalf("Environment variable CAPI_UPGRADE_CP_TYPE is not set." + - "Please set it to the type of the control plane you want to upgrade.") - } - - worker_deployment_name = os.Getenv("CAPI_UPGRADE_MD_NAME") - if worker_deployment_name == "" { - t.Fatalf("Environment variable CAPI_UPGRADE_MD_NAME is not set." + - "Please set it to the name of the machine deployment you want to upgrade.") - } - - worker_deployment_type = os.Getenv("CAPI_UPGRADE_MD_TYPE") - if worker_deployment_type == "" { - t.Fatalf("Environment variable CAPI_UPGRADE_MD_TYPE is not set." + - "Please set it to the type of the machine deployment you want to upgrade.") - } - - return version, control_plane_name, control_plane_type, worker_deployment_name, worker_deployment_type -} diff --git a/templates/cluster-template-aws.yaml b/templates/cluster-template-aws.yaml index b46ce17..31afb6d 100644 --- a/templates/cluster-template-aws.yaml +++ b/templates/cluster-template-aws.yaml @@ -108,5 +108,3 @@ spec: initConfiguration: riskLevel: "${SNAP_RISKLEVEL:=}" confinement: "${SNAP_CONFINEMENT:=}" - disableDefaultCNI: ${DISABLE_DEFAULT_CNI:=false} - postRunCommands: ${POST_RUN_COMMANDS:=[]} From 6a3bd0eef292a96d21944a0645759dfcce2721fb Mon Sep 17 00:00:00 2001 From: Maciej Golaszewski Date: Mon, 2 Sep 2024 19:48:17 +0200 Subject: [PATCH 4/7] description fix --- integration/e2e_test.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/integration/e2e_test.go b/integration/e2e_test.go index 672ecb7..f4bec9f 100644 --- a/integration/e2e_test.go +++ b/integration/e2e_test.go @@ -85,9 +85,9 @@ func TestInPlaceUpgrade(t *testing.T) { t.Logf("Deleting the cluster") } -// TestDisableDefaultCNI deploys cluster disabled default CNI. +// TestDisableDefaultCNI deploys cluster disabled default CNI. +// Next Cilium is installed // helm have to be available in the caller's path. -// With post actions calico will be installed. func TestDisableDefaultCNI(t *testing.T) { t.Logf("Cluster to setup is in %s", DisableDefaultCNIClusterPath) From 57eb7470fa1e48cca10fd28c6e4851f1aa261e95 Mon Sep 17 00:00:00 2001 From: Maciej Golaszewski Date: Tue, 3 Sep 2024 17:48:45 +0200 Subject: [PATCH 5/7] refactor retryable command exec --- .../cluster-manifests/cluster-inplace.yaml | 2 - integration/e2e_test.go | 330 ++++++------------ 2 files changed, 114 insertions(+), 218 deletions(-) diff --git a/integration/cluster-manifests/cluster-inplace.yaml b/integration/cluster-manifests/cluster-inplace.yaml index 8af59d9..2d63b9f 100644 --- a/integration/cluster-manifests/cluster-inplace.yaml +++ b/integration/cluster-manifests/cluster-inplace.yaml @@ -39,7 +39,6 @@ spec: - dns - ingress confinement: classic - disableDefaultCNI: false joinTokenTTLInSecs: 900000 riskLevel: stable machineTemplate: @@ -113,5 +112,4 @@ spec: portCompatibilityRemap: true initConfiguration: confinement: classic - disableDefaultCNI: false riskLevel: stable diff --git a/integration/e2e_test.go b/integration/e2e_test.go index f4bec9f..ed1e8e2 100644 --- a/integration/e2e_test.go +++ b/integration/e2e_test.go @@ -36,6 +36,8 @@ const ( DisableDefaultCNIClusterPath string = "cluster-manifests/cluster-disable-default-cni.yaml" BasicClusterPath string = "cluster-manifests/cluster.yaml" InPlaceUpgradeClusterPath string = "cluster-manifests/cluster-inplace.yaml" + retryMaxAttempts = 120 + secondsBetweenAttempts = 20 ) func init() { @@ -126,24 +128,10 @@ func setupCheck(t testing.TB) { // waitForPod waits for a pod to be available. func waitForPod(t testing.TB, pod string, ns string) { - attempt := 0 - maxAttempts := 10 + command := []string{"kubectl", "wait", "--timeout=15s", "--for=condition=available", "deploy/" + pod, "-n", ns} - for { - cmd := exec.Command(command[0], command[1:]...) - outputBytes, err := cmd.CombinedOutput() - if err != nil { - t.Log(string(outputBytes)) - if attempt >= maxAttempts { - t.Fatal(err) - } else { - t.Logf("Retrying...") - attempt++ - time.Sleep(10 * time.Second) - } - } else { - break - } + if err := retryableCommand(t, command); err != nil { + t.Fatal(err) } } @@ -187,123 +175,63 @@ func deployCluster(t testing.TB, clusterManifestFile string) { cluster := strings.Trim(string(outputBytes), "\n") t.Logf("Cluster name is %s", cluster) - attempt := 0 - maxAttempts := 120 command = []string{"clusterctl", "get", "kubeconfig", cluster} - for { - cmd = exec.Command(command[0], command[1:]...) - outputBytes, err = cmd.Output() + testFn := func(output string) bool { + cfg := strings.Trim(output, "\n") + err = os.WriteFile(KUBECONFIG, []byte(cfg), 0644) if err != nil { - if attempt >= maxAttempts { - t.Fatal(err) - } else { - attempt++ - t.Logf("Failed to get the target's kubeconfig for %s, retrying.", cluster) - time.Sleep(20 * time.Second) - } - } else { - cfg := strings.Trim(string(outputBytes), "\n") - err = os.WriteFile(KUBECONFIG, []byte(cfg), 0644) - if err != nil { - t.Fatalf("Could not persist the targets kubeconfig file. %s", err) - } - t.Logf("Target's kubeconfig file is at %s", KUBECONFIG) - t.Log(cfg) - break + t.Fatalf("Could not persist the targets kubeconfig file. %s", err) } + t.Logf("Target's kubeconfig file is at %s", KUBECONFIG) + t.Log(cfg) + return true + } + if err = retryableCommandWithConditionFunction(t, command, testFn); err != nil { + t.Fatal(err) } // Wait until the cluster is provisioned - attempt = 0 - maxAttempts = 120 command = []string{"kubectl", "get", "cluster", cluster} - for { - cmd = exec.Command(command[0], command[1:]...) - outputBytes, err = cmd.CombinedOutput() - if err != nil { - t.Log(string(outputBytes)) - if attempt >= maxAttempts { - t.Fatal(err) - } else { - attempt++ - t.Log("Retrying") - time.Sleep(10 * time.Second) - } - } else { - if strings.Contains(string(outputBytes), "Provisioned") { - break - } else { - attempt++ - time.Sleep(20 * time.Second) - t.Log("Waiting for the cluster to be provisioned") - } - } + testFn = func(output string) bool { + return strings.Contains(output, "Provisioned") + } + if err = retryableCommandWithConditionFunction(t, command, testFn); err != nil { + t.Fatal(err) } } // verifyCluster check if cluster is functional func verifyCluster(t testing.TB) { // Wait until all machines are running - attempt := 0 - maxAttempts := 120 + machines := 0 command := []string{"kubectl", "get", "machine", "--no-headers"} - for { - cmd := exec.Command(command[0], command[1:]...) - outputBytes, err := cmd.CombinedOutput() - output := string(outputBytes) - if err != nil { - t.Log(output) - if attempt >= maxAttempts { - t.Fatal(err) - } else { - attempt++ - t.Log("Retrying") - time.Sleep(10 * time.Second) - } - } else { - machines = strings.Count(output, "\n") - running := strings.Count(output, "Running") - t.Logf("Machines %d out of which %d are Running", machines, running) - if machines == running { - break - } else { - attempt++ - time.Sleep(10 * time.Second) - t.Log("Waiting for machines to start running") - } + testFn := func(output string) bool { + machines = strings.Count(output, "\n") + running := strings.Count(output, "Running") + t.Logf("Machines %d out of which %d are Running", machines, running) + if machines == running { + return true } + return false + } + if err := retryableCommandWithConditionFunction(t, command, testFn); err != nil { + t.Fatal(err) } // Make sure we have as many nodes as machines - attempt = 0 - maxAttempts = 120 command = []string{"kubectl", "--kubeconfig=" + KUBECONFIG, "get", "no", "--no-headers"} - for { - cmd := exec.Command(command[0], command[1:]...) - outputBytes, err := cmd.CombinedOutput() - output := string(outputBytes) - if err != nil { - t.Log(output) - if attempt >= maxAttempts { - t.Fatal(err) - } else { - attempt++ - time.Sleep(10 * time.Second) - t.Log("Retrying") - } - } else { - nodes := strings.Count(output, "\n") - ready := strings.Count(output, " Ready") - t.Logf("Machines are %d, Nodes are %d out of which %d are Ready", machines, nodes, ready) - if machines == nodes && ready == nodes { - break - } else { - attempt++ - time.Sleep(20 * time.Second) - t.Log("Waiting for nodes to become ready") - } + testFn = func(output string) bool { + nodes := strings.Count(output, "\n") + ready := strings.Count(output, " Ready") + t.Logf("Machines are %d, Nodes are %d out of which %d are Ready", machines, nodes, ready) + if machines == nodes && ready == nodes { + return true } + return false + } + if err := retryableCommandWithConditionFunction(t, command, testFn); err != nil { + t.Fatal(err) } } @@ -320,54 +248,24 @@ func deployMicrobot(t testing.TB) { } // Make sure we have as many nodes as machines - attempt := 0 - maxAttempts := 120 t.Log("Waiting for the deployment to complete") command = []string{"kubectl", "--kubeconfig=" + KUBECONFIG, "wait", "deploy/bot", "--for=jsonpath={.status.readyReplicas}=30"} - for { - cmd = exec.Command(command[0], command[1:]...) - outputBytes, err = cmd.CombinedOutput() - if err != nil { - t.Log(string(outputBytes)) - if attempt >= maxAttempts { - t.Fatal(err) - } else { - attempt++ - t.Log("Retrying") - time.Sleep(10 * time.Second) - } - } else { - break - } + if err = retryableCommand(t, command); err != nil { + t.Fatal(err) } } -// validateNoCalico checks a there is no calico daemon set. +// validateNoCalico Checks if calico daemon set is not deployed on the cluster. func validateNoCalico(t testing.TB) { t.Log("Validate no Calico daemon set") t.Log("Checking for calico daemon set") command := []string{"kubectl", "--kubeconfig=" + KUBECONFIG, "-n", "kube-system", "get", "ds"} - attempt := 0 - maxAttempts := 120 - for { - t.Logf("running command: %s", strings.Join(command, " ")) - cmd := exec.Command(command[0], command[1:]...) - outputBytes, err := cmd.CombinedOutput() - if err != nil { - t.Log(string(outputBytes)) - if attempt >= maxAttempts { - t.Fatal(err) - } else { - attempt++ - t.Log("Retrying") - time.Sleep(10 * time.Second) - } - } else { - if !strings.Contains(string(outputBytes), "calico") { - break - } - } + testFn := func(output string) bool { + return !strings.Contains(output, "calico") + } + if err := retryableCommandWithConditionFunction(t, command, testFn); err != nil { + t.Fatal(err) } t.Log("No calico daemon set") } @@ -398,52 +296,67 @@ func installCilium(t testing.TB) { func validateCilium(t testing.TB) { t.Log("Validate Cilium") - //check control plane machine exists - attempt := 0 - maxAttempts := 120 - machines := 0 command := []string{"kubectl", "get", "machine", "--no-headers"} - for { + machines := 0 + testFn := func(output string) bool { + machines = strings.Count(output, "\n") + return machines > 0 + } + if err := retryableCommandWithConditionFunction(t, command, testFn); err != nil { + t.Fatal(err) + } + + t.Log("Checking Cilium daemon set") + command = []string{"kubectl", "--kubeconfig=" + KUBECONFIG, "-n", "kube-system", "wait", "ds/cilium", fmt.Sprintf("--for=jsonpath={.status.numberAvailable}=%d", machines)} + if err := retryableCommand(t, command); err != nil { + t.Fatal(err) + } +} + +// retryableCommand runs command and retires if command fails. +// Runs provided command, if commands return err retries it retryMaxAttempts and waits secondsBetweenAttempts between next execution. +// If after all attempts error occurs error gets returned. +func retryableCommand(t testing.TB, command []string) error { + var err error + var outputBytes []byte + for attempt := 0; attempt < retryMaxAttempts; attempt++ { + if attempt > 1 { + t.Logf("Retrying") + time.Sleep(secondsBetweenAttempts * time.Second) + } t.Logf("running command: %s", strings.Join(command, " ")) cmd := exec.Command(command[0], command[1:]...) - outputBytes, err := cmd.CombinedOutput() - output := string(outputBytes) - if err != nil { - t.Log(output) - if attempt >= maxAttempts { - t.Fatal(err) - } else { - attempt++ - t.Log("Retrying") - time.Sleep(10 * time.Second) - } - } else { - machines = strings.Count(output, "\n") - if machines > 0 { - break - } + outputBytes, err = cmd.CombinedOutput() + t.Log(string(outputBytes)) + if err == nil { + break } } + return err +} - t.Log("Checking Cilium daemon set") - command = []string{"kubectl", "--kubeconfig=" + KUBECONFIG, "-n", "kube-system", "wait", "ds/cilium", fmt.Sprintf("--for=jsonpath={.status.numberAvailable}=%d", machines)} - for { +// retryableCommandWithConditionFunction runs command with retry. +// Runs provided command, if commands return err retries it retryMaxAttempts and waits secondsBetweenAttempts between next execution. +// Additionally, to finish correctly runs function fn func(string) bool, if passed function returns true +// the retryableCommandWithConditionFunction returns nil +func retryableCommandWithConditionFunction(t testing.TB, command []string, fn func(string) bool) error { + var err error + var outputBytes []byte + for attempt := 0; attempt < retryMaxAttempts; attempt++ { + if attempt > 1 { + t.Logf("Retrying") + time.Sleep(secondsBetweenAttempts * time.Second) + } + t.Logf("running command: %s", strings.Join(command, " ")) cmd := exec.Command(command[0], command[1:]...) - outputBytes, err := cmd.CombinedOutput() - if err != nil { - t.Logf("running command: %s", strings.Join(command, " ")) - t.Log(string(outputBytes)) - if attempt >= maxAttempts { - t.Fatal(err) - } else { - attempt++ - t.Log("Retrying") - time.Sleep(20 * time.Second) - } - } else { + outputBytes, err = cmd.CombinedOutput() + output := string(outputBytes) + t.Log(output) + if err == nil && fn(output) { break } } + return err } // upgradeCluster upgrades the cluster to a new version based on the upgrade strategy. @@ -473,37 +386,22 @@ func upgradeCluster(t testing.TB, upgradeStrategy string) { time.Sleep(30 * time.Second) // Now all the machines should be upgraded to the new version. - attempt := 0 - maxAttempts := 120 + command := []string{"kubectl", "get", "machine", "--no-headers"} - for { - cmd := exec.Command(command[0], command[1:]...) - outputBytes, err = cmd.CombinedOutput() - output := string(outputBytes) - if err != nil { - t.Log(output) - if attempt >= maxAttempts { - t.Fatal(err) - } - - attempt++ - t.Log("Retrying") - time.Sleep(20 * time.Second) - } else { - totalMachines := strings.Count(output, "Running") - - // We count all the "Running" machines with the new version. - re := regexp.MustCompile("Running .* " + version) - upgradedMachines := len(re.FindAllString(output, -1)) - t.Logf("Total machines %d out of which %d are upgraded", totalMachines, upgradedMachines) - if totalMachines == upgradedMachines { - break - } else { - attempt++ - time.Sleep(20 * time.Second) - t.Log("Waiting for machines to upgrade and start running") - } + testFn := func(output string) bool { + totalMachines := strings.Count(output, "Running") + + // We count all the "Running" machines with the new version. + re := regexp.MustCompile("Running .* " + version) + upgradedMachines := len(re.FindAllString(output, -1)) + t.Logf("Total machines %d out of which %d are upgraded", totalMachines, upgradedMachines) + if totalMachines == upgradedMachines { + return true } + return false + } + if err = retryableCommandWithConditionFunction(t, command, testFn); err != nil { + t.Fatal(err) } } From 966c0c8192dd92bf98f65a82833e586f61ed353a Mon Sep 17 00:00:00 2001 From: Maciej Golaszewski Date: Thu, 5 Sep 2024 20:41:56 +0200 Subject: [PATCH 6/7] added context to retry removed unnecessary functions --- integration/e2e_test.go | 320 +++++++++++++++++++--------------------- 1 file changed, 152 insertions(+), 168 deletions(-) diff --git a/integration/e2e_test.go b/integration/e2e_test.go index ed1e8e2..df83548 100644 --- a/integration/e2e_test.go +++ b/integration/e2e_test.go @@ -20,6 +20,8 @@ limitations under the License. package e2e_test import ( + "context" + "errors" "fmt" "os" "os/exec" @@ -55,14 +57,15 @@ func init() { // kubectl should be setup so it uses the kubeconfig of the management cluster by default. func TestBasic(t *testing.T) { t.Logf("Cluster to setup is in %s", BasicClusterPath) + ctx := context.Background() - setupCheck(t) + setupCheck(ctx, t) t.Cleanup(teardownCluster) - t.Run("DeployCluster", func(t *testing.T) { deployCluster(t, BasicClusterPath) }) - t.Run("VerifyCluster", func(t *testing.T) { verifyCluster(t) }) - t.Run("DeployMicrobot", func(t *testing.T) { deployMicrobot(t) }) - t.Run("UpgradeClusterRollout", func(t *testing.T) { upgradeCluster(t, "RollingUpgrade") }) + t.Run("DeployCluster", func(t *testing.T) { deployCluster(ctx, t, BasicClusterPath) }) + t.Run("VerifyCluster", func(t *testing.T) { verifyCluster(ctx, t) }) + t.Run("DeployMicrobot", func(t *testing.T) { deployMicrobot(ctx, t) }) + t.Run("UpgradeClusterRollout", func(t *testing.T) { upgradeCluster(ctx, t, "RollingUpgrade") }) // Important: the cluster is deleted in the Cleanup function // which is called after all subtests are finished. @@ -73,14 +76,15 @@ func TestBasic(t *testing.T) { // This cluster will be upgraded via an in-place upgrade. func TestInPlaceUpgrade(t *testing.T) { t.Logf("Cluster for in-place upgrade test setup is in %s", InPlaceUpgradeClusterPath) + ctx := context.Background() - setupCheck(t) + setupCheck(ctx, t) t.Cleanup(teardownCluster) - t.Run("DeployCluster", func(t *testing.T) { deployCluster(t, InPlaceUpgradeClusterPath) }) - t.Run("VerifyCluster", func(t *testing.T) { verifyCluster(t) }) - t.Run("DeployMicrobot", func(t *testing.T) { deployMicrobot(t) }) - t.Run("UpgradeClusterInplace", func(t *testing.T) { upgradeCluster(t, "InPlaceUpgrade") }) + t.Run("DeployCluster", func(t *testing.T) { deployCluster(ctx, t, InPlaceUpgradeClusterPath) }) + t.Run("VerifyCluster", func(t *testing.T) { verifyCluster(ctx, t) }) + t.Run("DeployMicrobot", func(t *testing.T) { deployMicrobot(ctx, t) }) + t.Run("UpgradeClusterInplace", func(t *testing.T) { upgradeCluster(ctx, t, "InPlaceUpgrade") }) // Important: the cluster is deleted in the Cleanup function // which is called after all subtests are finished. @@ -92,15 +96,16 @@ func TestInPlaceUpgrade(t *testing.T) { // helm have to be available in the caller's path. func TestDisableDefaultCNI(t *testing.T) { t.Logf("Cluster to setup is in %s", DisableDefaultCNIClusterPath) + ctx := context.Background() - setupCheck(t) + setupCheck(ctx, t) t.Cleanup(teardownCluster) - t.Run("DeployCluster", func(t *testing.T) { deployCluster(t, DisableDefaultCNIClusterPath) }) - t.Run("ValidateNoCalico", func(t *testing.T) { validateNoCalico(t) }) + t.Run("DeployCluster", func(t *testing.T) { deployCluster(ctx, t, DisableDefaultCNIClusterPath) }) + t.Run("ValidateNoCalico", func(t *testing.T) { validateNoCalico(ctx, t) }) t.Run("installCilium", func(t *testing.T) { installCilium(t) }) - t.Run("validateCilium", func(t *testing.T) { validateCilium(t) }) - t.Run("VerifyCluster", func(t *testing.T) { verifyCluster(t) }) + t.Run("validateCilium", func(t *testing.T) { validateCilium(ctx, t) }) + t.Run("VerifyCluster", func(t *testing.T) { verifyCluster(ctx, t) }) // Important: the cluster is deleted in the Cleanup function // which is called after all subtests are finished. @@ -116,21 +121,21 @@ func checkBinary(t testing.TB, binaryName string) { } // setupCheck checks that the environment is ready to run the tests. -func setupCheck(t testing.TB) { - +func setupCheck(ctx context.Context, t testing.TB) { for _, binaryName := range []string{"kubectl", "clusterctl", "helm"} { checkBinary(t, binaryName) } t.Logf("Waiting for the MicroK8s providers to deploy on the management cluster.") - waitForPod(t, "capi-microk8s-bootstrap-controller-manager", "capi-microk8s-bootstrap-system") - waitForPod(t, "capi-microk8s-control-plane-controller-manager", "capi-microk8s-control-plane-system") + waitForPod(ctx, t, "capi-microk8s-bootstrap-controller-manager", "capi-microk8s-bootstrap-system") + waitForPod(ctx, t, "capi-microk8s-control-plane-controller-manager", "capi-microk8s-control-plane-system") } // waitForPod waits for a pod to be available. -func waitForPod(t testing.TB, pod string, ns string) { - - command := []string{"kubectl", "wait", "--timeout=15s", "--for=condition=available", "deploy/" + pod, "-n", ns} - if err := retryableCommand(t, command); err != nil { +func waitForPod(ctx context.Context, t testing.TB, pod string, ns string) { + if err := retryFor(ctx, retryMaxAttempts, secondsBetweenAttempts*time.Second, func() error { + _, err := execCommand(t, "kubectl", "wait", "--timeout=15s", "--for=condition=available", "deploy/"+pod, "-n", ns) + return err + }); err != nil { t.Fatal(err) } } @@ -152,119 +157,132 @@ func teardownCluster() { } // deployCluster deploys a cluster using the manifest in CLUSTER_MANIFEST_FILE. -func deployCluster(t testing.TB, clusterManifestFile string) { +func deployCluster(ctx context.Context, t testing.TB, clusterManifestFile string) { t.Logf("Setting up the cluster using %s", clusterManifestFile) - command := []string{"kubectl", "apply", "-f", clusterManifestFile} - cmd := exec.Command(command[0], command[1:]...) - outputBytes, err := cmd.CombinedOutput() - if err != nil { - t.Error(string(outputBytes)) - t.Fatalf("Failed to create the requested cluster. %s", err) + if _, err := execCommand(t, "kubectl", "apply", "-f", clusterManifestFile); err != nil { + t.Fatalf("Failed to get the name of the cluster. %s", err) } time.Sleep(30 * time.Second) - command = []string{"kubectl", "get", "cluster", "--no-headers", "-o", "custom-columns=:metadata.name"} - cmd = exec.Command(command[0], command[1:]...) - outputBytes, err = cmd.CombinedOutput() + output, err := execCommand(t, "kubectl", "get", "cluster", "--no-headers", "-o", "custom-columns=:metadata.name") if err != nil { - t.Error(string(outputBytes)) t.Fatalf("Failed to get the name of the cluster. %s", err) } - cluster := strings.Trim(string(outputBytes), "\n") + cluster := strings.Trim(output, "\n") t.Logf("Cluster name is %s", cluster) - command = []string{"clusterctl", "get", "kubeconfig", cluster} - testFn := func(output string) bool { + if err = retryFor(ctx, retryMaxAttempts, secondsBetweenAttempts*time.Second, func() error { + output, err = execCommand(t, "clusterctl", "get", "kubeconfig", cluster) + if err != nil { + return err + } + cfg := strings.Trim(output, "\n") err = os.WriteFile(KUBECONFIG, []byte(cfg), 0644) if err != nil { t.Fatalf("Could not persist the targets kubeconfig file. %s", err) } t.Logf("Target's kubeconfig file is at %s", KUBECONFIG) - t.Log(cfg) - return true - } - if err = retryableCommandWithConditionFunction(t, command, testFn); err != nil { + return nil + + }); err != nil { t.Fatal(err) } // Wait until the cluster is provisioned - command = []string{"kubectl", "get", "cluster", cluster} - testFn = func(output string) bool { - return strings.Contains(output, "Provisioned") - } - if err = retryableCommandWithConditionFunction(t, command, testFn); err != nil { + if err = retryFor(ctx, retryMaxAttempts, secondsBetweenAttempts*time.Second, func() error { + output, err = execCommand(t, "kubectl", "get", "cluster", cluster) + if err != nil { + return err + } + if strings.Contains(output, "Provisioned") { + return nil + } + return errors.New("cluster not provisioned") + }); err != nil { t.Fatal(err) } } // verifyCluster check if cluster is functional -func verifyCluster(t testing.TB) { +func verifyCluster(ctx context.Context, t testing.TB) { // Wait until all machines are running + t.Log("Verify cluster deployment") machines := 0 - command := []string{"kubectl", "get", "machine", "--no-headers"} - testFn := func(output string) bool { + if err := retryFor(ctx, retryMaxAttempts, secondsBetweenAttempts*time.Second, func() error { + output, err := execCommand(t, "kubectl", "get", "machine", "--no-headers") + if err != nil { + return err + } machines = strings.Count(output, "\n") running := strings.Count(output, "Running") - t.Logf("Machines %d out of which %d are Running", machines, running) + msg := fmt.Sprintf("Machines %d out of which %d are Running", machines, running) + t.Logf(msg) if machines == running { - return true + return nil } - return false - } - if err := retryableCommandWithConditionFunction(t, command, testFn); err != nil { + return errors.New(msg) + }); err != nil { t.Fatal(err) } // Make sure we have as many nodes as machines - command = []string{"kubectl", "--kubeconfig=" + KUBECONFIG, "get", "no", "--no-headers"} - testFn = func(output string) bool { + if err := retryFor(ctx, retryMaxAttempts, secondsBetweenAttempts*time.Second, func() error { + output, err := execCommand(t, "kubectl", "--kubeconfig="+KUBECONFIG, "get", "no", "--no-headers") + if err != nil { + return err + } nodes := strings.Count(output, "\n") ready := strings.Count(output, " Ready") - t.Logf("Machines are %d, Nodes are %d out of which %d are Ready", machines, nodes, ready) + msg := fmt.Sprintf("Machines are %d, Nodes are %d out of which %d are Ready", machines, nodes, ready) + t.Log(msg) if machines == nodes && ready == nodes { - return true + return nil } - return false - } - if err := retryableCommandWithConditionFunction(t, command, testFn); err != nil { + return errors.New(msg) + }); err != nil { t.Fatal(err) } } // deployMicrobot deploys a deployment of microbot. -func deployMicrobot(t testing.TB) { +func deployMicrobot(ctx context.Context, t testing.TB) { t.Log("Deploying microbot") - command := []string{"kubectl", "--kubeconfig=" + KUBECONFIG, "create", "deploy", "--image=cdkbot/microbot:1", "--replicas=30", "bot"} - cmd := exec.Command(command[0], command[1:]...) - outputBytes, err := cmd.CombinedOutput() - if err != nil { - t.Error(string(outputBytes)) + if output, err := execCommand(t, "kubectl", "--kubeconfig="+KUBECONFIG, "create", "deploy", "--image=cdkbot/microbot:1", "--replicas=30", "bot"); err != nil { + t.Error(output) t.Fatalf("Failed to create the requested microbot deployment. %s", err) } // Make sure we have as many nodes as machines t.Log("Waiting for the deployment to complete") - command = []string{"kubectl", "--kubeconfig=" + KUBECONFIG, "wait", "deploy/bot", "--for=jsonpath={.status.readyReplicas}=30"} - if err = retryableCommand(t, command); err != nil { + if err := retryFor(ctx, retryMaxAttempts, secondsBetweenAttempts*time.Second, func() error { + _, err := execCommand(t, "kubectl", "--kubeconfig="+KUBECONFIG, "wait", "deploy/bot", "--for=jsonpath={.status.readyReplicas}=30") + return err + }); err != nil { t.Fatal(err) } + } // validateNoCalico Checks if calico daemon set is not deployed on the cluster. -func validateNoCalico(t testing.TB) { +func validateNoCalico(ctx context.Context, t testing.TB) { t.Log("Validate no Calico daemon set") - t.Log("Checking for calico daemon set") - command := []string{"kubectl", "--kubeconfig=" + KUBECONFIG, "-n", "kube-system", "get", "ds"} - testFn := func(output string) bool { - return !strings.Contains(output, "calico") - } - if err := retryableCommandWithConditionFunction(t, command, testFn); err != nil { + if err := retryFor(ctx, retryMaxAttempts, secondsBetweenAttempts*time.Second, func() error { + output, err := execCommand(t, "kubectl", "--kubeconfig="+KUBECONFIG, "-n", "kube-system", "get", "ds") + if err != nil { + return err + } + if strings.Contains(output, "calico") { + return errors.New("there is calico daemon set") + + } + return nil + }); err != nil { t.Fatal(err) } t.Log("No calico daemon set") @@ -273,6 +291,7 @@ func validateNoCalico(t testing.TB) { // installCilium installs cilium from helm chart func installCilium(t testing.TB) { t.Log("Deploy Cilium") + command := []string{"helm", "install", "cilium", "--repo", "https://helm.cilium.io/", "cilium", "--namespace", "kube-system", "--set", "cni.confPath=/var/snap/microk8s/current/args/cni-network", "--set", "cni.binPath=/var/snap/microk8s/current/opt/cni/bin", @@ -281,7 +300,6 @@ func installCilium(t testing.TB) { "--set", "ipam.operator.clusterPoolIPv4PodCIDRList=10.1.0.0/16", "--set", "nodePort.enabled=true", } - t.Logf("running command: %s", strings.Join(command, " ")) cmd := exec.Command(command[0], command[1:]...) cmd.Env = append(cmd.Env, "KUBECONFIG="+KUBECONFIG) @@ -293,132 +311,98 @@ func installCilium(t testing.TB) { } // validateCilium checks a deployment of cilium daemon set. -func validateCilium(t testing.TB) { +func validateCilium(ctx context.Context, t testing.TB) { t.Log("Validate Cilium") - command := []string{"kubectl", "get", "machine", "--no-headers"} machines := 0 - testFn := func(output string) bool { + if err := retryFor(ctx, retryMaxAttempts, secondsBetweenAttempts*time.Second, func() error { + output, err := execCommand(t, "kubectl", "get", "machine", "--no-headers") + if err != nil { + return err + } machines = strings.Count(output, "\n") - return machines > 0 - } - if err := retryableCommandWithConditionFunction(t, command, testFn); err != nil { + if machines == 0 { + return errors.New("machines to haven't start yet") + } + return err + }); err != nil { t.Fatal(err) } t.Log("Checking Cilium daemon set") - command = []string{"kubectl", "--kubeconfig=" + KUBECONFIG, "-n", "kube-system", "wait", "ds/cilium", fmt.Sprintf("--for=jsonpath={.status.numberAvailable}=%d", machines)} - if err := retryableCommand(t, command); err != nil { + if err := retryFor(ctx, retryMaxAttempts, secondsBetweenAttempts*time.Second, func() error { + _, err := execCommand(t, "kubectl", "--kubeconfig="+KUBECONFIG, "-n", "kube-system", "wait", "ds/cilium", fmt.Sprintf("--for=jsonpath={.status.numberAvailable}=%d", machines)) + return err + }); err != nil { t.Fatal(err) } } -// retryableCommand runs command and retires if command fails. -// Runs provided command, if commands return err retries it retryMaxAttempts and waits secondsBetweenAttempts between next execution. -// If after all attempts error occurs error gets returned. -func retryableCommand(t testing.TB, command []string) error { - var err error - var outputBytes []byte - for attempt := 0; attempt < retryMaxAttempts; attempt++ { - if attempt > 1 { - t.Logf("Retrying") - time.Sleep(secondsBetweenAttempts * time.Second) - } - t.Logf("running command: %s", strings.Join(command, " ")) - cmd := exec.Command(command[0], command[1:]...) - outputBytes, err = cmd.CombinedOutput() - t.Log(string(outputBytes)) - if err == nil { - break - } - } - return err -} - -// retryableCommandWithConditionFunction runs command with retry. -// Runs provided command, if commands return err retries it retryMaxAttempts and waits secondsBetweenAttempts between next execution. -// Additionally, to finish correctly runs function fn func(string) bool, if passed function returns true -// the retryableCommandWithConditionFunction returns nil -func retryableCommandWithConditionFunction(t testing.TB, command []string, fn func(string) bool) error { - var err error - var outputBytes []byte - for attempt := 0; attempt < retryMaxAttempts; attempt++ { - if attempt > 1 { - t.Logf("Retrying") - time.Sleep(secondsBetweenAttempts * time.Second) - } - t.Logf("running command: %s", strings.Join(command, " ")) - cmd := exec.Command(command[0], command[1:]...) - outputBytes, err = cmd.CombinedOutput() - output := string(outputBytes) - t.Log(output) - if err == nil && fn(output) { - break - } - } - return err +// execCommand executes command transforms output bytes to string and reruns error from exec +func execCommand(t testing.TB, command ...string) (string, error) { + t.Logf("running command: %s", strings.Join(command, " ")) + cmd := exec.Command(command[0], command[1:]...) + outputBytes, err := cmd.CombinedOutput() + output := string(outputBytes) + t.Logf(output) + return output, err } // upgradeCluster upgrades the cluster to a new version based on the upgrade strategy. -func upgradeCluster(t testing.TB, upgradeStrategy string) { - +func upgradeCluster(ctx context.Context, t testing.TB, upgradeStrategy string) { version := "v1.28.0" - controlPlaneName := "test-ci-cluster-control-plane" - controlPlaneType := "microk8scontrolplanes.controlplane.cluster.x-k8s.io" - workerDeploymentName := "test-ci-cluster-md-0" - workerDeploymentType := "machinedeployments.cluster.x-k8s.io" - t.Logf("Upgrading cluster to %s via %s", version, upgradeStrategy) + // Patch control plane machine upgrades based on type of upgrade strategy. - outputBytes, err := controlPlanePatch(controlPlaneName, controlPlaneType, version, upgradeStrategy) - if err != nil { - t.Error(string(outputBytes)) + if _, err := execCommand(t, "kubectl", "patch", "--type=merge", + "microk8scontrolplanes.controlplane.cluster.x-k8s.io", "test-ci-cluster-control-plane", "--patch", + fmt.Sprintf(`{"spec":{"version":"%s","upgradeStrategy":"%s"}}`, version, upgradeStrategy)); err != nil { t.Fatalf("Failed to merge the patch to control plane. %s", err) } // Patch worker machine upgrades. - outputBytes, err = workerPatch(workerDeploymentName, workerDeploymentType, version) - if err != nil { - t.Error(string(outputBytes)) + if _, err := execCommand(t, "kubectl", "patch", "--type=merge", + "machinedeployments.cluster.x-k8s.io", "test-ci-cluster-md-0", "--patch", + fmt.Sprintf(`{"spec":{"template":{"spec":{"version":"%s"}}}}`, version)); err != nil { t.Fatalf("Failed to merge the patch to the machine deployments. %s", err) } time.Sleep(30 * time.Second) // Now all the machines should be upgraded to the new version. - - command := []string{"kubectl", "get", "machine", "--no-headers"} - testFn := func(output string) bool { + if err := retryFor(ctx, retryMaxAttempts, secondsBetweenAttempts*time.Second, func() error { + output, err := execCommand(t, "kubectl", "get", "machine", "--no-headers") + if err != nil { + return err + } totalMachines := strings.Count(output, "Running") - - // We count all the "Running" machines with the new version. re := regexp.MustCompile("Running .* " + version) upgradedMachines := len(re.FindAllString(output, -1)) - t.Logf("Total machines %d out of which %d are upgraded", totalMachines, upgradedMachines) + msg := fmt.Sprintf("Total machines %d out of which %d are upgraded", totalMachines, upgradedMachines) + t.Logf(msg) if totalMachines == upgradedMachines { - return true + return nil } - return false - } - if err = retryableCommandWithConditionFunction(t, command, testFn); err != nil { + return errors.New(msg) + }); err != nil { t.Fatal(err) } } -// controlPlanePatch patches the control plane machines based on the upgrade strategy and version. -func controlPlanePatch(controlPlaneName, controlPlaneType, version, upgradeStrategy string) ([]byte, error) { - command := []string{"kubectl", "patch", "--type=merge", controlPlaneType, controlPlaneName, "--patch", - fmt.Sprintf(`{"spec":{"version":"%s","upgradeStrategy":"%s"}}`, version, upgradeStrategy)} - cmd := exec.Command(command[0], command[1:]...) - - return cmd.CombinedOutput() -} - -// workerPatch patches a given worker machines with the given version. -func workerPatch(workerDeploymentName, workerDeploymentType, version string) ([]byte, error) { - command := []string{"kubectl", "patch", "--type=merge", workerDeploymentType, workerDeploymentName, "--patch", - fmt.Sprintf(`{"spec":{"template":{"spec":{"version":"%s"}}}}`, version)} - cmd := exec.Command(command[0], command[1:]...) - - return cmd.CombinedOutput() +// retryFor will retry a given function for the given amount of times. +// retryFor will wait for backoff between retries. +func retryFor(ctx context.Context, retryCount int, delayBetweenRetry time.Duration, retryFunc func() error) error { + var err error = nil + for i := 0; i < retryCount; i++ { + if err = retryFunc(); err != nil { + select { + case <-ctx.Done(): + return context.Canceled + case <-time.After(delayBetweenRetry): + continue + } + } + break + } + return err } From f313dcc26fc6855d29b6ca507ef0ebaf487b54f3 Mon Sep 17 00:00:00 2001 From: Maciej Golaszewski Date: Mon, 9 Sep 2024 10:41:46 +0200 Subject: [PATCH 7/7] Apply suggestions from code review Co-authored-by: Kevin W Monroe --- integration/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/integration/README.md b/integration/README.md index e4999e8..8afc059 100644 --- a/integration/README.md +++ b/integration/README.md @@ -7,7 +7,7 @@ The integration/e2e tests have the following prerequisites: * make sure to have ssh key in aws `capi`in `us-east-1 region` if you do not have key refer to CAPI on [AWS prerequisites documentation](https://cluster-api-aws.sigs.k8s.io/topics/using-clusterawsadm-to-fulfill-prerequisites#ssh-key-pair) - * to run tests locally you will need to have on path fallowing programs `clusterctl`, `kubectl`, `helm` + * local testing requires the following to be available in the PATH: `clusterctl`, `kubectl`, `helm` * a management cluster initialised via `clusterctl` with the infrastructure targeted as well as the version of the MicroK8s providers we want to be tested @@ -68,7 +68,7 @@ kubectl apply -f bootstrap/bootstrap-components.yaml -f control-plane/control-pl ``` ### Cluster definitions for e2e -Cluster definition are stored in [`manifests`](./cluster-manifests) directory. +Cluster definitions are stored in the [`manifests`](./cluster-manifests) directory. #### Trigger the e2e tests