From 4496f40b331325fb3c57f7f14b261cad4e36ee77 Mon Sep 17 00:00:00 2001 From: Kyle Squizzato Date: Thu, 22 Aug 2024 17:12:36 -0700 Subject: [PATCH] Fix various issues with validating deployment Signed-off-by: Kyle Squizzato --- .github/workflows/test.yml | 2 +- Makefile | 16 +- config/dev/deployment.yaml | 26 ++-- test/e2e/e2e_test.go | 63 ++++---- ...ate_provider.go => validate_deployment.go} | 145 ++++++++++++------ test/utils/deployment.go | 15 +- test/utils/utils.go | 28 +++- 7 files changed, 190 insertions(+), 105 deletions(-) rename test/e2e/{validate_provider.go => validate_deployment.go} (68%) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index c258f06bc..6c6ab64f3 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -71,4 +71,4 @@ jobs: uses: azure/setup-kubectl@v4 - name: Run E2E tests run: | - make e2e-test + make test-e2e diff --git a/Makefile b/Makefile index 622d0f575..3fa17d2be 100644 --- a/Makefile +++ b/Makefile @@ -106,7 +106,7 @@ test: generate-all fmt vet envtest tidy external-crd ## Run tests. # Utilize Kind or modify the e2e tests to load the image locally, enabling compatibility with other vendors. .PHONY: test-e2e # Run the e2e tests against a Kind k8s instance that is spun up. test-e2e: cli-install - KIND_CLUSTER_NAME=$(KIND_CLUSTER_NAME) KIND_VERSION=$(KIND_VERSION) go test ./test/e2e/ -v -ginkgo.v + KIND_CLUSTER_NAME="hmc-test" KIND_VERSION=$(KIND_VERSION) go test ./test/e2e/ -v -ginkgo.v -timeout=2h .PHONY: lint lint: golangci-lint ## Run golangci-lint linter & yamllint @@ -270,10 +270,10 @@ dev-aws: yq @$(YQ) e ".data.credentials = \"${AWS_CREDENTIALS}\"" config/dev/awscredentials.yaml | $(KUBECTL) -n $(NAMESPACE) apply -f - .PHONY: dev-apply -dev-apply: kind-deploy registry-deploy dev-push dev-deploy dev-templates dev-aws +dev-apply: kind-deploy registry-deploy dev-push dev-deploy dev-templates dev-aws ## Deploy a kind cluster, local registry, push the image, deploy the controller, and apply the templates. .PHONY: dev-destroy -dev-destroy: kind-undeploy registry-undeploy +dev-destroy: kind-undeploy registry-undeploy ## Destroy the development environment by deleting the kind cluster and local registry. .PHONY: dev-aws-apply dev-aws-apply: ## Apply the AWS deployment @@ -286,11 +286,17 @@ dev-aws-destroy: ## Delete the AWS deployment .PHONY: dev-aws-nuke dev-aws-nuke: ## Warning: Destructive! Nuke all AWS resources deployed by 'dev-aws-apply', prefix with CLUSTER_NAME to nuke a specific cluster. @CLUSTER_NAME=$(CLUSTER_NAME) envsubst < config/dev/cloud_nuke.yaml.tpl > config/dev/cloud_nuke.yaml - $(CLOUDNUKE) aws --region us-west-2 --force --config config/dev/cloud_nuke.yaml --resource-type vpc,eip,nat-gateway,ec2-subnet,internet-gateway,network-interface,security-group + DISABLE_TELEMETRY=true $(CLOUDNUKE) aws --region $$AWS_REGION --config config/dev/cloud_nuke.yaml --resource-type vpc,eip,nat-gateway,ec2-subnet,elb,elbv2,internet-gateway,network-interface,security-group @rm config/dev/cloud_nuke.yaml +.PHONY: test-apply +test-apply: kind-deploy registry-deploy dev-push dev-deploy dev-templates + +.PHONY: test-destroy +test-destroy: kind-undeploy registry-undeploy + .PHONY: cli-install -cli-install: clusterawsadm clusterctl cloud-nuke +cli-install: clusterawsadm clusterctl cloud-nuke yq ## Install the necessary CLI tools for deployment, development and testing. ##@ Dependencies diff --git a/config/dev/deployment.yaml b/config/dev/deployment.yaml index 58ba18e32..8d85b82a5 100644 --- a/config/dev/deployment.yaml +++ b/config/dev/deployment.yaml @@ -1,17 +1,17 @@ apiVersion: hmc.mirantis.com/v1alpha1 kind: Deployment metadata: - name: aws-dev + name: bba1743d-e2e-test spec: - template: aws-standalone-cp - config: - region: us-east-2 - publicIP: true - controlPlaneNumber: 1 - workersNumber: 1 - controlPlane: - amiID: ami-02f3416038bdb17fb - instanceType: t3.small - worker: - amiID: ami-02f3416038bdb17fb - instanceType: t3.small + config: + controlPlane: + amiID: ami-0989c067ff3da4b27 + instanceType: t3.small + controlPlaneNumber: 1 + publicIP: true + region: us-west-2 + worker: + amiID: ami-0989c067ff3da4b27 + instanceType: t3.small + workersNumber: 1 + template: aws-standalone-cp diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index be59d7656..ffbde6a09 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -32,19 +32,19 @@ import ( const namespace = "hmc-system" var _ = Describe("controller", Ordered, func() { - BeforeAll(func() { - By("building and deploying the controller-manager") - cmd := exec.Command("make", "dev-apply") - _, err := utils.Run(cmd) - Expect(err).NotTo(HaveOccurred()) - }) - - AfterAll(func() { - By("removing the controller-manager") - cmd := exec.Command("make", "dev-destroy") - _, err := utils.Run(cmd) - Expect(err).NotTo(HaveOccurred()) - }) + // BeforeAll(func() { + // By("building and deploying the controller-manager") + // cmd := exec.Command("make", "test-apply") + // _, err := utils.Run(cmd) + // Expect(err).NotTo(HaveOccurred()) + // }) + + // AfterAll(func() { + // By("removing the controller-manager") + // cmd := exec.Command("make", "test-destroy") + // _, err := utils.Run(cmd) + // Expect(err).NotTo(HaveOccurred()) + // }) Context("Operator", func() { It("should run successfully", func() { @@ -54,7 +54,7 @@ var _ = Describe("controller", Ordered, func() { By("validating that the controller-manager pod is running as expected") verifyControllerUp := func() error { // Ensure only one controller pod is running. - podList, err := kc.Client.CoreV1().Pods(namespace).List(context.Background(), metav1.ListOptions{ + podList, err := kc.Client.CoreV1().Pods(kc.Namespace).List(context.Background(), metav1.ListOptions{ LabelSelector: "control-plane=controller-manager,app.kubernetes.io/name=cluster-api", }) if err != nil { @@ -81,8 +81,16 @@ var _ = Describe("controller", Ordered, func() { } return nil - }() - EventuallyWithOffset(1, verifyControllerUp, time.Minute, time.Second).Should(Succeed()) + } + EventuallyWithOffset(1, func() error { + err := verifyControllerUp() + if err != nil { + _, _ = fmt.Fprintf(GinkgoWriter, "Controller pod validation failed: %v\n", err) + return err + } + + return nil + }(), 5*time.Minute, time.Second).Should(Succeed()) }) }) @@ -100,29 +108,30 @@ var _ = Describe("controller", Ordered, func() { }) AfterAll(func() { - // Purge the AWS resources, the AfterAll for the controller will - // clean up the management cluster. - cmd := exec.Command("make", "dev-aws-nuke") - _, err := utils.Run(cmd) - ExpectWithOffset(2, err).NotTo(HaveOccurred()) + // // Purge the AWS resources, the AfterAll for the controller will + // // clean up the management cluster. + // cmd := exec.Command("make", "dev-aws-nuke") + // _, err := utils.Run(cmd) + // ExpectWithOffset(2, err).NotTo(HaveOccurred()) }) It("should work with an AWS provider", func() { By("using the aws-standalone-cp template") - clusterName, err := utils.ConfigureDeploymentConfig(utils.ProviderAWS, utils.AWSStandaloneCPTemplate) - ExpectWithOffset(2, err).NotTo(HaveOccurred()) + //clusterName, err := utils.ConfigureDeploymentConfig(utils.ProviderAWS, utils.TemplateAWSStandaloneCP) + //ExpectWithOffset(1, err).NotTo(HaveOccurred()) + + clusterName := "bba1743d-e2e-test" cmd := exec.Command("make", "dev-aws-apply") _, err = utils.Run(cmd) ExpectWithOffset(2, err).NotTo(HaveOccurred()) - EventuallyWithOffset(2, func() error { + _, _ = fmt.Fprintf(GinkgoWriter, "Waiting for resource validation to succeed\n") + Eventually(func() error { return verifyProviderDeployed(context.Background(), kc, clusterName) - }(), 30*time.Minute, 10*time.Second).Should(Succeed()) - + }).WithTimeout(30 * time.Minute).WithPolling(5 * time.Second).Should(Succeed()) By("using the aws-hosted-cp template") // TODO: Use the standalone control plane resources to craft a hosted // control plane and test it. - }) }) }) diff --git a/test/e2e/validate_provider.go b/test/e2e/validate_deployment.go similarity index 68% rename from test/e2e/validate_provider.go rename to test/e2e/validate_deployment.go index 171ad67fe..5d19f69b7 100644 --- a/test/e2e/validate_provider.go +++ b/test/e2e/validate_deployment.go @@ -18,10 +18,9 @@ import ( "context" "fmt" - . "github.com/onsi/ginkgo/v2" - "github.com/Mirantis/hmc/test/kubeclient" "github.com/Mirantis/hmc/test/utils" + . "github.com/onsi/ginkgo/v2" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/resource" @@ -32,79 +31,109 @@ import ( ) // resourceValidationFunc is intended to validate a specific kubernetes -// resource. It is meant to be used in conjunction with an Eventually block, -// however, in some cases it may be necessary to end the Eventually block early -// if the resource will never reach a ready state, in these instances Ginkgo's -// Fail function should be used. +// resource. type resourceValidationFunc func(context.Context, *kubeclient.KubeClient, string) error +var resourceValidators = map[string]resourceValidationFunc{ + "clusters": validateClusters, + "machines": validateMachines, + "control-planes": validateK0sControlPlanes, + "csi-driver": validateCSIDriver, + "ccm": validateCCM, +} + // verifyProviderDeployed is a provider-agnostic verification that checks for // the presence of specific resources in the cluster using -// resourceValidationFuncs and clusterValidationFuncs. +// resourceValidationFuncs and clusterValidationFuncs. It is meant to be used +// in conjunction with an Eventually block. In some cases it may be necessary +// to end the Eventually block early if the resource will never reach a ready +// state, in these instances Ginkgo's Fail function should be used. func verifyProviderDeployed(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { // Sequentially validate each resource type, only returning the first error // as to not move on to the next resource type until the first is resolved. - for _, resourceValidator := range []resourceValidationFunc{ - validateClusters, - validateMachines, - validateK0sControlPlanes, - validateCSIDriver, - validateCCM, - } { + for _, name := range []string{"clusters", "machines", "control-planes", "csi-driver", "ccm"} { + validator, ok := resourceValidators[name] + if !ok { + continue + } + + if err := validator(ctx, kc, clusterName); err != nil { + _, _ = fmt.Fprintf(GinkgoWriter, "[%s] validation error: %v\n", name, err) + return err + } + + _, _ = fmt.Fprintf(GinkgoWriter, "[%s] validation succeeded\n", name) // XXX: Once we validate for the first time should we move the // validation out and consider it "done"? Or is there a possibility // that the resources could enter a non-ready state later? - if err := resourceValidator(ctx, kc, clusterName); err != nil { - return err - } + delete(resourceValidators, name) } return nil } func validateClusters(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { - return validateNameAndStatus(ctx, kc, clusterName, schema.GroupVersionResource{ + gvr := schema.GroupVersionResource{ Group: "cluster.x-k8s.io", Version: "v1beta1", Resource: "clusters", - }) + } + + client, err := kc.GetDynamicClient(gvr) + if err != nil { + Fail(fmt.Sprintf("failed to get %s client: %v", gvr.Resource, err)) + } + + cluster, err := client.Get(ctx, clusterName, metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("failed to get %s %s: %v", gvr.Resource, clusterName, err) + } + + phase, _, err := unstructured.NestedString(cluster.Object, "status", "phase") + if err != nil { + return fmt.Errorf("failed to get status.phase for %s: %v", cluster.GetName(), err) + } + + if phase == "Deleting" { + Fail(fmt.Sprintf("%s is in 'Deleting' phase", cluster.GetName())) + } + + if err := utils.ValidateObjectNamePrefix(cluster, clusterName); err != nil { + Fail(err.Error()) + } + + if err := utils.ValidateConditionsTrue(cluster); err != nil { + return err + } + + return nil } func validateMachines(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { - return validateNameAndStatus(ctx, kc, clusterName, schema.GroupVersionResource{ + gvr := schema.GroupVersionResource{ Group: "cluster.x-k8s.io", Version: "v1beta1", Resource: "machines", - }) -} + } -func validateNameAndStatus(ctx context.Context, kc *kubeclient.KubeClient, - clusterName string, gvr schema.GroupVersionResource) error { client, err := kc.GetDynamicClient(gvr) if err != nil { Fail(fmt.Sprintf("failed to get %s client: %v", gvr.Resource, err)) } - list, err := client.List(ctx, metav1.ListOptions{}) + machines, err := client.List(ctx, metav1.ListOptions{ + LabelSelector: "cluster.x-k8s.io/cluster-name=" + clusterName, + }) if err != nil { - Fail(fmt.Sprintf("failed to list %s: %v", gvr.Resource, err)) + return fmt.Errorf("failed to list %s: %v", gvr.Resource, err) } - for _, item := range list.Items { - phase, _, err := unstructured.NestedString(item.Object, "status", "phase") - if err != nil { - return fmt.Errorf("failed to get status.phase for %s: %v", item.GetName(), err) - } - - if phase == "Deleting" { - Fail(fmt.Sprintf("%s is in 'Deleting' phase", item.GetName())) - } - - if err := utils.ValidateObjectNamePrefix(&item, clusterName); err != nil { + for _, machine := range machines.Items { + if err := utils.ValidateObjectNamePrefix(&machine, clusterName); err != nil { Fail(err.Error()) } - if err := utils.ValidateConditionsTrue(&item); err != nil { + if err := utils.ValidateConditionsTrue(&machine); err != nil { return err } } @@ -116,13 +145,15 @@ func validateK0sControlPlanes(ctx context.Context, kc *kubeclient.KubeClient, cl k0sControlPlaneClient, err := kc.GetDynamicClient(schema.GroupVersionResource{ Group: "controlplane.cluster.x-k8s.io", Version: "v1beta1", - Resource: "K0sControlPlane", + Resource: "k0scontrolplanes", }) if err != nil { return fmt.Errorf("failed to get K0sControlPlane client: %w", err) } - controlPlanes, err := k0sControlPlaneClient.List(ctx, metav1.ListOptions{}) + controlPlanes, err := k0sControlPlaneClient.List(ctx, metav1.ListOptions{ + LabelSelector: "cluster.x-k8s.io/cluster-name=" + clusterName, + }) if err != nil { return fmt.Errorf("failed to list K0sControlPlanes: %w", err) } @@ -134,30 +165,46 @@ func validateK0sControlPlanes(ctx context.Context, kc *kubeclient.KubeClient, cl objKind, objName := utils.ObjKindName(&controlPlane) - // k0s does not use the metav1.Condition type for status - // conditions, instead it uses a custom type so we can't use + // k0s does not use the metav1.Condition type for status.conditions, + // instead it uses a custom type so we can't use // ValidateConditionsTrue here, instead we'll check for "ready: true". - conditions, found, err := unstructured.NestedFieldCopy(controlPlane.Object, "status", "conditions") + status, found, err := unstructured.NestedFieldCopy(controlPlane.Object, "status") if !found { - return fmt.Errorf("no status conditions found for %s: %s", objKind, objName) + return fmt.Errorf("no status found for %s: %s", objKind, objName) } if err != nil { return fmt.Errorf("failed to get status conditions for %s: %s: %w", objKind, objName, err) } - c, ok := conditions.(map[string]interface{}) + st, ok := status.(map[string]interface{}) if !ok { - return fmt.Errorf("expected K0sControlPlane condition to be type map[string]interface{}, got: %T", conditions) + return fmt.Errorf("expected K0sControlPlane condition to be type map[string]interface{}, got: %T", status) } - if c["ready"] != "true" { - return fmt.Errorf("K0sControlPlane %s is not ready, status: %v", controlPlane.GetName(), conditions) + if !st["ready"].(bool) { + return fmt.Errorf("K0sControlPlane %s is not ready, status: %+v", controlPlane.GetName(), status) } } return nil } +// apiVersion: v1 +// kind: Pod +// metadata: +// name: test-pvc-pod +// spec: +// volumes: +// - name: test-pvc-vol +// persistentVolumeClaim: +// claimName: pvcName +// containers: +// - name: test-pvc-container +// image: nginx +// volumeMounts: +// - mountPath: "/storage" +// name: task-pv-storage + // validateCSIDriver validates that the provider CSI driver is functioning // by creating a PVC and verifying it enters "Bound" status. func validateCSIDriver(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { @@ -166,7 +213,7 @@ func validateCSIDriver(ctx context.Context, kc *kubeclient.KubeClient, clusterNa Fail(fmt.Sprintf("failed to create KubeClient for managed cluster %s: %v", clusterName, err)) } - pvcName := clusterName + "-test-pvc" + pvcName := clusterName + "-csi-test-pvc" _, err = clusterKC.Client.CoreV1().PersistentVolumeClaims(clusterKC.Namespace). Create(ctx, &corev1.PersistentVolumeClaim{ diff --git a/test/utils/deployment.go b/test/utils/deployment.go index 875668479..1f088bde8 100644 --- a/test/utils/deployment.go +++ b/test/utils/deployment.go @@ -21,6 +21,7 @@ import ( "os/exec" "github.com/google/uuid" + . "github.com/onsi/ginkgo/v2" "gopkg.in/yaml.v3" ) @@ -33,16 +34,18 @@ const ( type Template string const ( - AWSStandaloneCPTemplate Template = "aws-standalone-cp" - AWSHostedCPTemplate Template = "aws-hosted-cp" + TemplateAWSStandaloneCP Template = "aws-standalone-cp" + TemplateAWSHostedCP Template = "aws-hosted-cp" + + deploymentConfigFile = "./config/dev/deployment.yaml" ) -// ConfigureDeploymentConfig modifies the ./config/dev/deployment.yaml for +// ConfigureDeploymentConfig modifies the config/dev/deployment.yaml for // use in test and returns the generated cluster name. func ConfigureDeploymentConfig(provider ProviderType, templateName Template) (string, error) { generatedName := uuid.NewString()[:8] + "-e2e-test" - deploymentConfigBytes, err := os.ReadFile("./config/dev/deployment.yaml") + deploymentConfigBytes, err := os.ReadFile(deploymentConfigFile) if err != nil { return "", fmt.Errorf("failed to read deployment config: %w", err) } @@ -94,7 +97,9 @@ func ConfigureDeploymentConfig(provider ProviderType, templateName Template) (st return "", fmt.Errorf("failed to marshal deployment config: %w", err) } - return generatedName, os.WriteFile("./config/dev/deployment.yaml", deploymentConfigBytes, 0644) + _, _ = fmt.Fprintf(GinkgoWriter, "Generated AWS cluster name: %q\n", generatedName) + + return generatedName, os.WriteFile(deploymentConfigFile, deploymentConfigBytes, 0644) default: return "", fmt.Errorf("unsupported provider: %s", provider) } diff --git a/test/utils/utils.go b/test/utils/utils.go index bfb7a6c46..613ac9605 100644 --- a/test/utils/utils.go +++ b/test/utils/utils.go @@ -24,6 +24,7 @@ import ( . "github.com/onsi/ginkgo/v2" //nolint:golint,revive metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime" ) // Run executes the provided command within this context @@ -110,18 +111,35 @@ func ValidateConditionsTrue(unstrObj *unstructured.Unstructured) error { return fmt.Errorf("failed to get status conditions for %s: %s: %w", objKind, objName, err) } + var errs error + for _, condition := range conditions { - condition, ok := condition.(metav1.Condition) + conditionMap, ok := condition.(map[string]interface{}) if !ok { - return fmt.Errorf("expected %s: %s condition to be type metav1.Condition, got: %T", - objKind, objName, condition) + return fmt.Errorf("expected %s: %s condition to be type map[string]interface{}, got: %T", + objKind, objName, conditionMap) + } + + var c *metav1.Condition + + if err := runtime.DefaultUnstructuredConverter.FromUnstructured(conditionMap, &c); err != nil { + return fmt.Errorf("failed to convert condition map to metav1.Condition: %w", err) } - if condition.Status == metav1.ConditionTrue { + if c.Status == metav1.ConditionTrue { continue } - return fmt.Errorf("%s %s condition %s is not ready: %s", objKind, objName, condition.Type, condition.Message) + errorStr := fmt.Sprintf("%s: %s", c.Type, c.Reason) + if c.Message != "" { + errorStr = fmt.Sprintf("%s: %s", errorStr, c.Message) + } + + errs = errors.Join(fmt.Errorf(errorStr), errs) + } + + if errs != nil { + return fmt.Errorf("%s %s is not ready with conditions: %w", objKind, objName, errs) } return nil