From dda80dcac7ca217355f9a02297063476d401ef21 Mon Sep 17 00:00:00 2001 From: Kyle Squizzato Date: Mon, 19 Aug 2024 16:13:46 -0700 Subject: [PATCH 01/35] Rework existing e2e test to use kubeclient, add utils for AWS setup Signed-off-by: Kyle Squizzato --- .github/workflows/test.yml | 28 ++++- Makefile | 6 +- config/dev/aws-managedcluster.yaml | 12 ++- test/e2e/e2e_test.go | 138 ++++++++++++------------- test/kubeclient/kubeclient.go | 114 +++++++++++++++++++++ test/utils/utils.go | 159 ++++++++++++++++++----------- 6 files changed, 312 insertions(+), 145 deletions(-) create mode 100644 test/kubeclient/kubeclient.go diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 56bb5543f..c258f06bc 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -10,7 +10,7 @@ on: - main - release-* tags: - - "*" + - '*' paths-ignore: - '**.md' pull_request: @@ -18,7 +18,7 @@ on: - main - release-* tags: - - "*" + - '*' paths-ignore: - 'config/**' - '**.md' @@ -33,7 +33,7 @@ jobs: steps: - name: Checkout repository uses: actions/checkout@v4 - - name: Set up Go + - name: Setup Go uses: actions/setup-go@v5 with: go-version: ${{ env.GO_VERSION }} @@ -47,8 +47,28 @@ jobs: run: | make test - name: Build - run: | + run: | make build - name: Image build run: | make docker-build + e2etest: + name: E2E Tests + runs-on: ubuntu-latest + needs: build + env: + AWS_REGION: us-west-2 + AWS_ACCESS_KEY_ID: ${{ secrets.CI_AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.CI_AWS_SECRET_ACCESS_KEY }} + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: Setup Go + uses: actions/setup-go@v5 + with: + go-version: ${{ env.GO_VERSION }} + - name: Setup kubectl + uses: azure/setup-kubectl@v4 + - name: Run E2E tests + run: | + make e2e-test diff --git a/Makefile b/Makefile index 2b3f89afc..10f924c05 100644 --- a/Makefile +++ b/Makefile @@ -104,9 +104,9 @@ test: generate-all fmt vet envtest tidy external-crd ## Run tests. KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" go test $$(go list ./... | grep -v /e2e) -coverprofile cover.out # Utilize Kind or modify the e2e tests to load the image locally, enabling compatibility with other vendors. -.PHONY: test-e2e # Run the e2e tests against a Kind k8s instance that is spun up. -test-e2e: - go test ./test/e2e/ -v -ginkgo.v +.PHONY: test-e2e # Run the e2e tests against a Kind k8s instance that is spun up. +test-e2e: cli-install + KIND_CLUSTER_NAME=$(KIND_CLUSTER_NAME) KIND_VERSION=$(KIND_VERSION) go test ./test/e2e/ -v -ginkgo.v .PHONY: lint lint: golangci-lint ## Run golangci-lint linter & yamllint diff --git a/config/dev/aws-managedcluster.yaml b/config/dev/aws-managedcluster.yaml index e40cc1d39..6ed56f2de 100644 --- a/config/dev/aws-managedcluster.yaml +++ b/config/dev/aws-managedcluster.yaml @@ -4,13 +4,15 @@ metadata: name: aws-dev namespace: ${NAMESPACE} spec: - template: aws-standalone-cp config: - region: us-east-2 - publicIP: true - controlPlaneNumber: 1 - workersNumber: 1 controlPlane: + amiID: ami-0989c067ff3da4b27 instanceType: t3.small + controlPlaneNumber: 1 + publicIP: true + region: us-west-2 worker: + amiID: ami-0989c067ff3da4b27 instanceType: t3.small + workersNumber: 1 + template: aws-standalone-cp diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index 33bf9f27b..a9ee02928 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -15,105 +15,93 @@ package e2e import ( + "context" "fmt" "os/exec" + "strings" "time" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "github.com/Mirantis/hmc/test/kubeclient" "github.com/Mirantis/hmc/test/utils" ) const namespace = "hmc-system" var _ = Describe("controller", Ordered, func() { - BeforeAll(func() { - By("installing prometheus operator") - Expect(utils.InstallPrometheusOperator()).To(Succeed()) - - By("installing the cert-manager") - Expect(utils.InstallCertManager()).To(Succeed()) - - By("creating manager namespace") - cmd := exec.Command("kubectl", "create", "ns", namespace) - _, _ = utils.Run(cmd) - }) - - AfterAll(func() { - By("uninstalling the Prometheus manager bundle") - utils.UninstallPrometheusOperator() - - By("uninstalling the cert-manager bundle") - utils.UninstallCertManager() - - By("removing manager namespace") - cmd := exec.Command("kubectl", "delete", "ns", namespace) - _, _ = utils.Run(cmd) - }) + // BeforeAll(func() { + // By("building and deploying the controller-manager") + // cmd := exec.Command("make", "dev-apply") + // _, err := utils.Run(cmd) + // Expect(err).NotTo(HaveOccurred()) + // }) + + // AfterAll(func() { + // By("removing the controller-manager") + // cmd := exec.Command("make", "dev-destroy") + // _, err := utils.Run(cmd) + // Expect(err).NotTo(HaveOccurred()) + // }) Context("Operator", func() { It("should run successfully", func() { - var controllerPodName string - var err error + kc, err := kubeclient.New(namespace) + ExpectWithOffset(2, err).NotTo(HaveOccurred()) - // projectimage stores the name of the image used in the example - var projectimage = "example.com/hmc:v0.0.1" - - By("building the manager(Operator) image") - cmd := exec.Command("make", "docker-build", fmt.Sprintf("IMG=%s", projectimage)) - _, err = utils.Run(cmd) - ExpectWithOffset(1, err).NotTo(HaveOccurred()) - - By("loading the the manager(Operator) image on Kind") - err = utils.LoadImageToKindClusterWithName(projectimage) - ExpectWithOffset(1, err).NotTo(HaveOccurred()) + By("validating that the controller-manager pod is running as expected") + verifyControllerUp := func() error { + // Ensure only one controller pod is running. + podList, err := kc.Client.CoreV1().Pods(namespace).List(context.Background(), metav1.ListOptions{ + LabelSelector: "control-plane=controller-manager,app.kubernetes.io/name=cluster-api", + }) + if err != nil { + return err + } - By("installing CRDs") - cmd = exec.Command("make", "install") - _, err = utils.Run(cmd) - ExpectWithOffset(1, err).NotTo(HaveOccurred()) + if len(podList.Items) != 1 { + return fmt.Errorf("expected 1 controller pod, got %d", len(podList.Items)) + } - By("deploying the controller-manager") - cmd = exec.Command("make", "deploy", fmt.Sprintf("IMG=%s", projectimage)) - _, err = utils.Run(cmd) - ExpectWithOffset(1, err).NotTo(HaveOccurred()) + controllerPod := podList.Items[0] - By("validating that the controller-manager pod is running as expected") - verifyControllerUp := func() error { - // Get pod name - - cmd = exec.Command("kubectl", "get", - "pods", "-l", "control-plane=controller-manager", - "-o", "go-template={{ range .items }}"+ - "{{ if not .metadata.deletionTimestamp }}"+ - "{{ .metadata.name }}"+ - "{{ \"\\n\" }}{{ end }}{{ end }}", - "-n", namespace, - ) - - podOutput, err := utils.Run(cmd) - ExpectWithOffset(2, err).NotTo(HaveOccurred()) - podNames := utils.GetNonEmptyLines(string(podOutput)) - if len(podNames) != 1 { - return fmt.Errorf("expect 1 controller pods running, but got %d", len(podNames)) + if controllerPod.DeletionTimestamp != nil { + return fmt.Errorf("deletion timestamp should be nil, got: %v", controllerPod) + } + if !strings.Contains(controllerPod.Name, "controller-manager") { + return fmt.Errorf("controller pod name %s does not contain 'controller-manager'", controllerPod.Name) } - controllerPodName = podNames[0] - ExpectWithOffset(2, controllerPodName).Should(ContainSubstring("controller-manager")) - - // Validate pod status - cmd = exec.Command("kubectl", "get", - "pods", controllerPodName, "-o", "jsonpath={.status.phase}", - "-n", namespace, - ) - status, err := utils.Run(cmd) - ExpectWithOffset(2, err).NotTo(HaveOccurred()) - if string(status) != "Running" { - return fmt.Errorf("controller pod in %s status", status) + if controllerPod.Status.Phase != "Running" { + return fmt.Errorf("controller pod in %s status", controllerPod.Status.Phase) } + return nil - } + }() EventuallyWithOffset(1, verifyControllerUp, time.Minute, time.Second).Should(Succeed()) + }) + }) + + Context("AWS Templates", func() { + BeforeAll(func() { + By("ensuring AWS credentials are set") + kc, err := kubeclient.New(namespace) + ExpectWithOffset(2, err).NotTo(HaveOccurred()) + ExpectWithOffset(2, kc.CreateAWSCredentialsKubeSecret(context.Background())).To(Succeed()) + }) + + It("should work with an AWS provider", func() { + By("using the aws-standalone-cp template") + ExpectWithOffset(2, utils.ConfigureDeploymentConfig()).To(Succeed()) + + cmd := exec.Command("make", "dev-aws-apply") + _, err := utils.Run(cmd) + ExpectWithOffset(2, err).NotTo(HaveOccurred()) + + EventuallyWithOffset(2, func() error { + return nil + }) }) }) diff --git a/test/kubeclient/kubeclient.go b/test/kubeclient/kubeclient.go new file mode 100644 index 000000000..62d3364d1 --- /dev/null +++ b/test/kubeclient/kubeclient.go @@ -0,0 +1,114 @@ +package kubeclient + +import ( + "context" + "fmt" + "os" + "os/exec" + "path/filepath" + + "github.com/Mirantis/hmc/test/utils" + corev1 "k8s.io/api/core/v1" + apiextensionsclientset "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" + "k8s.io/client-go/tools/clientcmd" +) + +const ( + awsCredentialsSecretName = "aws-credentials" +) + +type KubeClient struct { + Namespace string + + Client kubernetes.Interface + ExtendedClient apiextensionsclientset.Interface + Config *rest.Config +} + +// getKubeConfig returns the kubeconfig file content. +func getKubeConfig() ([]byte, error) { + homeDir, err := os.UserHomeDir() + if err != nil { + return nil, fmt.Errorf("failed to get user home directory: %w", err) + } + + // Use the KUBECONFIG environment variable if it is set, otherwise use the + // default path. + kubeConfig, ok := os.LookupEnv("KUBECONFIG") + if !ok { + kubeConfig = filepath.Join(homeDir, ".kube", "config") + } + + configBytes, err := os.ReadFile(kubeConfig) + if err != nil { + return nil, fmt.Errorf("failed to read %q: %w", kubeConfig, err) + } + + return configBytes, nil +} + +// New creates a new instance of KubeClient from a given namespace. +func New(namespace string) (*KubeClient, error) { + configBytes, err := getKubeConfig() + if err != nil { + return nil, fmt.Errorf("failed to get kubeconfig: %w", err) + } + + config, err := clientcmd.RESTConfigFromKubeConfig(configBytes) + if err != nil { + return nil, fmt.Errorf("failed to parse kubeconfig: %w", err) + } + + clientSet, err := kubernetes.NewForConfig(config) + if err != nil { + return nil, fmt.Errorf("could not initialize kubernetes client: %w", err) + } + + extendedClientSet, err := apiextensionsclientset.NewForConfig(config) + if err != nil { + return nil, fmt.Errorf("failed to initialize apiextensions clientset: %w", err) + } + + return &KubeClient{ + Namespace: namespace, + Client: clientSet, + ExtendedClient: extendedClientSet, + Config: config, + }, nil +} + +// CreateAWSCredentialsKubeSecret uses clusterawsadm to encode existing AWS +// credentials and create a secret named 'aws-credentials' in the given +// namespace if one does not already exist. +func (kc *KubeClient) CreateAWSCredentialsKubeSecret(ctx context.Context) error { + _, err := kc.Client.CoreV1().Secrets(kc.Namespace).Get(ctx, awsCredentialsSecretName, metav1.GetOptions{}) + if !apierrors.IsNotFound(err) { + return nil + } + + cmd := exec.Command("./bin/clusterawsadm", + "bootstrap", "credentials", "encode-as-profile", "--output", "rawSharedConfig") + output, err := utils.Run(cmd) + if err != nil { + return fmt.Errorf("failed to encode AWS credentials with clusterawsadm: %w", err) + } + + _, err = kc.Client.CoreV1().Secrets(kc.Namespace).Create(ctx, &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: awsCredentialsSecretName, + }, + Data: map[string][]byte{ + "credentials": output, + }, + Type: corev1.SecretTypeOpaque, + }, metav1.CreateOptions{}) + if err != nil { + return fmt.Errorf("failed to create AWS credentials secret: %w", err) + } + + return nil +} diff --git a/test/utils/utils.go b/test/utils/utils.go index 4e59dca0a..d6d4671e3 100644 --- a/test/utils/utils.go +++ b/test/utils/utils.go @@ -15,35 +15,21 @@ package utils import ( + "encoding/json" + "errors" "fmt" "os" "os/exec" "strings" . "github.com/onsi/ginkgo/v2" //nolint:golint,revive -) - -const ( - prometheusOperatorVersion = "v0.72.0" - prometheusOperatorURL = "https://github.com/prometheus-operator/prometheus-operator/" + - "releases/download/%s/bundle.yaml" - - certmanagerVersion = "v1.14.4" - certmanagerURLTmpl = "https://github.com/jetstack/cert-manager/releases/download/%s/cert-manager.yaml" + "gopkg.in/yaml.v2" ) func warnError(err error) { _, _ = fmt.Fprintf(GinkgoWriter, "warning: %v\n", err) } -// InstallPrometheusOperator installs the prometheus Operator to be used to export the enabled metrics. -func InstallPrometheusOperator() error { - url := fmt.Sprintf(prometheusOperatorURL, prometheusOperatorVersion) - cmd := exec.Command("kubectl", "create", "-f", url) - _, err := Run(cmd) - return err -} - // Run executes the provided command within this context func Run(cmd *exec.Cmd) ([]byte, error) { dir, _ := GetProjectDir() @@ -56,59 +42,34 @@ func Run(cmd *exec.Cmd) ([]byte, error) { cmd.Env = append(os.Environ(), "GO111MODULE=on") command := strings.Join(cmd.Args, " ") _, _ = fmt.Fprintf(GinkgoWriter, "running: %s\n", command) - output, err := cmd.CombinedOutput() - if err != nil { - return output, fmt.Errorf("%s failed with error: (%v) %s", command, err, string(output)) - } - - return output, nil -} - -// UninstallPrometheusOperator uninstalls the prometheus -func UninstallPrometheusOperator() { - url := fmt.Sprintf(prometheusOperatorURL, prometheusOperatorVersion) - cmd := exec.Command("kubectl", "delete", "-f", url) - if _, err := Run(cmd); err != nil { - warnError(err) - } -} -// UninstallCertManager uninstalls the cert manager -func UninstallCertManager() { - url := fmt.Sprintf(certmanagerURLTmpl, certmanagerVersion) - cmd := exec.Command("kubectl", "delete", "-f", url) - if _, err := Run(cmd); err != nil { - warnError(err) - } -} + output, err := cmd.Output() + if err != nil { + var exitError *exec.ExitError -// InstallCertManager installs the cert manager bundle. -func InstallCertManager() error { - url := fmt.Sprintf(certmanagerURLTmpl, certmanagerVersion) - cmd := exec.Command("kubectl", "apply", "-f", url) - if _, err := Run(cmd); err != nil { - return err + if errors.As(err, &exitError) { + return output, fmt.Errorf("%s failed with error: (%v) %s", command, err, string(output)) + } } - // Wait for cert-manager-webhook to be ready, which can take time if cert-manager - // was re-installed after uninstalling on a cluster. - cmd = exec.Command("kubectl", "wait", "managedcluster.apps/cert-manager-webhook", - "--for", "condition=Available", - "--namespace", "cert-manager", - "--timeout", "5m", - ) - _, err := Run(cmd) - return err + return output, nil } // LoadImageToKindCluster loads a local docker image to the kind cluster func LoadImageToKindClusterWithName(name string) error { cluster := "kind" - if v, ok := os.LookupEnv("KIND_CLUSTER"); ok { + if v, ok := os.LookupEnv("KIND_CLUSTER_NAME"); ok { cluster = v } kindOptions := []string{"load", "docker-image", name, "--name", cluster} - cmd := exec.Command("kind", kindOptions...) + + kindBinary := "kind" + + if kindVersion, ok := os.LookupEnv("KIND_VERSION"); ok { + kindBinary = fmt.Sprintf("./bin/kind-%s", kindVersion) + } + + cmd := exec.Command(kindBinary, kindOptions...) _, err := Run(cmd) return err } @@ -136,3 +97,85 @@ func GetProjectDir() (string, error) { wd = strings.Replace(wd, "/test/e2e", "", -1) return wd, nil } + +// getAWSAMI returns an AWS AMI ID to use for test. +func getAWSAMI() (string, error) { + // For now we'll just use the latest Kubernetes version for ubuntu 20.04, + // but we could potentially pin the Kube version and specify that here. + cmd := exec.Command("./bin/clusterawsadm", "ami", "list", "--os=ubuntu-20.04", "-o", "json") + output, err := Run(cmd) + if err != nil { + return "", fmt.Errorf("failed to list AMIs: %w", err) + } + + var amiList map[string]interface{} + + if err := json.Unmarshal(output, &amiList); err != nil { + return "", fmt.Errorf("failed to unmarshal AMI list: %w", err) + } + + // ami list returns a sorted list of AMIs by kube version, just get the + // first one. + for _, item := range amiList["items"].([]interface{}) { + spec := item.(map[string]interface{})["spec"].(map[string]interface{}) + if imageID, ok := spec["imageID"]; ok { + ami, ok := imageID.(string) + if !ok { + continue + } + + return ami, nil + } + } + + return "", fmt.Errorf("no AMIs found") +} + +// ConfigureDeploymentConfig modifies the ./config/dev/deployment.yaml for +// use in test. For now we modify only the AWS_REGION and AWSAMI's but in the +// future this may mean more complex configuration. +func ConfigureDeploymentConfig() error { + amiID, err := getAWSAMI() + if err != nil { + return fmt.Errorf("failed to get AWS AMI: %w", err) + } + + deploymentConfigBytes, err := os.ReadFile("./config/dev/deployment.yaml") + if err != nil { + return fmt.Errorf("failed to read deployment config: %w", err) + } + + var deploymentConfig map[string]interface{} + + err = yaml.Unmarshal(deploymentConfigBytes, &deploymentConfig) + if err != nil { + return fmt.Errorf("failed to unmarshal deployment config: %w", err) + } + + awsRegion := os.Getenv("AWS_REGION") + + // Modify the existing ./config/dev/deployment.yaml file to use the + // AMI we just found and our AWS_REGION. + if spec, ok := deploymentConfig["spec"].(map[interface{}]interface{}); ok { + if config, ok := spec["config"].(map[interface{}]interface{}); ok { + if awsRegion != "" { + config["region"] = awsRegion + } + + if worker, ok := config["worker"].(map[interface{}]interface{}); ok { + worker["amiID"] = amiID + } + + if controlPlane, ok := config["controlPlane"].(map[interface{}]interface{}); ok { + controlPlane["amiID"] = amiID + } + } + } + + deploymentConfigBytes, err = yaml.Marshal(deploymentConfig) + if err != nil { + return fmt.Errorf("failed to marshal deployment config: %w", err) + } + + return os.WriteFile("./config/dev/deployment.yaml", deploymentConfigBytes, 0644) +} From 58eb73923b0c75f4ef5b089ea78122c31f3b1006 Mon Sep 17 00:00:00 2001 From: Kyle Squizzato Date: Tue, 20 Aug 2024 14:13:10 -0700 Subject: [PATCH 02/35] Add resource validation for provider-agnostic resources Signed-off-by: Kyle Squizzato --- go.mod | 2 +- test/e2e/e2e_test.go | 22 +++-- test/e2e/validate_provider.go | 155 ++++++++++++++++++++++++++++++++++ test/kubeclient/kubeclient.go | 12 +++ test/utils/utils.go | 52 ++++++++++++ 5 files changed, 236 insertions(+), 7 deletions(-) create mode 100644 test/e2e/validate_provider.go diff --git a/go.mod b/go.mod index 25a7ce696..70d820e31 100644 --- a/go.mod +++ b/go.mod @@ -14,6 +14,7 @@ require ( github.com/onsi/gomega v1.34.2 github.com/opencontainers/go-digest v1.0.1-0.20231025023718-d50d2fec9c98 github.com/segmentio/analytics-go v3.1.0+incompatible + gopkg.in/yaml.v2 v2.4.0 helm.sh/helm/v3 v3.15.4 k8s.io/api v0.31.0 k8s.io/apiextensions-apiserver v0.31.0 @@ -157,7 +158,6 @@ require ( google.golang.org/protobuf v1.34.2 // indirect gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect - gopkg.in/yaml.v2 v2.4.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect k8s.io/apiserver v0.31.0 // indirect k8s.io/cli-runtime v0.31.0 // indirect diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index a9ee02928..75873c17d 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -49,7 +49,7 @@ var _ = Describe("controller", Ordered, func() { Context("Operator", func() { It("should run successfully", func() { kc, err := kubeclient.New(namespace) - ExpectWithOffset(2, err).NotTo(HaveOccurred()) + ExpectWithOffset(1, err).NotTo(HaveOccurred()) By("validating that the controller-manager pod is running as expected") verifyControllerUp := func() error { @@ -67,9 +67,12 @@ var _ = Describe("controller", Ordered, func() { controllerPod := podList.Items[0] + // Ensure the pod is not being deleted. if controllerPod.DeletionTimestamp != nil { return fmt.Errorf("deletion timestamp should be nil, got: %v", controllerPod) } + + // Ensure the pod is running and has the expected name. if !strings.Contains(controllerPod.Name, "controller-manager") { return fmt.Errorf("controller pod name %s does not contain 'controller-manager'", controllerPod.Name) } @@ -84,13 +87,22 @@ var _ = Describe("controller", Ordered, func() { }) Context("AWS Templates", func() { + var ( + kc *kubeclient.KubeClient + err error + ) + BeforeAll(func() { By("ensuring AWS credentials are set") - kc, err := kubeclient.New(namespace) + kc, err = kubeclient.New(namespace) ExpectWithOffset(2, err).NotTo(HaveOccurred()) ExpectWithOffset(2, kc.CreateAWSCredentialsKubeSecret(context.Background())).To(Succeed()) }) + AfterAll(func() { + // TODO: Purge the AWS resources + }) + It("should work with an AWS provider", func() { By("using the aws-standalone-cp template") ExpectWithOffset(2, utils.ConfigureDeploymentConfig()).To(Succeed()) @@ -98,11 +110,9 @@ var _ = Describe("controller", Ordered, func() { cmd := exec.Command("make", "dev-aws-apply") _, err := utils.Run(cmd) ExpectWithOffset(2, err).NotTo(HaveOccurred()) - EventuallyWithOffset(2, func() error { - return nil - }) - + return verifyProviderDeployed(context.Background(), kc, "aws-dev") + }(), 30*time.Minute, 10*time.Second).Should(Succeed()) }) }) }) diff --git a/test/e2e/validate_provider.go b/test/e2e/validate_provider.go new file mode 100644 index 000000000..cf0aca32a --- /dev/null +++ b/test/e2e/validate_provider.go @@ -0,0 +1,155 @@ +// Copyright 2024 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package e2e + +import ( + "context" + "fmt" + + . "github.com/onsi/ginkgo/v2" + + "github.com/Mirantis/hmc/test/kubeclient" + "github.com/Mirantis/hmc/test/utils" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" +) + +type resourceValidationFunc func(context.Context, *kubeclient.KubeClient, string) error + +// verifyProviderDeployed is a provider-agnostic verification that checks for +// the presence of cluster, machine and k0scontrolplane resources and their +// underlying status conditions. It is meant to be used in conjunction with +// an Eventually block. +func verifyProviderDeployed(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { + for _, resourceValidator := range []resourceValidationFunc{ + validateClusters, + validateMachines, + validateK0sControlPlanes, + } { + // XXX: Once we validate for the first time should we move the + // validation out and consider it "done"? Or is there a possibility + // that the resources could enter a non-ready state later? + if err := resourceValidator(ctx, kc, clusterName); err != nil { + return err + } + } + + return nil +} + +func validateClusters(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { + return validateNameAndStatus(ctx, kc, clusterName, schema.GroupVersionResource{ + Group: "cluster.x-k8s.io", + Version: "v1beta1", + Resource: "clusters", + }) +} + +func validateMachines(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { + return validateNameAndStatus(ctx, kc, clusterName, schema.GroupVersionResource{ + Group: "cluster.x-k8s.io", + Version: "v1beta1", + Resource: "machines", + }) +} + +func validateNameAndStatus(ctx context.Context, kc *kubeclient.KubeClient, + clusterName string, gvr schema.GroupVersionResource) error { + client, err := kc.GetDynamicClient(gvr) + if err != nil { + Fail(fmt.Sprintf("failed to get %s client: %v", gvr.Resource, err)) + } + + list, err := client.List(ctx, metav1.ListOptions{}) + if err != nil { + Fail(fmt.Sprintf("failed to list %s: %v", gvr.Resource, err)) + } + + for _, item := range list.Items { + phase, _, err := unstructured.NestedString(item.Object, "status", "phase") + if err != nil { + Fail(fmt.Sprintf("failed to get phase for %s: %v", item.GetName(), err)) + } + + if phase == "Deleting" { + Fail(fmt.Sprintf("%s is in 'Deleting' phase", item.GetName())) + } + + if err := utils.ValidateObjectNamePrefix(&item, clusterName); err != nil { + Fail(err.Error()) + } + + if err := utils.ValidateConditionsTrue(&item); err != nil { + return err + } + } + + return nil +} + +type k0smotronControlPlaneStatus struct { + // Ready denotes that the control plane is ready + Ready bool `json:"ready"` + ControlPlaneReady bool `json:"controlPlaneReady"` + Inititalized bool `json:"initialized"` + ExternalManagedControlPlane bool `json:"externalManagedControlPlane"` +} + +func validateK0sControlPlanes(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { + k0sControlPlaneClient, err := kc.GetDynamicClient(schema.GroupVersionResource{ + Group: "controlplane.cluster.x-k8s.io", + Version: "v1beta1", + Resource: "K0sControlPlane", + }) + if err != nil { + return fmt.Errorf("failed to get K0sControlPlane client: %w", err) + } + + controlPlanes, err := k0sControlPlaneClient.List(ctx, metav1.ListOptions{}) + if err != nil { + return fmt.Errorf("failed to list K0sControlPlanes: %w", err) + } + + for _, controlPlane := range controlPlanes.Items { + if err := utils.ValidateObjectNamePrefix(&controlPlane, clusterName); err != nil { + Fail(err.Error()) + } + + objKind, objName := utils.ObjKindName(&controlPlane) + + // k0smotron does not use the metav1.Condition type for status + // conditions, instead it uses a custom type so we can't use + // ValidateConditionsTrue here. + conditions, found, err := unstructured.NestedFieldCopy(controlPlane.Object, "status", "conditions") + if !found { + return fmt.Errorf("no status conditions found for %s: %s", objKind, objName) + } + if err != nil { + return fmt.Errorf("failed to get status conditions for %s: %s: %w", objKind, objName, err) + } + + c, ok := conditions.(k0smotronControlPlaneStatus) + if !ok { + return fmt.Errorf("expected K0sControlPlane condition to be type K0smotronControlPlaneStatus, got: %T", conditions) + } + + if !c.Ready { + return fmt.Errorf("K0sControlPlane %s is not ready, status: %+v", controlPlane.GetName(), c) + } + } + + return nil +} diff --git a/test/kubeclient/kubeclient.go b/test/kubeclient/kubeclient.go index 62d3364d1..0c66e1d33 100644 --- a/test/kubeclient/kubeclient.go +++ b/test/kubeclient/kubeclient.go @@ -12,6 +12,8 @@ import ( apiextensionsclientset "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/client-go/dynamic" "k8s.io/client-go/kubernetes" "k8s.io/client-go/rest" "k8s.io/client-go/tools/clientcmd" @@ -112,3 +114,13 @@ func (kc *KubeClient) CreateAWSCredentialsKubeSecret(ctx context.Context) error return nil } + +// GetDynamicClient returns a dynamic client for the given GroupVersionResource. +func (kc *KubeClient) GetDynamicClient(gvr schema.GroupVersionResource) (dynamic.ResourceInterface, error) { + client, err := dynamic.NewForConfig(kc.Config) + if err != nil { + return nil, fmt.Errorf("failed to create dynamic client: %w", err) + } + + return client.Resource(gvr).Namespace(kc.Namespace), nil +} diff --git a/test/utils/utils.go b/test/utils/utils.go index d6d4671e3..c8f257a00 100644 --- a/test/utils/utils.go +++ b/test/utils/utils.go @@ -24,6 +24,8 @@ import ( . "github.com/onsi/ginkgo/v2" //nolint:golint,revive "gopkg.in/yaml.v2" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" ) func warnError(err error) { @@ -179,3 +181,53 @@ func ConfigureDeploymentConfig() error { return os.WriteFile("./config/dev/deployment.yaml", deploymentConfigBytes, 0644) } + +// ValidateConditionsTrue iterates over the conditions of the given +// unstructured object and returns an error if any of the conditions are not +// true. Conditions are expected to be of type metav1.Condition. +func ValidateConditionsTrue(unstrObj *unstructured.Unstructured) error { + objKind, objName := ObjKindName(unstrObj) + + // Iterate the status conditions and ensure each condition reports a "Ready" + // status. + conditions, found, err := unstructured.NestedSlice(unstrObj.Object, "status", "conditions") + if !found { + return fmt.Errorf("no status conditions found for %s: %s", objKind, objName) + } + if err != nil { + return fmt.Errorf("failed to get status conditions for %s: %s: %w", objKind, objName, err) + } + + for _, condition := range conditions { + condition, ok := condition.(metav1.Condition) + if !ok { + return fmt.Errorf("expected %s: %s condition to be type metav1.Condition, got: %T", + objKind, objName, condition) + } + + if condition.Status == metav1.ConditionTrue { + continue + } + + return fmt.Errorf("%s %s condition %s is not ready: %s", objKind, objName, condition.Type, condition.Message) + } + + return nil +} + +// ValidateObjectNamePrefix checks if the given object name has the given prefix. +func ValidateObjectNamePrefix(unstrObj *unstructured.Unstructured, clusterName string) error { + objKind, objName := ObjKindName(unstrObj) + + // Verify the machines are prefixed with the cluster name and fail + // the test if they are not. + if !strings.HasPrefix(objName, clusterName) { + return fmt.Errorf("object %s %s does not have cluster name prefix: %s", objKind, objName, clusterName) + } + + return nil +} + +func ObjKindName(unstrObj *unstructured.Unstructured) (string, string) { + return unstrObj.GetKind(), unstrObj.GetName() +} From b93d7869acfe545bfaf6d039ad6d92eb710f6653 Mon Sep 17 00:00:00 2001 From: Kyle Squizzato Date: Wed, 21 Aug 2024 12:16:40 -0700 Subject: [PATCH 03/35] Add additional validations for CSI, CCM * Support obtaining a KubeClient from a managed cluster. * Generate a random name to use as clusterName for the test, this will enable the ability to use cloud-nuke to nuke specifically named resources. Signed-off-by: Kyle Squizzato --- go.mod | 6 +- test/e2e/e2e_test.go | 19 +++-- test/e2e/validate_provider.go | 138 +++++++++++++++++++++++++++++----- test/kubeclient/kubeclient.go | 42 ++++++++--- test/utils/deployment.go | 127 +++++++++++++++++++++++++++++++ test/utils/utils.go | 84 --------------------- 6 files changed, 296 insertions(+), 120 deletions(-) create mode 100644 test/utils/deployment.go diff --git a/go.mod b/go.mod index 70d820e31..8e9af40d1 100644 --- a/go.mod +++ b/go.mod @@ -9,12 +9,13 @@ require ( github.com/fluxcd/pkg/runtime v0.49.0 github.com/fluxcd/source-controller/api v1.3.0 github.com/go-logr/logr v1.4.2 + github.com/google/uuid v1.6.0 github.com/hashicorp/go-retryablehttp v0.7.7 github.com/onsi/ginkgo/v2 v2.20.2 github.com/onsi/gomega v1.34.2 github.com/opencontainers/go-digest v1.0.1-0.20231025023718-d50d2fec9c98 github.com/segmentio/analytics-go v3.1.0+incompatible - gopkg.in/yaml.v2 v2.4.0 + gopkg.in/yaml.v3 v3.0.1 helm.sh/helm/v3 v3.15.4 k8s.io/api v0.31.0 k8s.io/apiextensions-apiserver v0.31.0 @@ -79,7 +80,6 @@ require ( github.com/google/gofuzz v1.2.0 // indirect github.com/google/pprof v0.0.0-20240827171923-fa2c70bbbfe5 // indirect github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect - github.com/google/uuid v1.6.0 // indirect github.com/gorilla/mux v1.8.0 // indirect github.com/gorilla/websocket v1.5.1 // indirect github.com/gosuri/uitable v0.0.4 // indirect @@ -158,7 +158,7 @@ require ( google.golang.org/protobuf v1.34.2 // indirect gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect - gopkg.in/yaml.v3 v3.0.1 // indirect + gopkg.in/yaml.v2 v2.4.0 // indirect k8s.io/apiserver v0.31.0 // indirect k8s.io/cli-runtime v0.31.0 // indirect k8s.io/component-base v0.31.0 // indirect diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index 75873c17d..7aa7b81f5 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -48,7 +48,7 @@ var _ = Describe("controller", Ordered, func() { Context("Operator", func() { It("should run successfully", func() { - kc, err := kubeclient.New(namespace) + kc, err := kubeclient.NewFromLocal(namespace) ExpectWithOffset(1, err).NotTo(HaveOccurred()) By("validating that the controller-manager pod is running as expected") @@ -94,25 +94,32 @@ var _ = Describe("controller", Ordered, func() { BeforeAll(func() { By("ensuring AWS credentials are set") - kc, err = kubeclient.New(namespace) + kc, err = kubeclient.NewFromLocal(namespace) ExpectWithOffset(2, err).NotTo(HaveOccurred()) ExpectWithOffset(2, kc.CreateAWSCredentialsKubeSecret(context.Background())).To(Succeed()) }) AfterAll(func() { - // TODO: Purge the AWS resources + // Purge the AWS resources, the AfterAll for the controller will + // clean up the management cluster. }) It("should work with an AWS provider", func() { By("using the aws-standalone-cp template") - ExpectWithOffset(2, utils.ConfigureDeploymentConfig()).To(Succeed()) + clusterName, err := utils.ConfigureDeploymentConfig(utils.ProviderAWS, utils.AWSStandaloneCPTemplate) + ExpectWithOffset(2, err).NotTo(HaveOccurred()) cmd := exec.Command("make", "dev-aws-apply") - _, err := utils.Run(cmd) + _, err = utils.Run(cmd) ExpectWithOffset(2, err).NotTo(HaveOccurred()) EventuallyWithOffset(2, func() error { - return verifyProviderDeployed(context.Background(), kc, "aws-dev") + return verifyProviderDeployed(context.Background(), kc, clusterName) }(), 30*time.Minute, 10*time.Second).Should(Succeed()) + + By("using the aws-hosted-cp template") + // TODO: Use the standalone control plane resources to craft a hosted + // control plane and test it. + }) }) }) diff --git a/test/e2e/validate_provider.go b/test/e2e/validate_provider.go index cf0aca32a..171ad67fe 100644 --- a/test/e2e/validate_provider.go +++ b/test/e2e/validate_provider.go @@ -22,22 +22,34 @@ import ( "github.com/Mirantis/hmc/test/kubeclient" "github.com/Mirantis/hmc/test/utils" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/apimachinery/pkg/util/intstr" ) +// resourceValidationFunc is intended to validate a specific kubernetes +// resource. It is meant to be used in conjunction with an Eventually block, +// however, in some cases it may be necessary to end the Eventually block early +// if the resource will never reach a ready state, in these instances Ginkgo's +// Fail function should be used. type resourceValidationFunc func(context.Context, *kubeclient.KubeClient, string) error // verifyProviderDeployed is a provider-agnostic verification that checks for -// the presence of cluster, machine and k0scontrolplane resources and their -// underlying status conditions. It is meant to be used in conjunction with -// an Eventually block. +// the presence of specific resources in the cluster using +// resourceValidationFuncs and clusterValidationFuncs. func verifyProviderDeployed(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { + // Sequentially validate each resource type, only returning the first error + // as to not move on to the next resource type until the first is resolved. for _, resourceValidator := range []resourceValidationFunc{ validateClusters, validateMachines, validateK0sControlPlanes, + validateCSIDriver, + validateCCM, } { // XXX: Once we validate for the first time should we move the // validation out and consider it "done"? Or is there a possibility @@ -81,7 +93,7 @@ func validateNameAndStatus(ctx context.Context, kc *kubeclient.KubeClient, for _, item := range list.Items { phase, _, err := unstructured.NestedString(item.Object, "status", "phase") if err != nil { - Fail(fmt.Sprintf("failed to get phase for %s: %v", item.GetName(), err)) + return fmt.Errorf("failed to get status.phase for %s: %v", item.GetName(), err) } if phase == "Deleting" { @@ -100,14 +112,6 @@ func validateNameAndStatus(ctx context.Context, kc *kubeclient.KubeClient, return nil } -type k0smotronControlPlaneStatus struct { - // Ready denotes that the control plane is ready - Ready bool `json:"ready"` - ControlPlaneReady bool `json:"controlPlaneReady"` - Inititalized bool `json:"initialized"` - ExternalManagedControlPlane bool `json:"externalManagedControlPlane"` -} - func validateK0sControlPlanes(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { k0sControlPlaneClient, err := kc.GetDynamicClient(schema.GroupVersionResource{ Group: "controlplane.cluster.x-k8s.io", @@ -130,9 +134,9 @@ func validateK0sControlPlanes(ctx context.Context, kc *kubeclient.KubeClient, cl objKind, objName := utils.ObjKindName(&controlPlane) - // k0smotron does not use the metav1.Condition type for status + // k0s does not use the metav1.Condition type for status // conditions, instead it uses a custom type so we can't use - // ValidateConditionsTrue here. + // ValidateConditionsTrue here, instead we'll check for "ready: true". conditions, found, err := unstructured.NestedFieldCopy(controlPlane.Object, "status", "conditions") if !found { return fmt.Errorf("no status conditions found for %s: %s", objKind, objName) @@ -141,15 +145,113 @@ func validateK0sControlPlanes(ctx context.Context, kc *kubeclient.KubeClient, cl return fmt.Errorf("failed to get status conditions for %s: %s: %w", objKind, objName, err) } - c, ok := conditions.(k0smotronControlPlaneStatus) + c, ok := conditions.(map[string]interface{}) if !ok { - return fmt.Errorf("expected K0sControlPlane condition to be type K0smotronControlPlaneStatus, got: %T", conditions) + return fmt.Errorf("expected K0sControlPlane condition to be type map[string]interface{}, got: %T", conditions) } - if !c.Ready { - return fmt.Errorf("K0sControlPlane %s is not ready, status: %+v", controlPlane.GetName(), c) + if c["ready"] != "true" { + return fmt.Errorf("K0sControlPlane %s is not ready, status: %v", controlPlane.GetName(), conditions) } } return nil } + +// validateCSIDriver validates that the provider CSI driver is functioning +// by creating a PVC and verifying it enters "Bound" status. +func validateCSIDriver(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { + clusterKC, err := kc.NewFromCluster(ctx, "default", clusterName) + if err != nil { + Fail(fmt.Sprintf("failed to create KubeClient for managed cluster %s: %v", clusterName, err)) + } + + pvcName := clusterName + "-test-pvc" + + _, err = clusterKC.Client.CoreV1().PersistentVolumeClaims(clusterKC.Namespace). + Create(ctx, &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: pvcName, + }, + Spec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{ + corev1.ReadWriteOnce, + }, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: resource.MustParse("1Gi"), + }, + }, + }, + }, metav1.CreateOptions{}) + if err != nil { + // Since these resourceValidationFuncs are intended to be used in + // Eventually we should ensure a follow-up PVCreate is a no-op. + if !apierrors.IsAlreadyExists(err) { + return fmt.Errorf("failed to create test PVC: %w", err) + } + } + + // Verify the PVC enters "Bound" status. + pvc, err := clusterKC.Client.CoreV1().PersistentVolumeClaims(clusterKC.Namespace). + Get(ctx, pvcName, metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("failed to get test PVC: %w", err) + } + + if pvc.Status.Phase == corev1.ClaimBound { + return nil + } + + return fmt.Errorf("%s PersistentVolume not yet 'Bound', current phase: %q", pvcName, pvc.Status.Phase) +} + +// validateCCM validates that the provider's cloud controller manager is +// functional by creating a LoadBalancer service and verifying it is assigned +// an external IP. +func validateCCM(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { + clusterKC, err := kc.NewFromCluster(ctx, "default", clusterName) + if err != nil { + Fail(fmt.Sprintf("failed to create KubeClient for managed cluster %s: %v", clusterName, err)) + } + + _, err = clusterKC.Client.CoreV1().Services(clusterKC.Namespace).Create(ctx, &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: clusterName + "-test-service", + }, + Spec: corev1.ServiceSpec{ + Selector: map[string]string{ + "some": "selector", + }, + Ports: []corev1.ServicePort{ + { + Port: 8765, + TargetPort: intstr.FromInt(9376), + }, + }, + Type: corev1.ServiceTypeLoadBalancer, + }, + }, metav1.CreateOptions{}) + if err != nil { + // Since these resourceValidationFuncs are intended to be used in + // Eventually we should ensure a follow-up ServiceCreate is a no-op. + if !apierrors.IsAlreadyExists(err) { + return fmt.Errorf("failed to create test Service: %w", err) + } + } + + // Verify the Service is assigned an external IP. + service, err := clusterKC.Client.CoreV1().Services(clusterKC.Namespace). + Get(ctx, clusterName+"-test-service", metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("failed to get test Service: %w", err) + } + + for _, i := range service.Status.LoadBalancer.Ingress { + if i.Hostname != "" { + return nil + } + } + + return fmt.Errorf("%s Service does not yet have an external hostname", service.Name) +} diff --git a/test/kubeclient/kubeclient.go b/test/kubeclient/kubeclient.go index 0c66e1d33..f10d4dd2c 100644 --- a/test/kubeclient/kubeclient.go +++ b/test/kubeclient/kubeclient.go @@ -31,8 +31,36 @@ type KubeClient struct { Config *rest.Config } -// getKubeConfig returns the kubeconfig file content. -func getKubeConfig() ([]byte, error) { +// NewFromLocal creates a new instance of KubeClient from a given namespace +// using the locally found kubeconfig. +func NewFromLocal(namespace string) (*KubeClient, error) { + configBytes, err := getLocalKubeConfig() + if err != nil { + return nil, fmt.Errorf("failed to get local kubeconfig: %w", err) + } + + return new(configBytes, namespace) +} + +// NewFromCluster creates a new KubeClient using the kubeconfig stored in the +// secret affiliated with the given clusterName. Since it relies on fetching +// the kubeconfig from secret it needs an existing kubeclient. +func (kc *KubeClient) NewFromCluster(ctx context.Context, namespace, clusterName string) (*KubeClient, error) { + secret, err := kc.Client.CoreV1().Secrets(kc.Namespace).Get(ctx, clusterName+"-kubeconfig", metav1.GetOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to get cluster: %q kubeconfig secret: %w", clusterName, err) + } + + secretData, ok := secret.Data["value"] + if !ok { + return nil, fmt.Errorf("kubeconfig secret %q has no 'value' key", clusterName) + } + + return new(secretData, namespace) +} + +// getLocalKubeConfig returns the kubeconfig file content. +func getLocalKubeConfig() ([]byte, error) { homeDir, err := os.UserHomeDir() if err != nil { return nil, fmt.Errorf("failed to get user home directory: %w", err) @@ -53,13 +81,9 @@ func getKubeConfig() ([]byte, error) { return configBytes, nil } -// New creates a new instance of KubeClient from a given namespace. -func New(namespace string) (*KubeClient, error) { - configBytes, err := getKubeConfig() - if err != nil { - return nil, fmt.Errorf("failed to get kubeconfig: %w", err) - } - +// new creates a new instance of KubeClient from a given namespace using +// the local kubeconfig. +func new(configBytes []byte, namespace string) (*KubeClient, error) { config, err := clientcmd.RESTConfigFromKubeConfig(configBytes) if err != nil { return nil, fmt.Errorf("failed to parse kubeconfig: %w", err) diff --git a/test/utils/deployment.go b/test/utils/deployment.go new file mode 100644 index 000000000..e375dc5a9 --- /dev/null +++ b/test/utils/deployment.go @@ -0,0 +1,127 @@ +// Copyright 2024 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package utils + +import ( + "encoding/json" + "fmt" + "os" + "os/exec" + + "github.com/google/uuid" + "gopkg.in/yaml.v3" +) + +type ProviderType string + +const ( + ProviderAWS ProviderType = "aws" +) + +type Template string + +const ( + AWSStandaloneCPTemplate Template = "aws-standalone-cp" + AWSHostedCPTemplate Template = "aws-hosted-cp" +) + +// ConfigureDeploymentConfig modifies the ./config/dev/deployment.yaml for +// use in test and returns the generated cluster name. +func ConfigureDeploymentConfig(provider ProviderType, templateName Template) (string, error) { + generatedName := uuid.NewString()[:8] + "-e2e-test" + + deploymentConfigBytes, err := os.ReadFile("./config/dev/deployment.yaml") + if err != nil { + return "", fmt.Errorf("failed to read deployment config: %w", err) + } + + var deploymentConfig map[string]interface{} + + err = yaml.Unmarshal(deploymentConfigBytes, &deploymentConfig) + if err != nil { + return "", fmt.Errorf("failed to unmarshal deployment config: %w", err) + } + + switch provider { + case ProviderAWS: + // XXX: Maybe we should just use automatic AMI selection here. + amiID, err := getAWSAMI() + if err != nil { + return "", fmt.Errorf("failed to get AWS AMI: %w", err) + } + + awsRegion := os.Getenv("AWS_REGION") + + // Modify the existing ./config/dev/deployment.yaml file to use the + // AMI we just found and our AWS_REGION. + if spec, ok := deploymentConfig["spec"].(map[string]interface{}); ok { + if config, ok := spec["config"].(map[string]interface{}); ok { + if awsRegion != "" { + config["region"] = awsRegion + } + + if worker, ok := config["worker"].(map[string]interface{}); ok { + worker["amiID"] = amiID + } + + if controlPlane, ok := config["controlPlane"].(map[string]interface{}); ok { + controlPlane["amiID"] = amiID + } + } + } + + deploymentConfigBytes, err = yaml.Marshal(deploymentConfig) + if err != nil { + return "", fmt.Errorf("failed to marshal deployment config: %w", err) + } + + return generatedName, os.WriteFile("./config/dev/deployment.yaml", deploymentConfigBytes, 0644) + default: + return "", fmt.Errorf("unsupported provider: %s", provider) + } +} + +// getAWSAMI returns an AWS AMI ID to use for test. +func getAWSAMI() (string, error) { + // For now we'll just use the latest Kubernetes version for ubuntu 20.04, + // but we could potentially pin the Kube version and specify that here. + cmd := exec.Command("./bin/clusterawsadm", "ami", "list", "--os=ubuntu-20.04", "-o", "json") + output, err := Run(cmd) + if err != nil { + return "", fmt.Errorf("failed to list AMIs: %w", err) + } + + var amiList map[string]interface{} + + if err := json.Unmarshal(output, &amiList); err != nil { + return "", fmt.Errorf("failed to unmarshal AMI list: %w", err) + } + + // ami list returns a sorted list of AMIs by kube version, just get the + // first one. + for _, item := range amiList["items"].([]interface{}) { + spec := item.(map[string]interface{})["spec"].(map[string]interface{}) + if imageID, ok := spec["imageID"]; ok { + ami, ok := imageID.(string) + if !ok { + continue + } + + return ami, nil + } + } + + return "", fmt.Errorf("no AMIs found") +} diff --git a/test/utils/utils.go b/test/utils/utils.go index c8f257a00..04d9273be 100644 --- a/test/utils/utils.go +++ b/test/utils/utils.go @@ -15,7 +15,6 @@ package utils import ( - "encoding/json" "errors" "fmt" "os" @@ -23,7 +22,6 @@ import ( "strings" . "github.com/onsi/ginkgo/v2" //nolint:golint,revive - "gopkg.in/yaml.v2" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" ) @@ -100,88 +98,6 @@ func GetProjectDir() (string, error) { return wd, nil } -// getAWSAMI returns an AWS AMI ID to use for test. -func getAWSAMI() (string, error) { - // For now we'll just use the latest Kubernetes version for ubuntu 20.04, - // but we could potentially pin the Kube version and specify that here. - cmd := exec.Command("./bin/clusterawsadm", "ami", "list", "--os=ubuntu-20.04", "-o", "json") - output, err := Run(cmd) - if err != nil { - return "", fmt.Errorf("failed to list AMIs: %w", err) - } - - var amiList map[string]interface{} - - if err := json.Unmarshal(output, &amiList); err != nil { - return "", fmt.Errorf("failed to unmarshal AMI list: %w", err) - } - - // ami list returns a sorted list of AMIs by kube version, just get the - // first one. - for _, item := range amiList["items"].([]interface{}) { - spec := item.(map[string]interface{})["spec"].(map[string]interface{}) - if imageID, ok := spec["imageID"]; ok { - ami, ok := imageID.(string) - if !ok { - continue - } - - return ami, nil - } - } - - return "", fmt.Errorf("no AMIs found") -} - -// ConfigureDeploymentConfig modifies the ./config/dev/deployment.yaml for -// use in test. For now we modify only the AWS_REGION and AWSAMI's but in the -// future this may mean more complex configuration. -func ConfigureDeploymentConfig() error { - amiID, err := getAWSAMI() - if err != nil { - return fmt.Errorf("failed to get AWS AMI: %w", err) - } - - deploymentConfigBytes, err := os.ReadFile("./config/dev/deployment.yaml") - if err != nil { - return fmt.Errorf("failed to read deployment config: %w", err) - } - - var deploymentConfig map[string]interface{} - - err = yaml.Unmarshal(deploymentConfigBytes, &deploymentConfig) - if err != nil { - return fmt.Errorf("failed to unmarshal deployment config: %w", err) - } - - awsRegion := os.Getenv("AWS_REGION") - - // Modify the existing ./config/dev/deployment.yaml file to use the - // AMI we just found and our AWS_REGION. - if spec, ok := deploymentConfig["spec"].(map[interface{}]interface{}); ok { - if config, ok := spec["config"].(map[interface{}]interface{}); ok { - if awsRegion != "" { - config["region"] = awsRegion - } - - if worker, ok := config["worker"].(map[interface{}]interface{}); ok { - worker["amiID"] = amiID - } - - if controlPlane, ok := config["controlPlane"].(map[interface{}]interface{}); ok { - controlPlane["amiID"] = amiID - } - } - } - - deploymentConfigBytes, err = yaml.Marshal(deploymentConfig) - if err != nil { - return fmt.Errorf("failed to marshal deployment config: %w", err) - } - - return os.WriteFile("./config/dev/deployment.yaml", deploymentConfigBytes, 0644) -} - // ValidateConditionsTrue iterates over the conditions of the given // unstructured object and returns an error if any of the conditions are not // true. Conditions are expected to be of type metav1.Condition. From e45bf5b4e535b3e41eb6c92c9cc258b180749fc6 Mon Sep 17 00:00:00 2001 From: Kyle Squizzato Date: Wed, 21 Aug 2024 17:02:27 -0700 Subject: [PATCH 04/35] Add 'make dev-aws-nuke' target, add support for nuking AWS resources in test Instead of using cloud-nuke as a library, add a cloud-nuke Makefile target so users can take advantage of cloud-nuke filtering for dev work outside of test. Signed-off-by: Kyle Squizzato --- Makefile | 19 +- config/dev/aws-managedcluster.yaml | 14 +- config/dev/cloud_nuke.yaml.tpl | 340 +++++++++++++++++++++++++++++ test/e2e/e2e_test.go | 3 + test/kubeclient/kubeclient.go | 14 ++ test/utils/deployment.go | 7 + 6 files changed, 387 insertions(+), 10 deletions(-) create mode 100644 config/dev/cloud_nuke.yaml.tpl diff --git a/Makefile b/Makefile index 10f924c05..2e03a56fe 100644 --- a/Makefile +++ b/Makefile @@ -192,6 +192,7 @@ REGISTRY_NAME ?= hmc-local-registry REGISTRY_PORT ?= 5001 REGISTRY_REPO ?= oci://127.0.0.1:$(REGISTRY_PORT)/charts DEV_PROVIDER ?= aws +CLUSTER_NAME ?= $(shell $(YQ) '.metadata.name' ./config/dev/deployment.yaml) AWS_CREDENTIALS=${AWS_B64ENCODED_CREDENTIALS} @@ -279,8 +280,6 @@ dev-apply: kind-deploy registry-deploy dev-push dev-deploy dev-templates .PHONY: dev-destroy dev-destroy: kind-undeploy registry-undeploy -.PHONY: dev-creds-apply -dev-creds-apply: dev-$(DEV_PROVIDER)-creds .PHONY: dev-provider-apply dev-provider-apply: envsubst @@ -290,8 +289,14 @@ dev-provider-apply: envsubst dev-provider-delete: envsubst @NAMESPACE=$(NAMESPACE) $(ENVSUBST) -no-unset -i config/dev/$(DEV_PROVIDER)-managedcluster.yaml | $(KUBECTL) delete -f - +.PHONY: dev-aws-nuke +dev-aws-nuke: ## Warning: Destructive! Nuke all AWS resources deployed by 'dev-aws-apply', prefix with CLUSTER_NAME to nuke a specific cluster. + @CLUSTER_NAME=$(CLUSTER_NAME) envsubst < config/dev/cloud_nuke.yaml.tpl > config/dev/cloud_nuke.yaml + $(CLOUDNUKE) aws --region us-west-2 --force --config config/dev/cloud_nuke.yaml --resource-type vpc,eip,nat-gateway,ec2-subnet,internet-gateway,network-interface,security-group + @rm config/dev/cloud_nuke.yaml + .PHONY: cli-install -cli-install: clusterawsadm clusterctl +cli-install: clusterawsadm clusterctl cloud-nuke ##@ Dependencies @@ -320,6 +325,7 @@ KIND ?= $(LOCALBIN)/kind-$(KIND_VERSION) YQ ?= $(LOCALBIN)/yq-$(YQ_VERSION) CLUSTERAWSADM ?= $(LOCALBIN)/clusterawsadm CLUSTERCTL ?= $(LOCALBIN)/clusterctl +CLOUDNUKE ?= $(LOCALBIN)/cloud-nuke ADDLICENSE ?= $(LOCALBIN)/addlicense-$(ADDLICENSE_VERSION) ENVSUBST ?= $(LOCALBIN)/envsubst-$(ENVSUBST_VERSION) @@ -330,6 +336,7 @@ GOLANGCI_LINT_VERSION ?= v1.60.1 HELM_VERSION ?= v3.15.1 KIND_VERSION ?= v0.23.0 YQ_VERSION ?= v4.44.2 +CLOUDNUKE_VERSION = v0.37.1 CLUSTERAWSADM_VERSION ?= v2.5.2 CLUSTERCTL_VERSION ?= v1.7.3 ADDLICENSE_VERSION ?= v1.1.1 @@ -382,6 +389,12 @@ yq: $(YQ) ## Download yq locally if necessary. $(YQ): | $(LOCALBIN) $(call go-install-tool,$(YQ),github.com/mikefarah/yq/v4,${YQ_VERSION}) +.PHONY: cloud-nuke +cloud-nuke: $(CLOUDNUKE) ## Download cloud-nuke locally if necessary. +$(CLOUDNUKE): | $(LOCALBIN) + curl -sL https://github.com/gruntwork-io/cloud-nuke/releases/download/$(CLOUDNUKE_VERSION)/cloud-nuke_$(OS)_$(ARCH) -o $(CLOUDNUKE) + chmod +x $(CLOUDNUKE) + .PHONY: clusterawsadm clusterawsadm: $(CLUSTERAWSADM) ## Download clusterawsadm locally if necessary. $(CLUSTERAWSADM): | $(LOCALBIN) diff --git a/config/dev/aws-managedcluster.yaml b/config/dev/aws-managedcluster.yaml index 6ed56f2de..d1130bab3 100644 --- a/config/dev/aws-managedcluster.yaml +++ b/config/dev/aws-managedcluster.yaml @@ -4,15 +4,15 @@ metadata: name: aws-dev namespace: ${NAMESPACE} spec: + template: aws-standalone-cp config: + region: us-east-2 + publicIP: true + controlPlaneNumber: 1 + workersNumber: 1 controlPlane: - amiID: ami-0989c067ff3da4b27 + amiID: ami-02f3416038bdb17fb instanceType: t3.small - controlPlaneNumber: 1 - publicIP: true - region: us-west-2 worker: - amiID: ami-0989c067ff3da4b27 + amiID: ami-02f3416038bdb17fb instanceType: t3.small - workersNumber: 1 - template: aws-standalone-cp diff --git a/config/dev/cloud_nuke.yaml.tpl b/config/dev/cloud_nuke.yaml.tpl new file mode 100644 index 000000000..1888fe965 --- /dev/null +++ b/config/dev/cloud_nuke.yaml.tpl @@ -0,0 +1,340 @@ +# This config file is used by cloud-nuke to clean up named resources associated +# with a specific managed cluster across an AWS account. CLUSTER_NAME is +# typically the metadata.name of the Deployment. +# The resources listed here are ALL of the potential resources that can be +# filtered by cloud-nuke, except for IAM resources since we'll never touch those. +# See: https://github.com/gruntwork-io/cloud-nuke?tab=readme-ov-file#whats-supported +# +# Usage: +# - 'make aws-dev-nuke' will nuke resources affiliated with config/dev/deployment.yaml +# - 'CLUSTER_NAME=foo make aws-dev-nuke' will nuke resources affiliated with an AWS cluster named 'foo' +# Check cluster names with 'kubectl get clusters -n hmc-system' + +ACM: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +APIGateway: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +APIGatewayV2: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +AccessAnalyzer: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +AutoScalingGroup: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +AppRunnerService: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +BackupVault: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +CloudWatchAlarm: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +CloudWatchDashboard: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +CloudWatchLogGroup: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +CloudtrailTrail: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +CodeDeployApplications: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +ConfigServiceRecorder: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +ConfigServiceRule: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +DataSyncTask: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +DynamoDB: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +EBSVolume: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +ElasticBeanstalk: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +EC2: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +EC2DedicatedHosts: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +EC2KeyPairs: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +EC2IPAM: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +EC2IPAMPool: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +EC2IPAMResourceDiscovery: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +EC2IPAMScope: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +EC2PlacementGroups: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +EC2Subnet: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +EC2Endpoint: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +ECRRepository: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +ECSCluster: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +ECSService: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +EKSCluster: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +ELBv1: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +ELBv2: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +ElasticFileSystem: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +ElasticIP: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +Elasticache: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +ElasticacheParameterGroups: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +ElasticacheSubnetGroups: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +InternetGateway: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +EgressOnlyInternetGateway: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +LambdaFunction: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +LaunchConfiguration: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +LaunchTemplate: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +MSKCluster: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +NatGateway: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +NetworkACL: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +NetworkInterface: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +OIDCProvider: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +OpenSearchDomain: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +Redshift: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +DBClusters: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +DBInstances: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +RdsParameterGroup: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +DBSubnetGroups: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +RDSProxy: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +s3: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +s3AccessPoint: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +S3ObjectLambdaAccessPoint: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +S3MultiRegionAccessPoint: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +SecurityGroup: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +SesConfigurationset: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +SesEmailTemplates: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +SesIdentity: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +SesReceiptRuleSet: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +SesReceiptFilter: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +SNS: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +SQS: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +SageMakerNotebook: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +SecretsManager: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +VPC: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +Route53HostedZone: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +Route53CIDRCollection: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +Route53TrafficPolicy: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +NetworkFirewall: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +NetworkFirewallPolicy: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +NetworkFirewallRuleGroup: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +NetworkFirewallTLSConfig: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +NetworkFirewallResourcePolicy: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +VPCLatticeService: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +VPCLatticeServiceNetwork: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +VPCLatticeTargetGroup: + include: + names_regex: + - '^${CLUSTER_NAME}.*' diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index 7aa7b81f5..13ed9be72 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -102,6 +102,9 @@ var _ = Describe("controller", Ordered, func() { AfterAll(func() { // Purge the AWS resources, the AfterAll for the controller will // clean up the management cluster. + cmd := exec.Command("make", "dev-aws-nuke") + _, err := utils.Run(cmd) + ExpectWithOffset(2, err).NotTo(HaveOccurred()) }) It("should work with an AWS provider", func() { diff --git a/test/kubeclient/kubeclient.go b/test/kubeclient/kubeclient.go index f10d4dd2c..f2a954dda 100644 --- a/test/kubeclient/kubeclient.go +++ b/test/kubeclient/kubeclient.go @@ -1,3 +1,17 @@ +// Copyright 2024 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + package kubeclient import ( diff --git a/test/utils/deployment.go b/test/utils/deployment.go index e375dc5a9..875668479 100644 --- a/test/utils/deployment.go +++ b/test/utils/deployment.go @@ -66,6 +66,13 @@ func ConfigureDeploymentConfig(provider ProviderType, templateName Template) (st // Modify the existing ./config/dev/deployment.yaml file to use the // AMI we just found and our AWS_REGION. + if metadata, ok := deploymentConfig["metadata"].(map[string]interface{}); ok { + metadata["name"] = generatedName + } else { + // Ensure we always have a metadata.name field populated. + deploymentConfig["metadata"] = map[string]interface{}{"name": generatedName} + } + if spec, ok := deploymentConfig["spec"].(map[string]interface{}); ok { if config, ok := spec["config"].(map[string]interface{}); ok { if awsRegion != "" { From c5ce23852a9e59889d835e55ea029b8d38bbab0c Mon Sep 17 00:00:00 2001 From: Kyle Squizzato Date: Wed, 21 Aug 2024 17:23:51 -0700 Subject: [PATCH 05/35] Re-enable Before/AfterAll, fix linting, use KUBECONFIG before fallback Signed-off-by: Kyle Squizzato --- test/e2e/e2e_test.go | 26 +++++++++++++------------- test/kubeclient/kubeclient.go | 10 +++++----- test/utils/utils.go | 4 ---- 3 files changed, 18 insertions(+), 22 deletions(-) diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index 13ed9be72..be59d7656 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -32,19 +32,19 @@ import ( const namespace = "hmc-system" var _ = Describe("controller", Ordered, func() { - // BeforeAll(func() { - // By("building and deploying the controller-manager") - // cmd := exec.Command("make", "dev-apply") - // _, err := utils.Run(cmd) - // Expect(err).NotTo(HaveOccurred()) - // }) - - // AfterAll(func() { - // By("removing the controller-manager") - // cmd := exec.Command("make", "dev-destroy") - // _, err := utils.Run(cmd) - // Expect(err).NotTo(HaveOccurred()) - // }) + BeforeAll(func() { + By("building and deploying the controller-manager") + cmd := exec.Command("make", "dev-apply") + _, err := utils.Run(cmd) + Expect(err).NotTo(HaveOccurred()) + }) + + AfterAll(func() { + By("removing the controller-manager") + cmd := exec.Command("make", "dev-destroy") + _, err := utils.Run(cmd) + Expect(err).NotTo(HaveOccurred()) + }) Context("Operator", func() { It("should run successfully", func() { diff --git a/test/kubeclient/kubeclient.go b/test/kubeclient/kubeclient.go index f2a954dda..eb922777f 100644 --- a/test/kubeclient/kubeclient.go +++ b/test/kubeclient/kubeclient.go @@ -75,15 +75,15 @@ func (kc *KubeClient) NewFromCluster(ctx context.Context, namespace, clusterName // getLocalKubeConfig returns the kubeconfig file content. func getLocalKubeConfig() ([]byte, error) { - homeDir, err := os.UserHomeDir() - if err != nil { - return nil, fmt.Errorf("failed to get user home directory: %w", err) - } - // Use the KUBECONFIG environment variable if it is set, otherwise use the // default path. kubeConfig, ok := os.LookupEnv("KUBECONFIG") if !ok { + homeDir, err := os.UserHomeDir() + if err != nil { + return nil, fmt.Errorf("failed to get user home directory: %w", err) + } + kubeConfig = filepath.Join(homeDir, ".kube", "config") } diff --git a/test/utils/utils.go b/test/utils/utils.go index 04d9273be..bfb7a6c46 100644 --- a/test/utils/utils.go +++ b/test/utils/utils.go @@ -26,10 +26,6 @@ import ( "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" ) -func warnError(err error) { - _, _ = fmt.Fprintf(GinkgoWriter, "warning: %v\n", err) -} - // Run executes the provided command within this context func Run(cmd *exec.Cmd) ([]byte, error) { dir, _ := GetProjectDir() From 255fe19acc8d26f9a0d8c64b3cd6b885decfbe23 Mon Sep 17 00:00:00 2001 From: Kyle Squizzato Date: Thu, 22 Aug 2024 17:12:36 -0700 Subject: [PATCH 06/35] Fix various issues with validating deployment Signed-off-by: Kyle Squizzato --- .github/workflows/test.yml | 2 +- Makefile | 14 +- config/dev/aws-managedcluster.yaml | 14 +- test/e2e/e2e_test.go | 63 ++++---- ...ate_provider.go => validate_deployment.go} | 145 ++++++++++++------ test/utils/deployment.go | 15 +- test/utils/utils.go | 28 +++- 7 files changed, 183 insertions(+), 98 deletions(-) rename test/e2e/{validate_provider.go => validate_deployment.go} (68%) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index c258f06bc..6c6ab64f3 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -71,4 +71,4 @@ jobs: uses: azure/setup-kubectl@v4 - name: Run E2E tests run: | - make e2e-test + make test-e2e diff --git a/Makefile b/Makefile index 2e03a56fe..41f21c0e1 100644 --- a/Makefile +++ b/Makefile @@ -106,7 +106,7 @@ test: generate-all fmt vet envtest tidy external-crd ## Run tests. # Utilize Kind or modify the e2e tests to load the image locally, enabling compatibility with other vendors. .PHONY: test-e2e # Run the e2e tests against a Kind k8s instance that is spun up. test-e2e: cli-install - KIND_CLUSTER_NAME=$(KIND_CLUSTER_NAME) KIND_VERSION=$(KIND_VERSION) go test ./test/e2e/ -v -ginkgo.v + KIND_CLUSTER_NAME="hmc-test" KIND_VERSION=$(KIND_VERSION) go test ./test/e2e/ -v -ginkgo.v -timeout=2h .PHONY: lint lint: golangci-lint ## Run golangci-lint linter & yamllint @@ -278,7 +278,7 @@ dev-azure-creds: envsubst dev-apply: kind-deploy registry-deploy dev-push dev-deploy dev-templates .PHONY: dev-destroy -dev-destroy: kind-undeploy registry-undeploy +dev-destroy: kind-undeploy registry-undeploy ## Destroy the development environment by deleting the kind cluster and local registry. .PHONY: dev-provider-apply @@ -292,11 +292,17 @@ dev-provider-delete: envsubst .PHONY: dev-aws-nuke dev-aws-nuke: ## Warning: Destructive! Nuke all AWS resources deployed by 'dev-aws-apply', prefix with CLUSTER_NAME to nuke a specific cluster. @CLUSTER_NAME=$(CLUSTER_NAME) envsubst < config/dev/cloud_nuke.yaml.tpl > config/dev/cloud_nuke.yaml - $(CLOUDNUKE) aws --region us-west-2 --force --config config/dev/cloud_nuke.yaml --resource-type vpc,eip,nat-gateway,ec2-subnet,internet-gateway,network-interface,security-group + DISABLE_TELEMETRY=true $(CLOUDNUKE) aws --region $$AWS_REGION --config config/dev/cloud_nuke.yaml --resource-type vpc,eip,nat-gateway,ec2-subnet,elb,elbv2,internet-gateway,network-interface,security-group @rm config/dev/cloud_nuke.yaml +.PHONY: test-apply +test-apply: kind-deploy registry-deploy dev-push dev-deploy dev-templates + +.PHONY: test-destroy +test-destroy: kind-undeploy registry-undeploy + .PHONY: cli-install -cli-install: clusterawsadm clusterctl cloud-nuke +cli-install: clusterawsadm clusterctl cloud-nuke yq ## Install the necessary CLI tools for deployment, development and testing. ##@ Dependencies diff --git a/config/dev/aws-managedcluster.yaml b/config/dev/aws-managedcluster.yaml index d1130bab3..6ed56f2de 100644 --- a/config/dev/aws-managedcluster.yaml +++ b/config/dev/aws-managedcluster.yaml @@ -4,15 +4,15 @@ metadata: name: aws-dev namespace: ${NAMESPACE} spec: - template: aws-standalone-cp config: - region: us-east-2 - publicIP: true - controlPlaneNumber: 1 - workersNumber: 1 controlPlane: - amiID: ami-02f3416038bdb17fb + amiID: ami-0989c067ff3da4b27 instanceType: t3.small + controlPlaneNumber: 1 + publicIP: true + region: us-west-2 worker: - amiID: ami-02f3416038bdb17fb + amiID: ami-0989c067ff3da4b27 instanceType: t3.small + workersNumber: 1 + template: aws-standalone-cp diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index be59d7656..ffbde6a09 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -32,19 +32,19 @@ import ( const namespace = "hmc-system" var _ = Describe("controller", Ordered, func() { - BeforeAll(func() { - By("building and deploying the controller-manager") - cmd := exec.Command("make", "dev-apply") - _, err := utils.Run(cmd) - Expect(err).NotTo(HaveOccurred()) - }) - - AfterAll(func() { - By("removing the controller-manager") - cmd := exec.Command("make", "dev-destroy") - _, err := utils.Run(cmd) - Expect(err).NotTo(HaveOccurred()) - }) + // BeforeAll(func() { + // By("building and deploying the controller-manager") + // cmd := exec.Command("make", "test-apply") + // _, err := utils.Run(cmd) + // Expect(err).NotTo(HaveOccurred()) + // }) + + // AfterAll(func() { + // By("removing the controller-manager") + // cmd := exec.Command("make", "test-destroy") + // _, err := utils.Run(cmd) + // Expect(err).NotTo(HaveOccurred()) + // }) Context("Operator", func() { It("should run successfully", func() { @@ -54,7 +54,7 @@ var _ = Describe("controller", Ordered, func() { By("validating that the controller-manager pod is running as expected") verifyControllerUp := func() error { // Ensure only one controller pod is running. - podList, err := kc.Client.CoreV1().Pods(namespace).List(context.Background(), metav1.ListOptions{ + podList, err := kc.Client.CoreV1().Pods(kc.Namespace).List(context.Background(), metav1.ListOptions{ LabelSelector: "control-plane=controller-manager,app.kubernetes.io/name=cluster-api", }) if err != nil { @@ -81,8 +81,16 @@ var _ = Describe("controller", Ordered, func() { } return nil - }() - EventuallyWithOffset(1, verifyControllerUp, time.Minute, time.Second).Should(Succeed()) + } + EventuallyWithOffset(1, func() error { + err := verifyControllerUp() + if err != nil { + _, _ = fmt.Fprintf(GinkgoWriter, "Controller pod validation failed: %v\n", err) + return err + } + + return nil + }(), 5*time.Minute, time.Second).Should(Succeed()) }) }) @@ -100,29 +108,30 @@ var _ = Describe("controller", Ordered, func() { }) AfterAll(func() { - // Purge the AWS resources, the AfterAll for the controller will - // clean up the management cluster. - cmd := exec.Command("make", "dev-aws-nuke") - _, err := utils.Run(cmd) - ExpectWithOffset(2, err).NotTo(HaveOccurred()) + // // Purge the AWS resources, the AfterAll for the controller will + // // clean up the management cluster. + // cmd := exec.Command("make", "dev-aws-nuke") + // _, err := utils.Run(cmd) + // ExpectWithOffset(2, err).NotTo(HaveOccurred()) }) It("should work with an AWS provider", func() { By("using the aws-standalone-cp template") - clusterName, err := utils.ConfigureDeploymentConfig(utils.ProviderAWS, utils.AWSStandaloneCPTemplate) - ExpectWithOffset(2, err).NotTo(HaveOccurred()) + //clusterName, err := utils.ConfigureDeploymentConfig(utils.ProviderAWS, utils.TemplateAWSStandaloneCP) + //ExpectWithOffset(1, err).NotTo(HaveOccurred()) + + clusterName := "bba1743d-e2e-test" cmd := exec.Command("make", "dev-aws-apply") _, err = utils.Run(cmd) ExpectWithOffset(2, err).NotTo(HaveOccurred()) - EventuallyWithOffset(2, func() error { + _, _ = fmt.Fprintf(GinkgoWriter, "Waiting for resource validation to succeed\n") + Eventually(func() error { return verifyProviderDeployed(context.Background(), kc, clusterName) - }(), 30*time.Minute, 10*time.Second).Should(Succeed()) - + }).WithTimeout(30 * time.Minute).WithPolling(5 * time.Second).Should(Succeed()) By("using the aws-hosted-cp template") // TODO: Use the standalone control plane resources to craft a hosted // control plane and test it. - }) }) }) diff --git a/test/e2e/validate_provider.go b/test/e2e/validate_deployment.go similarity index 68% rename from test/e2e/validate_provider.go rename to test/e2e/validate_deployment.go index 171ad67fe..5d19f69b7 100644 --- a/test/e2e/validate_provider.go +++ b/test/e2e/validate_deployment.go @@ -18,10 +18,9 @@ import ( "context" "fmt" - . "github.com/onsi/ginkgo/v2" - "github.com/Mirantis/hmc/test/kubeclient" "github.com/Mirantis/hmc/test/utils" + . "github.com/onsi/ginkgo/v2" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/resource" @@ -32,79 +31,109 @@ import ( ) // resourceValidationFunc is intended to validate a specific kubernetes -// resource. It is meant to be used in conjunction with an Eventually block, -// however, in some cases it may be necessary to end the Eventually block early -// if the resource will never reach a ready state, in these instances Ginkgo's -// Fail function should be used. +// resource. type resourceValidationFunc func(context.Context, *kubeclient.KubeClient, string) error +var resourceValidators = map[string]resourceValidationFunc{ + "clusters": validateClusters, + "machines": validateMachines, + "control-planes": validateK0sControlPlanes, + "csi-driver": validateCSIDriver, + "ccm": validateCCM, +} + // verifyProviderDeployed is a provider-agnostic verification that checks for // the presence of specific resources in the cluster using -// resourceValidationFuncs and clusterValidationFuncs. +// resourceValidationFuncs and clusterValidationFuncs. It is meant to be used +// in conjunction with an Eventually block. In some cases it may be necessary +// to end the Eventually block early if the resource will never reach a ready +// state, in these instances Ginkgo's Fail function should be used. func verifyProviderDeployed(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { // Sequentially validate each resource type, only returning the first error // as to not move on to the next resource type until the first is resolved. - for _, resourceValidator := range []resourceValidationFunc{ - validateClusters, - validateMachines, - validateK0sControlPlanes, - validateCSIDriver, - validateCCM, - } { + for _, name := range []string{"clusters", "machines", "control-planes", "csi-driver", "ccm"} { + validator, ok := resourceValidators[name] + if !ok { + continue + } + + if err := validator(ctx, kc, clusterName); err != nil { + _, _ = fmt.Fprintf(GinkgoWriter, "[%s] validation error: %v\n", name, err) + return err + } + + _, _ = fmt.Fprintf(GinkgoWriter, "[%s] validation succeeded\n", name) // XXX: Once we validate for the first time should we move the // validation out and consider it "done"? Or is there a possibility // that the resources could enter a non-ready state later? - if err := resourceValidator(ctx, kc, clusterName); err != nil { - return err - } + delete(resourceValidators, name) } return nil } func validateClusters(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { - return validateNameAndStatus(ctx, kc, clusterName, schema.GroupVersionResource{ + gvr := schema.GroupVersionResource{ Group: "cluster.x-k8s.io", Version: "v1beta1", Resource: "clusters", - }) + } + + client, err := kc.GetDynamicClient(gvr) + if err != nil { + Fail(fmt.Sprintf("failed to get %s client: %v", gvr.Resource, err)) + } + + cluster, err := client.Get(ctx, clusterName, metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("failed to get %s %s: %v", gvr.Resource, clusterName, err) + } + + phase, _, err := unstructured.NestedString(cluster.Object, "status", "phase") + if err != nil { + return fmt.Errorf("failed to get status.phase for %s: %v", cluster.GetName(), err) + } + + if phase == "Deleting" { + Fail(fmt.Sprintf("%s is in 'Deleting' phase", cluster.GetName())) + } + + if err := utils.ValidateObjectNamePrefix(cluster, clusterName); err != nil { + Fail(err.Error()) + } + + if err := utils.ValidateConditionsTrue(cluster); err != nil { + return err + } + + return nil } func validateMachines(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { - return validateNameAndStatus(ctx, kc, clusterName, schema.GroupVersionResource{ + gvr := schema.GroupVersionResource{ Group: "cluster.x-k8s.io", Version: "v1beta1", Resource: "machines", - }) -} + } -func validateNameAndStatus(ctx context.Context, kc *kubeclient.KubeClient, - clusterName string, gvr schema.GroupVersionResource) error { client, err := kc.GetDynamicClient(gvr) if err != nil { Fail(fmt.Sprintf("failed to get %s client: %v", gvr.Resource, err)) } - list, err := client.List(ctx, metav1.ListOptions{}) + machines, err := client.List(ctx, metav1.ListOptions{ + LabelSelector: "cluster.x-k8s.io/cluster-name=" + clusterName, + }) if err != nil { - Fail(fmt.Sprintf("failed to list %s: %v", gvr.Resource, err)) + return fmt.Errorf("failed to list %s: %v", gvr.Resource, err) } - for _, item := range list.Items { - phase, _, err := unstructured.NestedString(item.Object, "status", "phase") - if err != nil { - return fmt.Errorf("failed to get status.phase for %s: %v", item.GetName(), err) - } - - if phase == "Deleting" { - Fail(fmt.Sprintf("%s is in 'Deleting' phase", item.GetName())) - } - - if err := utils.ValidateObjectNamePrefix(&item, clusterName); err != nil { + for _, machine := range machines.Items { + if err := utils.ValidateObjectNamePrefix(&machine, clusterName); err != nil { Fail(err.Error()) } - if err := utils.ValidateConditionsTrue(&item); err != nil { + if err := utils.ValidateConditionsTrue(&machine); err != nil { return err } } @@ -116,13 +145,15 @@ func validateK0sControlPlanes(ctx context.Context, kc *kubeclient.KubeClient, cl k0sControlPlaneClient, err := kc.GetDynamicClient(schema.GroupVersionResource{ Group: "controlplane.cluster.x-k8s.io", Version: "v1beta1", - Resource: "K0sControlPlane", + Resource: "k0scontrolplanes", }) if err != nil { return fmt.Errorf("failed to get K0sControlPlane client: %w", err) } - controlPlanes, err := k0sControlPlaneClient.List(ctx, metav1.ListOptions{}) + controlPlanes, err := k0sControlPlaneClient.List(ctx, metav1.ListOptions{ + LabelSelector: "cluster.x-k8s.io/cluster-name=" + clusterName, + }) if err != nil { return fmt.Errorf("failed to list K0sControlPlanes: %w", err) } @@ -134,30 +165,46 @@ func validateK0sControlPlanes(ctx context.Context, kc *kubeclient.KubeClient, cl objKind, objName := utils.ObjKindName(&controlPlane) - // k0s does not use the metav1.Condition type for status - // conditions, instead it uses a custom type so we can't use + // k0s does not use the metav1.Condition type for status.conditions, + // instead it uses a custom type so we can't use // ValidateConditionsTrue here, instead we'll check for "ready: true". - conditions, found, err := unstructured.NestedFieldCopy(controlPlane.Object, "status", "conditions") + status, found, err := unstructured.NestedFieldCopy(controlPlane.Object, "status") if !found { - return fmt.Errorf("no status conditions found for %s: %s", objKind, objName) + return fmt.Errorf("no status found for %s: %s", objKind, objName) } if err != nil { return fmt.Errorf("failed to get status conditions for %s: %s: %w", objKind, objName, err) } - c, ok := conditions.(map[string]interface{}) + st, ok := status.(map[string]interface{}) if !ok { - return fmt.Errorf("expected K0sControlPlane condition to be type map[string]interface{}, got: %T", conditions) + return fmt.Errorf("expected K0sControlPlane condition to be type map[string]interface{}, got: %T", status) } - if c["ready"] != "true" { - return fmt.Errorf("K0sControlPlane %s is not ready, status: %v", controlPlane.GetName(), conditions) + if !st["ready"].(bool) { + return fmt.Errorf("K0sControlPlane %s is not ready, status: %+v", controlPlane.GetName(), status) } } return nil } +// apiVersion: v1 +// kind: Pod +// metadata: +// name: test-pvc-pod +// spec: +// volumes: +// - name: test-pvc-vol +// persistentVolumeClaim: +// claimName: pvcName +// containers: +// - name: test-pvc-container +// image: nginx +// volumeMounts: +// - mountPath: "/storage" +// name: task-pv-storage + // validateCSIDriver validates that the provider CSI driver is functioning // by creating a PVC and verifying it enters "Bound" status. func validateCSIDriver(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { @@ -166,7 +213,7 @@ func validateCSIDriver(ctx context.Context, kc *kubeclient.KubeClient, clusterNa Fail(fmt.Sprintf("failed to create KubeClient for managed cluster %s: %v", clusterName, err)) } - pvcName := clusterName + "-test-pvc" + pvcName := clusterName + "-csi-test-pvc" _, err = clusterKC.Client.CoreV1().PersistentVolumeClaims(clusterKC.Namespace). Create(ctx, &corev1.PersistentVolumeClaim{ diff --git a/test/utils/deployment.go b/test/utils/deployment.go index 875668479..1f088bde8 100644 --- a/test/utils/deployment.go +++ b/test/utils/deployment.go @@ -21,6 +21,7 @@ import ( "os/exec" "github.com/google/uuid" + . "github.com/onsi/ginkgo/v2" "gopkg.in/yaml.v3" ) @@ -33,16 +34,18 @@ const ( type Template string const ( - AWSStandaloneCPTemplate Template = "aws-standalone-cp" - AWSHostedCPTemplate Template = "aws-hosted-cp" + TemplateAWSStandaloneCP Template = "aws-standalone-cp" + TemplateAWSHostedCP Template = "aws-hosted-cp" + + deploymentConfigFile = "./config/dev/deployment.yaml" ) -// ConfigureDeploymentConfig modifies the ./config/dev/deployment.yaml for +// ConfigureDeploymentConfig modifies the config/dev/deployment.yaml for // use in test and returns the generated cluster name. func ConfigureDeploymentConfig(provider ProviderType, templateName Template) (string, error) { generatedName := uuid.NewString()[:8] + "-e2e-test" - deploymentConfigBytes, err := os.ReadFile("./config/dev/deployment.yaml") + deploymentConfigBytes, err := os.ReadFile(deploymentConfigFile) if err != nil { return "", fmt.Errorf("failed to read deployment config: %w", err) } @@ -94,7 +97,9 @@ func ConfigureDeploymentConfig(provider ProviderType, templateName Template) (st return "", fmt.Errorf("failed to marshal deployment config: %w", err) } - return generatedName, os.WriteFile("./config/dev/deployment.yaml", deploymentConfigBytes, 0644) + _, _ = fmt.Fprintf(GinkgoWriter, "Generated AWS cluster name: %q\n", generatedName) + + return generatedName, os.WriteFile(deploymentConfigFile, deploymentConfigBytes, 0644) default: return "", fmt.Errorf("unsupported provider: %s", provider) } diff --git a/test/utils/utils.go b/test/utils/utils.go index bfb7a6c46..613ac9605 100644 --- a/test/utils/utils.go +++ b/test/utils/utils.go @@ -24,6 +24,7 @@ import ( . "github.com/onsi/ginkgo/v2" //nolint:golint,revive metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime" ) // Run executes the provided command within this context @@ -110,18 +111,35 @@ func ValidateConditionsTrue(unstrObj *unstructured.Unstructured) error { return fmt.Errorf("failed to get status conditions for %s: %s: %w", objKind, objName, err) } + var errs error + for _, condition := range conditions { - condition, ok := condition.(metav1.Condition) + conditionMap, ok := condition.(map[string]interface{}) if !ok { - return fmt.Errorf("expected %s: %s condition to be type metav1.Condition, got: %T", - objKind, objName, condition) + return fmt.Errorf("expected %s: %s condition to be type map[string]interface{}, got: %T", + objKind, objName, conditionMap) + } + + var c *metav1.Condition + + if err := runtime.DefaultUnstructuredConverter.FromUnstructured(conditionMap, &c); err != nil { + return fmt.Errorf("failed to convert condition map to metav1.Condition: %w", err) } - if condition.Status == metav1.ConditionTrue { + if c.Status == metav1.ConditionTrue { continue } - return fmt.Errorf("%s %s condition %s is not ready: %s", objKind, objName, condition.Type, condition.Message) + errorStr := fmt.Sprintf("%s: %s", c.Type, c.Reason) + if c.Message != "" { + errorStr = fmt.Sprintf("%s: %s", errorStr, c.Message) + } + + errs = errors.Join(fmt.Errorf(errorStr), errs) + } + + if errs != nil { + return fmt.Errorf("%s %s is not ready with conditions: %w", objKind, objName, errs) } return nil From b585416e9b78a9902ee20912a84a698c25e92fbf Mon Sep 17 00:00:00 2001 From: Kyle Squizzato Date: Fri, 23 Aug 2024 12:46:45 -0700 Subject: [PATCH 07/35] Polish deployment creation, add deletion validation Signed-off-by: Kyle Squizzato --- test/{utils => deployment}/deployment.go | 75 +++++---- test/deployment/resources/deployment.yaml.tpl | 17 +++ test/deployment/validate_deleted.go | 116 ++++++++++++++ .../validate_deployed.go} | 144 +++++++++--------- test/e2e/e2e_test.go | 70 +++++---- test/kubeclient/kubeclient.go | 105 +++++++++++++ 6 files changed, 387 insertions(+), 140 deletions(-) rename test/{utils => deployment}/deployment.go (61%) create mode 100644 test/deployment/resources/deployment.yaml.tpl create mode 100644 test/deployment/validate_deleted.go rename test/{e2e/validate_deployment.go => deployment/validate_deployed.go} (71%) diff --git a/test/utils/deployment.go b/test/deployment/deployment.go similarity index 61% rename from test/utils/deployment.go rename to test/deployment/deployment.go index 1f088bde8..5faedb169 100644 --- a/test/utils/deployment.go +++ b/test/deployment/deployment.go @@ -12,17 +12,21 @@ // See the License for the specific language governing permissions and // limitations under the License. -package utils +package deployment import ( + _ "embed" "encoding/json" "fmt" "os" "os/exec" + "github.com/Mirantis/hmc/test/utils" "github.com/google/uuid" . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" "gopkg.in/yaml.v3" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" ) type ProviderType string @@ -36,39 +40,32 @@ type Template string const ( TemplateAWSStandaloneCP Template = "aws-standalone-cp" TemplateAWSHostedCP Template = "aws-hosted-cp" - - deploymentConfigFile = "./config/dev/deployment.yaml" ) -// ConfigureDeploymentConfig modifies the config/dev/deployment.yaml for -// use in test and returns the generated cluster name. -func ConfigureDeploymentConfig(provider ProviderType, templateName Template) (string, error) { - generatedName := uuid.NewString()[:8] + "-e2e-test" +//go:embed resources/deployment.yaml.tpl +var deploymentConfigBytes []byte - deploymentConfigBytes, err := os.ReadFile(deploymentConfigFile) - if err != nil { - return "", fmt.Errorf("failed to read deployment config: %w", err) - } +// GetUnstructuredDeployment returns an unstructured deployment object based on +// the provider and template. +func GetUnstructuredDeployment(provider ProviderType, templateName Template) *unstructured.Unstructured { + GinkgoHelper() + + generatedName := uuid.New().String()[:8] + "-e2e-test" + _, _ = fmt.Fprintf(GinkgoWriter, "Generated AWS cluster name: %q\n", generatedName) var deploymentConfig map[string]interface{} - err = yaml.Unmarshal(deploymentConfigBytes, &deploymentConfig) - if err != nil { - return "", fmt.Errorf("failed to unmarshal deployment config: %w", err) - } + err := yaml.Unmarshal(deploymentConfigBytes, &deploymentConfig) + Expect(err).NotTo(HaveOccurred(), "failed to unmarshal deployment config") switch provider { case ProviderAWS: // XXX: Maybe we should just use automatic AMI selection here. - amiID, err := getAWSAMI() - if err != nil { - return "", fmt.Errorf("failed to get AWS AMI: %w", err) - } - + amiID := getAWSAMI() awsRegion := os.Getenv("AWS_REGION") - // Modify the existing ./config/dev/deployment.yaml file to use the - // AMI we just found and our AWS_REGION. + // Modify the deployment config to use the generated name and the AMI. + // TODO: This should be modified to use go templating. if metadata, ok := deploymentConfig["metadata"].(map[string]interface{}); ok { metadata["name"] = generatedName } else { @@ -92,34 +89,28 @@ func ConfigureDeploymentConfig(provider ProviderType, templateName Template) (st } } - deploymentConfigBytes, err = yaml.Marshal(deploymentConfig) - if err != nil { - return "", fmt.Errorf("failed to marshal deployment config: %w", err) - } - - _, _ = fmt.Fprintf(GinkgoWriter, "Generated AWS cluster name: %q\n", generatedName) - - return generatedName, os.WriteFile(deploymentConfigFile, deploymentConfigBytes, 0644) + return &unstructured.Unstructured{Object: deploymentConfig} default: - return "", fmt.Errorf("unsupported provider: %s", provider) + Fail(fmt.Sprintf("unsupported provider: %s", provider)) } + + return nil } // getAWSAMI returns an AWS AMI ID to use for test. -func getAWSAMI() (string, error) { +func getAWSAMI() string { + GinkgoHelper() + // For now we'll just use the latest Kubernetes version for ubuntu 20.04, // but we could potentially pin the Kube version and specify that here. cmd := exec.Command("./bin/clusterawsadm", "ami", "list", "--os=ubuntu-20.04", "-o", "json") - output, err := Run(cmd) - if err != nil { - return "", fmt.Errorf("failed to list AMIs: %w", err) - } + output, err := utils.Run(cmd) + Expect(err).NotTo(HaveOccurred(), "failed to list AMIs") var amiList map[string]interface{} - if err := json.Unmarshal(output, &amiList); err != nil { - return "", fmt.Errorf("failed to unmarshal AMI list: %w", err) - } + err = json.Unmarshal(output, &amiList) + Expect(err).NotTo(HaveOccurred(), "failed to unmarshal AMI list") // ami list returns a sorted list of AMIs by kube version, just get the // first one. @@ -131,9 +122,11 @@ func getAWSAMI() (string, error) { continue } - return ami, nil + return ami } } - return "", fmt.Errorf("no AMIs found") + Fail("no AMIs found") + + return "" } diff --git a/test/deployment/resources/deployment.yaml.tpl b/test/deployment/resources/deployment.yaml.tpl new file mode 100644 index 000000000..372003b58 --- /dev/null +++ b/test/deployment/resources/deployment.yaml.tpl @@ -0,0 +1,17 @@ +apiVersion: hmc.mirantis.com/v1alpha1 +kind: Deployment +metadata: + name: ${DEPLOYMENT_NAME} +spec: + config: + controlPlane: + amiID: ${AMI_ID} + instanceType: ${INSTANCE_TYPE} + controlPlaneNumber: ${CONTROL_PLANE_NUMBER} + publicIP: ${PUBLIC_IP} + region: ${AWS_REGION} + worker: + amiID: ${AMI_ID} + instanceType: ${INSTANCE_TYPE} + workersNumber: ${WORKERS_NUMBER} + template: ${TEMPLATE_NAME} diff --git a/test/deployment/validate_deleted.go b/test/deployment/validate_deleted.go new file mode 100644 index 000000000..6be7b5969 --- /dev/null +++ b/test/deployment/validate_deleted.go @@ -0,0 +1,116 @@ +// Copyright 2024 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package deployment + +import ( + "context" + "fmt" + + "github.com/Mirantis/hmc/test/kubeclient" + . "github.com/onsi/ginkgo/v2" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" +) + +var deletionValidators = map[string]resourceValidationFunc{ + "clusters": validateClusterDeleted, + "machinedeployments": validateMachineDeploymentsDeleted, + "control-planes": validateK0sControlPlanesDeleted, +} + +// VerifyProviderDeleted is a provider-agnostic verification that checks +// to ensure generic resources managed by the provider have been deleted. +// It is intended to be used in conjunction with an Eventually block. +func VerifyProviderDeleted(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { + // Sequentially validate each resource type, only returning the first error + // as to not move on to the next resource type until the first is resolved. + // We use []string here since order is important. + for _, name := range []string{"control-planes", "machinedeployments", "clusters"} { + validator, ok := resourceValidators[name] + if !ok { + continue + } + + if err := validator(ctx, kc, clusterName); err != nil { + _, _ = fmt.Fprintf(GinkgoWriter, "[%s] validation error: %v\n", name, err) + return err + } + + _, _ = fmt.Fprintf(GinkgoWriter, "[%s] validation succeeded\n", name) + delete(resourceValidators, name) + } + + return nil +} + +// validateClusterDeleted validates that the Cluster resource has been deleted. +func validateClusterDeleted(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { + // Validate that the Cluster resource has been deleted + cluster, err := kc.GetCluster(ctx, clusterName) + if err != nil { + return err + } + + var inPhase string + + if cluster != nil { + phase, _, _ := unstructured.NestedString(cluster.Object, "status", "phase") + if phase != "" { + inPhase = ", in phase: " + phase + } + + return fmt.Errorf("cluster %q still exists%s", clusterName, inPhase) + } + + return nil +} + +// validateMachineDeploymentsDeleted validates that all MachineDeployments have +// been deleted. +func validateMachineDeploymentsDeleted(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { + machineDeployments, err := kc.ListMachineDeployments(ctx, clusterName) + if err != nil { + return err + } + + var mdNames []string + if len(machineDeployments) > 0 { + for _, md := range machineDeployments { + mdNames = append(mdNames, md.GetName()) + + return fmt.Errorf("machine deployments still exist: %s", mdNames) + } + } + + return nil +} + +// validateK0sControlPlanesDeleted validates that all k0scontrolplanes have +// been deleted. +func validateK0sControlPlanesDeleted(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { + controlPlanes, err := kc.ListK0sControlPlanes(ctx, clusterName) + if err != nil { + return err + } + + var cpNames []string + if len(controlPlanes) > 0 { + for _, cp := range controlPlanes { + cpNames = append(cpNames, cp.GetName()) + + return fmt.Errorf("k0s control planes still exist: %s", cpNames) + } + } + + return nil +} diff --git a/test/e2e/validate_deployment.go b/test/deployment/validate_deployed.go similarity index 71% rename from test/e2e/validate_deployment.go rename to test/deployment/validate_deployed.go index 5d19f69b7..8aebc458c 100644 --- a/test/e2e/validate_deployment.go +++ b/test/deployment/validate_deployed.go @@ -12,11 +12,12 @@ // See the License for the specific language governing permissions and // limitations under the License. -package e2e +package deployment import ( "context" "fmt" + "strings" "github.com/Mirantis/hmc/test/kubeclient" "github.com/Mirantis/hmc/test/utils" @@ -26,7 +27,6 @@ import ( "k8s.io/apimachinery/pkg/api/resource" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" - "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/apimachinery/pkg/util/intstr" ) @@ -35,22 +35,24 @@ import ( type resourceValidationFunc func(context.Context, *kubeclient.KubeClient, string) error var resourceValidators = map[string]resourceValidationFunc{ - "clusters": validateClusters, + "clusters": validateCluster, "machines": validateMachines, "control-planes": validateK0sControlPlanes, "csi-driver": validateCSIDriver, "ccm": validateCCM, } -// verifyProviderDeployed is a provider-agnostic verification that checks for +// VerifyProviderDeployed is a provider-agnostic verification that checks for // the presence of specific resources in the cluster using -// resourceValidationFuncs and clusterValidationFuncs. It is meant to be used -// in conjunction with an Eventually block. In some cases it may be necessary -// to end the Eventually block early if the resource will never reach a ready -// state, in these instances Ginkgo's Fail function should be used. -func verifyProviderDeployed(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { +// resourceValidationFuncs. It is meant to be used in conjunction with an +// Eventually block. +// In some cases it may be necessary to end the Eventually block early if the +// resource will never reach a ready state, in these instances Ginkgo's Fail +// should be used to end the spec early. +func VerifyProviderDeployed(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { // Sequentially validate each resource type, only returning the first error // as to not move on to the next resource type until the first is resolved. + // We use []string here since order is important. for _, name := range []string{"clusters", "machines", "control-planes", "csi-driver", "ccm"} { validator, ok := resourceValidators[name] if !ok { @@ -63,30 +65,16 @@ func verifyProviderDeployed(ctx context.Context, kc *kubeclient.KubeClient, clus } _, _ = fmt.Fprintf(GinkgoWriter, "[%s] validation succeeded\n", name) - // XXX: Once we validate for the first time should we move the - // validation out and consider it "done"? Or is there a possibility - // that the resources could enter a non-ready state later? delete(resourceValidators, name) } return nil } -func validateClusters(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { - gvr := schema.GroupVersionResource{ - Group: "cluster.x-k8s.io", - Version: "v1beta1", - Resource: "clusters", - } - - client, err := kc.GetDynamicClient(gvr) - if err != nil { - Fail(fmt.Sprintf("failed to get %s client: %v", gvr.Resource, err)) - } - - cluster, err := client.Get(ctx, clusterName, metav1.GetOptions{}) +func validateCluster(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { + cluster, err := kc.GetCluster(ctx, clusterName) if err != nil { - return fmt.Errorf("failed to get %s %s: %v", gvr.Resource, clusterName, err) + return err } phase, _, err := unstructured.NestedString(cluster.Object, "status", "phase") @@ -110,25 +98,16 @@ func validateClusters(ctx context.Context, kc *kubeclient.KubeClient, clusterNam } func validateMachines(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { - gvr := schema.GroupVersionResource{ - Group: "cluster.x-k8s.io", - Version: "v1beta1", - Resource: "machines", - } - - client, err := kc.GetDynamicClient(gvr) + machines, err := kc.ListMachines(ctx, clusterName) if err != nil { - Fail(fmt.Sprintf("failed to get %s client: %v", gvr.Resource, err)) + return fmt.Errorf("failed to list machines: %w", err) } - machines, err := client.List(ctx, metav1.ListOptions{ - LabelSelector: "cluster.x-k8s.io/cluster-name=" + clusterName, - }) if err != nil { - return fmt.Errorf("failed to list %s: %v", gvr.Resource, err) + return fmt.Errorf("failed to list Machines: %w", err) } - for _, machine := range machines.Items { + for _, machine := range machines { if err := utils.ValidateObjectNamePrefix(&machine, clusterName); err != nil { Fail(err.Error()) } @@ -142,23 +121,12 @@ func validateMachines(ctx context.Context, kc *kubeclient.KubeClient, clusterNam } func validateK0sControlPlanes(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { - k0sControlPlaneClient, err := kc.GetDynamicClient(schema.GroupVersionResource{ - Group: "controlplane.cluster.x-k8s.io", - Version: "v1beta1", - Resource: "k0scontrolplanes", - }) - if err != nil { - return fmt.Errorf("failed to get K0sControlPlane client: %w", err) - } - - controlPlanes, err := k0sControlPlaneClient.List(ctx, metav1.ListOptions{ - LabelSelector: "cluster.x-k8s.io/cluster-name=" + clusterName, - }) + controlPlanes, err := kc.ListK0sControlPlanes(ctx, clusterName) if err != nil { return fmt.Errorf("failed to list K0sControlPlanes: %w", err) } - for _, controlPlane := range controlPlanes.Items { + for _, controlPlane := range controlPlanes { if err := utils.ValidateObjectNamePrefix(&controlPlane, clusterName); err != nil { Fail(err.Error()) } @@ -181,30 +149,18 @@ func validateK0sControlPlanes(ctx context.Context, kc *kubeclient.KubeClient, cl return fmt.Errorf("expected K0sControlPlane condition to be type map[string]interface{}, got: %T", status) } + if _, ok := st["ready"]; !ok { + return fmt.Errorf("%s %s has no 'ready' status", objKind, objName) + } + if !st["ready"].(bool) { - return fmt.Errorf("K0sControlPlane %s is not ready, status: %+v", controlPlane.GetName(), status) + return fmt.Errorf("%s %s is not ready, status: %+v", objKind, objName, st) } } return nil } -// apiVersion: v1 -// kind: Pod -// metadata: -// name: test-pvc-pod -// spec: -// volumes: -// - name: test-pvc-vol -// persistentVolumeClaim: -// claimName: pvcName -// containers: -// - name: test-pvc-container -// image: nginx -// volumeMounts: -// - mountPath: "/storage" -// name: task-pv-storage - // validateCSIDriver validates that the provider CSI driver is functioning // by creating a PVC and verifying it enters "Bound" status. func validateCSIDriver(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { @@ -235,22 +191,64 @@ func validateCSIDriver(ctx context.Context, kc *kubeclient.KubeClient, clusterNa // Since these resourceValidationFuncs are intended to be used in // Eventually we should ensure a follow-up PVCreate is a no-op. if !apierrors.IsAlreadyExists(err) { + // XXX: Maybe we should Fail here? return fmt.Errorf("failed to create test PVC: %w", err) } } - // Verify the PVC enters "Bound" status. + // Create a pod that uses the PVC so that the PVC enters "Bound" status. + _, err = clusterKC.Client.CoreV1().Pods(clusterKC.Namespace).Create(ctx, &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: pvcName + "-pod", + }, + Spec: corev1.PodSpec{ + Volumes: []corev1.Volume{ + { + Name: "test-pvc-vol", + VolumeSource: corev1.VolumeSource{ + PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ + ClaimName: pvcName, + }, + }, + }, + }, + Containers: []corev1.Container{ + { + Name: "test-pvc-container", + Image: "nginx", + VolumeMounts: []corev1.VolumeMount{ + { + MountPath: "/storage", + Name: "test-pvc-vol", + }, + }, + }, + }, + }, + }, metav1.CreateOptions{}) + if err != nil { + if !apierrors.IsAlreadyExists(err) { + return fmt.Errorf("failed to create test Pod: %w", err) + } + } + + // Verify the PVC enters "Bound" status and inherits the CSI driver + // storageClass without us having to specify it. pvc, err := clusterKC.Client.CoreV1().PersistentVolumeClaims(clusterKC.Namespace). Get(ctx, pvcName, metav1.GetOptions{}) if err != nil { return fmt.Errorf("failed to get test PVC: %w", err) } - if pvc.Status.Phase == corev1.ClaimBound { - return nil + if !strings.Contains(*pvc.Spec.StorageClassName, "csi") { + Fail(fmt.Sprintf("%s PersistentVolumeClaim does not have a CSI driver storageClass", pvcName)) + } + + if pvc.Status.Phase != corev1.ClaimBound { + return fmt.Errorf("%s PersistentVolume not yet 'Bound', current phase: %q", pvcName, pvc.Status.Phase) } - return fmt.Errorf("%s PersistentVolume not yet 'Bound', current phase: %q", pvcName, pvc.Status.Phase) + return nil } // validateCCM validates that the provider's cloud controller manager is diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index ffbde6a09..9645b1e72 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -17,6 +17,7 @@ package e2e import ( "context" "fmt" + "os" "os/exec" "strings" "time" @@ -25,6 +26,7 @@ import ( . "github.com/onsi/gomega" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "github.com/Mirantis/hmc/test/deployment" "github.com/Mirantis/hmc/test/kubeclient" "github.com/Mirantis/hmc/test/utils" ) @@ -32,12 +34,12 @@ import ( const namespace = "hmc-system" var _ = Describe("controller", Ordered, func() { - // BeforeAll(func() { - // By("building and deploying the controller-manager") - // cmd := exec.Command("make", "test-apply") - // _, err := utils.Run(cmd) - // Expect(err).NotTo(HaveOccurred()) - // }) + BeforeAll(func() { + By("building and deploying the controller-manager") + cmd := exec.Command("make", "test-apply") + _, err := utils.Run(cmd) + Expect(err).NotTo(HaveOccurred()) + }) // AfterAll(func() { // By("removing the controller-manager") @@ -96,8 +98,10 @@ var _ = Describe("controller", Ordered, func() { Context("AWS Templates", func() { var ( - kc *kubeclient.KubeClient - err error + kc *kubeclient.KubeClient + deleteDeploymentFunc func() error + clusterName string + err error ) BeforeAll(func() { @@ -108,30 +112,44 @@ var _ = Describe("controller", Ordered, func() { }) AfterAll(func() { - // // Purge the AWS resources, the AfterAll for the controller will - // // clean up the management cluster. - // cmd := exec.Command("make", "dev-aws-nuke") - // _, err := utils.Run(cmd) - // ExpectWithOffset(2, err).NotTo(HaveOccurred()) + // Delete the deployment if it was created. + if deleteDeploymentFunc != nil { + err = deleteDeploymentFunc() + Expect(err).NotTo(HaveOccurred()) + } + + // Purge the AWS resources, the AfterAll for the controller will + // clean up the management cluster. + err = os.Setenv("CLUSTER_NAME", clusterName) + Expect(err).NotTo(HaveOccurred()) + cmd := exec.Command("make", "dev-aws-nuke") + _, err := utils.Run(cmd) + ExpectWithOffset(2, err).NotTo(HaveOccurred()) }) It("should work with an AWS provider", func() { - By("using the aws-standalone-cp template") - //clusterName, err := utils.ConfigureDeploymentConfig(utils.ProviderAWS, utils.TemplateAWSStandaloneCP) - //ExpectWithOffset(1, err).NotTo(HaveOccurred()) + By("creating a Deployment with aws-standalone-cp template") + d := deployment.GetUnstructuredDeployment(deployment.ProviderAWS, deployment.TemplateAWSStandaloneCP) + clusterName = d.GetName() - clusterName := "bba1743d-e2e-test" + deleteDeploymentFunc, err = kc.CreateDeployment(context.Background(), d) + Expect(err).NotTo(HaveOccurred()) - cmd := exec.Command("make", "dev-aws-apply") - _, err = utils.Run(cmd) - ExpectWithOffset(2, err).NotTo(HaveOccurred()) - _, _ = fmt.Fprintf(GinkgoWriter, "Waiting for resource validation to succeed\n") + By("waiting for infrastructure providers to deploy successfully") Eventually(func() error { - return verifyProviderDeployed(context.Background(), kc, clusterName) - }).WithTimeout(30 * time.Minute).WithPolling(5 * time.Second).Should(Succeed()) - By("using the aws-hosted-cp template") - // TODO: Use the standalone control plane resources to craft a hosted - // control plane and test it. + return deployment.VerifyProviderDeployed(context.Background(), kc, clusterName) + }).WithTimeout(30 * time.Minute).WithPolling(10 * time.Second).Should(Succeed()) + + By("verifying the deployment deletes successfully") + err = deleteDeploymentFunc() + Expect(err).NotTo(HaveOccurred()) + Eventually(func() error { + return deployment.VerifyProviderDeleted(context.Background(), kc, clusterName) + }).WithTimeout(10 * time.Minute).WithPolling(10 * time.Second).Should(Succeed()) + + By("creating a Deployment with aws-hosted-cp template") + // TODO: Use the standalone control plane resources to craft a + // hosted control plane and test it. }) }) }) diff --git a/test/kubeclient/kubeclient.go b/test/kubeclient/kubeclient.go index eb922777f..d2521291d 100644 --- a/test/kubeclient/kubeclient.go +++ b/test/kubeclient/kubeclient.go @@ -22,10 +22,12 @@ import ( "path/filepath" "github.com/Mirantis/hmc/test/utils" + . "github.com/onsi/ginkgo/v2" corev1 "k8s.io/api/core/v1" apiextensionsclientset "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/runtime/schema" "k8s.io/client-go/dynamic" "k8s.io/client-go/kubernetes" @@ -162,3 +164,106 @@ func (kc *KubeClient) GetDynamicClient(gvr schema.GroupVersionResource) (dynamic return client.Resource(gvr).Namespace(kc.Namespace), nil } + +// CreateDeployment creates a deployment.hmc.mirantis.com in the given +// namespace and returns a DeleteFunc to clean up the deployment. +// The DeleteFunc is a no-op if the deployment has already been deleted. +func (kc *KubeClient) CreateDeployment( + ctx context.Context, deployment *unstructured.Unstructured) (func() error, error) { + kind := deployment.GetKind() + + if kind != "Deployment" { + return nil, fmt.Errorf("expected kind Deployment, got: %s", kind) + } + + client, err := kc.GetDynamicClient(schema.GroupVersionResource{ + Group: "hmc.mirantis.com", + Version: "v1alpha1", + Resource: "deployments", + }) + if err != nil { + return nil, fmt.Errorf("failed to get dynamic client: %w", err) + } + + _, err = client.Create(ctx, deployment, metav1.CreateOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to create Deployment: %w", err) + } + + return func() error { + err := client.Delete(ctx, deployment.GetName(), metav1.DeleteOptions{}) + if apierrors.IsNotFound(err) { + return nil + } + return err + }, nil +} + +// GetCluster returns a Cluster resource by name. +func (kc *KubeClient) GetCluster(ctx context.Context, clusterName string) (*unstructured.Unstructured, error) { + gvr := schema.GroupVersionResource{ + Group: "cluster.x-k8s.io", + Version: "v1beta1", + Resource: "clusters", + } + + client, err := kc.GetDynamicClient(gvr) + if err != nil { + Fail(fmt.Sprintf("failed to get %s client: %v", gvr.Resource, err)) + } + + cluster, err := client.Get(ctx, clusterName, metav1.GetOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to get %s %s: %w", gvr.Resource, clusterName, err) + } + + return cluster, nil +} + +// listResource returns a list of resources for the given GroupVersionResource +// affiliated with the given clusterName. +func (kc *KubeClient) listResource( + ctx context.Context, gvr schema.GroupVersionResource, clusterName string) ([]unstructured.Unstructured, error) { + client, err := kc.GetDynamicClient(gvr) + if err != nil { + Fail(fmt.Sprintf("failed to get %s client: %v", gvr.Resource, err)) + } + + resources, err := client.List(ctx, metav1.ListOptions{ + LabelSelector: "cluster.x-k8s.io/cluster-name=" + clusterName, + }) + if err != nil { + return nil, fmt.Errorf("failed to list %s: %w", gvr.Resource, err) + } + + return resources.Items, nil +} + +// ListMachines returns a list of Machine resources for the given cluster. +func (kc *KubeClient) ListMachines(ctx context.Context, clusterName string) ([]unstructured.Unstructured, error) { + return kc.listResource(ctx, schema.GroupVersionResource{ + Group: "cluster.x-k8s.io", + Version: "v1beta1", + Resource: "machines", + }, clusterName) +} + +// ListMachineDeployments returns a list of MachineDeployment resources for the +// given cluster. +func (kc *KubeClient) ListMachineDeployments( + ctx context.Context, clusterName string) ([]unstructured.Unstructured, error) { + return kc.listResource(ctx, schema.GroupVersionResource{ + Group: "cluster.x-k8s.io", + Version: "v1beta1", + Resource: "machinedeployments", + }, clusterName) +} + +func (kc *KubeClient) ListK0sControlPlanes( + ctx context.Context, clusterName string) ([]unstructured.Unstructured, error) { + return kc.listResource(ctx, schema.GroupVersionResource{ + Group: "control-plane.cluster.x-k8s.io", + Version: "v1beta1", + Resource: "k0scontrolplanes", + }, clusterName) +} From 7316869a0f55b05003d2e4e6bca091b624febba6 Mon Sep 17 00:00:00 2001 From: Kyle Squizzato Date: Fri, 23 Aug 2024 13:03:59 -0700 Subject: [PATCH 08/35] Use envsubst to templatize deployment yaml Signed-off-by: Kyle Squizzato --- .golangci.yml | 6 +-- config/dev/deployment.yaml | 17 ++++++++ go.mod | 1 + go.sum | 2 + test/deployment/deployment.go | 40 +++++-------------- test/deployment/resources/deployment.yaml.tpl | 15 +++---- test/deployment/validate_deleted.go | 2 +- test/deployment/validate_deployed.go | 5 +-- test/e2e/e2e_test.go | 6 +-- test/kubeclient/kubeclient.go | 2 +- test/utils/utils.go | 6 +-- 11 files changed, 52 insertions(+), 50 deletions(-) create mode 100644 config/dev/deployment.yaml diff --git a/.golangci.yml b/.golangci.yml index ca69a11f6..a6ffbedab 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -9,10 +9,10 @@ issues: # restore some of the defaults # (fill in the rest as needed) exclude-rules: - - path: "api/*" + - path: 'api/*' linters: - lll - - path: "internal/*" + - path: 'internal/*' linters: - dupl - lll @@ -21,7 +21,7 @@ linters: enable: - dupl - errcheck - - exportloopref + - copyloopvar - goconst - gocyclo - gofmt diff --git a/config/dev/deployment.yaml b/config/dev/deployment.yaml new file mode 100644 index 000000000..58ba18e32 --- /dev/null +++ b/config/dev/deployment.yaml @@ -0,0 +1,17 @@ +apiVersion: hmc.mirantis.com/v1alpha1 +kind: Deployment +metadata: + name: aws-dev +spec: + template: aws-standalone-cp + config: + region: us-east-2 + publicIP: true + controlPlaneNumber: 1 + workersNumber: 1 + controlPlane: + amiID: ami-02f3416038bdb17fb + instanceType: t3.small + worker: + amiID: ami-02f3416038bdb17fb + instanceType: t3.small diff --git a/go.mod b/go.mod index 8e9af40d1..5f23ba966 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module github.com/Mirantis/hmc go 1.22.0 require ( + github.com/a8m/envsubst v1.4.2 github.com/cert-manager/cert-manager v1.15.3 github.com/fluxcd/helm-controller/api v1.0.1 github.com/fluxcd/pkg/apis/meta v1.6.0 diff --git a/go.sum b/go.sum index 64c3ef962..1a361f17d 100644 --- a/go.sum +++ b/go.sum @@ -23,6 +23,8 @@ github.com/Microsoft/hcsshim v0.11.4 h1:68vKo2VN8DE9AdN4tnkWnmdhqdbpUFM8OF3Airm7 github.com/Microsoft/hcsshim v0.11.4/go.mod h1:smjE4dvqPX9Zldna+t5FG3rnoHhaB7QYxPRqGcpAD9w= github.com/Shopify/logrus-bugsnag v0.0.0-20171204204709-577dee27f20d h1:UrqY+r/OJnIp5u0s1SbQ8dVfLCZJsnvazdBP5hS4iRs= github.com/Shopify/logrus-bugsnag v0.0.0-20171204204709-577dee27f20d/go.mod h1:HI8ITrYtUY+O+ZhtlqUnD8+KwNPOyugEhfP9fdUIaEQ= +github.com/a8m/envsubst v1.4.2 h1:4yWIHXOLEJHQEFd4UjrWDrYeYlV7ncFWJOCBRLOZHQg= +github.com/a8m/envsubst v1.4.2/go.mod h1:MVUTQNGQ3tsjOOtKCNd+fl8RzhsXcDvvAEzkhGtlsbY= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio= diff --git a/test/deployment/deployment.go b/test/deployment/deployment.go index 5faedb169..3cbfcb361 100644 --- a/test/deployment/deployment.go +++ b/test/deployment/deployment.go @@ -22,6 +22,7 @@ import ( "os/exec" "github.com/Mirantis/hmc/test/utils" + "github.com/a8m/envsubst" "github.com/google/uuid" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" @@ -43,7 +44,7 @@ const ( ) //go:embed resources/deployment.yaml.tpl -var deploymentConfigBytes []byte +var deploymentTemplateBytes []byte // GetUnstructuredDeployment returns an unstructured deployment object based on // the provider and template. @@ -53,41 +54,22 @@ func GetUnstructuredDeployment(provider ProviderType, templateName Template) *un generatedName := uuid.New().String()[:8] + "-e2e-test" _, _ = fmt.Fprintf(GinkgoWriter, "Generated AWS cluster name: %q\n", generatedName) - var deploymentConfig map[string]interface{} - - err := yaml.Unmarshal(deploymentConfigBytes, &deploymentConfig) - Expect(err).NotTo(HaveOccurred(), "failed to unmarshal deployment config") - switch provider { case ProviderAWS: // XXX: Maybe we should just use automatic AMI selection here. amiID := getAWSAMI() - awsRegion := os.Getenv("AWS_REGION") - - // Modify the deployment config to use the generated name and the AMI. - // TODO: This should be modified to use go templating. - if metadata, ok := deploymentConfig["metadata"].(map[string]interface{}); ok { - metadata["name"] = generatedName - } else { - // Ensure we always have a metadata.name field populated. - deploymentConfig["metadata"] = map[string]interface{}{"name": generatedName} - } - if spec, ok := deploymentConfig["spec"].(map[string]interface{}); ok { - if config, ok := spec["config"].(map[string]interface{}); ok { - if awsRegion != "" { - config["region"] = awsRegion - } + Expect(os.Setenv("AMI_ID", amiID)).NotTo(HaveOccurred()) + Expect(os.Setenv("DEPLOYMENT_NAME", generatedName)).NotTo(HaveOccurred()) + Expect(os.Setenv("TEMPLATE_NAME", string(templateName))).NotTo(HaveOccurred()) - if worker, ok := config["worker"].(map[string]interface{}); ok { - worker["amiID"] = amiID - } + deploymentConfigBytes, err := envsubst.Bytes(deploymentTemplateBytes) + Expect(err).NotTo(HaveOccurred(), "failed to substitute environment variables") - if controlPlane, ok := config["controlPlane"].(map[string]interface{}); ok { - controlPlane["amiID"] = amiID - } - } - } + var deploymentConfig map[string]interface{} + + err = yaml.Unmarshal(deploymentConfigBytes, &deploymentConfig) + Expect(err).NotTo(HaveOccurred(), "failed to unmarshal deployment config") return &unstructured.Unstructured{Object: deploymentConfig} default: diff --git a/test/deployment/resources/deployment.yaml.tpl b/test/deployment/resources/deployment.yaml.tpl index 372003b58..ac10f9f84 100644 --- a/test/deployment/resources/deployment.yaml.tpl +++ b/test/deployment/resources/deployment.yaml.tpl @@ -3,15 +3,16 @@ kind: Deployment metadata: name: ${DEPLOYMENT_NAME} spec: + template: ${TEMPLATE_NAME} config: + region: ${AWS_REGION} + publicIP: ${PUBLIC_IP:=true} + controlPlaneNumber: ${CONTROL_PLANE_NUMBER:=1} + workersNumber: ${WORKERS_NUMBER:=1} controlPlane: amiID: ${AMI_ID} - instanceType: ${INSTANCE_TYPE} - controlPlaneNumber: ${CONTROL_PLANE_NUMBER} - publicIP: ${PUBLIC_IP} - region: ${AWS_REGION} + instanceType: ${INSTANCE_TYPE:=t3.small} worker: amiID: ${AMI_ID} - instanceType: ${INSTANCE_TYPE} - workersNumber: ${WORKERS_NUMBER} - template: ${TEMPLATE_NAME} + instanceType: ${INSTANCE_TYPE:=t3.small} + diff --git a/test/deployment/validate_deleted.go b/test/deployment/validate_deleted.go index 6be7b5969..ae0497939 100644 --- a/test/deployment/validate_deleted.go +++ b/test/deployment/validate_deleted.go @@ -36,7 +36,7 @@ func VerifyProviderDeleted(ctx context.Context, kc *kubeclient.KubeClient, clust // as to not move on to the next resource type until the first is resolved. // We use []string here since order is important. for _, name := range []string{"control-planes", "machinedeployments", "clusters"} { - validator, ok := resourceValidators[name] + validator, ok := deletionValidators[name] if !ok { continue } diff --git a/test/deployment/validate_deployed.go b/test/deployment/validate_deployed.go index 8aebc458c..66b3d7709 100644 --- a/test/deployment/validate_deployed.go +++ b/test/deployment/validate_deployed.go @@ -191,8 +191,7 @@ func validateCSIDriver(ctx context.Context, kc *kubeclient.KubeClient, clusterNa // Since these resourceValidationFuncs are intended to be used in // Eventually we should ensure a follow-up PVCreate is a no-op. if !apierrors.IsAlreadyExists(err) { - // XXX: Maybe we should Fail here? - return fmt.Errorf("failed to create test PVC: %w", err) + Fail(fmt.Sprintf("failed to create test PVC: %v", err)) } } @@ -228,7 +227,7 @@ func validateCSIDriver(ctx context.Context, kc *kubeclient.KubeClient, clusterNa }, metav1.CreateOptions{}) if err != nil { if !apierrors.IsAlreadyExists(err) { - return fmt.Errorf("failed to create test Pod: %w", err) + Fail(fmt.Sprintf("failed to create test Pod: %v", err)) } } diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index 9645b1e72..ebec00019 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -64,7 +64,7 @@ var _ = Describe("controller", Ordered, func() { } if len(podList.Items) != 1 { - return fmt.Errorf("expected 1 controller pod, got %d", len(podList.Items)) + return fmt.Errorf("expected 1 cluster-api-controller pod, got %d", len(podList.Items)) } controllerPod := podList.Items[0] @@ -84,7 +84,7 @@ var _ = Describe("controller", Ordered, func() { return nil } - EventuallyWithOffset(1, func() error { + Eventually(func() error { err := verifyControllerUp() if err != nil { _, _ = fmt.Fprintf(GinkgoWriter, "Controller pod validation failed: %v\n", err) @@ -92,7 +92,7 @@ var _ = Describe("controller", Ordered, func() { } return nil - }(), 5*time.Minute, time.Second).Should(Succeed()) + }).WithTimeout(5 * time.Minute).WithPolling(10 * time.Second).Should(Succeed()) }) }) diff --git a/test/kubeclient/kubeclient.go b/test/kubeclient/kubeclient.go index d2521291d..dd8a7a5d1 100644 --- a/test/kubeclient/kubeclient.go +++ b/test/kubeclient/kubeclient.go @@ -262,7 +262,7 @@ func (kc *KubeClient) ListMachineDeployments( func (kc *KubeClient) ListK0sControlPlanes( ctx context.Context, clusterName string) ([]unstructured.Unstructured, error) { return kc.listResource(ctx, schema.GroupVersionResource{ - Group: "control-plane.cluster.x-k8s.io", + Group: "controlplane.cluster.x-k8s.io", Version: "v1beta1", Resource: "k0scontrolplanes", }, clusterName) diff --git a/test/utils/utils.go b/test/utils/utils.go index 613ac9605..d4cc82587 100644 --- a/test/utils/utils.go +++ b/test/utils/utils.go @@ -130,16 +130,16 @@ func ValidateConditionsTrue(unstrObj *unstructured.Unstructured) error { continue } - errorStr := fmt.Sprintf("%s: %s", c.Type, c.Reason) + errorStr := fmt.Sprintf("%s - Reason: %s", c.Type, c.Reason) if c.Message != "" { errorStr = fmt.Sprintf("%s: %s", errorStr, c.Message) } - errs = errors.Join(fmt.Errorf(errorStr), errs) + errs = errors.Join(errors.New(errorStr), errs) } if errs != nil { - return fmt.Errorf("%s %s is not ready with conditions: %w", objKind, objName, errs) + return fmt.Errorf("%s %s is not ready with conditions:\n%w", objKind, objName, errs) } return nil From ccc0ef1ec3390014c1f7fd847b5ea19abdb74def Mon Sep 17 00:00:00 2001 From: Kyle Squizzato Date: Fri, 23 Aug 2024 15:41:10 -0700 Subject: [PATCH 09/35] Do not attempt mv on bin if it already exists, ignore cloud-nuke configs When GitHub Actions runs the 'make cli-install' target it fails because the GOBIN is already set to ./bin. GitHub appears to use a 'mv' that by default does not wish to clobber the file that now exists (even with -f specified), so place a check in front of the 'mv' to prevent it from ever running if the file exists there. This does not change the existing behavior of the make target as it does not run if a file exists anyways. Signed-off-by: Kyle Squizzato --- .gitignore | 3 +++ Makefile | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 884169b67..e089f3734 100644 --- a/.gitignore +++ b/.gitignore @@ -14,6 +14,9 @@ dist go.work go.work.sum +# cloud nuke config +*cloud_nuke.yaml + # editors .idea *.swp diff --git a/Makefile b/Makefile index 41f21c0e1..cee53f988 100644 --- a/Makefile +++ b/Makefile @@ -432,6 +432,6 @@ set -e; \ package=$(2)@$(3) ;\ echo "Downloading $${package}" ;\ GOBIN=$(LOCALBIN) go install $${package} ;\ -mv "$$(echo "$(1)" | sed "s/-$(3)$$//")" $(1) ;\ +if [ ! -f $(1) ]; then mv -f "$$(echo "$(1)" | sed "s/-$(3)$$//")" $(1); fi ;\ } endef From d89dd5eee660d48d80d34004ee52d052854afbe0 Mon Sep 17 00:00:00 2001 From: Kyle Squizzato Date: Mon, 26 Aug 2024 16:56:26 -0700 Subject: [PATCH 10/35] Add hosted template, break out test contexts, add log artifacting Signed-off-by: Kyle Squizzato --- .github/workflows/test.yml | 10 ++ .gitignore | 7 +- config/dev/deployment.yaml | 6 +- go.mod | 2 +- test/deployment/deployment.go | 23 +++- .../resources/aws-hosted-cp.yaml.tpl | 17 +++ .../resources/aws-standalone-cp.yaml.tpl | 19 +++ test/deployment/resources/deployment.yaml.tpl | 18 --- test/deployment/validate_deployed.go | 4 - test/e2e/e2e_test.go | 117 +++++++++++++----- 10 files changed, 161 insertions(+), 62 deletions(-) create mode 100644 test/deployment/resources/aws-hosted-cp.yaml.tpl create mode 100644 test/deployment/resources/aws-standalone-cp.yaml.tpl delete mode 100644 test/deployment/resources/deployment.yaml.tpl diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 6c6ab64f3..dcb9508fa 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -72,3 +72,13 @@ jobs: - name: Run E2E tests run: | make test-e2e + - name: Get test logs + run: | + kubectl logs -n hmc-system -l app=e2e-test > test/e2e/e2e-test.log + ./bin/clusterctl describe cluster --show-conditions=all > test/e2e/clusterctl.log + - name: Archive test results + uses: actions/upload-artifact@v4 + with: + name: test-logs + path: | + test/e2e/*.log diff --git a/.gitignore b/.gitignore index e089f3734..0656b090b 100644 --- a/.gitignore +++ b/.gitignore @@ -14,10 +14,13 @@ dist go.work go.work.sum -# cloud nuke config +# cloud-nuke config *cloud_nuke.yaml -# editors +# Test artifacts +test/e2e/*.log + +# ditors .idea *.swp *.swo diff --git a/config/dev/deployment.yaml b/config/dev/deployment.yaml index 58ba18e32..a24db4c3e 100644 --- a/config/dev/deployment.yaml +++ b/config/dev/deployment.yaml @@ -5,13 +5,13 @@ metadata: spec: template: aws-standalone-cp config: - region: us-east-2 + region: us-west-2 publicIP: true controlPlaneNumber: 1 workersNumber: 1 controlPlane: - amiID: ami-02f3416038bdb17fb + amiID: ami-0989c067ff3da4b27 instanceType: t3.small worker: - amiID: ami-02f3416038bdb17fb + amiID: ami-0989c067ff3da4b27 instanceType: t3.small diff --git a/go.mod b/go.mod index 5f23ba966..2d6b1ec6c 100644 --- a/go.mod +++ b/go.mod @@ -22,6 +22,7 @@ require ( k8s.io/apiextensions-apiserver v0.31.0 k8s.io/apimachinery v0.31.0 k8s.io/client-go v0.31.0 + k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 sigs.k8s.io/cluster-api v1.8.1 sigs.k8s.io/controller-runtime v0.19.0 ) @@ -166,7 +167,6 @@ require ( k8s.io/klog/v2 v2.130.1 // indirect k8s.io/kube-openapi v0.0.0-20240430033511-f0e62f92d13f // indirect k8s.io/kubectl v0.31.0 // indirect - k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 // indirect oras.land/oras-go v1.2.5 // indirect sigs.k8s.io/gateway-api v1.1.0 // indirect sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect diff --git a/test/deployment/deployment.go b/test/deployment/deployment.go index 3cbfcb361..337089621 100644 --- a/test/deployment/deployment.go +++ b/test/deployment/deployment.go @@ -43,8 +43,11 @@ const ( TemplateAWSHostedCP Template = "aws-hosted-cp" ) -//go:embed resources/deployment.yaml.tpl -var deploymentTemplateBytes []byte +//go:embed resources/aws-standalone-cp.yaml.tpl +var awsStandaloneCPDeploymentTemplateBytes []byte + +//go:embed resources/aws-hosted-cp.yaml.tpl +var awsHostedCPDeploymentTemplateBytes []byte // GetUnstructuredDeployment returns an unstructured deployment object based on // the provider and template. @@ -52,16 +55,24 @@ func GetUnstructuredDeployment(provider ProviderType, templateName Template) *un GinkgoHelper() generatedName := uuid.New().String()[:8] + "-e2e-test" - _, _ = fmt.Fprintf(GinkgoWriter, "Generated AWS cluster name: %q\n", generatedName) + _, _ = fmt.Fprintf(GinkgoWriter, "Generated cluster name: %q\n", generatedName) switch provider { case ProviderAWS: // XXX: Maybe we should just use automatic AMI selection here. amiID := getAWSAMI() - - Expect(os.Setenv("AMI_ID", amiID)).NotTo(HaveOccurred()) + Expect(os.Setenv("AWS_AMI_ID", amiID)).NotTo(HaveOccurred()) Expect(os.Setenv("DEPLOYMENT_NAME", generatedName)).NotTo(HaveOccurred()) - Expect(os.Setenv("TEMPLATE_NAME", string(templateName))).NotTo(HaveOccurred()) + + var deploymentTemplateBytes []byte + switch templateName { + case TemplateAWSStandaloneCP: + deploymentTemplateBytes = awsStandaloneCPDeploymentTemplateBytes + case TemplateAWSHostedCP: + deploymentTemplateBytes = awsHostedCPDeploymentTemplateBytes + default: + Fail(fmt.Sprintf("unsupported AWS template: %s", templateName)) + } deploymentConfigBytes, err := envsubst.Bytes(deploymentTemplateBytes) Expect(err).NotTo(HaveOccurred(), "failed to substitute environment variables") diff --git a/test/deployment/resources/aws-hosted-cp.yaml.tpl b/test/deployment/resources/aws-hosted-cp.yaml.tpl new file mode 100644 index 000000000..2606b64f9 --- /dev/null +++ b/test/deployment/resources/aws-hosted-cp.yaml.tpl @@ -0,0 +1,17 @@ +apiVersion: hmc.mirantis.com/v1alpha1 +kind: Deployment +metadata: + name: ${DEPLOYMENT_NAME} +spec: + template: aws-hosted-cp + config: + vpcID: ${AWS_VPC_ID} + region: ${AWS_REGION} + publicIP: ${PUBLIC_IP:=true} + subnets: + - id: ${AWS_SUBNET_ID} + availabilityZone: ${AWS_SUBNET_AVAILABILITY_ZONE} + amiID: ${AWS_AMI_ID} + instanceType: ${INSTANCE_TYPE:=t3.medium} + securityGroupIDs: + - ${AWS_SG_ID} diff --git a/test/deployment/resources/aws-standalone-cp.yaml.tpl b/test/deployment/resources/aws-standalone-cp.yaml.tpl new file mode 100644 index 000000000..220fa600b --- /dev/null +++ b/test/deployment/resources/aws-standalone-cp.yaml.tpl @@ -0,0 +1,19 @@ +apiVersion: hmc.mirantis.com/v1alpha1 +kind: Deployment +metadata: + name: ${DEPLOYMENT_NAME} +spec: + template: aws-standalone-cp + config: + region: ${AWS_REGION} + publicIP: ${PUBLIC_IP:=true} + controlPlaneNumber: ${CONTROL_PLANE_NUMBER:=1} + workersNumber: ${WORKERS_NUMBER:=1} + controlPlane: + amiID: ${AWS_AMI_ID} + instanceType: ${INSTANCE_TYPE:=t3.small} + worker: + amiID: ${AWS_AMI_ID} + instanceType: ${INSTANCE_TYPE:=t3.small} + + diff --git a/test/deployment/resources/deployment.yaml.tpl b/test/deployment/resources/deployment.yaml.tpl deleted file mode 100644 index ac10f9f84..000000000 --- a/test/deployment/resources/deployment.yaml.tpl +++ /dev/null @@ -1,18 +0,0 @@ -apiVersion: hmc.mirantis.com/v1alpha1 -kind: Deployment -metadata: - name: ${DEPLOYMENT_NAME} -spec: - template: ${TEMPLATE_NAME} - config: - region: ${AWS_REGION} - publicIP: ${PUBLIC_IP:=true} - controlPlaneNumber: ${CONTROL_PLANE_NUMBER:=1} - workersNumber: ${WORKERS_NUMBER:=1} - controlPlane: - amiID: ${AMI_ID} - instanceType: ${INSTANCE_TYPE:=t3.small} - worker: - amiID: ${AMI_ID} - instanceType: ${INSTANCE_TYPE:=t3.small} - diff --git a/test/deployment/validate_deployed.go b/test/deployment/validate_deployed.go index 66b3d7709..e7e1239ff 100644 --- a/test/deployment/validate_deployed.go +++ b/test/deployment/validate_deployed.go @@ -103,10 +103,6 @@ func validateMachines(ctx context.Context, kc *kubeclient.KubeClient, clusterNam return fmt.Errorf("failed to list machines: %w", err) } - if err != nil { - return fmt.Errorf("failed to list Machines: %w", err) - } - for _, machine := range machines { if err := utils.ValidateObjectNamePrefix(&machine, clusterName); err != nil { Fail(err.Error()) diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index ebec00019..ca0e73abb 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -15,16 +15,20 @@ package e2e import ( + "bufio" "context" "fmt" "os" "os/exec" + "path/filepath" "strings" "time" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" "github.com/Mirantis/hmc/test/deployment" "github.com/Mirantis/hmc/test/kubeclient" @@ -98,10 +102,10 @@ var _ = Describe("controller", Ordered, func() { Context("AWS Templates", func() { var ( - kc *kubeclient.KubeClient - deleteDeploymentFunc func() error - clusterName string - err error + kc *kubeclient.KubeClient + deleteFunc func() error + clusterName string + err error ) BeforeAll(func() { @@ -111,10 +115,17 @@ var _ = Describe("controller", Ordered, func() { ExpectWithOffset(2, kc.CreateAWSCredentialsKubeSecret(context.Background())).To(Succeed()) }) - AfterAll(func() { - // Delete the deployment if it was created. - if deleteDeploymentFunc != nil { - err = deleteDeploymentFunc() + AfterEach(func() { + // If we failed collect logs from each of the affiliated controllers + // as well as the output of clusterctl to store as artifacts. + if CurrentSpecReport().Failed() { + By("collecting failure logs from controllers") + collectLogArtifacts(kc, clusterName, deployment.ProviderAWS) + } + + // Delete the deployments if they were created. + if deleteFunc != nil { + err = deleteFunc() Expect(err).NotTo(HaveOccurred()) } @@ -127,29 +138,79 @@ var _ = Describe("controller", Ordered, func() { ExpectWithOffset(2, err).NotTo(HaveOccurred()) }) - It("should work with an AWS provider", func() { - By("creating a Deployment with aws-standalone-cp template") - d := deployment.GetUnstructuredDeployment(deployment.ProviderAWS, deployment.TemplateAWSStandaloneCP) - clusterName = d.GetName() + for _, template := range []deployment.Template{deployment.TemplateAWSStandaloneCP, deployment.TemplateAWSHostedCP} { + It(fmt.Sprintf("should work with an AWS provider and %s template", template), func() { + if template == deployment.TemplateAWSHostedCP { + // TODO: Create AWS resources for hosted control plane. + Skip("AWS hosted control plane not yet implemented") + } - deleteDeploymentFunc, err = kc.CreateDeployment(context.Background(), d) - Expect(err).NotTo(HaveOccurred()) + By("creating a Deployment") + d := deployment.GetUnstructuredDeployment(deployment.ProviderAWS, template) + clusterName = d.GetName() - By("waiting for infrastructure providers to deploy successfully") - Eventually(func() error { - return deployment.VerifyProviderDeployed(context.Background(), kc, clusterName) - }).WithTimeout(30 * time.Minute).WithPolling(10 * time.Second).Should(Succeed()) + deleteFunc, err = kc.CreateDeployment(context.Background(), d) + Expect(err).NotTo(HaveOccurred()) - By("verifying the deployment deletes successfully") - err = deleteDeploymentFunc() - Expect(err).NotTo(HaveOccurred()) - Eventually(func() error { - return deployment.VerifyProviderDeleted(context.Background(), kc, clusterName) - }).WithTimeout(10 * time.Minute).WithPolling(10 * time.Second).Should(Succeed()) + By("waiting for infrastructure providers to deploy successfully") + Eventually(func() error { + return deployment.VerifyProviderDeployed(context.Background(), kc, clusterName) + }).WithTimeout(30 * time.Minute).WithPolling(10 * time.Second).Should(Succeed()) - By("creating a Deployment with aws-hosted-cp template") - // TODO: Use the standalone control plane resources to craft a - // hosted control plane and test it. - }) + By("verify the deployment deletes successfully") + err = deleteFunc() + Expect(err).NotTo(HaveOccurred()) + Eventually(func() error { + return deployment.VerifyProviderDeleted(context.Background(), kc, clusterName) + }).WithTimeout(10 * time.Minute).WithPolling(10 * time.Second).Should(Succeed()) + }) + } }) }) + +// collectLogArtfiacts collects log output from each the HMC controller, +// CAPI controller and the provider controller as well as output from clusterctl +// and stores them in the test/e2e directory as artifacts. +// We could do this at the end or we could use Kubernetes' CopyPodLogs from +// https://github.com/kubernetes/kubernetes/blob/v1.31.0/test/e2e/storage/podlogs/podlogs.go#L88 +// to stream the logs to GinkgoWriter during the test. +func collectLogArtifacts(kc *kubeclient.KubeClient, clusterName string, providerType deployment.ProviderType) { + GinkgoHelper() + + filterLabels := []string{ + "app.kubernetes.io/name=hmc-controller-manager", + "app.kubernetes.io/name=cluster-api", + fmt.Sprintf("app.kubernetes.io/name=cluster-api-provider-%s", providerType), + } + + for _, label := range filterLabels { + pods, _ := kc.Client.CoreV1().Pods(kc.Namespace).List(context.Background(), metav1.ListOptions{ + LabelSelector: label, + }) + + for _, pod := range pods.Items { + req := kc.Client.CoreV1().Pods(kc.Namespace).GetLogs(pod.Name, &corev1.PodLogOptions{ + TailLines: ptr.To(int64(1000)), + }) + podLogs, err := req.Stream(context.Background()) + Expect(err).NotTo(HaveOccurred(), "failed to get log stream for pod %s", pod.Name) + DeferCleanup(Expect(podLogs.Close()).NotTo(HaveOccurred())) + + output, err := os.Create(fmt.Sprintf("test/e2e/%s.log", pod.Name)) + Expect(err).NotTo(HaveOccurred(), "failed to create log file for pod %s", pod.Name) + DeferCleanup(Expect(output.Close()).NotTo(HaveOccurred())) + + r := bufio.NewReader(podLogs) + _, err = r.WriteTo(output) + Expect(err).NotTo(HaveOccurred(), "failed to write log file for pod %s", pod.Name) + } + } + + cmd := exec.Command("./bin/clusterctl", + "describe", "cluster", clusterName, "--show-conditions=all") + output, err := utils.Run(cmd) + Expect(err).NotTo(HaveOccurred(), "failed to get clusterctl log") + + err = os.WriteFile(filepath.Join("test/e2e", "clusterctl.log"), output, 0644) + Expect(err).NotTo(HaveOccurred(), "failed to write clusterctl log") +} From 159bf40fe3bd7631b23789265b94a9a0ca6580ac Mon Sep 17 00:00:00 2001 From: Kyle Squizzato Date: Thu, 29 Aug 2024 03:26:51 -0700 Subject: [PATCH 11/35] Update helm-push targets to support pushing to non-OCI Signed-off-by: Kyle Squizzato --- Makefile | 17 ++++++++++++++--- config/dev/deployment.yaml | 6 +++--- 2 files changed, 17 insertions(+), 6 deletions(-) diff --git a/Makefile b/Makefile index cee53f988..98b8dae71 100644 --- a/Makefile +++ b/Makefile @@ -249,12 +249,23 @@ helm-push: helm-package chart_version=$$(echo $$base | grep -o "v\{0,1\}[0-9]\+\.[0-9]\+\.[0-9].*"); \ chart_name="$${base%-"$$chart_version"}"; \ echo "Verifying if chart $$chart_name, version $$chart_version already exists in $(REGISTRY_REPO)"; \ - chart_exists=$$($(HELM) pull $(REGISTRY_REPO)/$$chart_name --version $$chart_version --destination /tmp 2>&1 | grep "not found" || true); \ + chart_exists=$$($(HELM) pull --repo $(REGISTRY_REPO) $$chart_name --version $$chart_version --destination /tmp 2>&1 | grep "not found" || true); \ if [ -z "$$chart_exists" ]; then \ echo "Chart $$chart_name version $$chart_version already exists in the repository."; \ else \ - echo "Pushing $$chart to $(REGISTRY_REPO)"; \ - $(HELM) push "$$chart" $(REGISTRY_REPO); \ + if [ $(REGISTRY_REPO) == "oci://*" ]; then \ + echo "Pushing $$chart to $(REGISTRY_REPO)"; \ + $(HELM) push "$$chart" $(REGISTRY_REPO); \ + else \ + if [ ! $$REGISTRY_USERNAME ] && [ ! $$REGISTRY_PASSWORD ]; then \ + echo "REGISTRY_USERNAME and REGISTRY_PASSWORD must be populated to push the chart to an HTTPS repository"; \ + exit 1; \ + else \ + $(HELM) repo add hmc $(REGISTRY_REPO); \ + echo "Pushing $$chart to $(REGISTRY_REPO)"; \ + $(HELM) cm-push "$$chart" $(REGISTRY_REPO) --username $$REGISTRY_USERNAME --password $$REGISTRY_PASSWORD; \ + fi; \ + fi; \ fi; \ done diff --git a/config/dev/deployment.yaml b/config/dev/deployment.yaml index a24db4c3e..58ba18e32 100644 --- a/config/dev/deployment.yaml +++ b/config/dev/deployment.yaml @@ -5,13 +5,13 @@ metadata: spec: template: aws-standalone-cp config: - region: us-west-2 + region: us-east-2 publicIP: true controlPlaneNumber: 1 workersNumber: 1 controlPlane: - amiID: ami-0989c067ff3da4b27 + amiID: ami-02f3416038bdb17fb instanceType: t3.small worker: - amiID: ami-0989c067ff3da4b27 + amiID: ami-02f3416038bdb17fb instanceType: t3.small From c7071659e1a3cb814b015d8e31ff290f6f57eb4d Mon Sep 17 00:00:00 2001 From: Kyle Squizzato Date: Thu, 29 Aug 2024 12:34:26 -0700 Subject: [PATCH 12/35] Re-enable AfterAll Signed-off-by: Kyle Squizzato --- test/e2e/e2e_test.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index ca0e73abb..ca87f3df0 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -45,12 +45,12 @@ var _ = Describe("controller", Ordered, func() { Expect(err).NotTo(HaveOccurred()) }) - // AfterAll(func() { - // By("removing the controller-manager") - // cmd := exec.Command("make", "test-destroy") - // _, err := utils.Run(cmd) - // Expect(err).NotTo(HaveOccurred()) - // }) + AfterAll(func() { + By("removing the controller-manager") + cmd := exec.Command("make", "test-destroy") + _, err := utils.Run(cmd) + Expect(err).NotTo(HaveOccurred()) + }) Context("Operator", func() { It("should run successfully", func() { From 59aad06c417c1442c6b304a73b84169021289716 Mon Sep 17 00:00:00 2001 From: Kyle Squizzato Date: Thu, 29 Aug 2024 13:21:36 -0700 Subject: [PATCH 13/35] Update controller pod tests to look for CAP providers Signed-off-by: Kyle Squizzato --- test/deployment/deployment.go | 10 +++- test/e2e/e2e_test.go | 87 ++++++++++++++++++++++------------- 2 files changed, 63 insertions(+), 34 deletions(-) diff --git a/test/deployment/deployment.go b/test/deployment/deployment.go index 337089621..a207397d9 100644 --- a/test/deployment/deployment.go +++ b/test/deployment/deployment.go @@ -33,7 +33,11 @@ import ( type ProviderType string const ( - ProviderAWS ProviderType = "aws" + ProviderCAPI ProviderType = "cluster-api" + ProviderAWS ProviderType = "infrastructure-aws" + ProviderAzure ProviderType = "infrastructure-azure" + + providerLabel = "cluster.x-k8s.io/provider" ) type Template string @@ -49,6 +53,10 @@ var awsStandaloneCPDeploymentTemplateBytes []byte //go:embed resources/aws-hosted-cp.yaml.tpl var awsHostedCPDeploymentTemplateBytes []byte +func GetProviderLabel(provider ProviderType) string { + return fmt.Sprintf("%s=%s", providerLabel, provider) +} + // GetUnstructuredDeployment returns an unstructured deployment object based on // the provider and template. func GetUnstructuredDeployment(provider ProviderType, templateName Template) *unstructured.Unstructured { diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index ca87f3df0..500d7d558 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -35,7 +35,10 @@ import ( "github.com/Mirantis/hmc/test/utils" ) -const namespace = "hmc-system" +const ( + namespace = "hmc-system" + hmcControllerLabel = "app.kubernetes.io/name=hmc" +) var _ = Describe("controller", Ordered, func() { BeforeAll(func() { @@ -57,39 +60,35 @@ var _ = Describe("controller", Ordered, func() { kc, err := kubeclient.NewFromLocal(namespace) ExpectWithOffset(1, err).NotTo(HaveOccurred()) - By("validating that the controller-manager pod is running as expected") - verifyControllerUp := func() error { - // Ensure only one controller pod is running. + By("validating that the hmc-controller and capi provider controllers are running") + verifyControllersUp := func() error { + for _, provider := range []deployment.ProviderType{ + deployment.ProviderCAPI, + deployment.ProviderAWS, + deployment.ProviderAzure, + } { + // Ensure only one controller pod is running. + podList, err := kc.Client.CoreV1().Pods(kc.Namespace).List(context.Background(), metav1.ListOptions{ + LabelSelector: deployment.GetProviderLabel(provider), + }) + if err != nil { + return err + } + + verifyControllerUp(podList, string(provider)) + } + podList, err := kc.Client.CoreV1().Pods(kc.Namespace).List(context.Background(), metav1.ListOptions{ - LabelSelector: "control-plane=controller-manager,app.kubernetes.io/name=cluster-api", + LabelSelector: hmcControllerLabel, }) if err != nil { return err } - if len(podList.Items) != 1 { - return fmt.Errorf("expected 1 cluster-api-controller pod, got %d", len(podList.Items)) - } - - controllerPod := podList.Items[0] - - // Ensure the pod is not being deleted. - if controllerPod.DeletionTimestamp != nil { - return fmt.Errorf("deletion timestamp should be nil, got: %v", controllerPod) - } - - // Ensure the pod is running and has the expected name. - if !strings.Contains(controllerPod.Name, "controller-manager") { - return fmt.Errorf("controller pod name %s does not contain 'controller-manager'", controllerPod.Name) - } - if controllerPod.Status.Phase != "Running" { - return fmt.Errorf("controller pod in %s status", controllerPod.Status.Phase) - } - - return nil + return verifyControllerUp(podList, "hmc-controller-manager") } Eventually(func() error { - err := verifyControllerUp() + err := verifyControllersUp() if err != nil { _, _ = fmt.Fprintf(GinkgoWriter, "Controller pod validation failed: %v\n", err) return err @@ -120,7 +119,7 @@ var _ = Describe("controller", Ordered, func() { // as well as the output of clusterctl to store as artifacts. if CurrentSpecReport().Failed() { By("collecting failure logs from controllers") - collectLogArtifacts(kc, clusterName, deployment.ProviderAWS) + collectLogArtifacts(kc, clusterName, deployment.ProviderAWS, deployment.ProviderCAPI) } // Delete the deployments if they were created. @@ -168,19 +167,41 @@ var _ = Describe("controller", Ordered, func() { }) }) +func verifyControllerUp(podList *corev1.PodList, name string) error { + if len(podList.Items) != 1 { + return fmt.Errorf("expected 1 %s controller pod, got %d", name, len(podList.Items)) + } + + controllerPod := podList.Items[0] + + // Ensure the pod is not being deleted. + if controllerPod.DeletionTimestamp != nil { + return fmt.Errorf("deletion timestamp should be nil, got: %v", controllerPod) + } + // Ensure the pod is running and has the expected name. + if !strings.Contains(controllerPod.Name, "controller-manager") { + return fmt.Errorf("controller pod name %s does not contain 'controller-manager'", controllerPod.Name) + } + if controllerPod.Status.Phase != "Running" { + return fmt.Errorf("controller pod in %s status", controllerPod.Status.Phase) + } + + return nil +} + // collectLogArtfiacts collects log output from each the HMC controller, -// CAPI controller and the provider controller as well as output from clusterctl +// CAPI controller and the provider controller(s) as well as output from clusterctl // and stores them in the test/e2e directory as artifacts. // We could do this at the end or we could use Kubernetes' CopyPodLogs from // https://github.com/kubernetes/kubernetes/blob/v1.31.0/test/e2e/storage/podlogs/podlogs.go#L88 // to stream the logs to GinkgoWriter during the test. -func collectLogArtifacts(kc *kubeclient.KubeClient, clusterName string, providerType deployment.ProviderType) { +func collectLogArtifacts(kc *kubeclient.KubeClient, clusterName string, providerTypes ...deployment.ProviderType) { GinkgoHelper() - filterLabels := []string{ - "app.kubernetes.io/name=hmc-controller-manager", - "app.kubernetes.io/name=cluster-api", - fmt.Sprintf("app.kubernetes.io/name=cluster-api-provider-%s", providerType), + filterLabels := []string{hmcControllerLabel} + + for _, providerType := range providerTypes { + filterLabels = append(filterLabels, deployment.GetProviderLabel(providerType)) } for _, label := range filterLabels { From 37a82944cee4d56d5d992f2deee2459f21a1ddbd Mon Sep 17 00:00:00 2001 From: Kyle Squizzato Date: Thu, 29 Aug 2024 13:35:32 -0700 Subject: [PATCH 14/35] Fix linting issues Signed-off-by: Kyle Squizzato --- test/e2e/e2e_test.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index 500d7d558..f4dd4fee5 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -75,7 +75,9 @@ var _ = Describe("controller", Ordered, func() { return err } - verifyControllerUp(podList, string(provider)) + if err := verifyControllerUp(podList, string(provider)); err != nil { + return err + } } podList, err := kc.Client.CoreV1().Pods(kc.Namespace).List(context.Background(), metav1.ListOptions{ From c57623b21ef23c44800d31b6ab6981896caa295a Mon Sep 17 00:00:00 2001 From: Kyle Squizzato Date: Thu, 29 Aug 2024 14:06:41 -0700 Subject: [PATCH 15/35] Increase the timeout wait for provider controllers Signed-off-by: Kyle Squizzato --- test/e2e/e2e_test.go | 37 ++++++++++++++++------------------- test/kubeclient/kubeclient.go | 2 +- 2 files changed, 18 insertions(+), 21 deletions(-) diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index f4dd4fee5..6f104fb51 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -62,32 +62,22 @@ var _ = Describe("controller", Ordered, func() { By("validating that the hmc-controller and capi provider controllers are running") verifyControllersUp := func() error { + if err := verifyControllerUp(kc, hmcControllerLabel, "hmc-controller-manager"); err != nil { + return err + } + for _, provider := range []deployment.ProviderType{ deployment.ProviderCAPI, deployment.ProviderAWS, deployment.ProviderAzure, } { // Ensure only one controller pod is running. - podList, err := kc.Client.CoreV1().Pods(kc.Namespace).List(context.Background(), metav1.ListOptions{ - LabelSelector: deployment.GetProviderLabel(provider), - }) - if err != nil { + if err := verifyControllerUp(kc, deployment.GetProviderLabel(provider), string(provider)); err != nil { return err } - - if err := verifyControllerUp(podList, string(provider)); err != nil { - return err - } - } - - podList, err := kc.Client.CoreV1().Pods(kc.Namespace).List(context.Background(), metav1.ListOptions{ - LabelSelector: hmcControllerLabel, - }) - if err != nil { - return err } - return verifyControllerUp(podList, "hmc-controller-manager") + return nil } Eventually(func() error { err := verifyControllersUp() @@ -97,7 +87,7 @@ var _ = Describe("controller", Ordered, func() { } return nil - }).WithTimeout(5 * time.Minute).WithPolling(10 * time.Second).Should(Succeed()) + }).WithTimeout(15 * time.Minute).WithPolling(10 * time.Second).Should(Succeed()) }) }) @@ -169,7 +159,14 @@ var _ = Describe("controller", Ordered, func() { }) }) -func verifyControllerUp(podList *corev1.PodList, name string) error { +func verifyControllerUp(kc *kubeclient.KubeClient, labelSelector string, name string) error { + podList, err := kc.Client.CoreV1().Pods(kc.Namespace).List(context.Background(), metav1.ListOptions{ + LabelSelector: labelSelector, + }) + if err != nil { + return fmt.Errorf("failed to list %s controller pods: %v", name, err) + } + if len(podList.Items) != 1 { return fmt.Errorf("expected 1 %s controller pod, got %d", name, len(podList.Items)) } @@ -217,11 +214,11 @@ func collectLogArtifacts(kc *kubeclient.KubeClient, clusterName string, provider }) podLogs, err := req.Stream(context.Background()) Expect(err).NotTo(HaveOccurred(), "failed to get log stream for pod %s", pod.Name) - DeferCleanup(Expect(podLogs.Close()).NotTo(HaveOccurred())) + defer Expect(podLogs.Close()).NotTo(HaveOccurred()) output, err := os.Create(fmt.Sprintf("test/e2e/%s.log", pod.Name)) Expect(err).NotTo(HaveOccurred(), "failed to create log file for pod %s", pod.Name) - DeferCleanup(Expect(output.Close()).NotTo(HaveOccurred())) + defer Expect(output.Close()).NotTo(HaveOccurred()) r := bufio.NewReader(podLogs) _, err = r.WriteTo(output) diff --git a/test/kubeclient/kubeclient.go b/test/kubeclient/kubeclient.go index dd8a7a5d1..8c4c5d112 100644 --- a/test/kubeclient/kubeclient.go +++ b/test/kubeclient/kubeclient.go @@ -36,7 +36,7 @@ import ( ) const ( - awsCredentialsSecretName = "aws-credentials" + awsCredentialsSecretName = "aws-variables" ) type KubeClient struct { From 45c5f8f4c417cdc6c99576f7c38c3b25f40c1385 Mon Sep 17 00:00:00 2001 From: Kyle Squizzato Date: Thu, 29 Aug 2024 14:57:08 -0700 Subject: [PATCH 16/35] Fix issues with helm-push target Signed-off-by: Kyle Squizzato --- Makefile | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 98b8dae71..c7a8a0c75 100644 --- a/Makefile +++ b/Makefile @@ -192,6 +192,7 @@ REGISTRY_NAME ?= hmc-local-registry REGISTRY_PORT ?= 5001 REGISTRY_REPO ?= oci://127.0.0.1:$(REGISTRY_PORT)/charts DEV_PROVIDER ?= aws +REGISTRY_IS_OCI = $(shell echo $(REGISTRY_REPO) | grep -q oci && echo true || echo false) CLUSTER_NAME ?= $(shell $(YQ) '.metadata.name' ./config/dev/deployment.yaml) AWS_CREDENTIALS=${AWS_B64ENCODED_CREDENTIALS} @@ -244,16 +245,19 @@ dev-undeploy: ## Undeploy controller from the K8s cluster specified in ~/.kube/c .PHONY: helm-push helm-push: helm-package - @for chart in $(CHARTS_PACKAGE_DIR)/*.tgz; do \ + if [ ! $(REGISTRY_IS_OCI) ]; then \ + repo_flag="--repo"; \ + fi; \ + for chart in $(CHARTS_PACKAGE_DIR)/*.tgz; do \ base=$$(basename $$chart .tgz); \ chart_version=$$(echo $$base | grep -o "v\{0,1\}[0-9]\+\.[0-9]\+\.[0-9].*"); \ chart_name="$${base%-"$$chart_version"}"; \ echo "Verifying if chart $$chart_name, version $$chart_version already exists in $(REGISTRY_REPO)"; \ - chart_exists=$$($(HELM) pull --repo $(REGISTRY_REPO) $$chart_name --version $$chart_version --destination /tmp 2>&1 | grep "not found" || true); \ + chart_exists=$$($(HELM) pull $$repo_flag $(REGISTRY_REPO) $$chart_name --version $$chart_version --destination /tmp 2>&1 | grep "not found" || true); \ if [ -z "$$chart_exists" ]; then \ echo "Chart $$chart_name version $$chart_version already exists in the repository."; \ else \ - if [ $(REGISTRY_REPO) == "oci://*" ]; then \ + if $(REGISTRY_IS_OCI); then \ echo "Pushing $$chart to $(REGISTRY_REPO)"; \ $(HELM) push "$$chart" $(REGISTRY_REPO); \ else \ From 1704e93d7590a4cf61536d3475b604d2a9902091 Mon Sep 17 00:00:00 2001 From: Kyle Squizzato Date: Thu, 29 Aug 2024 15:13:35 -0700 Subject: [PATCH 17/35] Create aws credential in controller test, remove old test artifacts actions Signed-off-by: Kyle Squizzato --- .github/workflows/test.yml | 4 ---- Makefile | 6 +++++- test/e2e/e2e_test.go | 5 +++-- test/kubeclient/kubeclient.go | 9 ++++----- test/utils/utils.go | 2 +- 5 files changed, 13 insertions(+), 13 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index dcb9508fa..c5e10974d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -72,10 +72,6 @@ jobs: - name: Run E2E tests run: | make test-e2e - - name: Get test logs - run: | - kubectl logs -n hmc-system -l app=e2e-test > test/e2e/e2e-test.log - ./bin/clusterctl describe cluster --show-conditions=all > test/e2e/clusterctl.log - name: Archive test results uses: actions/upload-artifact@v4 with: diff --git a/Makefile b/Makefile index c7a8a0c75..372d7cfa1 100644 --- a/Makefile +++ b/Makefile @@ -245,7 +245,7 @@ dev-undeploy: ## Undeploy controller from the K8s cluster specified in ~/.kube/c .PHONY: helm-push helm-push: helm-package - if [ ! $(REGISTRY_IS_OCI) ]; then \ + @if [ ! $(REGISTRY_IS_OCI) ]; then \ repo_flag="--repo"; \ fi; \ for chart in $(CHARTS_PACKAGE_DIR)/*.tgz; do \ @@ -316,6 +316,10 @@ test-apply: kind-deploy registry-deploy dev-push dev-deploy dev-templates .PHONY: test-destroy test-destroy: kind-undeploy registry-undeploy +.PHONY: get-local-bin +get-local-bin: + $(shell pwd)/bin + .PHONY: cli-install cli-install: clusterawsadm clusterctl cloud-nuke yq ## Install the necessary CLI tools for deployment, development and testing. diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index 6f104fb51..7dd3112a7 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -59,6 +59,7 @@ var _ = Describe("controller", Ordered, func() { It("should run successfully", func() { kc, err := kubeclient.NewFromLocal(namespace) ExpectWithOffset(1, err).NotTo(HaveOccurred()) + ExpectWithOffset(1, kc.CreateAWSCredentialsKubeSecret(context.Background())).To(Succeed()) By("validating that the hmc-controller and capi provider controllers are running") verifyControllersUp := func() error { @@ -175,14 +176,14 @@ func verifyControllerUp(kc *kubeclient.KubeClient, labelSelector string, name st // Ensure the pod is not being deleted. if controllerPod.DeletionTimestamp != nil { - return fmt.Errorf("deletion timestamp should be nil, got: %v", controllerPod) + return fmt.Errorf("controller pod: %s deletion timestamp should be nil, got: %v", controllerPod.Name, controllerPod) } // Ensure the pod is running and has the expected name. if !strings.Contains(controllerPod.Name, "controller-manager") { return fmt.Errorf("controller pod name %s does not contain 'controller-manager'", controllerPod.Name) } if controllerPod.Status.Phase != "Running" { - return fmt.Errorf("controller pod in %s status", controllerPod.Status.Phase) + return fmt.Errorf("controller pod: %s in %s status", controllerPod.Name, controllerPod.Status.Phase) } return nil diff --git a/test/kubeclient/kubeclient.go b/test/kubeclient/kubeclient.go index 8c4c5d112..bac6a2909 100644 --- a/test/kubeclient/kubeclient.go +++ b/test/kubeclient/kubeclient.go @@ -124,16 +124,15 @@ func new(configBytes []byte, namespace string) (*KubeClient, error) { } // CreateAWSCredentialsKubeSecret uses clusterawsadm to encode existing AWS -// credentials and create a secret named 'aws-credentials' in the given -// namespace if one does not already exist. +// credentials and create a secret in the given namespace if one does not +// already exist. func (kc *KubeClient) CreateAWSCredentialsKubeSecret(ctx context.Context) error { _, err := kc.Client.CoreV1().Secrets(kc.Namespace).Get(ctx, awsCredentialsSecretName, metav1.GetOptions{}) if !apierrors.IsNotFound(err) { return nil } - cmd := exec.Command("./bin/clusterawsadm", - "bootstrap", "credentials", "encode-as-profile", "--output", "rawSharedConfig") + cmd := exec.Command("./bin/clusterawsadm", "bootstrap", "credentials", "encode-as-profile") output, err := utils.Run(cmd) if err != nil { return fmt.Errorf("failed to encode AWS credentials with clusterawsadm: %w", err) @@ -144,7 +143,7 @@ func (kc *KubeClient) CreateAWSCredentialsKubeSecret(ctx context.Context) error Name: awsCredentialsSecretName, }, Data: map[string][]byte{ - "credentials": output, + "AWS_B64ENCODED_CREDENTIALS": output, }, Type: corev1.SecretTypeOpaque, }, metav1.CreateOptions{}) diff --git a/test/utils/utils.go b/test/utils/utils.go index d4cc82587..e4f561915 100644 --- a/test/utils/utils.go +++ b/test/utils/utils.go @@ -45,7 +45,7 @@ func Run(cmd *exec.Cmd) ([]byte, error) { var exitError *exec.ExitError if errors.As(err, &exitError) { - return output, fmt.Errorf("%s failed with error: (%v) %s", command, err, string(output)) + return output, fmt.Errorf("%s failed with error: (%v): %s", command, err, string(exitError.Stderr)) } } From f6b7130ec73ff7cdb0d903dbc1af8d1dd4a7a92c Mon Sep 17 00:00:00 2001 From: Kyle Squizzato Date: Thu, 29 Aug 2024 17:01:06 -0700 Subject: [PATCH 18/35] Move AWS env under specific run action Signed-off-by: Kyle Squizzato --- .github/workflows/test.yml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index c5e10974d..b70f7108d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -56,10 +56,6 @@ jobs: name: E2E Tests runs-on: ubuntu-latest needs: build - env: - AWS_REGION: us-west-2 - AWS_ACCESS_KEY_ID: ${{ secrets.CI_AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.CI_AWS_SECRET_ACCESS_KEY }} steps: - name: Checkout repository uses: actions/checkout@v4 @@ -70,6 +66,10 @@ jobs: - name: Setup kubectl uses: azure/setup-kubectl@v4 - name: Run E2E tests + env: + AWS_REGION: us-west-2 + AWS_ACCESS_KEY_ID: ${{ secrets.CI_AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.CI_AWS_SECRET_ACCESS_KEY }} run: | make test-e2e - name: Archive test results From 5bcf3b8b2d7a374980775b6fdc9850d6b35badb3 Mon Sep 17 00:00:00 2001 From: Kyle Squizzato Date: Thu, 29 Aug 2024 23:29:27 -0700 Subject: [PATCH 19/35] Ensure CCM validation produces a valid service, add logging for cleanup steps Signed-off-by: Kyle Squizzato --- test/deployment/validate_deployed.go | 6 ++++-- test/e2e/e2e_test.go | 2 ++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/test/deployment/validate_deployed.go b/test/deployment/validate_deployed.go index e7e1239ff..956370933 100644 --- a/test/deployment/validate_deployed.go +++ b/test/deployment/validate_deployed.go @@ -255,9 +255,11 @@ func validateCCM(ctx context.Context, kc *kubeclient.KubeClient, clusterName str Fail(fmt.Sprintf("failed to create KubeClient for managed cluster %s: %v", clusterName, err)) } + createdServiceName := "loadbalancer-" + clusterName + _, err = clusterKC.Client.CoreV1().Services(clusterKC.Namespace).Create(ctx, &corev1.Service{ ObjectMeta: metav1.ObjectMeta{ - Name: clusterName + "-test-service", + Name: createdServiceName, }, Spec: corev1.ServiceSpec{ Selector: map[string]string{ @@ -282,7 +284,7 @@ func validateCCM(ctx context.Context, kc *kubeclient.KubeClient, clusterName str // Verify the Service is assigned an external IP. service, err := clusterKC.Client.CoreV1().Services(clusterKC.Namespace). - Get(ctx, clusterName+"-test-service", metav1.GetOptions{}) + Get(ctx, createdServiceName, metav1.GetOptions{}) if err != nil { return fmt.Errorf("failed to get test Service: %w", err) } diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index 7dd3112a7..1c7e76bb0 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -117,12 +117,14 @@ var _ = Describe("controller", Ordered, func() { // Delete the deployments if they were created. if deleteFunc != nil { + By("deleting the deployment") err = deleteFunc() Expect(err).NotTo(HaveOccurred()) } // Purge the AWS resources, the AfterAll for the controller will // clean up the management cluster. + By("nuking remaining AWS resources") err = os.Setenv("CLUSTER_NAME", clusterName) Expect(err).NotTo(HaveOccurred()) cmd := exec.Command("make", "dev-aws-nuke") From 5e243b172e23a1d303f82005b5aa51a53ded85d8 Mon Sep 17 00:00:00 2001 From: Kyle Squizzato Date: Thu, 29 Aug 2024 23:32:19 -0700 Subject: [PATCH 20/35] Re-enable nuking of resources Signed-off-by: Kyle Squizzato --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 372d7cfa1..d788eb9c8 100644 --- a/Makefile +++ b/Makefile @@ -307,7 +307,7 @@ dev-provider-delete: envsubst .PHONY: dev-aws-nuke dev-aws-nuke: ## Warning: Destructive! Nuke all AWS resources deployed by 'dev-aws-apply', prefix with CLUSTER_NAME to nuke a specific cluster. @CLUSTER_NAME=$(CLUSTER_NAME) envsubst < config/dev/cloud_nuke.yaml.tpl > config/dev/cloud_nuke.yaml - DISABLE_TELEMETRY=true $(CLOUDNUKE) aws --region $$AWS_REGION --config config/dev/cloud_nuke.yaml --resource-type vpc,eip,nat-gateway,ec2-subnet,elb,elbv2,internet-gateway,network-interface,security-group + DISABLE_TELEMETRY=true $(CLOUDNUKE) aws --region $$AWS_REGION --force --config config/dev/cloud_nuke.yaml --resource-type vpc,eip,nat-gateway,ec2-subnet,elb,elbv2,internet-gateway,network-interface,security-group @rm config/dev/cloud_nuke.yaml .PHONY: test-apply From cb7400672ff5d17290cf49fd522fe8e01dd9ed7e Mon Sep 17 00:00:00 2001 From: Kyle Squizzato Date: Fri, 30 Aug 2024 08:59:57 -0700 Subject: [PATCH 21/35] Try to add more debugging info to deletion failures Signed-off-by: Kyle Squizzato --- test/deployment/validate_deleted.go | 42 ++++++++----------- test/deployment/validate_deployed.go | 25 ++++++++---- test/utils/utils.go | 61 ++++++++++++++++++---------- 3 files changed, 74 insertions(+), 54 deletions(-) diff --git a/test/deployment/validate_deleted.go b/test/deployment/validate_deleted.go index ae0497939..10fafd170 100644 --- a/test/deployment/validate_deleted.go +++ b/test/deployment/validate_deleted.go @@ -15,9 +15,11 @@ package deployment import ( "context" + "errors" "fmt" "github.com/Mirantis/hmc/test/kubeclient" + "github.com/Mirantis/hmc/test/utils" . "github.com/onsi/ginkgo/v2" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" ) @@ -32,25 +34,8 @@ var deletionValidators = map[string]resourceValidationFunc{ // to ensure generic resources managed by the provider have been deleted. // It is intended to be used in conjunction with an Eventually block. func VerifyProviderDeleted(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { - // Sequentially validate each resource type, only returning the first error - // as to not move on to the next resource type until the first is resolved. - // We use []string here since order is important. - for _, name := range []string{"control-planes", "machinedeployments", "clusters"} { - validator, ok := deletionValidators[name] - if !ok { - continue - } - - if err := validator(ctx, kc, clusterName); err != nil { - _, _ = fmt.Fprintf(GinkgoWriter, "[%s] validation error: %v\n", name, err) - return err - } - - _, _ = fmt.Fprintf(GinkgoWriter, "[%s] validation succeeded\n", name) - delete(resourceValidators, name) - } - - return nil + return verifyProviderAction(ctx, kc, clusterName, deletionValidators, + []string{"clusters", "machinedeployments", "control-planes"}) } // validateClusterDeleted validates that the Cluster resource has been deleted. @@ -61,15 +46,24 @@ func validateClusterDeleted(ctx context.Context, kc *kubeclient.KubeClient, clus return err } - var inPhase string - if cluster != nil { phase, _, _ := unstructured.NestedString(cluster.Object, "status", "phase") - if phase != "" { - inPhase = ", in phase: " + phase + if phase != "Deleting" { + Fail(fmt.Sprintf("cluster %q exists, but is not in 'Deleting' phase", clusterName)) + } + + conditions, err := utils.GetConditionsFromUnstructured(cluster) + if err != nil { + return fmt.Errorf("failed to get conditions from unstructured object: %w", err) + } + + var errs error + + for _, c := range conditions { + errs = errors.Join(errors.New(utils.ConvertConditionsToString(c)), errs) } - return fmt.Errorf("cluster %q still exists%s", clusterName, inPhase) + return fmt.Errorf("cluster %q still in 'Deleting' phase with conditions:\n%w", clusterName, errs) } return nil diff --git a/test/deployment/validate_deployed.go b/test/deployment/validate_deployed.go index 956370933..498b3853f 100644 --- a/test/deployment/validate_deployed.go +++ b/test/deployment/validate_deployed.go @@ -42,19 +42,28 @@ var resourceValidators = map[string]resourceValidationFunc{ "ccm": validateCCM, } -// VerifyProviderDeployed is a provider-agnostic verification that checks for -// the presence of specific resources in the cluster using -// resourceValidationFuncs. It is meant to be used in conjunction with an -// Eventually block. +// VerifyProviderDeployed is a provider-agnostic verification that checks +// to ensure generic resources managed by the provider have been deleted. +// It is intended to be used in conjunction with an Eventually block. +func VerifyProviderDeployed(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { + return verifyProviderAction(ctx, kc, clusterName, resourceValidators, + []string{"clusters", "machines", "control-planes", "csi-driver", "ccm"}) +} + +// verifyProviderAction is a provider-agnostic verification that checks for +// a specific set of resources and either validates their readiness or +// their deletion depending on the passed map of resourceValidationFuncs and +// desired order. +// It is meant to be used in conjunction with an Eventually block. // In some cases it may be necessary to end the Eventually block early if the // resource will never reach a ready state, in these instances Ginkgo's Fail // should be used to end the spec early. -func VerifyProviderDeployed(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { +func verifyProviderAction(ctx context.Context, kc *kubeclient.KubeClient, clusterName string, resourcesToValidate map[string]resourceValidationFunc, order []string) error { // Sequentially validate each resource type, only returning the first error // as to not move on to the next resource type until the first is resolved. // We use []string here since order is important. - for _, name := range []string{"clusters", "machines", "control-planes", "csi-driver", "ccm"} { - validator, ok := resourceValidators[name] + for _, name := range order { + validator, ok := resourcesToValidate[name] if !ok { continue } @@ -65,7 +74,7 @@ func VerifyProviderDeployed(ctx context.Context, kc *kubeclient.KubeClient, clus } _, _ = fmt.Fprintf(GinkgoWriter, "[%s] validation succeeded\n", name) - delete(resourceValidators, name) + delete(resourcesToValidate, name) } return nil diff --git a/test/utils/utils.go b/test/utils/utils.go index e4f561915..98c757767 100644 --- a/test/utils/utils.go +++ b/test/utils/utils.go @@ -101,48 +101,65 @@ func GetProjectDir() (string, error) { func ValidateConditionsTrue(unstrObj *unstructured.Unstructured) error { objKind, objName := ObjKindName(unstrObj) + conditions, err := GetConditionsFromUnstructured(unstrObj) + if err != nil { + return fmt.Errorf("failed to get conditions from unstructured object: %w", err) + } + + var errs error + + for _, c := range conditions { + if c.Status == metav1.ConditionTrue { + continue + } + + errs = errors.Join(errors.New(ConvertConditionsToString(c)), errs) + } + + if errs != nil { + return fmt.Errorf("%s %s is not ready with conditions:\n%w", objKind, objName, errs) + } + + return nil +} + +func ConvertConditionsToString(condition metav1.Condition) string { + return fmt.Sprintf("Type: %s, Status: %s, Reason: %s, Message: %s", + condition.Type, condition.Status, condition.Reason, condition.Message) +} + +func GetConditionsFromUnstructured(unstrObj *unstructured.Unstructured) ([]metav1.Condition, error) { + objKind, objName := ObjKindName(unstrObj) + // Iterate the status conditions and ensure each condition reports a "Ready" // status. - conditions, found, err := unstructured.NestedSlice(unstrObj.Object, "status", "conditions") + unstrConditions, found, err := unstructured.NestedSlice(unstrObj.Object, "status", "conditions") if !found { - return fmt.Errorf("no status conditions found for %s: %s", objKind, objName) + return nil, fmt.Errorf("no status conditions found for %s: %s", objKind, objName) } if err != nil { - return fmt.Errorf("failed to get status conditions for %s: %s: %w", objKind, objName, err) + return nil, fmt.Errorf("failed to get status conditions for %s: %s: %w", objKind, objName, err) } - var errs error + var conditions []metav1.Condition - for _, condition := range conditions { + for _, condition := range unstrConditions { conditionMap, ok := condition.(map[string]interface{}) if !ok { - return fmt.Errorf("expected %s: %s condition to be type map[string]interface{}, got: %T", + return nil, fmt.Errorf("expected %s: %s condition to be type map[string]interface{}, got: %T", objKind, objName, conditionMap) } var c *metav1.Condition if err := runtime.DefaultUnstructuredConverter.FromUnstructured(conditionMap, &c); err != nil { - return fmt.Errorf("failed to convert condition map to metav1.Condition: %w", err) + return nil, fmt.Errorf("failed to convert condition map to metav1.Condition: %w", err) } - if c.Status == metav1.ConditionTrue { - continue - } - - errorStr := fmt.Sprintf("%s - Reason: %s", c.Type, c.Reason) - if c.Message != "" { - errorStr = fmt.Sprintf("%s: %s", errorStr, c.Message) - } - - errs = errors.Join(errors.New(errorStr), errs) - } - - if errs != nil { - return fmt.Errorf("%s %s is not ready with conditions:\n%w", objKind, objName, errs) + conditions = append(conditions, *c) } - return nil + return conditions, nil } // ValidateObjectNamePrefix checks if the given object name has the given prefix. From dfc30b9ce15771b97c863b356294c1e74f656b57 Mon Sep 17 00:00:00 2001 From: Kyle Squizzato Date: Fri, 30 Aug 2024 11:08:26 -0700 Subject: [PATCH 22/35] DRY validation implementation, add nuke docs * Simplify GitHub actions workflow to try to get secrets working. * Update permissions in workflow. Signed-off-by: Kyle Squizzato --- .github/workflows/test.yml | 21 ++++++++------------- docs/aws/nuke.md | 7 +++++++ docs/dev.md | 1 + test/deployment/validate_deleted.go | 7 +++++-- test/deployment/validate_deployed.go | 4 +++- test/e2e/e2e_test.go | 21 +++++++++++++-------- test/utils/utils.go | 2 +- 7 files changed, 38 insertions(+), 25 deletions(-) create mode 100644 docs/aws/nuke.md diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index b70f7108d..72bc3f2c0 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -4,6 +4,10 @@ concurrency: group: test-${{ github.head_ref || github.run_id }} cancel-in-progress: true +permissions: + id-token: write + contents: write + on: push: branches: @@ -28,7 +32,7 @@ env: jobs: build: - name: Build + name: Build and Test runs-on: ubuntu-latest steps: - name: Checkout repository @@ -52,25 +56,16 @@ jobs: - name: Image build run: | make docker-build - e2etest: - name: E2E Tests - runs-on: ubuntu-latest - needs: build - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - name: Setup Go - uses: actions/setup-go@v5 - with: - go-version: ${{ env.GO_VERSION }} - name: Setup kubectl uses: azure/setup-kubectl@v4 - name: Run E2E tests env: - AWS_REGION: us-west-2 AWS_ACCESS_KEY_ID: ${{ secrets.CI_AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.CI_AWS_SECRET_ACCESS_KEY }} + SOME_TEST_SECRET: ${{ secrets.CI_TEST_SECRET }} + AWS_REGION: "us-west-2" run: | + echo $SOME_TEST_SECRET make test-e2e - name: Archive test results uses: actions/upload-artifact@v4 diff --git a/docs/aws/nuke.md b/docs/aws/nuke.md new file mode 100644 index 000000000..55a46c33d --- /dev/null +++ b/docs/aws/nuke.md @@ -0,0 +1,7 @@ +# Nuking AWS resources +If you'd like to forcefully cleanup all AWS resources created by HMC you can use +the following command: + +``` +CLUSTER_NAME= make dev-aws-nuke +``` diff --git a/docs/dev.md b/docs/dev.md index e66b38c61..848959399 100644 --- a/docs/dev.md +++ b/docs/dev.md @@ -82,3 +82,4 @@ export KUBECONFIG=~/.kube/config ``` kubectl --kubeconfig ~/.kube/config get secret -n hmc-system -kubeconfig -o=jsonpath={.data.value} | base64 -d > kubeconfig ``` + diff --git a/test/deployment/validate_deleted.go b/test/deployment/validate_deleted.go index 10fafd170..cca0bdaeb 100644 --- a/test/deployment/validate_deleted.go +++ b/test/deployment/validate_deleted.go @@ -20,7 +20,6 @@ import ( "github.com/Mirantis/hmc/test/kubeclient" "github.com/Mirantis/hmc/test/utils" - . "github.com/onsi/ginkgo/v2" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" ) @@ -49,7 +48,11 @@ func validateClusterDeleted(ctx context.Context, kc *kubeclient.KubeClient, clus if cluster != nil { phase, _, _ := unstructured.NestedString(cluster.Object, "status", "phase") if phase != "Deleting" { - Fail(fmt.Sprintf("cluster %q exists, but is not in 'Deleting' phase", clusterName)) + // TODO: We should have a threshold error system for situations + // like this, we probably don't want to wait the full Eventually + // for something like this, but we can't immediately fail the test + // either. + return fmt.Errorf("cluster %q exists, but is not in 'Deleting' phase", clusterName) } conditions, err := utils.GetConditionsFromUnstructured(cluster) diff --git a/test/deployment/validate_deployed.go b/test/deployment/validate_deployed.go index 498b3853f..f7e6f14a7 100644 --- a/test/deployment/validate_deployed.go +++ b/test/deployment/validate_deployed.go @@ -58,7 +58,9 @@ func VerifyProviderDeployed(ctx context.Context, kc *kubeclient.KubeClient, clus // In some cases it may be necessary to end the Eventually block early if the // resource will never reach a ready state, in these instances Ginkgo's Fail // should be used to end the spec early. -func verifyProviderAction(ctx context.Context, kc *kubeclient.KubeClient, clusterName string, resourcesToValidate map[string]resourceValidationFunc, order []string) error { +func verifyProviderAction( + ctx context.Context, kc *kubeclient.KubeClient, clusterName string, + resourcesToValidate map[string]resourceValidationFunc, order []string) error { // Sequentially validate each resource type, only returning the first error // as to not move on to the next resource type until the first is resolved. // We use []string here since order is important. diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index 1c7e76bb0..4eb59c82f 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -193,7 +193,8 @@ func verifyControllerUp(kc *kubeclient.KubeClient, labelSelector string, name st // collectLogArtfiacts collects log output from each the HMC controller, // CAPI controller and the provider controller(s) as well as output from clusterctl -// and stores them in the test/e2e directory as artifacts. +// and stores them in the test/e2e directory as artifacts. If it fails it +// produces a warning message to the GinkgoWriter, but does not fail the test. // We could do this at the end or we could use Kubernetes' CopyPodLogs from // https://github.com/kubernetes/kubernetes/blob/v1.31.0/test/e2e/storage/podlogs/podlogs.go#L88 // to stream the logs to GinkgoWriter during the test. @@ -216,24 +217,28 @@ func collectLogArtifacts(kc *kubeclient.KubeClient, clusterName string, provider TailLines: ptr.To(int64(1000)), }) podLogs, err := req.Stream(context.Background()) - Expect(err).NotTo(HaveOccurred(), "failed to get log stream for pod %s", pod.Name) - defer Expect(podLogs.Close()).NotTo(HaveOccurred()) + warnError(fmt.Errorf("failed to get log stream for pod %s: %w", pod.Name, err)) + defer podLogs.Close() //nolint:errcheck output, err := os.Create(fmt.Sprintf("test/e2e/%s.log", pod.Name)) - Expect(err).NotTo(HaveOccurred(), "failed to create log file for pod %s", pod.Name) - defer Expect(output.Close()).NotTo(HaveOccurred()) + warnError(fmt.Errorf("failed to create log file for pod %s: %w", pod.Name, err)) + defer output.Close() //nolint:errcheck r := bufio.NewReader(podLogs) _, err = r.WriteTo(output) - Expect(err).NotTo(HaveOccurred(), "failed to write log file for pod %s", pod.Name) + warnError(fmt.Errorf("failed to write log file for pod %s: %w", pod.Name, err)) } } cmd := exec.Command("./bin/clusterctl", "describe", "cluster", clusterName, "--show-conditions=all") output, err := utils.Run(cmd) - Expect(err).NotTo(HaveOccurred(), "failed to get clusterctl log") + warnError(fmt.Errorf("failed to get clusterctl log: %w", err)) err = os.WriteFile(filepath.Join("test/e2e", "clusterctl.log"), output, 0644) - Expect(err).NotTo(HaveOccurred(), "failed to write clusterctl log") + warnError(fmt.Errorf("failed to write clusterctl log: %w", err)) +} + +func warnError(err error) { + _, _ = fmt.Fprintf(GinkgoWriter, "Warning: %v\n", err) } diff --git a/test/utils/utils.go b/test/utils/utils.go index 98c757767..e6283fa74 100644 --- a/test/utils/utils.go +++ b/test/utils/utils.go @@ -141,7 +141,7 @@ func GetConditionsFromUnstructured(unstrObj *unstructured.Unstructured) ([]metav return nil, fmt.Errorf("failed to get status conditions for %s: %s: %w", objKind, objName, err) } - var conditions []metav1.Condition + conditions := make([]metav1.Condition, 0, len(unstrConditions)) for _, condition := range unstrConditions { conditionMap, ok := condition.(map[string]interface{}) From 929bf999b59df797d502bf75052aef3586831cd9 Mon Sep 17 00:00:00 2001 From: Kyle Squizzato Date: Fri, 30 Aug 2024 18:01:56 -0700 Subject: [PATCH 23/35] Remove getAWSAMI in favor of automatic AMI selection Signed-off-by: Kyle Squizzato --- test/deployment/deployment.go | 40 ------------------- .../resources/aws-hosted-cp.yaml.tpl | 1 - .../resources/aws-standalone-cp.yaml.tpl | 2 - 3 files changed, 43 deletions(-) diff --git a/test/deployment/deployment.go b/test/deployment/deployment.go index a207397d9..102f65812 100644 --- a/test/deployment/deployment.go +++ b/test/deployment/deployment.go @@ -16,12 +16,9 @@ package deployment import ( _ "embed" - "encoding/json" "fmt" "os" - "os/exec" - "github.com/Mirantis/hmc/test/utils" "github.com/a8m/envsubst" "github.com/google/uuid" . "github.com/onsi/ginkgo/v2" @@ -67,9 +64,6 @@ func GetUnstructuredDeployment(provider ProviderType, templateName Template) *un switch provider { case ProviderAWS: - // XXX: Maybe we should just use automatic AMI selection here. - amiID := getAWSAMI() - Expect(os.Setenv("AWS_AMI_ID", amiID)).NotTo(HaveOccurred()) Expect(os.Setenv("DEPLOYMENT_NAME", generatedName)).NotTo(HaveOccurred()) var deploymentTemplateBytes []byte @@ -97,37 +91,3 @@ func GetUnstructuredDeployment(provider ProviderType, templateName Template) *un return nil } - -// getAWSAMI returns an AWS AMI ID to use for test. -func getAWSAMI() string { - GinkgoHelper() - - // For now we'll just use the latest Kubernetes version for ubuntu 20.04, - // but we could potentially pin the Kube version and specify that here. - cmd := exec.Command("./bin/clusterawsadm", "ami", "list", "--os=ubuntu-20.04", "-o", "json") - output, err := utils.Run(cmd) - Expect(err).NotTo(HaveOccurred(), "failed to list AMIs") - - var amiList map[string]interface{} - - err = json.Unmarshal(output, &amiList) - Expect(err).NotTo(HaveOccurred(), "failed to unmarshal AMI list") - - // ami list returns a sorted list of AMIs by kube version, just get the - // first one. - for _, item := range amiList["items"].([]interface{}) { - spec := item.(map[string]interface{})["spec"].(map[string]interface{}) - if imageID, ok := spec["imageID"]; ok { - ami, ok := imageID.(string) - if !ok { - continue - } - - return ami - } - } - - Fail("no AMIs found") - - return "" -} diff --git a/test/deployment/resources/aws-hosted-cp.yaml.tpl b/test/deployment/resources/aws-hosted-cp.yaml.tpl index 2606b64f9..a3ce562e8 100644 --- a/test/deployment/resources/aws-hosted-cp.yaml.tpl +++ b/test/deployment/resources/aws-hosted-cp.yaml.tpl @@ -11,7 +11,6 @@ spec: subnets: - id: ${AWS_SUBNET_ID} availabilityZone: ${AWS_SUBNET_AVAILABILITY_ZONE} - amiID: ${AWS_AMI_ID} instanceType: ${INSTANCE_TYPE:=t3.medium} securityGroupIDs: - ${AWS_SG_ID} diff --git a/test/deployment/resources/aws-standalone-cp.yaml.tpl b/test/deployment/resources/aws-standalone-cp.yaml.tpl index 220fa600b..5f0776e29 100644 --- a/test/deployment/resources/aws-standalone-cp.yaml.tpl +++ b/test/deployment/resources/aws-standalone-cp.yaml.tpl @@ -10,10 +10,8 @@ spec: controlPlaneNumber: ${CONTROL_PLANE_NUMBER:=1} workersNumber: ${WORKERS_NUMBER:=1} controlPlane: - amiID: ${AWS_AMI_ID} instanceType: ${INSTANCE_TYPE:=t3.small} worker: - amiID: ${AWS_AMI_ID} instanceType: ${INSTANCE_TYPE:=t3.small} From cbeafbbd098167c45a987de355a123ef122e76f0 Mon Sep 17 00:00:00 2001 From: Kyle Squizzato Date: Fri, 30 Aug 2024 18:26:51 -0700 Subject: [PATCH 24/35] Use pull_request_target in actions Signed-off-by: Kyle Squizzato --- .github/workflows/test.yml | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 72bc3f2c0..9b8300402 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -17,12 +17,14 @@ on: - '*' paths-ignore: - '**.md' - pull_request: + pull_request_target: + types: + - opened + - synchronize + - reopened branches: - main - release-* - tags: - - '*' paths-ignore: - 'config/**' - '**.md' @@ -63,7 +65,7 @@ jobs: AWS_ACCESS_KEY_ID: ${{ secrets.CI_AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.CI_AWS_SECRET_ACCESS_KEY }} SOME_TEST_SECRET: ${{ secrets.CI_TEST_SECRET }} - AWS_REGION: "us-west-2" + AWS_REGION: 'us-west-2' run: | echo $SOME_TEST_SECRET make test-e2e From 4ca842e29a1f485503dbb353b6e6707d1b8b7c1b Mon Sep 17 00:00:00 2001 From: Kyle Squizzato Date: Tue, 3 Sep 2024 09:31:56 -0700 Subject: [PATCH 25/35] Break out E2E tests into a pull_request_target workflow The workflow will require we label the PR with "test-e2e". Signed-off-by: Kyle Squizzato --- .github/workflows/build.yml | 52 +++++++++++++++++++++++++++++++++ .github/workflows/test.yml | 58 +++++++++---------------------------- 2 files changed, 65 insertions(+), 45 deletions(-) create mode 100644 .github/workflows/build.yml diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 000000000..43566691e --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,52 @@ +name: Build and Unit Test + +concurrency: + group: test-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +on: + push: + branches: + - main + - release-* + tags: + - '*' + paths-ignore: + - '**.md' + pull_request: + branches: + - main + - release-* + paths-ignore: + - 'config/**' + - '**.md' + +env: + GO_VERSION: '1.22' + +jobs: + build: + name: Build and Unit Test + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: Setup Go + uses: actions/setup-go@v5 + with: + go-version: ${{ env.GO_VERSION }} + - name: Lint + uses: golangci/golangci-lint-action@v6 + with: + args: --timeout 10m0s + - name: Verify all generated pieces are up-to-date + run: make generate-all && git add -N . && git diff --exit-code + - name: Unit tests + run: | + make test + - name: Build + run: | + make build + - name: Image build + run: | + make docker-build diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 9b8300402..91214ced8 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,77 +1,45 @@ -name: Build and Test +name: E2E Tests concurrency: group: test-${{ github.head_ref || github.run_id }} cancel-in-progress: true -permissions: - id-token: write - contents: write - on: - push: - branches: - - main - - release-* - tags: - - '*' - paths-ignore: - - '**.md' pull_request_target: - types: - - opened - - synchronize - - reopened + types: [labeled] branches: - main - release-* paths-ignore: - 'config/**' - '**.md' - env: GO_VERSION: '1.22' jobs: - build: - name: Build and Test + e2etest: + name: E2E Tests runs-on: ubuntu-latest + if: contains(github.event.pull_request.labels.*.name, 'test-e2e') + env: + AWS_REGION: us-west-2 + AWS_ACCESS_KEY_ID: ${{ secrets.CI_AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.CI_AWS_SECRET_ACCESS_KEY }} steps: - name: Checkout repository uses: actions/checkout@v4 - name: Setup Go uses: actions/setup-go@v5 with: - go-version: ${{ env.GO_VERSION }} - - name: Lint - uses: golangci/golangci-lint-action@v6 - with: - args: --timeout 10m0s - - name: Verify all generated pieces are up-to-date - run: make generate-all && git add -N . && git diff --exit-code - - name: Unit tests - run: | - make test - - name: Build - run: | - make build - - name: Image build - run: | - make docker-build + go-version: ${{ env.GO_VERSION }} - name: Setup kubectl uses: azure/setup-kubectl@v4 - name: Run E2E tests - env: - AWS_ACCESS_KEY_ID: ${{ secrets.CI_AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.CI_AWS_SECRET_ACCESS_KEY }} - SOME_TEST_SECRET: ${{ secrets.CI_TEST_SECRET }} - AWS_REGION: 'us-west-2' run: | - echo $SOME_TEST_SECRET make test-e2e - name: Archive test results uses: actions/upload-artifact@v4 with: - name: test-logs - path: | - test/e2e/*.log + name: test-logs + path: | + test/e2e/*.log From 1826aab471ae2a3a3ca710b51756121df4d2485d Mon Sep 17 00:00:00 2001 From: Kyle Squizzato Date: Tue, 3 Sep 2024 09:40:18 -0700 Subject: [PATCH 26/35] Run dev-PROVIDER-creds on dev-apply Signed-off-by: Kyle Squizzato --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index d788eb9c8..b6355887a 100644 --- a/Makefile +++ b/Makefile @@ -291,6 +291,7 @@ dev-azure-creds: envsubst .PHONY: dev-apply dev-apply: kind-deploy registry-deploy dev-push dev-deploy dev-templates + make dev-$(DEV_PROVIDER)-creds .PHONY: dev-destroy dev-destroy: kind-undeploy registry-undeploy ## Destroy the development environment by deleting the kind cluster and local registry. From 2c0cb6af568e6caf40bf2a645c4a1e860b5bc470 Mon Sep 17 00:00:00 2001 From: Kyle Squizzato Date: Tue, 3 Sep 2024 12:16:48 -0700 Subject: [PATCH 27/35] Use custom script to find and delete AWS CCM created resources Signed-off-by: Kyle Squizzato --- Makefile | 1 + scripts/aws-nuke-ccm.sh | 52 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 53 insertions(+) create mode 100755 scripts/aws-nuke-ccm.sh diff --git a/Makefile b/Makefile index b6355887a..d8813c963 100644 --- a/Makefile +++ b/Makefile @@ -307,6 +307,7 @@ dev-provider-delete: envsubst .PHONY: dev-aws-nuke dev-aws-nuke: ## Warning: Destructive! Nuke all AWS resources deployed by 'dev-aws-apply', prefix with CLUSTER_NAME to nuke a specific cluster. + @CLUSTER_NAME=$(CLUSTER_NAME) YQ=$(YQ) bash -c ./scripts/aws-nuke-ccm.sh @CLUSTER_NAME=$(CLUSTER_NAME) envsubst < config/dev/cloud_nuke.yaml.tpl > config/dev/cloud_nuke.yaml DISABLE_TELEMETRY=true $(CLOUDNUKE) aws --region $$AWS_REGION --force --config config/dev/cloud_nuke.yaml --resource-type vpc,eip,nat-gateway,ec2-subnet,elb,elbv2,internet-gateway,network-interface,security-group @rm config/dev/cloud_nuke.yaml diff --git a/scripts/aws-nuke-ccm.sh b/scripts/aws-nuke-ccm.sh new file mode 100755 index 000000000..cadc57597 --- /dev/null +++ b/scripts/aws-nuke-ccm.sh @@ -0,0 +1,52 @@ +#!/bin/bash +# Copyright 2024 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This script will remove all resources affiliated with the AWS CCM, such as +# ELB or CSI driver resources that can not be filtered by cloud-nuke. +# It should be ran after running cloud-nuke to remove any remaining resources. +if [ -z $CLUSTER_NAME ]; then + echo "CLUSTER_NAME must be set" + exit 1 +fi + +if [ -z $YQ ]; then + YQ=$(which yq) +fi + +echo "Checking for ELB with 'kubernetes.io/cluster/$CLUSTER_NAME' tag" +for LOADBALANCER in $(aws elb describe-load-balancers --output yaml | yq '.LoadBalancerDescriptions[].LoadBalancerName'); +do + echo "Checking ELB: $LOADBALANCER for 'kubernetes.io/cluster/$CLUSTER_NAME tag" + DESCRIBE_TAGS=$(aws elb describe-tags \ + --load-balancer-names $LOADBALANCER \ + --output yaml | yq '.TagDescriptions[].Tags.[]' | grep 'kubernetes.io/cluster/$CLUSTER_NAME') + if [ ! -z "${DESCRIBE_TAGS}" ]; then + echo "Deleting ELB: $LOADBALANCER" + aws elb delete-load-balancer --load-balancer-name $LOADBALANCER + fi +done + +echo "Checking for EBS Volumes with $CLUSTER_NAME within the 'kubernetes.io/created-for/pvc/name' tag" +for VOLUME in $(aws ec2 describe-volumes --output yaml | yq '.Volumes[].VolumeId'); +do + echo "Checking EBS Volume: $VOLUME for $CLUSTER_NAME claim" + DESCRIBE_VOLUMES=$(aws ec2 describe-volumes \ + --volume-id $VOLUME \ + --output yaml | yq '.Volumes | to_entries[] | .value.Tags[] | select(.Key == "kubernetes.io/created-for/pvc/name")' | grep $CLUSTER_NAME) + if [ ! -z "${DESCRIBE_VOLUMES}" ]; then + echo "Deleting EBS Volume: $VOLUME" + aws ec2 delete-volume --volume-id $VOLUME + fi +done From 0f187cf16409e9b3e9c5cdf1afcebadbd246ab49 Mon Sep 17 00:00:00 2001 From: Kyle Squizzato Date: Tue, 3 Sep 2024 12:25:53 -0700 Subject: [PATCH 28/35] Fix rebase issues Signed-off-by: Kyle Squizzato --- config/dev/deployment.yaml | 17 ----------------- .../deployment/resources/aws-hosted-cp.yaml.tpl | 1 + .../resources/aws-standalone-cp.yaml.tpl | 1 + 3 files changed, 2 insertions(+), 17 deletions(-) delete mode 100644 config/dev/deployment.yaml diff --git a/config/dev/deployment.yaml b/config/dev/deployment.yaml deleted file mode 100644 index 58ba18e32..000000000 --- a/config/dev/deployment.yaml +++ /dev/null @@ -1,17 +0,0 @@ -apiVersion: hmc.mirantis.com/v1alpha1 -kind: Deployment -metadata: - name: aws-dev -spec: - template: aws-standalone-cp - config: - region: us-east-2 - publicIP: true - controlPlaneNumber: 1 - workersNumber: 1 - controlPlane: - amiID: ami-02f3416038bdb17fb - instanceType: t3.small - worker: - amiID: ami-02f3416038bdb17fb - instanceType: t3.small diff --git a/test/deployment/resources/aws-hosted-cp.yaml.tpl b/test/deployment/resources/aws-hosted-cp.yaml.tpl index a3ce562e8..5207aec9d 100644 --- a/test/deployment/resources/aws-hosted-cp.yaml.tpl +++ b/test/deployment/resources/aws-hosted-cp.yaml.tpl @@ -2,6 +2,7 @@ apiVersion: hmc.mirantis.com/v1alpha1 kind: Deployment metadata: name: ${DEPLOYMENT_NAME} + namespace: {NAMESPACE:=default} spec: template: aws-hosted-cp config: diff --git a/test/deployment/resources/aws-standalone-cp.yaml.tpl b/test/deployment/resources/aws-standalone-cp.yaml.tpl index 5f0776e29..8b2f95622 100644 --- a/test/deployment/resources/aws-standalone-cp.yaml.tpl +++ b/test/deployment/resources/aws-standalone-cp.yaml.tpl @@ -2,6 +2,7 @@ apiVersion: hmc.mirantis.com/v1alpha1 kind: Deployment metadata: name: ${DEPLOYMENT_NAME} + namespace: ${NAMESPACE:=default} spec: template: aws-standalone-cp config: From 1991d3cf185c83971dfa46ff633f874a455f0398 Mon Sep 17 00:00:00 2001 From: Kyle Squizzato Date: Tue, 3 Sep 2024 12:44:47 -0700 Subject: [PATCH 29/35] Do not create .log files if error is non-nil Signed-off-by: Kyle Squizzato --- .../resources/aws-hosted-cp.yaml.tpl | 1 - .../resources/aws-standalone-cp.yaml.tpl | 1 - test/e2e/e2e_test.go | 30 ++++++++++++------- 3 files changed, 20 insertions(+), 12 deletions(-) diff --git a/test/deployment/resources/aws-hosted-cp.yaml.tpl b/test/deployment/resources/aws-hosted-cp.yaml.tpl index 5207aec9d..a3ce562e8 100644 --- a/test/deployment/resources/aws-hosted-cp.yaml.tpl +++ b/test/deployment/resources/aws-hosted-cp.yaml.tpl @@ -2,7 +2,6 @@ apiVersion: hmc.mirantis.com/v1alpha1 kind: Deployment metadata: name: ${DEPLOYMENT_NAME} - namespace: {NAMESPACE:=default} spec: template: aws-hosted-cp config: diff --git a/test/deployment/resources/aws-standalone-cp.yaml.tpl b/test/deployment/resources/aws-standalone-cp.yaml.tpl index 8b2f95622..5f0776e29 100644 --- a/test/deployment/resources/aws-standalone-cp.yaml.tpl +++ b/test/deployment/resources/aws-standalone-cp.yaml.tpl @@ -2,7 +2,6 @@ apiVersion: hmc.mirantis.com/v1alpha1 kind: Deployment metadata: name: ${DEPLOYMENT_NAME} - namespace: ${NAMESPACE:=default} spec: template: aws-standalone-cp config: diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index 4eb59c82f..ddfa9eb76 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -195,9 +195,6 @@ func verifyControllerUp(kc *kubeclient.KubeClient, labelSelector string, name st // CAPI controller and the provider controller(s) as well as output from clusterctl // and stores them in the test/e2e directory as artifacts. If it fails it // produces a warning message to the GinkgoWriter, but does not fail the test. -// We could do this at the end or we could use Kubernetes' CopyPodLogs from -// https://github.com/kubernetes/kubernetes/blob/v1.31.0/test/e2e/storage/podlogs/podlogs.go#L88 -// to stream the logs to GinkgoWriter during the test. func collectLogArtifacts(kc *kubeclient.KubeClient, clusterName string, providerTypes ...deployment.ProviderType) { GinkgoHelper() @@ -217,26 +214,39 @@ func collectLogArtifacts(kc *kubeclient.KubeClient, clusterName string, provider TailLines: ptr.To(int64(1000)), }) podLogs, err := req.Stream(context.Background()) - warnError(fmt.Errorf("failed to get log stream for pod %s: %w", pod.Name, err)) + if err != nil { + warnError(fmt.Errorf("failed to get log stream for pod %s: %w", pod.Name, err)) + continue + } defer podLogs.Close() //nolint:errcheck - output, err := os.Create(fmt.Sprintf("test/e2e/%s.log", pod.Name)) - warnError(fmt.Errorf("failed to create log file for pod %s: %w", pod.Name, err)) + output, err := os.Create(fmt.Sprintf("%s.log", pod.Name)) + if err != nil { + warnError(fmt.Errorf("failed to create log file for pod %s: %w", pod.Name, err)) + continue + } defer output.Close() //nolint:errcheck r := bufio.NewReader(podLogs) _, err = r.WriteTo(output) - warnError(fmt.Errorf("failed to write log file for pod %s: %w", pod.Name, err)) + if err != nil { + warnError(fmt.Errorf("failed to write log file for pod %s: %w", pod.Name, err)) + } } } cmd := exec.Command("./bin/clusterctl", - "describe", "cluster", clusterName, "--show-conditions=all") + "describe", "cluster", clusterName, "--namespace", namespace, "--show-conditions=all") output, err := utils.Run(cmd) - warnError(fmt.Errorf("failed to get clusterctl log: %w", err)) + if err != nil { + warnError(fmt.Errorf("failed to get clusterctl log: %w", err)) + return + } err = os.WriteFile(filepath.Join("test/e2e", "clusterctl.log"), output, 0644) - warnError(fmt.Errorf("failed to write clusterctl log: %w", err)) + if err != nil { + warnError(fmt.Errorf("failed to write clusterctl log: %w", err)) + } } func warnError(err error) { From 2fca0904fd4987bae5ea182a1c58b67592b198a5 Mon Sep 17 00:00:00 2001 From: Kyle Squizzato Date: Tue, 3 Sep 2024 15:17:26 -0700 Subject: [PATCH 30/35] Fix rebase clobbering some things Signed-off-by: Kyle Squizzato --- Makefile | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index d8813c963..6db2f8d08 100644 --- a/Makefile +++ b/Makefile @@ -290,13 +290,11 @@ dev-azure-creds: envsubst @NAMESPACE=$(NAMESPACE) $(ENVSUBST) -no-unset -i config/dev/azure-credentials.yaml | $(KUBECTL) apply -f - .PHONY: dev-apply -dev-apply: kind-deploy registry-deploy dev-push dev-deploy dev-templates - make dev-$(DEV_PROVIDER)-creds +dev-apply: kind-deploy registry-deploy dev-push dev-deploy dev-templates dev-creds-apply .PHONY: dev-destroy dev-destroy: kind-undeploy registry-undeploy ## Destroy the development environment by deleting the kind cluster and local registry. - .PHONY: dev-provider-apply dev-provider-apply: envsubst @NAMESPACE=$(NAMESPACE) $(ENVSUBST) -no-unset -i config/dev/$(DEV_PROVIDER)-managedcluster.yaml | $(KUBECTL) apply -f - @@ -305,8 +303,11 @@ dev-provider-apply: envsubst dev-provider-delete: envsubst @NAMESPACE=$(NAMESPACE) $(ENVSUBST) -no-unset -i config/dev/$(DEV_PROVIDER)-managedcluster.yaml | $(KUBECTL) delete -f - +.PHONY: dev-creds-apply +dev-creds-apply: dev-$(DEV_PROVIDER)-creds + .PHONY: dev-aws-nuke -dev-aws-nuke: ## Warning: Destructive! Nuke all AWS resources deployed by 'dev-aws-apply', prefix with CLUSTER_NAME to nuke a specific cluster. +dev-aws-nuke: ## Warning: Destructive! Nuke all AWS resources deployed by 'DEV_PROVIDER=aws dev-provider-apply', prefix with CLUSTER_NAME to nuke a specific cluster. @CLUSTER_NAME=$(CLUSTER_NAME) YQ=$(YQ) bash -c ./scripts/aws-nuke-ccm.sh @CLUSTER_NAME=$(CLUSTER_NAME) envsubst < config/dev/cloud_nuke.yaml.tpl > config/dev/cloud_nuke.yaml DISABLE_TELEMETRY=true $(CLOUDNUKE) aws --region $$AWS_REGION --force --config config/dev/cloud_nuke.yaml --resource-type vpc,eip,nat-gateway,ec2-subnet,elb,elbv2,internet-gateway,network-interface,security-group From 85ab09d5a8b71b7fed792e97097f344c79c50f6b Mon Sep 17 00:00:00 2001 From: Kyle Squizzato Date: Tue, 3 Sep 2024 16:27:09 -0700 Subject: [PATCH 31/35] Fix targets across Makefile, use Deployments for verifyControllerUp Signed-off-by: Kyle Squizzato --- Makefile | 28 ++++++++++++++++++---------- config/dev/aws-managedcluster.yaml | 2 -- scripts/aws-nuke-ccm.sh | 10 +++++++++- test/e2e/e2e_test.go | 26 +++++++++++++------------- 4 files changed, 40 insertions(+), 26 deletions(-) diff --git a/Makefile b/Makefile index 6db2f8d08..82902cbe2 100644 --- a/Makefile +++ b/Makefile @@ -290,14 +290,17 @@ dev-azure-creds: envsubst @NAMESPACE=$(NAMESPACE) $(ENVSUBST) -no-unset -i config/dev/azure-credentials.yaml | $(KUBECTL) apply -f - .PHONY: dev-apply -dev-apply: kind-deploy registry-deploy dev-push dev-deploy dev-templates dev-creds-apply +dev-apply: kind-deploy registry-deploy dev-push dev-deploy dev-templates .PHONY: dev-destroy dev-destroy: kind-undeploy registry-undeploy ## Destroy the development environment by deleting the kind cluster and local registry. .PHONY: dev-provider-apply dev-provider-apply: envsubst - @NAMESPACE=$(NAMESPACE) $(ENVSUBST) -no-unset -i config/dev/$(DEV_PROVIDER)-managedcluster.yaml | $(KUBECTL) apply -f - + @if [ $(DEV_PROVIDER) = "aws" ]; then \ + $(MAKE) dev-aws-creds; \ + fi + @NAMESPACE=$(NAMESPACE) $(ENVSUBST) -no-unset -i config/dev/$(DEV_PROVIDER)-deployment.yaml | $(KUBECTL) apply -f - .PHONY: dev-provider-delete dev-provider-delete: envsubst @@ -306,10 +309,10 @@ dev-provider-delete: envsubst .PHONY: dev-creds-apply dev-creds-apply: dev-$(DEV_PROVIDER)-creds -.PHONY: dev-aws-nuke +.PHONY: envsubst awscli dev-aws-nuke dev-aws-nuke: ## Warning: Destructive! Nuke all AWS resources deployed by 'DEV_PROVIDER=aws dev-provider-apply', prefix with CLUSTER_NAME to nuke a specific cluster. - @CLUSTER_NAME=$(CLUSTER_NAME) YQ=$(YQ) bash -c ./scripts/aws-nuke-ccm.sh - @CLUSTER_NAME=$(CLUSTER_NAME) envsubst < config/dev/cloud_nuke.yaml.tpl > config/dev/cloud_nuke.yaml + @CLUSTER_NAME=$(CLUSTER_NAME) YQ=$(YQ) AWSCLI=$(AWSCLI) bash -c ./scripts/aws-nuke-ccm.sh + @CLUSTER_NAME=$(CLUSTER_NAME) $(ENVSUBST) < config/dev/cloud_nuke.yaml.tpl > config/dev/cloud_nuke.yaml DISABLE_TELEMETRY=true $(CLOUDNUKE) aws --region $$AWS_REGION --force --config config/dev/cloud_nuke.yaml --resource-type vpc,eip,nat-gateway,ec2-subnet,elb,elbv2,internet-gateway,network-interface,security-group @rm config/dev/cloud_nuke.yaml @@ -319,12 +322,8 @@ test-apply: kind-deploy registry-deploy dev-push dev-deploy dev-templates .PHONY: test-destroy test-destroy: kind-undeploy registry-undeploy -.PHONY: get-local-bin -get-local-bin: - $(shell pwd)/bin - .PHONY: cli-install -cli-install: clusterawsadm clusterctl cloud-nuke yq ## Install the necessary CLI tools for deployment, development and testing. +cli-install: clusterawsadm clusterctl cloud-nuke yq awscli ## Install the necessary CLI tools for deployment, development and testing. ##@ Dependencies @@ -356,6 +355,7 @@ CLUSTERCTL ?= $(LOCALBIN)/clusterctl CLOUDNUKE ?= $(LOCALBIN)/cloud-nuke ADDLICENSE ?= $(LOCALBIN)/addlicense-$(ADDLICENSE_VERSION) ENVSUBST ?= $(LOCALBIN)/envsubst-$(ENVSUBST_VERSION) +AWSCLI ?= $(LOCALBIN)/aws ## Tool Versions CONTROLLER_TOOLS_VERSION ?= v0.14.0 @@ -369,6 +369,7 @@ CLUSTERAWSADM_VERSION ?= v2.5.2 CLUSTERCTL_VERSION ?= v1.7.3 ADDLICENSE_VERSION ?= v1.1.1 ENVSUBST_VERSION ?= v1.4.2 +AWSCLI_VERSION ?= 2.17.42 .PHONY: controller-gen controller-gen: $(CONTROLLER_GEN) ## Download controller-gen locally if necessary. @@ -444,6 +445,13 @@ envsubst: $(ENVSUBST) $(ENVSUBST): | $(LOCALBIN) $(call go-install-tool,$(ENVSUBST),github.com/a8m/envsubst/cmd/envsubst,${ENVSUBST_VERSION}) +.PHONY: awscli +awscli: $(AWSCLI) +$(AWSCLI): | $(LOCALBIN) + curl "https://awscli.amazonaws.com/awscli-exe-$(OS)-$(shell uname -m)-$(AWSCLI_VERSION).zip" -o "/tmp/awscliv2.zip" + unzip /tmp/awscliv2.zip -d /tmp + /tmp/aws/install -i $(LOCALBIN)/aws-cli -b $(LOCALBIN) --update + # go-install-tool will 'go install' any package with custom target and name of binary, if it doesn't exist # $1 - target path with name of binary (ideally with version) # $2 - package url which can be installed diff --git a/config/dev/aws-managedcluster.yaml b/config/dev/aws-managedcluster.yaml index 6ed56f2de..43a8a9189 100644 --- a/config/dev/aws-managedcluster.yaml +++ b/config/dev/aws-managedcluster.yaml @@ -6,13 +6,11 @@ metadata: spec: config: controlPlane: - amiID: ami-0989c067ff3da4b27 instanceType: t3.small controlPlaneNumber: 1 publicIP: true region: us-west-2 worker: - amiID: ami-0989c067ff3da4b27 instanceType: t3.small workersNumber: 1 template: aws-standalone-cp diff --git a/scripts/aws-nuke-ccm.sh b/scripts/aws-nuke-ccm.sh index cadc57597..1d0a0bcdd 100755 --- a/scripts/aws-nuke-ccm.sh +++ b/scripts/aws-nuke-ccm.sh @@ -22,7 +22,15 @@ if [ -z $CLUSTER_NAME ]; then fi if [ -z $YQ ]; then - YQ=$(which yq) + echo "YQ must be set to the path of the yq binary" + echo "Use 'make dev-aws-nuke' instead of running this script directly" + exit 1 +fi + +if [ -z AWSCLI ]; then + echo "AWSCLI must be set to the path of the AWS CLI" + echo "Use 'make dev-aws-nuke' instead of running this script directly" + exit 1 fi echo "Checking for ELB with 'kubernetes.io/cluster/$CLUSTER_NAME' tag" diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index ddfa9eb76..4c63e0844 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -163,29 +163,29 @@ var _ = Describe("controller", Ordered, func() { }) func verifyControllerUp(kc *kubeclient.KubeClient, labelSelector string, name string) error { - podList, err := kc.Client.CoreV1().Pods(kc.Namespace).List(context.Background(), metav1.ListOptions{ + deployList, err := kc.Client.AppsV1().Deployments(kc.Namespace).List(context.Background(), metav1.ListOptions{ LabelSelector: labelSelector, }) if err != nil { - return fmt.Errorf("failed to list %s controller pods: %v", name, err) + return fmt.Errorf("failed to list %s controller deployments: %w", name, err) } - if len(podList.Items) != 1 { - return fmt.Errorf("expected 1 %s controller pod, got %d", name, len(podList.Items)) + if len(deployList.Items) < 1 { + return fmt.Errorf("expected at least 1 %s controller deployment, got %d", name, len(deployList.Items)) } - controllerPod := podList.Items[0] + deployment := deployList.Items[0] - // Ensure the pod is not being deleted. - if controllerPod.DeletionTimestamp != nil { - return fmt.Errorf("controller pod: %s deletion timestamp should be nil, got: %v", controllerPod.Name, controllerPod) + // Ensure the deployment is not being deleted. + if deployment.DeletionTimestamp != nil { + return fmt.Errorf("controller pod: %s deletion timestamp should be nil, got: %v", deployment.Name, deployment.DeletionTimestamp) } - // Ensure the pod is running and has the expected name. - if !strings.Contains(controllerPod.Name, "controller-manager") { - return fmt.Errorf("controller pod name %s does not contain 'controller-manager'", controllerPod.Name) + // Ensure the deployment is running and has the expected name. + if !strings.Contains(deployment.Name, "controller-manager") { + return fmt.Errorf("controller deployment name %s does not contain 'controller-manager'", deployment.Name) } - if controllerPod.Status.Phase != "Running" { - return fmt.Errorf("controller pod: %s in %s status", controllerPod.Name, controllerPod.Status.Phase) + if deployment.Status.ReadyReplicas < 1 { + return fmt.Errorf("controller deployment: %s does not yet have any ReadyReplicas", deployment.Name) } return nil From c28b85617a7f98e0a417cc9f3870e15c20d23738 Mon Sep 17 00:00:00 2001 From: Kyle Squizzato Date: Tue, 3 Sep 2024 16:49:28 -0700 Subject: [PATCH 32/35] Fix test artifact writing location, linting issues Signed-off-by: Kyle Squizzato --- test/e2e/e2e_test.go | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index 4c63e0844..7d27c93dd 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -171,14 +171,16 @@ func verifyControllerUp(kc *kubeclient.KubeClient, labelSelector string, name st } if len(deployList.Items) < 1 { - return fmt.Errorf("expected at least 1 %s controller deployment, got %d", name, len(deployList.Items)) + return fmt.Errorf("expected at least 1 %s controller deployment, got %d", + name, len(deployList.Items)) } deployment := deployList.Items[0] // Ensure the deployment is not being deleted. if deployment.DeletionTimestamp != nil { - return fmt.Errorf("controller pod: %s deletion timestamp should be nil, got: %v", deployment.Name, deployment.DeletionTimestamp) + return fmt.Errorf("controller pod: %s deletion timestamp should be nil, got: %v", + deployment.Name, deployment.DeletionTimestamp) } // Ensure the deployment is running and has the expected name. if !strings.Contains(deployment.Name, "controller-manager") { @@ -220,7 +222,7 @@ func collectLogArtifacts(kc *kubeclient.KubeClient, clusterName string, provider } defer podLogs.Close() //nolint:errcheck - output, err := os.Create(fmt.Sprintf("%s.log", pod.Name)) + output, err := os.Create(fmt.Sprintf("./test/e2e/%s.log", pod.Name)) if err != nil { warnError(fmt.Errorf("failed to create log file for pod %s: %w", pod.Name, err)) continue From ca01d18f768a03300cf4c13450050c18649f1013 Mon Sep 17 00:00:00 2001 From: Kyle Squizzato Date: Wed, 4 Sep 2024 11:46:23 -0700 Subject: [PATCH 33/35] Ensure CLI vars are used in script Signed-off-by: Kyle Squizzato --- Makefile | 2 +- scripts/aws-nuke-ccm.sh | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/Makefile b/Makefile index 82902cbe2..022216225 100644 --- a/Makefile +++ b/Makefile @@ -311,10 +311,10 @@ dev-creds-apply: dev-$(DEV_PROVIDER)-creds .PHONY: envsubst awscli dev-aws-nuke dev-aws-nuke: ## Warning: Destructive! Nuke all AWS resources deployed by 'DEV_PROVIDER=aws dev-provider-apply', prefix with CLUSTER_NAME to nuke a specific cluster. - @CLUSTER_NAME=$(CLUSTER_NAME) YQ=$(YQ) AWSCLI=$(AWSCLI) bash -c ./scripts/aws-nuke-ccm.sh @CLUSTER_NAME=$(CLUSTER_NAME) $(ENVSUBST) < config/dev/cloud_nuke.yaml.tpl > config/dev/cloud_nuke.yaml DISABLE_TELEMETRY=true $(CLOUDNUKE) aws --region $$AWS_REGION --force --config config/dev/cloud_nuke.yaml --resource-type vpc,eip,nat-gateway,ec2-subnet,elb,elbv2,internet-gateway,network-interface,security-group @rm config/dev/cloud_nuke.yaml + @CLUSTER_NAME=$(CLUSTER_NAME) YQ=$(YQ) AWSCLI=$(AWSCLI) bash -c ./scripts/aws-nuke-ccm.sh .PHONY: test-apply test-apply: kind-deploy registry-deploy dev-push dev-deploy dev-templates diff --git a/scripts/aws-nuke-ccm.sh b/scripts/aws-nuke-ccm.sh index 1d0a0bcdd..26e8a067c 100755 --- a/scripts/aws-nuke-ccm.sh +++ b/scripts/aws-nuke-ccm.sh @@ -27,34 +27,34 @@ if [ -z $YQ ]; then exit 1 fi -if [ -z AWSCLI ]; then +if [ -z $AWSCLI ]; then echo "AWSCLI must be set to the path of the AWS CLI" echo "Use 'make dev-aws-nuke' instead of running this script directly" exit 1 fi echo "Checking for ELB with 'kubernetes.io/cluster/$CLUSTER_NAME' tag" -for LOADBALANCER in $(aws elb describe-load-balancers --output yaml | yq '.LoadBalancerDescriptions[].LoadBalancerName'); +for LOADBALANCER in $($AWSCLI elb describe-load-balancers --output yaml | $YQ '.LoadBalancerDescriptions[].LoadBalancerName'); do echo "Checking ELB: $LOADBALANCER for 'kubernetes.io/cluster/$CLUSTER_NAME tag" - DESCRIBE_TAGS=$(aws elb describe-tags \ + DESCRIBE_TAGS=$($AWSCLI elb describe-tags \ --load-balancer-names $LOADBALANCER \ - --output yaml | yq '.TagDescriptions[].Tags.[]' | grep 'kubernetes.io/cluster/$CLUSTER_NAME') + --output yaml | $YQ '.TagDescriptions[].Tags.[]' | grep 'kubernetes.io/cluster/$CLUSTER_NAME') if [ ! -z "${DESCRIBE_TAGS}" ]; then echo "Deleting ELB: $LOADBALANCER" - aws elb delete-load-balancer --load-balancer-name $LOADBALANCER + $AWSCLI elb delete-load-balancer --load-balancer-name $LOADBALANCER fi done echo "Checking for EBS Volumes with $CLUSTER_NAME within the 'kubernetes.io/created-for/pvc/name' tag" -for VOLUME in $(aws ec2 describe-volumes --output yaml | yq '.Volumes[].VolumeId'); +for VOLUME in $($AWSCLI ec2 describe-volumes --output yaml | $YQ '.Volumes[].VolumeId'); do echo "Checking EBS Volume: $VOLUME for $CLUSTER_NAME claim" - DESCRIBE_VOLUMES=$(aws ec2 describe-volumes \ + DESCRIBE_VOLUMES=$($AWSCLI ec2 describe-volumes \ --volume-id $VOLUME \ - --output yaml | yq '.Volumes | to_entries[] | .value.Tags[] | select(.Key == "kubernetes.io/created-for/pvc/name")' | grep $CLUSTER_NAME) + --output yaml | $YQ '.Volumes | to_entries[] | .value.Tags[] | select(.Key == "kubernetes.io/created-for/pvc/name")' | grep $CLUSTER_NAME) if [ ! -z "${DESCRIBE_VOLUMES}" ]; then echo "Deleting EBS Volume: $VOLUME" - aws ec2 delete-volume --volume-id $VOLUME + $AWSCLI ec2 delete-volume --volume-id $VOLUME fi done From 1ae09c40c8c139ba9066f3a194155261ccdce583 Mon Sep 17 00:00:00 2001 From: Kyle Squizzato Date: Thu, 5 Sep 2024 08:06:25 -0700 Subject: [PATCH 34/35] Rename deployment package to managedcluster Signed-off-by: Kyle Squizzato --- test/e2e/e2e_test.go | 45 +++++++++---------- .../managedcluster.go} | 28 ++++++------ .../resources/aws-hosted-cp.yaml.tpl | 4 +- .../resources/aws-standalone-cp.yaml.tpl | 4 +- .../validate_deleted.go | 2 +- .../validate_deployed.go | 2 +- test/utils/utils.go | 4 ++ 7 files changed, 46 insertions(+), 43 deletions(-) rename test/{deployment/deployment.go => managedcluster/managedcluster.go} (69%) rename test/{deployment => managedcluster}/resources/aws-hosted-cp.yaml.tpl (87%) rename test/{deployment => managedcluster}/resources/aws-standalone-cp.yaml.tpl (87%) rename test/{deployment => managedcluster}/validate_deleted.go (99%) rename test/{deployment => managedcluster}/validate_deployed.go (99%) diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index 7d27c93dd..75f2e1ec2 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -30,8 +30,8 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/utils/ptr" - "github.com/Mirantis/hmc/test/deployment" "github.com/Mirantis/hmc/test/kubeclient" + "github.com/Mirantis/hmc/test/managedcluster" "github.com/Mirantis/hmc/test/utils" ) @@ -67,13 +67,13 @@ var _ = Describe("controller", Ordered, func() { return err } - for _, provider := range []deployment.ProviderType{ - deployment.ProviderCAPI, - deployment.ProviderAWS, - deployment.ProviderAzure, + for _, provider := range []managedcluster.ProviderType{ + managedcluster.ProviderCAPI, + managedcluster.ProviderAWS, + managedcluster.ProviderAzure, } { // Ensure only one controller pod is running. - if err := verifyControllerUp(kc, deployment.GetProviderLabel(provider), string(provider)); err != nil { + if err := verifyControllerUp(kc, managedcluster.GetProviderLabel(provider), string(provider)); err != nil { return err } } @@ -112,7 +112,7 @@ var _ = Describe("controller", Ordered, func() { // as well as the output of clusterctl to store as artifacts. if CurrentSpecReport().Failed() { By("collecting failure logs from controllers") - collectLogArtifacts(kc, clusterName, deployment.ProviderAWS, deployment.ProviderCAPI) + collectLogArtifacts(kc, clusterName, managedcluster.ProviderAWS, managedcluster.ProviderCAPI) } // Delete the deployments if they were created. @@ -132,15 +132,18 @@ var _ = Describe("controller", Ordered, func() { ExpectWithOffset(2, err).NotTo(HaveOccurred()) }) - for _, template := range []deployment.Template{deployment.TemplateAWSStandaloneCP, deployment.TemplateAWSHostedCP} { + for _, template := range []managedcluster.Template{ + managedcluster.TemplateAWSStandaloneCP, + managedcluster.TemplateAWSHostedCP, + } { It(fmt.Sprintf("should work with an AWS provider and %s template", template), func() { - if template == deployment.TemplateAWSHostedCP { + if template == managedcluster.TemplateAWSHostedCP { // TODO: Create AWS resources for hosted control plane. Skip("AWS hosted control plane not yet implemented") } By("creating a Deployment") - d := deployment.GetUnstructuredDeployment(deployment.ProviderAWS, template) + d := managedcluster.GetUnstructured(managedcluster.ProviderAWS, template) clusterName = d.GetName() deleteFunc, err = kc.CreateDeployment(context.Background(), d) @@ -148,14 +151,14 @@ var _ = Describe("controller", Ordered, func() { By("waiting for infrastructure providers to deploy successfully") Eventually(func() error { - return deployment.VerifyProviderDeployed(context.Background(), kc, clusterName) + return managedcluster.VerifyProviderDeployed(context.Background(), kc, clusterName) }).WithTimeout(30 * time.Minute).WithPolling(10 * time.Second).Should(Succeed()) By("verify the deployment deletes successfully") err = deleteFunc() Expect(err).NotTo(HaveOccurred()) Eventually(func() error { - return deployment.VerifyProviderDeleted(context.Background(), kc, clusterName) + return managedcluster.VerifyProviderDeleted(context.Background(), kc, clusterName) }).WithTimeout(10 * time.Minute).WithPolling(10 * time.Second).Should(Succeed()) }) } @@ -197,13 +200,13 @@ func verifyControllerUp(kc *kubeclient.KubeClient, labelSelector string, name st // CAPI controller and the provider controller(s) as well as output from clusterctl // and stores them in the test/e2e directory as artifacts. If it fails it // produces a warning message to the GinkgoWriter, but does not fail the test. -func collectLogArtifacts(kc *kubeclient.KubeClient, clusterName string, providerTypes ...deployment.ProviderType) { +func collectLogArtifacts(kc *kubeclient.KubeClient, clusterName string, providerTypes ...managedcluster.ProviderType) { GinkgoHelper() filterLabels := []string{hmcControllerLabel} for _, providerType := range providerTypes { - filterLabels = append(filterLabels, deployment.GetProviderLabel(providerType)) + filterLabels = append(filterLabels, managedcluster.GetProviderLabel(providerType)) } for _, label := range filterLabels { @@ -217,14 +220,14 @@ func collectLogArtifacts(kc *kubeclient.KubeClient, clusterName string, provider }) podLogs, err := req.Stream(context.Background()) if err != nil { - warnError(fmt.Errorf("failed to get log stream for pod %s: %w", pod.Name, err)) + utils.WarnError(fmt.Errorf("failed to get log stream for pod %s: %w", pod.Name, err)) continue } defer podLogs.Close() //nolint:errcheck output, err := os.Create(fmt.Sprintf("./test/e2e/%s.log", pod.Name)) if err != nil { - warnError(fmt.Errorf("failed to create log file for pod %s: %w", pod.Name, err)) + utils.WarnError(fmt.Errorf("failed to create log file for pod %s: %w", pod.Name, err)) continue } defer output.Close() //nolint:errcheck @@ -232,7 +235,7 @@ func collectLogArtifacts(kc *kubeclient.KubeClient, clusterName string, provider r := bufio.NewReader(podLogs) _, err = r.WriteTo(output) if err != nil { - warnError(fmt.Errorf("failed to write log file for pod %s: %w", pod.Name, err)) + utils.WarnError(fmt.Errorf("failed to write log file for pod %s: %w", pod.Name, err)) } } } @@ -241,16 +244,12 @@ func collectLogArtifacts(kc *kubeclient.KubeClient, clusterName string, provider "describe", "cluster", clusterName, "--namespace", namespace, "--show-conditions=all") output, err := utils.Run(cmd) if err != nil { - warnError(fmt.Errorf("failed to get clusterctl log: %w", err)) + utils.WarnError(fmt.Errorf("failed to get clusterctl log: %w", err)) return } err = os.WriteFile(filepath.Join("test/e2e", "clusterctl.log"), output, 0644) if err != nil { - warnError(fmt.Errorf("failed to write clusterctl log: %w", err)) + utils.WarnError(fmt.Errorf("failed to write clusterctl log: %w", err)) } } - -func warnError(err error) { - _, _ = fmt.Fprintf(GinkgoWriter, "Warning: %v\n", err) -} diff --git a/test/deployment/deployment.go b/test/managedcluster/managedcluster.go similarity index 69% rename from test/deployment/deployment.go rename to test/managedcluster/managedcluster.go index 102f65812..28783ea5d 100644 --- a/test/deployment/deployment.go +++ b/test/managedcluster/managedcluster.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package deployment +package managedcluster import ( _ "embed" @@ -45,18 +45,18 @@ const ( ) //go:embed resources/aws-standalone-cp.yaml.tpl -var awsStandaloneCPDeploymentTemplateBytes []byte +var awsStandaloneCPManagedClusterTemplateBytes []byte //go:embed resources/aws-hosted-cp.yaml.tpl -var awsHostedCPDeploymentTemplateBytes []byte +var awsHostedCPManagedClusterTemplateBytes []byte func GetProviderLabel(provider ProviderType) string { return fmt.Sprintf("%s=%s", providerLabel, provider) } -// GetUnstructuredDeployment returns an unstructured deployment object based on -// the provider and template. -func GetUnstructuredDeployment(provider ProviderType, templateName Template) *unstructured.Unstructured { +// GetUnstructured returns an unstructured ManagedCluster object based on the +// provider and template. +func GetUnstructured(provider ProviderType, templateName Template) *unstructured.Unstructured { GinkgoHelper() generatedName := uuid.New().String()[:8] + "-e2e-test" @@ -64,27 +64,27 @@ func GetUnstructuredDeployment(provider ProviderType, templateName Template) *un switch provider { case ProviderAWS: - Expect(os.Setenv("DEPLOYMENT_NAME", generatedName)).NotTo(HaveOccurred()) + Expect(os.Setenv("MANAGED_CLUSTER_NAME", generatedName)).NotTo(HaveOccurred()) - var deploymentTemplateBytes []byte + var managedClusterTemplateBytes []byte switch templateName { case TemplateAWSStandaloneCP: - deploymentTemplateBytes = awsStandaloneCPDeploymentTemplateBytes + managedClusterTemplateBytes = awsStandaloneCPManagedClusterTemplateBytes case TemplateAWSHostedCP: - deploymentTemplateBytes = awsHostedCPDeploymentTemplateBytes + managedClusterTemplateBytes = awsHostedCPManagedClusterTemplateBytes default: Fail(fmt.Sprintf("unsupported AWS template: %s", templateName)) } - deploymentConfigBytes, err := envsubst.Bytes(deploymentTemplateBytes) + managedClusterConfigBytes, err := envsubst.Bytes(managedClusterTemplateBytes) Expect(err).NotTo(HaveOccurred(), "failed to substitute environment variables") - var deploymentConfig map[string]interface{} + var managedClusterConfig map[string]interface{} - err = yaml.Unmarshal(deploymentConfigBytes, &deploymentConfig) + err = yaml.Unmarshal(managedClusterConfigBytes, &managedClusterConfig) Expect(err).NotTo(HaveOccurred(), "failed to unmarshal deployment config") - return &unstructured.Unstructured{Object: deploymentConfig} + return &unstructured.Unstructured{Object: managedClusterConfig} default: Fail(fmt.Sprintf("unsupported provider: %s", provider)) } diff --git a/test/deployment/resources/aws-hosted-cp.yaml.tpl b/test/managedcluster/resources/aws-hosted-cp.yaml.tpl similarity index 87% rename from test/deployment/resources/aws-hosted-cp.yaml.tpl rename to test/managedcluster/resources/aws-hosted-cp.yaml.tpl index a3ce562e8..894bb6667 100644 --- a/test/deployment/resources/aws-hosted-cp.yaml.tpl +++ b/test/managedcluster/resources/aws-hosted-cp.yaml.tpl @@ -1,7 +1,7 @@ apiVersion: hmc.mirantis.com/v1alpha1 -kind: Deployment +kind: ManagedCluster metadata: - name: ${DEPLOYMENT_NAME} + name: ${MANAGED_CLUSTER_NAME} spec: template: aws-hosted-cp config: diff --git a/test/deployment/resources/aws-standalone-cp.yaml.tpl b/test/managedcluster/resources/aws-standalone-cp.yaml.tpl similarity index 87% rename from test/deployment/resources/aws-standalone-cp.yaml.tpl rename to test/managedcluster/resources/aws-standalone-cp.yaml.tpl index 5f0776e29..7825a2833 100644 --- a/test/deployment/resources/aws-standalone-cp.yaml.tpl +++ b/test/managedcluster/resources/aws-standalone-cp.yaml.tpl @@ -1,7 +1,7 @@ apiVersion: hmc.mirantis.com/v1alpha1 -kind: Deployment +kind: ManagedCluster metadata: - name: ${DEPLOYMENT_NAME} + name: ${MANAGED_CLUSTER_NAME} spec: template: aws-standalone-cp config: diff --git a/test/deployment/validate_deleted.go b/test/managedcluster/validate_deleted.go similarity index 99% rename from test/deployment/validate_deleted.go rename to test/managedcluster/validate_deleted.go index cca0bdaeb..dc5712a9f 100644 --- a/test/deployment/validate_deleted.go +++ b/test/managedcluster/validate_deleted.go @@ -11,7 +11,7 @@ // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -package deployment +package managedcluster import ( "context" diff --git a/test/deployment/validate_deployed.go b/test/managedcluster/validate_deployed.go similarity index 99% rename from test/deployment/validate_deployed.go rename to test/managedcluster/validate_deployed.go index f7e6f14a7..f6423fb2b 100644 --- a/test/deployment/validate_deployed.go +++ b/test/managedcluster/validate_deployed.go @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package deployment +package managedcluster import ( "context" diff --git a/test/utils/utils.go b/test/utils/utils.go index e6283fa74..4e0d767f4 100644 --- a/test/utils/utils.go +++ b/test/utils/utils.go @@ -178,3 +178,7 @@ func ValidateObjectNamePrefix(unstrObj *unstructured.Unstructured, clusterName s func ObjKindName(unstrObj *unstructured.Unstructured) (string, string) { return unstrObj.GetKind(), unstrObj.GetName() } + +func WarnError(err error) { + _, _ = fmt.Fprintf(GinkgoWriter, "Warning: %v\n", err) +} From 75575777ac979cfbbf7196fd3f947b15e4346813 Mon Sep 17 00:00:00 2001 From: Kyle Squizzato Date: Thu, 5 Sep 2024 13:54:28 -0700 Subject: [PATCH 35/35] Modify CreateDeployment to CreateManagedCluster Signed-off-by: Kyle Squizzato --- test/e2e/e2e_test.go | 2 +- test/kubeclient/kubeclient.go | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index 75f2e1ec2..0d3b748d3 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -146,7 +146,7 @@ var _ = Describe("controller", Ordered, func() { d := managedcluster.GetUnstructured(managedcluster.ProviderAWS, template) clusterName = d.GetName() - deleteFunc, err = kc.CreateDeployment(context.Background(), d) + deleteFunc, err = kc.CreateManagedCluster(context.Background(), d) Expect(err).NotTo(HaveOccurred()) By("waiting for infrastructure providers to deploy successfully") diff --git a/test/kubeclient/kubeclient.go b/test/kubeclient/kubeclient.go index bac6a2909..c1a4453c9 100644 --- a/test/kubeclient/kubeclient.go +++ b/test/kubeclient/kubeclient.go @@ -164,33 +164,33 @@ func (kc *KubeClient) GetDynamicClient(gvr schema.GroupVersionResource) (dynamic return client.Resource(gvr).Namespace(kc.Namespace), nil } -// CreateDeployment creates a deployment.hmc.mirantis.com in the given +// CreateDeployment creates a managedcluster.hmc.mirantis.com in the given // namespace and returns a DeleteFunc to clean up the deployment. // The DeleteFunc is a no-op if the deployment has already been deleted. -func (kc *KubeClient) CreateDeployment( - ctx context.Context, deployment *unstructured.Unstructured) (func() error, error) { - kind := deployment.GetKind() +func (kc *KubeClient) CreateManagedCluster( + ctx context.Context, managedcluster *unstructured.Unstructured) (func() error, error) { + kind := managedcluster.GetKind() - if kind != "Deployment" { - return nil, fmt.Errorf("expected kind Deployment, got: %s", kind) + if kind != "ManagedCluster" { + return nil, fmt.Errorf("expected kind ManagedCluster, got: %s", kind) } client, err := kc.GetDynamicClient(schema.GroupVersionResource{ Group: "hmc.mirantis.com", Version: "v1alpha1", - Resource: "deployments", + Resource: "managedclusters", }) if err != nil { return nil, fmt.Errorf("failed to get dynamic client: %w", err) } - _, err = client.Create(ctx, deployment, metav1.CreateOptions{}) + _, err = client.Create(ctx, managedcluster, metav1.CreateOptions{}) if err != nil { return nil, fmt.Errorf("failed to create Deployment: %w", err) } return func() error { - err := client.Delete(ctx, deployment.GetName(), metav1.DeleteOptions{}) + err := client.Delete(ctx, managedcluster.GetName(), metav1.DeleteOptions{}) if apierrors.IsNotFound(err) { return nil }