diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 6c6ab64f3..dcb9508fa 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -72,3 +72,13 @@ jobs: - name: Run E2E tests run: | make test-e2e + - name: Get test logs + run: | + kubectl logs -n hmc-system -l app=e2e-test > test/e2e/e2e-test.log + ./bin/clusterctl describe cluster --show-conditions=all > test/e2e/clusterctl.log + - name: Archive test results + uses: actions/upload-artifact@v4 + with: + name: test-logs + path: | + test/e2e/*.log diff --git a/.gitignore b/.gitignore index e089f3734..0656b090b 100644 --- a/.gitignore +++ b/.gitignore @@ -14,10 +14,13 @@ dist go.work go.work.sum -# cloud nuke config +# cloud-nuke config *cloud_nuke.yaml -# editors +# Test artifacts +test/e2e/*.log + +# ditors .idea *.swp *.swo diff --git a/config/dev/deployment.yaml b/config/dev/deployment.yaml index 58ba18e32..a24db4c3e 100644 --- a/config/dev/deployment.yaml +++ b/config/dev/deployment.yaml @@ -5,13 +5,13 @@ metadata: spec: template: aws-standalone-cp config: - region: us-east-2 + region: us-west-2 publicIP: true controlPlaneNumber: 1 workersNumber: 1 controlPlane: - amiID: ami-02f3416038bdb17fb + amiID: ami-0989c067ff3da4b27 instanceType: t3.small worker: - amiID: ami-02f3416038bdb17fb + amiID: ami-0989c067ff3da4b27 instanceType: t3.small diff --git a/go.mod b/go.mod index a79fbb27e..202538012 100644 --- a/go.mod +++ b/go.mod @@ -23,6 +23,7 @@ require ( k8s.io/apiextensions-apiserver v0.31.0 k8s.io/apimachinery v0.31.0 k8s.io/client-go v0.31.0 + k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 sigs.k8s.io/controller-runtime v0.19.0 ) @@ -163,7 +164,6 @@ require ( k8s.io/klog/v2 v2.130.1 // indirect k8s.io/kube-openapi v0.0.0-20240430033511-f0e62f92d13f // indirect k8s.io/kubectl v0.31.0 // indirect - k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 // indirect oras.land/oras-go v1.2.5 // indirect sigs.k8s.io/gateway-api v1.1.0 // indirect sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect diff --git a/test/deployment/deployment.go b/test/deployment/deployment.go index 3cbfcb361..337089621 100644 --- a/test/deployment/deployment.go +++ b/test/deployment/deployment.go @@ -43,8 +43,11 @@ const ( TemplateAWSHostedCP Template = "aws-hosted-cp" ) -//go:embed resources/deployment.yaml.tpl -var deploymentTemplateBytes []byte +//go:embed resources/aws-standalone-cp.yaml.tpl +var awsStandaloneCPDeploymentTemplateBytes []byte + +//go:embed resources/aws-hosted-cp.yaml.tpl +var awsHostedCPDeploymentTemplateBytes []byte // GetUnstructuredDeployment returns an unstructured deployment object based on // the provider and template. @@ -52,16 +55,24 @@ func GetUnstructuredDeployment(provider ProviderType, templateName Template) *un GinkgoHelper() generatedName := uuid.New().String()[:8] + "-e2e-test" - _, _ = fmt.Fprintf(GinkgoWriter, "Generated AWS cluster name: %q\n", generatedName) + _, _ = fmt.Fprintf(GinkgoWriter, "Generated cluster name: %q\n", generatedName) switch provider { case ProviderAWS: // XXX: Maybe we should just use automatic AMI selection here. amiID := getAWSAMI() - - Expect(os.Setenv("AMI_ID", amiID)).NotTo(HaveOccurred()) + Expect(os.Setenv("AWS_AMI_ID", amiID)).NotTo(HaveOccurred()) Expect(os.Setenv("DEPLOYMENT_NAME", generatedName)).NotTo(HaveOccurred()) - Expect(os.Setenv("TEMPLATE_NAME", string(templateName))).NotTo(HaveOccurred()) + + var deploymentTemplateBytes []byte + switch templateName { + case TemplateAWSStandaloneCP: + deploymentTemplateBytes = awsStandaloneCPDeploymentTemplateBytes + case TemplateAWSHostedCP: + deploymentTemplateBytes = awsHostedCPDeploymentTemplateBytes + default: + Fail(fmt.Sprintf("unsupported AWS template: %s", templateName)) + } deploymentConfigBytes, err := envsubst.Bytes(deploymentTemplateBytes) Expect(err).NotTo(HaveOccurred(), "failed to substitute environment variables") diff --git a/test/deployment/resources/aws-hosted-cp.yaml.tpl b/test/deployment/resources/aws-hosted-cp.yaml.tpl new file mode 100644 index 000000000..2606b64f9 --- /dev/null +++ b/test/deployment/resources/aws-hosted-cp.yaml.tpl @@ -0,0 +1,17 @@ +apiVersion: hmc.mirantis.com/v1alpha1 +kind: Deployment +metadata: + name: ${DEPLOYMENT_NAME} +spec: + template: aws-hosted-cp + config: + vpcID: ${AWS_VPC_ID} + region: ${AWS_REGION} + publicIP: ${PUBLIC_IP:=true} + subnets: + - id: ${AWS_SUBNET_ID} + availabilityZone: ${AWS_SUBNET_AVAILABILITY_ZONE} + amiID: ${AWS_AMI_ID} + instanceType: ${INSTANCE_TYPE:=t3.medium} + securityGroupIDs: + - ${AWS_SG_ID} diff --git a/test/deployment/resources/aws-standalone-cp.yaml.tpl b/test/deployment/resources/aws-standalone-cp.yaml.tpl new file mode 100644 index 000000000..220fa600b --- /dev/null +++ b/test/deployment/resources/aws-standalone-cp.yaml.tpl @@ -0,0 +1,19 @@ +apiVersion: hmc.mirantis.com/v1alpha1 +kind: Deployment +metadata: + name: ${DEPLOYMENT_NAME} +spec: + template: aws-standalone-cp + config: + region: ${AWS_REGION} + publicIP: ${PUBLIC_IP:=true} + controlPlaneNumber: ${CONTROL_PLANE_NUMBER:=1} + workersNumber: ${WORKERS_NUMBER:=1} + controlPlane: + amiID: ${AWS_AMI_ID} + instanceType: ${INSTANCE_TYPE:=t3.small} + worker: + amiID: ${AWS_AMI_ID} + instanceType: ${INSTANCE_TYPE:=t3.small} + + diff --git a/test/deployment/resources/deployment.yaml.tpl b/test/deployment/resources/deployment.yaml.tpl deleted file mode 100644 index ac10f9f84..000000000 --- a/test/deployment/resources/deployment.yaml.tpl +++ /dev/null @@ -1,18 +0,0 @@ -apiVersion: hmc.mirantis.com/v1alpha1 -kind: Deployment -metadata: - name: ${DEPLOYMENT_NAME} -spec: - template: ${TEMPLATE_NAME} - config: - region: ${AWS_REGION} - publicIP: ${PUBLIC_IP:=true} - controlPlaneNumber: ${CONTROL_PLANE_NUMBER:=1} - workersNumber: ${WORKERS_NUMBER:=1} - controlPlane: - amiID: ${AMI_ID} - instanceType: ${INSTANCE_TYPE:=t3.small} - worker: - amiID: ${AMI_ID} - instanceType: ${INSTANCE_TYPE:=t3.small} - diff --git a/test/deployment/validate_deployed.go b/test/deployment/validate_deployed.go index 66b3d7709..e7e1239ff 100644 --- a/test/deployment/validate_deployed.go +++ b/test/deployment/validate_deployed.go @@ -103,10 +103,6 @@ func validateMachines(ctx context.Context, kc *kubeclient.KubeClient, clusterNam return fmt.Errorf("failed to list machines: %w", err) } - if err != nil { - return fmt.Errorf("failed to list Machines: %w", err) - } - for _, machine := range machines { if err := utils.ValidateObjectNamePrefix(&machine, clusterName); err != nil { Fail(err.Error()) diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index ebec00019..ca0e73abb 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -15,16 +15,20 @@ package e2e import ( + "bufio" "context" "fmt" "os" "os/exec" + "path/filepath" "strings" "time" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" "github.com/Mirantis/hmc/test/deployment" "github.com/Mirantis/hmc/test/kubeclient" @@ -98,10 +102,10 @@ var _ = Describe("controller", Ordered, func() { Context("AWS Templates", func() { var ( - kc *kubeclient.KubeClient - deleteDeploymentFunc func() error - clusterName string - err error + kc *kubeclient.KubeClient + deleteFunc func() error + clusterName string + err error ) BeforeAll(func() { @@ -111,10 +115,17 @@ var _ = Describe("controller", Ordered, func() { ExpectWithOffset(2, kc.CreateAWSCredentialsKubeSecret(context.Background())).To(Succeed()) }) - AfterAll(func() { - // Delete the deployment if it was created. - if deleteDeploymentFunc != nil { - err = deleteDeploymentFunc() + AfterEach(func() { + // If we failed collect logs from each of the affiliated controllers + // as well as the output of clusterctl to store as artifacts. + if CurrentSpecReport().Failed() { + By("collecting failure logs from controllers") + collectLogArtifacts(kc, clusterName, deployment.ProviderAWS) + } + + // Delete the deployments if they were created. + if deleteFunc != nil { + err = deleteFunc() Expect(err).NotTo(HaveOccurred()) } @@ -127,29 +138,79 @@ var _ = Describe("controller", Ordered, func() { ExpectWithOffset(2, err).NotTo(HaveOccurred()) }) - It("should work with an AWS provider", func() { - By("creating a Deployment with aws-standalone-cp template") - d := deployment.GetUnstructuredDeployment(deployment.ProviderAWS, deployment.TemplateAWSStandaloneCP) - clusterName = d.GetName() + for _, template := range []deployment.Template{deployment.TemplateAWSStandaloneCP, deployment.TemplateAWSHostedCP} { + It(fmt.Sprintf("should work with an AWS provider and %s template", template), func() { + if template == deployment.TemplateAWSHostedCP { + // TODO: Create AWS resources for hosted control plane. + Skip("AWS hosted control plane not yet implemented") + } - deleteDeploymentFunc, err = kc.CreateDeployment(context.Background(), d) - Expect(err).NotTo(HaveOccurred()) + By("creating a Deployment") + d := deployment.GetUnstructuredDeployment(deployment.ProviderAWS, template) + clusterName = d.GetName() - By("waiting for infrastructure providers to deploy successfully") - Eventually(func() error { - return deployment.VerifyProviderDeployed(context.Background(), kc, clusterName) - }).WithTimeout(30 * time.Minute).WithPolling(10 * time.Second).Should(Succeed()) + deleteFunc, err = kc.CreateDeployment(context.Background(), d) + Expect(err).NotTo(HaveOccurred()) - By("verifying the deployment deletes successfully") - err = deleteDeploymentFunc() - Expect(err).NotTo(HaveOccurred()) - Eventually(func() error { - return deployment.VerifyProviderDeleted(context.Background(), kc, clusterName) - }).WithTimeout(10 * time.Minute).WithPolling(10 * time.Second).Should(Succeed()) + By("waiting for infrastructure providers to deploy successfully") + Eventually(func() error { + return deployment.VerifyProviderDeployed(context.Background(), kc, clusterName) + }).WithTimeout(30 * time.Minute).WithPolling(10 * time.Second).Should(Succeed()) - By("creating a Deployment with aws-hosted-cp template") - // TODO: Use the standalone control plane resources to craft a - // hosted control plane and test it. - }) + By("verify the deployment deletes successfully") + err = deleteFunc() + Expect(err).NotTo(HaveOccurred()) + Eventually(func() error { + return deployment.VerifyProviderDeleted(context.Background(), kc, clusterName) + }).WithTimeout(10 * time.Minute).WithPolling(10 * time.Second).Should(Succeed()) + }) + } }) }) + +// collectLogArtfiacts collects log output from each the HMC controller, +// CAPI controller and the provider controller as well as output from clusterctl +// and stores them in the test/e2e directory as artifacts. +// We could do this at the end or we could use Kubernetes' CopyPodLogs from +// https://github.com/kubernetes/kubernetes/blob/v1.31.0/test/e2e/storage/podlogs/podlogs.go#L88 +// to stream the logs to GinkgoWriter during the test. +func collectLogArtifacts(kc *kubeclient.KubeClient, clusterName string, providerType deployment.ProviderType) { + GinkgoHelper() + + filterLabels := []string{ + "app.kubernetes.io/name=hmc-controller-manager", + "app.kubernetes.io/name=cluster-api", + fmt.Sprintf("app.kubernetes.io/name=cluster-api-provider-%s", providerType), + } + + for _, label := range filterLabels { + pods, _ := kc.Client.CoreV1().Pods(kc.Namespace).List(context.Background(), metav1.ListOptions{ + LabelSelector: label, + }) + + for _, pod := range pods.Items { + req := kc.Client.CoreV1().Pods(kc.Namespace).GetLogs(pod.Name, &corev1.PodLogOptions{ + TailLines: ptr.To(int64(1000)), + }) + podLogs, err := req.Stream(context.Background()) + Expect(err).NotTo(HaveOccurred(), "failed to get log stream for pod %s", pod.Name) + DeferCleanup(Expect(podLogs.Close()).NotTo(HaveOccurred())) + + output, err := os.Create(fmt.Sprintf("test/e2e/%s.log", pod.Name)) + Expect(err).NotTo(HaveOccurred(), "failed to create log file for pod %s", pod.Name) + DeferCleanup(Expect(output.Close()).NotTo(HaveOccurred())) + + r := bufio.NewReader(podLogs) + _, err = r.WriteTo(output) + Expect(err).NotTo(HaveOccurred(), "failed to write log file for pod %s", pod.Name) + } + } + + cmd := exec.Command("./bin/clusterctl", + "describe", "cluster", clusterName, "--show-conditions=all") + output, err := utils.Run(cmd) + Expect(err).NotTo(HaveOccurred(), "failed to get clusterctl log") + + err = os.WriteFile(filepath.Join("test/e2e", "clusterctl.log"), output, 0644) + Expect(err).NotTo(HaveOccurred(), "failed to write clusterctl log") +}