diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml new file mode 100644 index 000000000..43566691e --- /dev/null +++ b/.github/workflows/build.yml @@ -0,0 +1,52 @@ +name: Build and Unit Test + +concurrency: + group: test-${{ github.head_ref || github.run_id }} + cancel-in-progress: true + +on: + push: + branches: + - main + - release-* + tags: + - '*' + paths-ignore: + - '**.md' + pull_request: + branches: + - main + - release-* + paths-ignore: + - 'config/**' + - '**.md' + +env: + GO_VERSION: '1.22' + +jobs: + build: + name: Build and Unit Test + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v4 + - name: Setup Go + uses: actions/setup-go@v5 + with: + go-version: ${{ env.GO_VERSION }} + - name: Lint + uses: golangci/golangci-lint-action@v6 + with: + args: --timeout 10m0s + - name: Verify all generated pieces are up-to-date + run: make generate-all && git add -N . && git diff --exit-code + - name: Unit tests + run: | + make test + - name: Build + run: | + make build + - name: Image build + run: | + make docker-build diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 56bb5543f..91214ced8 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,54 +1,45 @@ -name: Build and Test +name: E2E Tests concurrency: group: test-${{ github.head_ref || github.run_id }} cancel-in-progress: true on: - push: + pull_request_target: + types: [labeled] branches: - main - release-* - tags: - - "*" - paths-ignore: - - '**.md' - pull_request: - branches: - - main - - release-* - tags: - - "*" paths-ignore: - 'config/**' - '**.md' - env: GO_VERSION: '1.22' jobs: - build: - name: Build + e2etest: + name: E2E Tests runs-on: ubuntu-latest + if: contains(github.event.pull_request.labels.*.name, 'test-e2e') + env: + AWS_REGION: us-west-2 + AWS_ACCESS_KEY_ID: ${{ secrets.CI_AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.CI_AWS_SECRET_ACCESS_KEY }} steps: - name: Checkout repository uses: actions/checkout@v4 - - name: Set up Go + - name: Setup Go uses: actions/setup-go@v5 with: - go-version: ${{ env.GO_VERSION }} - - name: Lint - uses: golangci/golangci-lint-action@v6 - with: - args: --timeout 10m0s - - name: Verify all generated pieces are up-to-date - run: make generate-all && git add -N . && git diff --exit-code - - name: Unit tests + go-version: ${{ env.GO_VERSION }} + - name: Setup kubectl + uses: azure/setup-kubectl@v4 + - name: Run E2E tests run: | - make test - - name: Build - run: | - make build - - name: Image build - run: | - make docker-build + make test-e2e + - name: Archive test results + uses: actions/upload-artifact@v4 + with: + name: test-logs + path: | + test/e2e/*.log diff --git a/.gitignore b/.gitignore index 884169b67..0656b090b 100644 --- a/.gitignore +++ b/.gitignore @@ -14,7 +14,13 @@ dist go.work go.work.sum -# editors +# cloud-nuke config +*cloud_nuke.yaml + +# Test artifacts +test/e2e/*.log + +# ditors .idea *.swp *.swo diff --git a/.golangci.yml b/.golangci.yml index ca69a11f6..a6ffbedab 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -9,10 +9,10 @@ issues: # restore some of the defaults # (fill in the rest as needed) exclude-rules: - - path: "api/*" + - path: 'api/*' linters: - lll - - path: "internal/*" + - path: 'internal/*' linters: - dupl - lll @@ -21,7 +21,7 @@ linters: enable: - dupl - errcheck - - exportloopref + - copyloopvar - goconst - gocyclo - gofmt diff --git a/Makefile b/Makefile index 2b3f89afc..022216225 100644 --- a/Makefile +++ b/Makefile @@ -104,9 +104,9 @@ test: generate-all fmt vet envtest tidy external-crd ## Run tests. KUBEBUILDER_ASSETS="$(shell $(ENVTEST) use $(ENVTEST_K8S_VERSION) --bin-dir $(LOCALBIN) -p path)" go test $$(go list ./... | grep -v /e2e) -coverprofile cover.out # Utilize Kind or modify the e2e tests to load the image locally, enabling compatibility with other vendors. -.PHONY: test-e2e # Run the e2e tests against a Kind k8s instance that is spun up. -test-e2e: - go test ./test/e2e/ -v -ginkgo.v +.PHONY: test-e2e # Run the e2e tests against a Kind k8s instance that is spun up. +test-e2e: cli-install + KIND_CLUSTER_NAME="hmc-test" KIND_VERSION=$(KIND_VERSION) go test ./test/e2e/ -v -ginkgo.v -timeout=2h .PHONY: lint lint: golangci-lint ## Run golangci-lint linter & yamllint @@ -192,6 +192,8 @@ REGISTRY_NAME ?= hmc-local-registry REGISTRY_PORT ?= 5001 REGISTRY_REPO ?= oci://127.0.0.1:$(REGISTRY_PORT)/charts DEV_PROVIDER ?= aws +REGISTRY_IS_OCI = $(shell echo $(REGISTRY_REPO) | grep -q oci && echo true || echo false) +CLUSTER_NAME ?= $(shell $(YQ) '.metadata.name' ./config/dev/deployment.yaml) AWS_CREDENTIALS=${AWS_B64ENCODED_CREDENTIALS} @@ -243,17 +245,31 @@ dev-undeploy: ## Undeploy controller from the K8s cluster specified in ~/.kube/c .PHONY: helm-push helm-push: helm-package - @for chart in $(CHARTS_PACKAGE_DIR)/*.tgz; do \ + @if [ ! $(REGISTRY_IS_OCI) ]; then \ + repo_flag="--repo"; \ + fi; \ + for chart in $(CHARTS_PACKAGE_DIR)/*.tgz; do \ base=$$(basename $$chart .tgz); \ chart_version=$$(echo $$base | grep -o "v\{0,1\}[0-9]\+\.[0-9]\+\.[0-9].*"); \ chart_name="$${base%-"$$chart_version"}"; \ echo "Verifying if chart $$chart_name, version $$chart_version already exists in $(REGISTRY_REPO)"; \ - chart_exists=$$($(HELM) pull $(REGISTRY_REPO)/$$chart_name --version $$chart_version --destination /tmp 2>&1 | grep "not found" || true); \ + chart_exists=$$($(HELM) pull $$repo_flag $(REGISTRY_REPO) $$chart_name --version $$chart_version --destination /tmp 2>&1 | grep "not found" || true); \ if [ -z "$$chart_exists" ]; then \ echo "Chart $$chart_name version $$chart_version already exists in the repository."; \ else \ - echo "Pushing $$chart to $(REGISTRY_REPO)"; \ - $(HELM) push "$$chart" $(REGISTRY_REPO); \ + if $(REGISTRY_IS_OCI); then \ + echo "Pushing $$chart to $(REGISTRY_REPO)"; \ + $(HELM) push "$$chart" $(REGISTRY_REPO); \ + else \ + if [ ! $$REGISTRY_USERNAME ] && [ ! $$REGISTRY_PASSWORD ]; then \ + echo "REGISTRY_USERNAME and REGISTRY_PASSWORD must be populated to push the chart to an HTTPS repository"; \ + exit 1; \ + else \ + $(HELM) repo add hmc $(REGISTRY_REPO); \ + echo "Pushing $$chart to $(REGISTRY_REPO)"; \ + $(HELM) cm-push "$$chart" $(REGISTRY_REPO) --username $$REGISTRY_USERNAME --password $$REGISTRY_PASSWORD; \ + fi; \ + fi; \ fi; \ done @@ -277,21 +293,37 @@ dev-azure-creds: envsubst dev-apply: kind-deploy registry-deploy dev-push dev-deploy dev-templates .PHONY: dev-destroy -dev-destroy: kind-undeploy registry-undeploy - -.PHONY: dev-creds-apply -dev-creds-apply: dev-$(DEV_PROVIDER)-creds +dev-destroy: kind-undeploy registry-undeploy ## Destroy the development environment by deleting the kind cluster and local registry. .PHONY: dev-provider-apply dev-provider-apply: envsubst - @NAMESPACE=$(NAMESPACE) $(ENVSUBST) -no-unset -i config/dev/$(DEV_PROVIDER)-managedcluster.yaml | $(KUBECTL) apply -f - + @if [ $(DEV_PROVIDER) = "aws" ]; then \ + $(MAKE) dev-aws-creds; \ + fi + @NAMESPACE=$(NAMESPACE) $(ENVSUBST) -no-unset -i config/dev/$(DEV_PROVIDER)-deployment.yaml | $(KUBECTL) apply -f - .PHONY: dev-provider-delete dev-provider-delete: envsubst @NAMESPACE=$(NAMESPACE) $(ENVSUBST) -no-unset -i config/dev/$(DEV_PROVIDER)-managedcluster.yaml | $(KUBECTL) delete -f - +.PHONY: dev-creds-apply +dev-creds-apply: dev-$(DEV_PROVIDER)-creds + +.PHONY: envsubst awscli dev-aws-nuke +dev-aws-nuke: ## Warning: Destructive! Nuke all AWS resources deployed by 'DEV_PROVIDER=aws dev-provider-apply', prefix with CLUSTER_NAME to nuke a specific cluster. + @CLUSTER_NAME=$(CLUSTER_NAME) $(ENVSUBST) < config/dev/cloud_nuke.yaml.tpl > config/dev/cloud_nuke.yaml + DISABLE_TELEMETRY=true $(CLOUDNUKE) aws --region $$AWS_REGION --force --config config/dev/cloud_nuke.yaml --resource-type vpc,eip,nat-gateway,ec2-subnet,elb,elbv2,internet-gateway,network-interface,security-group + @rm config/dev/cloud_nuke.yaml + @CLUSTER_NAME=$(CLUSTER_NAME) YQ=$(YQ) AWSCLI=$(AWSCLI) bash -c ./scripts/aws-nuke-ccm.sh + +.PHONY: test-apply +test-apply: kind-deploy registry-deploy dev-push dev-deploy dev-templates + +.PHONY: test-destroy +test-destroy: kind-undeploy registry-undeploy + .PHONY: cli-install -cli-install: clusterawsadm clusterctl +cli-install: clusterawsadm clusterctl cloud-nuke yq awscli ## Install the necessary CLI tools for deployment, development and testing. ##@ Dependencies @@ -320,8 +352,10 @@ KIND ?= $(LOCALBIN)/kind-$(KIND_VERSION) YQ ?= $(LOCALBIN)/yq-$(YQ_VERSION) CLUSTERAWSADM ?= $(LOCALBIN)/clusterawsadm CLUSTERCTL ?= $(LOCALBIN)/clusterctl +CLOUDNUKE ?= $(LOCALBIN)/cloud-nuke ADDLICENSE ?= $(LOCALBIN)/addlicense-$(ADDLICENSE_VERSION) ENVSUBST ?= $(LOCALBIN)/envsubst-$(ENVSUBST_VERSION) +AWSCLI ?= $(LOCALBIN)/aws ## Tool Versions CONTROLLER_TOOLS_VERSION ?= v0.14.0 @@ -330,10 +364,12 @@ GOLANGCI_LINT_VERSION ?= v1.60.1 HELM_VERSION ?= v3.15.1 KIND_VERSION ?= v0.23.0 YQ_VERSION ?= v4.44.2 +CLOUDNUKE_VERSION = v0.37.1 CLUSTERAWSADM_VERSION ?= v2.5.2 CLUSTERCTL_VERSION ?= v1.7.3 ADDLICENSE_VERSION ?= v1.1.1 ENVSUBST_VERSION ?= v1.4.2 +AWSCLI_VERSION ?= 2.17.42 .PHONY: controller-gen controller-gen: $(CONTROLLER_GEN) ## Download controller-gen locally if necessary. @@ -382,6 +418,12 @@ yq: $(YQ) ## Download yq locally if necessary. $(YQ): | $(LOCALBIN) $(call go-install-tool,$(YQ),github.com/mikefarah/yq/v4,${YQ_VERSION}) +.PHONY: cloud-nuke +cloud-nuke: $(CLOUDNUKE) ## Download cloud-nuke locally if necessary. +$(CLOUDNUKE): | $(LOCALBIN) + curl -sL https://github.com/gruntwork-io/cloud-nuke/releases/download/$(CLOUDNUKE_VERSION)/cloud-nuke_$(OS)_$(ARCH) -o $(CLOUDNUKE) + chmod +x $(CLOUDNUKE) + .PHONY: clusterawsadm clusterawsadm: $(CLUSTERAWSADM) ## Download clusterawsadm locally if necessary. $(CLUSTERAWSADM): | $(LOCALBIN) @@ -403,6 +445,13 @@ envsubst: $(ENVSUBST) $(ENVSUBST): | $(LOCALBIN) $(call go-install-tool,$(ENVSUBST),github.com/a8m/envsubst/cmd/envsubst,${ENVSUBST_VERSION}) +.PHONY: awscli +awscli: $(AWSCLI) +$(AWSCLI): | $(LOCALBIN) + curl "https://awscli.amazonaws.com/awscli-exe-$(OS)-$(shell uname -m)-$(AWSCLI_VERSION).zip" -o "/tmp/awscliv2.zip" + unzip /tmp/awscliv2.zip -d /tmp + /tmp/aws/install -i $(LOCALBIN)/aws-cli -b $(LOCALBIN) --update + # go-install-tool will 'go install' any package with custom target and name of binary, if it doesn't exist # $1 - target path with name of binary (ideally with version) # $2 - package url which can be installed @@ -413,6 +462,6 @@ set -e; \ package=$(2)@$(3) ;\ echo "Downloading $${package}" ;\ GOBIN=$(LOCALBIN) go install $${package} ;\ -mv "$$(echo "$(1)" | sed "s/-$(3)$$//")" $(1) ;\ +if [ ! -f $(1) ]; then mv -f "$$(echo "$(1)" | sed "s/-$(3)$$//")" $(1); fi ;\ } endef diff --git a/config/dev/aws-managedcluster.yaml b/config/dev/aws-managedcluster.yaml index 7c664e1af..aa1b5efb7 100644 --- a/config/dev/aws-managedcluster.yaml +++ b/config/dev/aws-managedcluster.yaml @@ -4,13 +4,13 @@ metadata: name: aws-dev namespace: ${NAMESPACE} spec: - template: aws-standalone-cp config: - region: us-east-2 - publicIP: true - controlPlaneNumber: 1 - workersNumber: 1 controlPlane: instanceType: t3.small + controlPlaneNumber: 1 + publicIP: true + region: us-west-2 worker: instanceType: t3.small + workersNumber: 1 + template: aws-standalone-cp diff --git a/config/dev/cloud_nuke.yaml.tpl b/config/dev/cloud_nuke.yaml.tpl new file mode 100644 index 000000000..1888fe965 --- /dev/null +++ b/config/dev/cloud_nuke.yaml.tpl @@ -0,0 +1,340 @@ +# This config file is used by cloud-nuke to clean up named resources associated +# with a specific managed cluster across an AWS account. CLUSTER_NAME is +# typically the metadata.name of the Deployment. +# The resources listed here are ALL of the potential resources that can be +# filtered by cloud-nuke, except for IAM resources since we'll never touch those. +# See: https://github.com/gruntwork-io/cloud-nuke?tab=readme-ov-file#whats-supported +# +# Usage: +# - 'make aws-dev-nuke' will nuke resources affiliated with config/dev/deployment.yaml +# - 'CLUSTER_NAME=foo make aws-dev-nuke' will nuke resources affiliated with an AWS cluster named 'foo' +# Check cluster names with 'kubectl get clusters -n hmc-system' + +ACM: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +APIGateway: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +APIGatewayV2: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +AccessAnalyzer: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +AutoScalingGroup: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +AppRunnerService: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +BackupVault: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +CloudWatchAlarm: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +CloudWatchDashboard: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +CloudWatchLogGroup: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +CloudtrailTrail: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +CodeDeployApplications: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +ConfigServiceRecorder: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +ConfigServiceRule: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +DataSyncTask: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +DynamoDB: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +EBSVolume: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +ElasticBeanstalk: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +EC2: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +EC2DedicatedHosts: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +EC2KeyPairs: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +EC2IPAM: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +EC2IPAMPool: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +EC2IPAMResourceDiscovery: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +EC2IPAMScope: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +EC2PlacementGroups: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +EC2Subnet: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +EC2Endpoint: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +ECRRepository: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +ECSCluster: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +ECSService: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +EKSCluster: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +ELBv1: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +ELBv2: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +ElasticFileSystem: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +ElasticIP: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +Elasticache: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +ElasticacheParameterGroups: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +ElasticacheSubnetGroups: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +InternetGateway: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +EgressOnlyInternetGateway: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +LambdaFunction: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +LaunchConfiguration: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +LaunchTemplate: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +MSKCluster: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +NatGateway: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +NetworkACL: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +NetworkInterface: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +OIDCProvider: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +OpenSearchDomain: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +Redshift: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +DBClusters: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +DBInstances: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +RdsParameterGroup: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +DBSubnetGroups: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +RDSProxy: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +s3: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +s3AccessPoint: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +S3ObjectLambdaAccessPoint: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +S3MultiRegionAccessPoint: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +SecurityGroup: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +SesConfigurationset: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +SesEmailTemplates: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +SesIdentity: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +SesReceiptRuleSet: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +SesReceiptFilter: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +SNS: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +SQS: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +SageMakerNotebook: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +SecretsManager: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +VPC: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +Route53HostedZone: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +Route53CIDRCollection: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +Route53TrafficPolicy: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +NetworkFirewall: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +NetworkFirewallPolicy: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +NetworkFirewallRuleGroup: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +NetworkFirewallTLSConfig: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +NetworkFirewallResourcePolicy: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +VPCLatticeService: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +VPCLatticeServiceNetwork: + include: + names_regex: + - '^${CLUSTER_NAME}.*' +VPCLatticeTargetGroup: + include: + names_regex: + - '^${CLUSTER_NAME}.*' diff --git a/docs/aws/nuke.md b/docs/aws/nuke.md new file mode 100644 index 000000000..55a46c33d --- /dev/null +++ b/docs/aws/nuke.md @@ -0,0 +1,7 @@ +# Nuking AWS resources +If you'd like to forcefully cleanup all AWS resources created by HMC you can use +the following command: + +``` +CLUSTER_NAME= make dev-aws-nuke +``` diff --git a/docs/dev.md b/docs/dev.md index e66b38c61..848959399 100644 --- a/docs/dev.md +++ b/docs/dev.md @@ -82,3 +82,4 @@ export KUBECONFIG=~/.kube/config ``` kubectl --kubeconfig ~/.kube/config get secret -n hmc-system -kubeconfig -o=jsonpath={.data.value} | base64 -d > kubeconfig ``` + diff --git a/go.mod b/go.mod index 25a7ce696..2d6b1ec6c 100644 --- a/go.mod +++ b/go.mod @@ -3,22 +3,26 @@ module github.com/Mirantis/hmc go 1.22.0 require ( + github.com/a8m/envsubst v1.4.2 github.com/cert-manager/cert-manager v1.15.3 github.com/fluxcd/helm-controller/api v1.0.1 github.com/fluxcd/pkg/apis/meta v1.6.0 github.com/fluxcd/pkg/runtime v0.49.0 github.com/fluxcd/source-controller/api v1.3.0 github.com/go-logr/logr v1.4.2 + github.com/google/uuid v1.6.0 github.com/hashicorp/go-retryablehttp v0.7.7 github.com/onsi/ginkgo/v2 v2.20.2 github.com/onsi/gomega v1.34.2 github.com/opencontainers/go-digest v1.0.1-0.20231025023718-d50d2fec9c98 github.com/segmentio/analytics-go v3.1.0+incompatible + gopkg.in/yaml.v3 v3.0.1 helm.sh/helm/v3 v3.15.4 k8s.io/api v0.31.0 k8s.io/apiextensions-apiserver v0.31.0 k8s.io/apimachinery v0.31.0 k8s.io/client-go v0.31.0 + k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 sigs.k8s.io/cluster-api v1.8.1 sigs.k8s.io/controller-runtime v0.19.0 ) @@ -78,7 +82,6 @@ require ( github.com/google/gofuzz v1.2.0 // indirect github.com/google/pprof v0.0.0-20240827171923-fa2c70bbbfe5 // indirect github.com/google/shlex v0.0.0-20191202100458-e7afc7fbc510 // indirect - github.com/google/uuid v1.6.0 // indirect github.com/gorilla/mux v1.8.0 // indirect github.com/gorilla/websocket v1.5.1 // indirect github.com/gosuri/uitable v0.0.4 // indirect @@ -158,14 +161,12 @@ require ( gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/yaml.v2 v2.4.0 // indirect - gopkg.in/yaml.v3 v3.0.1 // indirect k8s.io/apiserver v0.31.0 // indirect k8s.io/cli-runtime v0.31.0 // indirect k8s.io/component-base v0.31.0 // indirect k8s.io/klog/v2 v2.130.1 // indirect k8s.io/kube-openapi v0.0.0-20240430033511-f0e62f92d13f // indirect k8s.io/kubectl v0.31.0 // indirect - k8s.io/utils v0.0.0-20240711033017-18e509b52bc8 // indirect oras.land/oras-go v1.2.5 // indirect sigs.k8s.io/gateway-api v1.1.0 // indirect sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect diff --git a/go.sum b/go.sum index 64c3ef962..1a361f17d 100644 --- a/go.sum +++ b/go.sum @@ -23,6 +23,8 @@ github.com/Microsoft/hcsshim v0.11.4 h1:68vKo2VN8DE9AdN4tnkWnmdhqdbpUFM8OF3Airm7 github.com/Microsoft/hcsshim v0.11.4/go.mod h1:smjE4dvqPX9Zldna+t5FG3rnoHhaB7QYxPRqGcpAD9w= github.com/Shopify/logrus-bugsnag v0.0.0-20171204204709-577dee27f20d h1:UrqY+r/OJnIp5u0s1SbQ8dVfLCZJsnvazdBP5hS4iRs= github.com/Shopify/logrus-bugsnag v0.0.0-20171204204709-577dee27f20d/go.mod h1:HI8ITrYtUY+O+ZhtlqUnD8+KwNPOyugEhfP9fdUIaEQ= +github.com/a8m/envsubst v1.4.2 h1:4yWIHXOLEJHQEFd4UjrWDrYeYlV7ncFWJOCBRLOZHQg= +github.com/a8m/envsubst v1.4.2/go.mod h1:MVUTQNGQ3tsjOOtKCNd+fl8RzhsXcDvvAEzkhGtlsbY= github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc= github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0= github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio= diff --git a/scripts/aws-nuke-ccm.sh b/scripts/aws-nuke-ccm.sh new file mode 100755 index 000000000..26e8a067c --- /dev/null +++ b/scripts/aws-nuke-ccm.sh @@ -0,0 +1,60 @@ +#!/bin/bash +# Copyright 2024 +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# This script will remove all resources affiliated with the AWS CCM, such as +# ELB or CSI driver resources that can not be filtered by cloud-nuke. +# It should be ran after running cloud-nuke to remove any remaining resources. +if [ -z $CLUSTER_NAME ]; then + echo "CLUSTER_NAME must be set" + exit 1 +fi + +if [ -z $YQ ]; then + echo "YQ must be set to the path of the yq binary" + echo "Use 'make dev-aws-nuke' instead of running this script directly" + exit 1 +fi + +if [ -z $AWSCLI ]; then + echo "AWSCLI must be set to the path of the AWS CLI" + echo "Use 'make dev-aws-nuke' instead of running this script directly" + exit 1 +fi + +echo "Checking for ELB with 'kubernetes.io/cluster/$CLUSTER_NAME' tag" +for LOADBALANCER in $($AWSCLI elb describe-load-balancers --output yaml | $YQ '.LoadBalancerDescriptions[].LoadBalancerName'); +do + echo "Checking ELB: $LOADBALANCER for 'kubernetes.io/cluster/$CLUSTER_NAME tag" + DESCRIBE_TAGS=$($AWSCLI elb describe-tags \ + --load-balancer-names $LOADBALANCER \ + --output yaml | $YQ '.TagDescriptions[].Tags.[]' | grep 'kubernetes.io/cluster/$CLUSTER_NAME') + if [ ! -z "${DESCRIBE_TAGS}" ]; then + echo "Deleting ELB: $LOADBALANCER" + $AWSCLI elb delete-load-balancer --load-balancer-name $LOADBALANCER + fi +done + +echo "Checking for EBS Volumes with $CLUSTER_NAME within the 'kubernetes.io/created-for/pvc/name' tag" +for VOLUME in $($AWSCLI ec2 describe-volumes --output yaml | $YQ '.Volumes[].VolumeId'); +do + echo "Checking EBS Volume: $VOLUME for $CLUSTER_NAME claim" + DESCRIBE_VOLUMES=$($AWSCLI ec2 describe-volumes \ + --volume-id $VOLUME \ + --output yaml | $YQ '.Volumes | to_entries[] | .value.Tags[] | select(.Key == "kubernetes.io/created-for/pvc/name")' | grep $CLUSTER_NAME) + if [ ! -z "${DESCRIBE_VOLUMES}" ]; then + echo "Deleting EBS Volume: $VOLUME" + $AWSCLI ec2 delete-volume --volume-id $VOLUME + fi +done diff --git a/test/e2e/e2e_test.go b/test/e2e/e2e_test.go index 33bf9f27b..0d3b748d3 100644 --- a/test/e2e/e2e_test.go +++ b/test/e2e/e2e_test.go @@ -15,106 +15,241 @@ package e2e import ( + "bufio" + "context" "fmt" + "os" "os/exec" + "path/filepath" + "strings" "time" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" + "github.com/Mirantis/hmc/test/kubeclient" + "github.com/Mirantis/hmc/test/managedcluster" "github.com/Mirantis/hmc/test/utils" ) -const namespace = "hmc-system" +const ( + namespace = "hmc-system" + hmcControllerLabel = "app.kubernetes.io/name=hmc" +) var _ = Describe("controller", Ordered, func() { BeforeAll(func() { - By("installing prometheus operator") - Expect(utils.InstallPrometheusOperator()).To(Succeed()) - - By("installing the cert-manager") - Expect(utils.InstallCertManager()).To(Succeed()) - - By("creating manager namespace") - cmd := exec.Command("kubectl", "create", "ns", namespace) - _, _ = utils.Run(cmd) + By("building and deploying the controller-manager") + cmd := exec.Command("make", "test-apply") + _, err := utils.Run(cmd) + Expect(err).NotTo(HaveOccurred()) }) AfterAll(func() { - By("uninstalling the Prometheus manager bundle") - utils.UninstallPrometheusOperator() - - By("uninstalling the cert-manager bundle") - utils.UninstallCertManager() - - By("removing manager namespace") - cmd := exec.Command("kubectl", "delete", "ns", namespace) - _, _ = utils.Run(cmd) + By("removing the controller-manager") + cmd := exec.Command("make", "test-destroy") + _, err := utils.Run(cmd) + Expect(err).NotTo(HaveOccurred()) }) Context("Operator", func() { It("should run successfully", func() { - var controllerPodName string - var err error - - // projectimage stores the name of the image used in the example - var projectimage = "example.com/hmc:v0.0.1" - - By("building the manager(Operator) image") - cmd := exec.Command("make", "docker-build", fmt.Sprintf("IMG=%s", projectimage)) - _, err = utils.Run(cmd) - ExpectWithOffset(1, err).NotTo(HaveOccurred()) - - By("loading the the manager(Operator) image on Kind") - err = utils.LoadImageToKindClusterWithName(projectimage) + kc, err := kubeclient.NewFromLocal(namespace) ExpectWithOffset(1, err).NotTo(HaveOccurred()) + ExpectWithOffset(1, kc.CreateAWSCredentialsKubeSecret(context.Background())).To(Succeed()) - By("installing CRDs") - cmd = exec.Command("make", "install") - _, err = utils.Run(cmd) - ExpectWithOffset(1, err).NotTo(HaveOccurred()) - - By("deploying the controller-manager") - cmd = exec.Command("make", "deploy", fmt.Sprintf("IMG=%s", projectimage)) - _, err = utils.Run(cmd) - ExpectWithOffset(1, err).NotTo(HaveOccurred()) + By("validating that the hmc-controller and capi provider controllers are running") + verifyControllersUp := func() error { + if err := verifyControllerUp(kc, hmcControllerLabel, "hmc-controller-manager"); err != nil { + return err + } - By("validating that the controller-manager pod is running as expected") - verifyControllerUp := func() error { - // Get pod name - - cmd = exec.Command("kubectl", "get", - "pods", "-l", "control-plane=controller-manager", - "-o", "go-template={{ range .items }}"+ - "{{ if not .metadata.deletionTimestamp }}"+ - "{{ .metadata.name }}"+ - "{{ \"\\n\" }}{{ end }}{{ end }}", - "-n", namespace, - ) - - podOutput, err := utils.Run(cmd) - ExpectWithOffset(2, err).NotTo(HaveOccurred()) - podNames := utils.GetNonEmptyLines(string(podOutput)) - if len(podNames) != 1 { - return fmt.Errorf("expect 1 controller pods running, but got %d", len(podNames)) + for _, provider := range []managedcluster.ProviderType{ + managedcluster.ProviderCAPI, + managedcluster.ProviderAWS, + managedcluster.ProviderAzure, + } { + // Ensure only one controller pod is running. + if err := verifyControllerUp(kc, managedcluster.GetProviderLabel(provider), string(provider)); err != nil { + return err + } } - controllerPodName = podNames[0] - ExpectWithOffset(2, controllerPodName).Should(ContainSubstring("controller-manager")) - - // Validate pod status - cmd = exec.Command("kubectl", "get", - "pods", controllerPodName, "-o", "jsonpath={.status.phase}", - "-n", namespace, - ) - status, err := utils.Run(cmd) - ExpectWithOffset(2, err).NotTo(HaveOccurred()) - if string(status) != "Running" { - return fmt.Errorf("controller pod in %s status", status) + + return nil + } + Eventually(func() error { + err := verifyControllersUp() + if err != nil { + _, _ = fmt.Fprintf(GinkgoWriter, "Controller pod validation failed: %v\n", err) + return err } + return nil + }).WithTimeout(15 * time.Minute).WithPolling(10 * time.Second).Should(Succeed()) + }) + }) + + Context("AWS Templates", func() { + var ( + kc *kubeclient.KubeClient + deleteFunc func() error + clusterName string + err error + ) + + BeforeAll(func() { + By("ensuring AWS credentials are set") + kc, err = kubeclient.NewFromLocal(namespace) + ExpectWithOffset(2, err).NotTo(HaveOccurred()) + ExpectWithOffset(2, kc.CreateAWSCredentialsKubeSecret(context.Background())).To(Succeed()) + }) + + AfterEach(func() { + // If we failed collect logs from each of the affiliated controllers + // as well as the output of clusterctl to store as artifacts. + if CurrentSpecReport().Failed() { + By("collecting failure logs from controllers") + collectLogArtifacts(kc, clusterName, managedcluster.ProviderAWS, managedcluster.ProviderCAPI) } - EventuallyWithOffset(1, verifyControllerUp, time.Minute, time.Second).Should(Succeed()) + // Delete the deployments if they were created. + if deleteFunc != nil { + By("deleting the deployment") + err = deleteFunc() + Expect(err).NotTo(HaveOccurred()) + } + + // Purge the AWS resources, the AfterAll for the controller will + // clean up the management cluster. + By("nuking remaining AWS resources") + err = os.Setenv("CLUSTER_NAME", clusterName) + Expect(err).NotTo(HaveOccurred()) + cmd := exec.Command("make", "dev-aws-nuke") + _, err := utils.Run(cmd) + ExpectWithOffset(2, err).NotTo(HaveOccurred()) }) + + for _, template := range []managedcluster.Template{ + managedcluster.TemplateAWSStandaloneCP, + managedcluster.TemplateAWSHostedCP, + } { + It(fmt.Sprintf("should work with an AWS provider and %s template", template), func() { + if template == managedcluster.TemplateAWSHostedCP { + // TODO: Create AWS resources for hosted control plane. + Skip("AWS hosted control plane not yet implemented") + } + + By("creating a Deployment") + d := managedcluster.GetUnstructured(managedcluster.ProviderAWS, template) + clusterName = d.GetName() + + deleteFunc, err = kc.CreateManagedCluster(context.Background(), d) + Expect(err).NotTo(HaveOccurred()) + + By("waiting for infrastructure providers to deploy successfully") + Eventually(func() error { + return managedcluster.VerifyProviderDeployed(context.Background(), kc, clusterName) + }).WithTimeout(30 * time.Minute).WithPolling(10 * time.Second).Should(Succeed()) + + By("verify the deployment deletes successfully") + err = deleteFunc() + Expect(err).NotTo(HaveOccurred()) + Eventually(func() error { + return managedcluster.VerifyProviderDeleted(context.Background(), kc, clusterName) + }).WithTimeout(10 * time.Minute).WithPolling(10 * time.Second).Should(Succeed()) + }) + } }) }) + +func verifyControllerUp(kc *kubeclient.KubeClient, labelSelector string, name string) error { + deployList, err := kc.Client.AppsV1().Deployments(kc.Namespace).List(context.Background(), metav1.ListOptions{ + LabelSelector: labelSelector, + }) + if err != nil { + return fmt.Errorf("failed to list %s controller deployments: %w", name, err) + } + + if len(deployList.Items) < 1 { + return fmt.Errorf("expected at least 1 %s controller deployment, got %d", + name, len(deployList.Items)) + } + + deployment := deployList.Items[0] + + // Ensure the deployment is not being deleted. + if deployment.DeletionTimestamp != nil { + return fmt.Errorf("controller pod: %s deletion timestamp should be nil, got: %v", + deployment.Name, deployment.DeletionTimestamp) + } + // Ensure the deployment is running and has the expected name. + if !strings.Contains(deployment.Name, "controller-manager") { + return fmt.Errorf("controller deployment name %s does not contain 'controller-manager'", deployment.Name) + } + if deployment.Status.ReadyReplicas < 1 { + return fmt.Errorf("controller deployment: %s does not yet have any ReadyReplicas", deployment.Name) + } + + return nil +} + +// collectLogArtfiacts collects log output from each the HMC controller, +// CAPI controller and the provider controller(s) as well as output from clusterctl +// and stores them in the test/e2e directory as artifacts. If it fails it +// produces a warning message to the GinkgoWriter, but does not fail the test. +func collectLogArtifacts(kc *kubeclient.KubeClient, clusterName string, providerTypes ...managedcluster.ProviderType) { + GinkgoHelper() + + filterLabels := []string{hmcControllerLabel} + + for _, providerType := range providerTypes { + filterLabels = append(filterLabels, managedcluster.GetProviderLabel(providerType)) + } + + for _, label := range filterLabels { + pods, _ := kc.Client.CoreV1().Pods(kc.Namespace).List(context.Background(), metav1.ListOptions{ + LabelSelector: label, + }) + + for _, pod := range pods.Items { + req := kc.Client.CoreV1().Pods(kc.Namespace).GetLogs(pod.Name, &corev1.PodLogOptions{ + TailLines: ptr.To(int64(1000)), + }) + podLogs, err := req.Stream(context.Background()) + if err != nil { + utils.WarnError(fmt.Errorf("failed to get log stream for pod %s: %w", pod.Name, err)) + continue + } + defer podLogs.Close() //nolint:errcheck + + output, err := os.Create(fmt.Sprintf("./test/e2e/%s.log", pod.Name)) + if err != nil { + utils.WarnError(fmt.Errorf("failed to create log file for pod %s: %w", pod.Name, err)) + continue + } + defer output.Close() //nolint:errcheck + + r := bufio.NewReader(podLogs) + _, err = r.WriteTo(output) + if err != nil { + utils.WarnError(fmt.Errorf("failed to write log file for pod %s: %w", pod.Name, err)) + } + } + } + + cmd := exec.Command("./bin/clusterctl", + "describe", "cluster", clusterName, "--namespace", namespace, "--show-conditions=all") + output, err := utils.Run(cmd) + if err != nil { + utils.WarnError(fmt.Errorf("failed to get clusterctl log: %w", err)) + return + } + + err = os.WriteFile(filepath.Join("test/e2e", "clusterctl.log"), output, 0644) + if err != nil { + utils.WarnError(fmt.Errorf("failed to write clusterctl log: %w", err)) + } +} diff --git a/test/kubeclient/kubeclient.go b/test/kubeclient/kubeclient.go new file mode 100644 index 000000000..c1a4453c9 --- /dev/null +++ b/test/kubeclient/kubeclient.go @@ -0,0 +1,268 @@ +// Copyright 2024 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package kubeclient + +import ( + "context" + "fmt" + "os" + "os/exec" + "path/filepath" + + "github.com/Mirantis/hmc/test/utils" + . "github.com/onsi/ginkgo/v2" + corev1 "k8s.io/api/core/v1" + apiextensionsclientset "k8s.io/apiextensions-apiserver/pkg/client/clientset/clientset" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime/schema" + "k8s.io/client-go/dynamic" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" + "k8s.io/client-go/tools/clientcmd" +) + +const ( + awsCredentialsSecretName = "aws-variables" +) + +type KubeClient struct { + Namespace string + + Client kubernetes.Interface + ExtendedClient apiextensionsclientset.Interface + Config *rest.Config +} + +// NewFromLocal creates a new instance of KubeClient from a given namespace +// using the locally found kubeconfig. +func NewFromLocal(namespace string) (*KubeClient, error) { + configBytes, err := getLocalKubeConfig() + if err != nil { + return nil, fmt.Errorf("failed to get local kubeconfig: %w", err) + } + + return new(configBytes, namespace) +} + +// NewFromCluster creates a new KubeClient using the kubeconfig stored in the +// secret affiliated with the given clusterName. Since it relies on fetching +// the kubeconfig from secret it needs an existing kubeclient. +func (kc *KubeClient) NewFromCluster(ctx context.Context, namespace, clusterName string) (*KubeClient, error) { + secret, err := kc.Client.CoreV1().Secrets(kc.Namespace).Get(ctx, clusterName+"-kubeconfig", metav1.GetOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to get cluster: %q kubeconfig secret: %w", clusterName, err) + } + + secretData, ok := secret.Data["value"] + if !ok { + return nil, fmt.Errorf("kubeconfig secret %q has no 'value' key", clusterName) + } + + return new(secretData, namespace) +} + +// getLocalKubeConfig returns the kubeconfig file content. +func getLocalKubeConfig() ([]byte, error) { + // Use the KUBECONFIG environment variable if it is set, otherwise use the + // default path. + kubeConfig, ok := os.LookupEnv("KUBECONFIG") + if !ok { + homeDir, err := os.UserHomeDir() + if err != nil { + return nil, fmt.Errorf("failed to get user home directory: %w", err) + } + + kubeConfig = filepath.Join(homeDir, ".kube", "config") + } + + configBytes, err := os.ReadFile(kubeConfig) + if err != nil { + return nil, fmt.Errorf("failed to read %q: %w", kubeConfig, err) + } + + return configBytes, nil +} + +// new creates a new instance of KubeClient from a given namespace using +// the local kubeconfig. +func new(configBytes []byte, namespace string) (*KubeClient, error) { + config, err := clientcmd.RESTConfigFromKubeConfig(configBytes) + if err != nil { + return nil, fmt.Errorf("failed to parse kubeconfig: %w", err) + } + + clientSet, err := kubernetes.NewForConfig(config) + if err != nil { + return nil, fmt.Errorf("could not initialize kubernetes client: %w", err) + } + + extendedClientSet, err := apiextensionsclientset.NewForConfig(config) + if err != nil { + return nil, fmt.Errorf("failed to initialize apiextensions clientset: %w", err) + } + + return &KubeClient{ + Namespace: namespace, + Client: clientSet, + ExtendedClient: extendedClientSet, + Config: config, + }, nil +} + +// CreateAWSCredentialsKubeSecret uses clusterawsadm to encode existing AWS +// credentials and create a secret in the given namespace if one does not +// already exist. +func (kc *KubeClient) CreateAWSCredentialsKubeSecret(ctx context.Context) error { + _, err := kc.Client.CoreV1().Secrets(kc.Namespace).Get(ctx, awsCredentialsSecretName, metav1.GetOptions{}) + if !apierrors.IsNotFound(err) { + return nil + } + + cmd := exec.Command("./bin/clusterawsadm", "bootstrap", "credentials", "encode-as-profile") + output, err := utils.Run(cmd) + if err != nil { + return fmt.Errorf("failed to encode AWS credentials with clusterawsadm: %w", err) + } + + _, err = kc.Client.CoreV1().Secrets(kc.Namespace).Create(ctx, &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{ + Name: awsCredentialsSecretName, + }, + Data: map[string][]byte{ + "AWS_B64ENCODED_CREDENTIALS": output, + }, + Type: corev1.SecretTypeOpaque, + }, metav1.CreateOptions{}) + if err != nil { + return fmt.Errorf("failed to create AWS credentials secret: %w", err) + } + + return nil +} + +// GetDynamicClient returns a dynamic client for the given GroupVersionResource. +func (kc *KubeClient) GetDynamicClient(gvr schema.GroupVersionResource) (dynamic.ResourceInterface, error) { + client, err := dynamic.NewForConfig(kc.Config) + if err != nil { + return nil, fmt.Errorf("failed to create dynamic client: %w", err) + } + + return client.Resource(gvr).Namespace(kc.Namespace), nil +} + +// CreateDeployment creates a managedcluster.hmc.mirantis.com in the given +// namespace and returns a DeleteFunc to clean up the deployment. +// The DeleteFunc is a no-op if the deployment has already been deleted. +func (kc *KubeClient) CreateManagedCluster( + ctx context.Context, managedcluster *unstructured.Unstructured) (func() error, error) { + kind := managedcluster.GetKind() + + if kind != "ManagedCluster" { + return nil, fmt.Errorf("expected kind ManagedCluster, got: %s", kind) + } + + client, err := kc.GetDynamicClient(schema.GroupVersionResource{ + Group: "hmc.mirantis.com", + Version: "v1alpha1", + Resource: "managedclusters", + }) + if err != nil { + return nil, fmt.Errorf("failed to get dynamic client: %w", err) + } + + _, err = client.Create(ctx, managedcluster, metav1.CreateOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to create Deployment: %w", err) + } + + return func() error { + err := client.Delete(ctx, managedcluster.GetName(), metav1.DeleteOptions{}) + if apierrors.IsNotFound(err) { + return nil + } + return err + }, nil +} + +// GetCluster returns a Cluster resource by name. +func (kc *KubeClient) GetCluster(ctx context.Context, clusterName string) (*unstructured.Unstructured, error) { + gvr := schema.GroupVersionResource{ + Group: "cluster.x-k8s.io", + Version: "v1beta1", + Resource: "clusters", + } + + client, err := kc.GetDynamicClient(gvr) + if err != nil { + Fail(fmt.Sprintf("failed to get %s client: %v", gvr.Resource, err)) + } + + cluster, err := client.Get(ctx, clusterName, metav1.GetOptions{}) + if err != nil { + return nil, fmt.Errorf("failed to get %s %s: %w", gvr.Resource, clusterName, err) + } + + return cluster, nil +} + +// listResource returns a list of resources for the given GroupVersionResource +// affiliated with the given clusterName. +func (kc *KubeClient) listResource( + ctx context.Context, gvr schema.GroupVersionResource, clusterName string) ([]unstructured.Unstructured, error) { + client, err := kc.GetDynamicClient(gvr) + if err != nil { + Fail(fmt.Sprintf("failed to get %s client: %v", gvr.Resource, err)) + } + + resources, err := client.List(ctx, metav1.ListOptions{ + LabelSelector: "cluster.x-k8s.io/cluster-name=" + clusterName, + }) + if err != nil { + return nil, fmt.Errorf("failed to list %s: %w", gvr.Resource, err) + } + + return resources.Items, nil +} + +// ListMachines returns a list of Machine resources for the given cluster. +func (kc *KubeClient) ListMachines(ctx context.Context, clusterName string) ([]unstructured.Unstructured, error) { + return kc.listResource(ctx, schema.GroupVersionResource{ + Group: "cluster.x-k8s.io", + Version: "v1beta1", + Resource: "machines", + }, clusterName) +} + +// ListMachineDeployments returns a list of MachineDeployment resources for the +// given cluster. +func (kc *KubeClient) ListMachineDeployments( + ctx context.Context, clusterName string) ([]unstructured.Unstructured, error) { + return kc.listResource(ctx, schema.GroupVersionResource{ + Group: "cluster.x-k8s.io", + Version: "v1beta1", + Resource: "machinedeployments", + }, clusterName) +} + +func (kc *KubeClient) ListK0sControlPlanes( + ctx context.Context, clusterName string) ([]unstructured.Unstructured, error) { + return kc.listResource(ctx, schema.GroupVersionResource{ + Group: "controlplane.cluster.x-k8s.io", + Version: "v1beta1", + Resource: "k0scontrolplanes", + }, clusterName) +} diff --git a/test/managedcluster/managedcluster.go b/test/managedcluster/managedcluster.go new file mode 100644 index 000000000..28783ea5d --- /dev/null +++ b/test/managedcluster/managedcluster.go @@ -0,0 +1,93 @@ +// Copyright 2024 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package managedcluster + +import ( + _ "embed" + "fmt" + "os" + + "github.com/a8m/envsubst" + "github.com/google/uuid" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + "gopkg.in/yaml.v3" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" +) + +type ProviderType string + +const ( + ProviderCAPI ProviderType = "cluster-api" + ProviderAWS ProviderType = "infrastructure-aws" + ProviderAzure ProviderType = "infrastructure-azure" + + providerLabel = "cluster.x-k8s.io/provider" +) + +type Template string + +const ( + TemplateAWSStandaloneCP Template = "aws-standalone-cp" + TemplateAWSHostedCP Template = "aws-hosted-cp" +) + +//go:embed resources/aws-standalone-cp.yaml.tpl +var awsStandaloneCPManagedClusterTemplateBytes []byte + +//go:embed resources/aws-hosted-cp.yaml.tpl +var awsHostedCPManagedClusterTemplateBytes []byte + +func GetProviderLabel(provider ProviderType) string { + return fmt.Sprintf("%s=%s", providerLabel, provider) +} + +// GetUnstructured returns an unstructured ManagedCluster object based on the +// provider and template. +func GetUnstructured(provider ProviderType, templateName Template) *unstructured.Unstructured { + GinkgoHelper() + + generatedName := uuid.New().String()[:8] + "-e2e-test" + _, _ = fmt.Fprintf(GinkgoWriter, "Generated cluster name: %q\n", generatedName) + + switch provider { + case ProviderAWS: + Expect(os.Setenv("MANAGED_CLUSTER_NAME", generatedName)).NotTo(HaveOccurred()) + + var managedClusterTemplateBytes []byte + switch templateName { + case TemplateAWSStandaloneCP: + managedClusterTemplateBytes = awsStandaloneCPManagedClusterTemplateBytes + case TemplateAWSHostedCP: + managedClusterTemplateBytes = awsHostedCPManagedClusterTemplateBytes + default: + Fail(fmt.Sprintf("unsupported AWS template: %s", templateName)) + } + + managedClusterConfigBytes, err := envsubst.Bytes(managedClusterTemplateBytes) + Expect(err).NotTo(HaveOccurred(), "failed to substitute environment variables") + + var managedClusterConfig map[string]interface{} + + err = yaml.Unmarshal(managedClusterConfigBytes, &managedClusterConfig) + Expect(err).NotTo(HaveOccurred(), "failed to unmarshal deployment config") + + return &unstructured.Unstructured{Object: managedClusterConfig} + default: + Fail(fmt.Sprintf("unsupported provider: %s", provider)) + } + + return nil +} diff --git a/test/managedcluster/resources/aws-hosted-cp.yaml.tpl b/test/managedcluster/resources/aws-hosted-cp.yaml.tpl new file mode 100644 index 000000000..894bb6667 --- /dev/null +++ b/test/managedcluster/resources/aws-hosted-cp.yaml.tpl @@ -0,0 +1,16 @@ +apiVersion: hmc.mirantis.com/v1alpha1 +kind: ManagedCluster +metadata: + name: ${MANAGED_CLUSTER_NAME} +spec: + template: aws-hosted-cp + config: + vpcID: ${AWS_VPC_ID} + region: ${AWS_REGION} + publicIP: ${PUBLIC_IP:=true} + subnets: + - id: ${AWS_SUBNET_ID} + availabilityZone: ${AWS_SUBNET_AVAILABILITY_ZONE} + instanceType: ${INSTANCE_TYPE:=t3.medium} + securityGroupIDs: + - ${AWS_SG_ID} diff --git a/test/managedcluster/resources/aws-standalone-cp.yaml.tpl b/test/managedcluster/resources/aws-standalone-cp.yaml.tpl new file mode 100644 index 000000000..7825a2833 --- /dev/null +++ b/test/managedcluster/resources/aws-standalone-cp.yaml.tpl @@ -0,0 +1,17 @@ +apiVersion: hmc.mirantis.com/v1alpha1 +kind: ManagedCluster +metadata: + name: ${MANAGED_CLUSTER_NAME} +spec: + template: aws-standalone-cp + config: + region: ${AWS_REGION} + publicIP: ${PUBLIC_IP:=true} + controlPlaneNumber: ${CONTROL_PLANE_NUMBER:=1} + workersNumber: ${WORKERS_NUMBER:=1} + controlPlane: + instanceType: ${INSTANCE_TYPE:=t3.small} + worker: + instanceType: ${INSTANCE_TYPE:=t3.small} + + diff --git a/test/managedcluster/validate_deleted.go b/test/managedcluster/validate_deleted.go new file mode 100644 index 000000000..dc5712a9f --- /dev/null +++ b/test/managedcluster/validate_deleted.go @@ -0,0 +1,113 @@ +// Copyright 2024 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package managedcluster + +import ( + "context" + "errors" + "fmt" + + "github.com/Mirantis/hmc/test/kubeclient" + "github.com/Mirantis/hmc/test/utils" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" +) + +var deletionValidators = map[string]resourceValidationFunc{ + "clusters": validateClusterDeleted, + "machinedeployments": validateMachineDeploymentsDeleted, + "control-planes": validateK0sControlPlanesDeleted, +} + +// VerifyProviderDeleted is a provider-agnostic verification that checks +// to ensure generic resources managed by the provider have been deleted. +// It is intended to be used in conjunction with an Eventually block. +func VerifyProviderDeleted(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { + return verifyProviderAction(ctx, kc, clusterName, deletionValidators, + []string{"clusters", "machinedeployments", "control-planes"}) +} + +// validateClusterDeleted validates that the Cluster resource has been deleted. +func validateClusterDeleted(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { + // Validate that the Cluster resource has been deleted + cluster, err := kc.GetCluster(ctx, clusterName) + if err != nil { + return err + } + + if cluster != nil { + phase, _, _ := unstructured.NestedString(cluster.Object, "status", "phase") + if phase != "Deleting" { + // TODO: We should have a threshold error system for situations + // like this, we probably don't want to wait the full Eventually + // for something like this, but we can't immediately fail the test + // either. + return fmt.Errorf("cluster %q exists, but is not in 'Deleting' phase", clusterName) + } + + conditions, err := utils.GetConditionsFromUnstructured(cluster) + if err != nil { + return fmt.Errorf("failed to get conditions from unstructured object: %w", err) + } + + var errs error + + for _, c := range conditions { + errs = errors.Join(errors.New(utils.ConvertConditionsToString(c)), errs) + } + + return fmt.Errorf("cluster %q still in 'Deleting' phase with conditions:\n%w", clusterName, errs) + } + + return nil +} + +// validateMachineDeploymentsDeleted validates that all MachineDeployments have +// been deleted. +func validateMachineDeploymentsDeleted(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { + machineDeployments, err := kc.ListMachineDeployments(ctx, clusterName) + if err != nil { + return err + } + + var mdNames []string + if len(machineDeployments) > 0 { + for _, md := range machineDeployments { + mdNames = append(mdNames, md.GetName()) + + return fmt.Errorf("machine deployments still exist: %s", mdNames) + } + } + + return nil +} + +// validateK0sControlPlanesDeleted validates that all k0scontrolplanes have +// been deleted. +func validateK0sControlPlanesDeleted(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { + controlPlanes, err := kc.ListK0sControlPlanes(ctx, clusterName) + if err != nil { + return err + } + + var cpNames []string + if len(controlPlanes) > 0 { + for _, cp := range controlPlanes { + cpNames = append(cpNames, cp.GetName()) + + return fmt.Errorf("k0s control planes still exist: %s", cpNames) + } + } + + return nil +} diff --git a/test/managedcluster/validate_deployed.go b/test/managedcluster/validate_deployed.go new file mode 100644 index 000000000..f6423fb2b --- /dev/null +++ b/test/managedcluster/validate_deployed.go @@ -0,0 +1,310 @@ +// Copyright 2024 +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package managedcluster + +import ( + "context" + "fmt" + "strings" + + "github.com/Mirantis/hmc/test/kubeclient" + "github.com/Mirantis/hmc/test/utils" + . "github.com/onsi/ginkgo/v2" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + "k8s.io/apimachinery/pkg/api/resource" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/util/intstr" +) + +// resourceValidationFunc is intended to validate a specific kubernetes +// resource. +type resourceValidationFunc func(context.Context, *kubeclient.KubeClient, string) error + +var resourceValidators = map[string]resourceValidationFunc{ + "clusters": validateCluster, + "machines": validateMachines, + "control-planes": validateK0sControlPlanes, + "csi-driver": validateCSIDriver, + "ccm": validateCCM, +} + +// VerifyProviderDeployed is a provider-agnostic verification that checks +// to ensure generic resources managed by the provider have been deleted. +// It is intended to be used in conjunction with an Eventually block. +func VerifyProviderDeployed(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { + return verifyProviderAction(ctx, kc, clusterName, resourceValidators, + []string{"clusters", "machines", "control-planes", "csi-driver", "ccm"}) +} + +// verifyProviderAction is a provider-agnostic verification that checks for +// a specific set of resources and either validates their readiness or +// their deletion depending on the passed map of resourceValidationFuncs and +// desired order. +// It is meant to be used in conjunction with an Eventually block. +// In some cases it may be necessary to end the Eventually block early if the +// resource will never reach a ready state, in these instances Ginkgo's Fail +// should be used to end the spec early. +func verifyProviderAction( + ctx context.Context, kc *kubeclient.KubeClient, clusterName string, + resourcesToValidate map[string]resourceValidationFunc, order []string) error { + // Sequentially validate each resource type, only returning the first error + // as to not move on to the next resource type until the first is resolved. + // We use []string here since order is important. + for _, name := range order { + validator, ok := resourcesToValidate[name] + if !ok { + continue + } + + if err := validator(ctx, kc, clusterName); err != nil { + _, _ = fmt.Fprintf(GinkgoWriter, "[%s] validation error: %v\n", name, err) + return err + } + + _, _ = fmt.Fprintf(GinkgoWriter, "[%s] validation succeeded\n", name) + delete(resourcesToValidate, name) + } + + return nil +} + +func validateCluster(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { + cluster, err := kc.GetCluster(ctx, clusterName) + if err != nil { + return err + } + + phase, _, err := unstructured.NestedString(cluster.Object, "status", "phase") + if err != nil { + return fmt.Errorf("failed to get status.phase for %s: %v", cluster.GetName(), err) + } + + if phase == "Deleting" { + Fail(fmt.Sprintf("%s is in 'Deleting' phase", cluster.GetName())) + } + + if err := utils.ValidateObjectNamePrefix(cluster, clusterName); err != nil { + Fail(err.Error()) + } + + if err := utils.ValidateConditionsTrue(cluster); err != nil { + return err + } + + return nil +} + +func validateMachines(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { + machines, err := kc.ListMachines(ctx, clusterName) + if err != nil { + return fmt.Errorf("failed to list machines: %w", err) + } + + for _, machine := range machines { + if err := utils.ValidateObjectNamePrefix(&machine, clusterName); err != nil { + Fail(err.Error()) + } + + if err := utils.ValidateConditionsTrue(&machine); err != nil { + return err + } + } + + return nil +} + +func validateK0sControlPlanes(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { + controlPlanes, err := kc.ListK0sControlPlanes(ctx, clusterName) + if err != nil { + return fmt.Errorf("failed to list K0sControlPlanes: %w", err) + } + + for _, controlPlane := range controlPlanes { + if err := utils.ValidateObjectNamePrefix(&controlPlane, clusterName); err != nil { + Fail(err.Error()) + } + + objKind, objName := utils.ObjKindName(&controlPlane) + + // k0s does not use the metav1.Condition type for status.conditions, + // instead it uses a custom type so we can't use + // ValidateConditionsTrue here, instead we'll check for "ready: true". + status, found, err := unstructured.NestedFieldCopy(controlPlane.Object, "status") + if !found { + return fmt.Errorf("no status found for %s: %s", objKind, objName) + } + if err != nil { + return fmt.Errorf("failed to get status conditions for %s: %s: %w", objKind, objName, err) + } + + st, ok := status.(map[string]interface{}) + if !ok { + return fmt.Errorf("expected K0sControlPlane condition to be type map[string]interface{}, got: %T", status) + } + + if _, ok := st["ready"]; !ok { + return fmt.Errorf("%s %s has no 'ready' status", objKind, objName) + } + + if !st["ready"].(bool) { + return fmt.Errorf("%s %s is not ready, status: %+v", objKind, objName, st) + } + } + + return nil +} + +// validateCSIDriver validates that the provider CSI driver is functioning +// by creating a PVC and verifying it enters "Bound" status. +func validateCSIDriver(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { + clusterKC, err := kc.NewFromCluster(ctx, "default", clusterName) + if err != nil { + Fail(fmt.Sprintf("failed to create KubeClient for managed cluster %s: %v", clusterName, err)) + } + + pvcName := clusterName + "-csi-test-pvc" + + _, err = clusterKC.Client.CoreV1().PersistentVolumeClaims(clusterKC.Namespace). + Create(ctx, &corev1.PersistentVolumeClaim{ + ObjectMeta: metav1.ObjectMeta{ + Name: pvcName, + }, + Spec: corev1.PersistentVolumeClaimSpec{ + AccessModes: []corev1.PersistentVolumeAccessMode{ + corev1.ReadWriteOnce, + }, + Resources: corev1.VolumeResourceRequirements{ + Requests: corev1.ResourceList{ + corev1.ResourceStorage: resource.MustParse("1Gi"), + }, + }, + }, + }, metav1.CreateOptions{}) + if err != nil { + // Since these resourceValidationFuncs are intended to be used in + // Eventually we should ensure a follow-up PVCreate is a no-op. + if !apierrors.IsAlreadyExists(err) { + Fail(fmt.Sprintf("failed to create test PVC: %v", err)) + } + } + + // Create a pod that uses the PVC so that the PVC enters "Bound" status. + _, err = clusterKC.Client.CoreV1().Pods(clusterKC.Namespace).Create(ctx, &corev1.Pod{ + ObjectMeta: metav1.ObjectMeta{ + Name: pvcName + "-pod", + }, + Spec: corev1.PodSpec{ + Volumes: []corev1.Volume{ + { + Name: "test-pvc-vol", + VolumeSource: corev1.VolumeSource{ + PersistentVolumeClaim: &corev1.PersistentVolumeClaimVolumeSource{ + ClaimName: pvcName, + }, + }, + }, + }, + Containers: []corev1.Container{ + { + Name: "test-pvc-container", + Image: "nginx", + VolumeMounts: []corev1.VolumeMount{ + { + MountPath: "/storage", + Name: "test-pvc-vol", + }, + }, + }, + }, + }, + }, metav1.CreateOptions{}) + if err != nil { + if !apierrors.IsAlreadyExists(err) { + Fail(fmt.Sprintf("failed to create test Pod: %v", err)) + } + } + + // Verify the PVC enters "Bound" status and inherits the CSI driver + // storageClass without us having to specify it. + pvc, err := clusterKC.Client.CoreV1().PersistentVolumeClaims(clusterKC.Namespace). + Get(ctx, pvcName, metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("failed to get test PVC: %w", err) + } + + if !strings.Contains(*pvc.Spec.StorageClassName, "csi") { + Fail(fmt.Sprintf("%s PersistentVolumeClaim does not have a CSI driver storageClass", pvcName)) + } + + if pvc.Status.Phase != corev1.ClaimBound { + return fmt.Errorf("%s PersistentVolume not yet 'Bound', current phase: %q", pvcName, pvc.Status.Phase) + } + + return nil +} + +// validateCCM validates that the provider's cloud controller manager is +// functional by creating a LoadBalancer service and verifying it is assigned +// an external IP. +func validateCCM(ctx context.Context, kc *kubeclient.KubeClient, clusterName string) error { + clusterKC, err := kc.NewFromCluster(ctx, "default", clusterName) + if err != nil { + Fail(fmt.Sprintf("failed to create KubeClient for managed cluster %s: %v", clusterName, err)) + } + + createdServiceName := "loadbalancer-" + clusterName + + _, err = clusterKC.Client.CoreV1().Services(clusterKC.Namespace).Create(ctx, &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: createdServiceName, + }, + Spec: corev1.ServiceSpec{ + Selector: map[string]string{ + "some": "selector", + }, + Ports: []corev1.ServicePort{ + { + Port: 8765, + TargetPort: intstr.FromInt(9376), + }, + }, + Type: corev1.ServiceTypeLoadBalancer, + }, + }, metav1.CreateOptions{}) + if err != nil { + // Since these resourceValidationFuncs are intended to be used in + // Eventually we should ensure a follow-up ServiceCreate is a no-op. + if !apierrors.IsAlreadyExists(err) { + return fmt.Errorf("failed to create test Service: %w", err) + } + } + + // Verify the Service is assigned an external IP. + service, err := clusterKC.Client.CoreV1().Services(clusterKC.Namespace). + Get(ctx, createdServiceName, metav1.GetOptions{}) + if err != nil { + return fmt.Errorf("failed to get test Service: %w", err) + } + + for _, i := range service.Status.LoadBalancer.Ingress { + if i.Hostname != "" { + return nil + } + } + + return fmt.Errorf("%s Service does not yet have an external hostname", service.Name) +} diff --git a/test/utils/utils.go b/test/utils/utils.go index 4e59dca0a..4e0d767f4 100644 --- a/test/utils/utils.go +++ b/test/utils/utils.go @@ -15,35 +15,18 @@ package utils import ( + "errors" "fmt" "os" "os/exec" "strings" . "github.com/onsi/ginkgo/v2" //nolint:golint,revive + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "k8s.io/apimachinery/pkg/runtime" ) -const ( - prometheusOperatorVersion = "v0.72.0" - prometheusOperatorURL = "https://github.com/prometheus-operator/prometheus-operator/" + - "releases/download/%s/bundle.yaml" - - certmanagerVersion = "v1.14.4" - certmanagerURLTmpl = "https://github.com/jetstack/cert-manager/releases/download/%s/cert-manager.yaml" -) - -func warnError(err error) { - _, _ = fmt.Fprintf(GinkgoWriter, "warning: %v\n", err) -} - -// InstallPrometheusOperator installs the prometheus Operator to be used to export the enabled metrics. -func InstallPrometheusOperator() error { - url := fmt.Sprintf(prometheusOperatorURL, prometheusOperatorVersion) - cmd := exec.Command("kubectl", "create", "-f", url) - _, err := Run(cmd) - return err -} - // Run executes the provided command within this context func Run(cmd *exec.Cmd) ([]byte, error) { dir, _ := GetProjectDir() @@ -56,59 +39,34 @@ func Run(cmd *exec.Cmd) ([]byte, error) { cmd.Env = append(os.Environ(), "GO111MODULE=on") command := strings.Join(cmd.Args, " ") _, _ = fmt.Fprintf(GinkgoWriter, "running: %s\n", command) - output, err := cmd.CombinedOutput() - if err != nil { - return output, fmt.Errorf("%s failed with error: (%v) %s", command, err, string(output)) - } - - return output, nil -} - -// UninstallPrometheusOperator uninstalls the prometheus -func UninstallPrometheusOperator() { - url := fmt.Sprintf(prometheusOperatorURL, prometheusOperatorVersion) - cmd := exec.Command("kubectl", "delete", "-f", url) - if _, err := Run(cmd); err != nil { - warnError(err) - } -} -// UninstallCertManager uninstalls the cert manager -func UninstallCertManager() { - url := fmt.Sprintf(certmanagerURLTmpl, certmanagerVersion) - cmd := exec.Command("kubectl", "delete", "-f", url) - if _, err := Run(cmd); err != nil { - warnError(err) - } -} + output, err := cmd.Output() + if err != nil { + var exitError *exec.ExitError -// InstallCertManager installs the cert manager bundle. -func InstallCertManager() error { - url := fmt.Sprintf(certmanagerURLTmpl, certmanagerVersion) - cmd := exec.Command("kubectl", "apply", "-f", url) - if _, err := Run(cmd); err != nil { - return err + if errors.As(err, &exitError) { + return output, fmt.Errorf("%s failed with error: (%v): %s", command, err, string(exitError.Stderr)) + } } - // Wait for cert-manager-webhook to be ready, which can take time if cert-manager - // was re-installed after uninstalling on a cluster. - cmd = exec.Command("kubectl", "wait", "managedcluster.apps/cert-manager-webhook", - "--for", "condition=Available", - "--namespace", "cert-manager", - "--timeout", "5m", - ) - _, err := Run(cmd) - return err + return output, nil } // LoadImageToKindCluster loads a local docker image to the kind cluster func LoadImageToKindClusterWithName(name string) error { cluster := "kind" - if v, ok := os.LookupEnv("KIND_CLUSTER"); ok { + if v, ok := os.LookupEnv("KIND_CLUSTER_NAME"); ok { cluster = v } kindOptions := []string{"load", "docker-image", name, "--name", cluster} - cmd := exec.Command("kind", kindOptions...) + + kindBinary := "kind" + + if kindVersion, ok := os.LookupEnv("KIND_VERSION"); ok { + kindBinary = fmt.Sprintf("./bin/kind-%s", kindVersion) + } + + cmd := exec.Command(kindBinary, kindOptions...) _, err := Run(cmd) return err } @@ -136,3 +94,91 @@ func GetProjectDir() (string, error) { wd = strings.Replace(wd, "/test/e2e", "", -1) return wd, nil } + +// ValidateConditionsTrue iterates over the conditions of the given +// unstructured object and returns an error if any of the conditions are not +// true. Conditions are expected to be of type metav1.Condition. +func ValidateConditionsTrue(unstrObj *unstructured.Unstructured) error { + objKind, objName := ObjKindName(unstrObj) + + conditions, err := GetConditionsFromUnstructured(unstrObj) + if err != nil { + return fmt.Errorf("failed to get conditions from unstructured object: %w", err) + } + + var errs error + + for _, c := range conditions { + if c.Status == metav1.ConditionTrue { + continue + } + + errs = errors.Join(errors.New(ConvertConditionsToString(c)), errs) + } + + if errs != nil { + return fmt.Errorf("%s %s is not ready with conditions:\n%w", objKind, objName, errs) + } + + return nil +} + +func ConvertConditionsToString(condition metav1.Condition) string { + return fmt.Sprintf("Type: %s, Status: %s, Reason: %s, Message: %s", + condition.Type, condition.Status, condition.Reason, condition.Message) +} + +func GetConditionsFromUnstructured(unstrObj *unstructured.Unstructured) ([]metav1.Condition, error) { + objKind, objName := ObjKindName(unstrObj) + + // Iterate the status conditions and ensure each condition reports a "Ready" + // status. + unstrConditions, found, err := unstructured.NestedSlice(unstrObj.Object, "status", "conditions") + if !found { + return nil, fmt.Errorf("no status conditions found for %s: %s", objKind, objName) + } + if err != nil { + return nil, fmt.Errorf("failed to get status conditions for %s: %s: %w", objKind, objName, err) + } + + conditions := make([]metav1.Condition, 0, len(unstrConditions)) + + for _, condition := range unstrConditions { + conditionMap, ok := condition.(map[string]interface{}) + if !ok { + return nil, fmt.Errorf("expected %s: %s condition to be type map[string]interface{}, got: %T", + objKind, objName, conditionMap) + } + + var c *metav1.Condition + + if err := runtime.DefaultUnstructuredConverter.FromUnstructured(conditionMap, &c); err != nil { + return nil, fmt.Errorf("failed to convert condition map to metav1.Condition: %w", err) + } + + conditions = append(conditions, *c) + } + + return conditions, nil +} + +// ValidateObjectNamePrefix checks if the given object name has the given prefix. +func ValidateObjectNamePrefix(unstrObj *unstructured.Unstructured, clusterName string) error { + objKind, objName := ObjKindName(unstrObj) + + // Verify the machines are prefixed with the cluster name and fail + // the test if they are not. + if !strings.HasPrefix(objName, clusterName) { + return fmt.Errorf("object %s %s does not have cluster name prefix: %s", objKind, objName, clusterName) + } + + return nil +} + +func ObjKindName(unstrObj *unstructured.Unstructured) (string, string) { + return unstrObj.GetKind(), unstrObj.GetName() +} + +func WarnError(err error) { + _, _ = fmt.Fprintf(GinkgoWriter, "Warning: %v\n", err) +}