diff --git a/build/templates/values.yaml b/build/templates/values.yaml index 420092a9..8f02560c 100644 --- a/build/templates/values.yaml +++ b/build/templates/values.yaml @@ -175,12 +175,19 @@ conf: http-port: "" # CockroachDB's data mount path. + # For multi-store configuration, the path for each store is evaluated as: + # Store 1: cockroach-data + # Store 2: cockroach-data-2 + # Store N: cockroach-data-N path: cockroach-data # CockroachDB's storage configuration https://www.cockroachlabs.com/docs/v21.1/cockroach-start.html#storage # Uses --store flag store: enabled: false + # Number of data stores per node. + # For multi-store configuration, set this to a value greater than 1. + count: 1 # Should be empty or 'mem' type: # Required for type=mem. If type and size is empty - storage.persistentVolume.size is used @@ -188,6 +195,40 @@ conf: # Arbitrary strings, separated by colons, specifying disk type or capability attrs: + # CockroachDB's WAL failover configuration: + # https://www.cockroachlabs.com/docs/stable/cockroach-start#write-ahead-log-wal-failover + # Uses `--wal-failover` flag + wal-failover: + # The value to be passed to the `--wal-failover` flag. + # Possible configurations: + # 1. ``: If empty, `--wal-failover` is not passed to cockroach start. + # 2. `disabled`: Disables WAL failover. + # 3. `among-stores`: Enables WAL failover among multiple stores. This requires + # `conf.store.count` to be greater than 1. + # 4. `path=`: Enables WAL failover to a side disk. This requires + # a persistent volume should be mounted at this path (e.g. `path=/cockroach/cockroach-failover`). + value: + + persistentVolume: + # If enabled, then a PersistentVolumeClaim will be created and + # used for WAL failover as a side disk. + # https://www.cockroachlabs.com/docs/v24.3/wal-failover#provision-a-single-store-cluster-and-side-disk-for-wal-failover + enabled: false + # Mount path for the side disk. This gets prepended with `/cockroach/` in the stateful set. + path: cockroach-failover + size: 25Gi + # If defined, then `storageClassName: `. + # If set to "-", then `storageClassName: ""`, which disables dynamic + # provisioning. + # If undefined or empty (default), then no `storageClassName` spec is + # set, so the default provisioner will be chosen (gp2 on AWS, standard + # on GKE, AWS & OpenStack). + storageClass: "" + # Additional labels to apply to the created PersistentVolumeClaims. + labels: {} + # Additional annotations to apply to the created PersistentVolumeClaims. + annotations: {} + statefulset: replicas: 3 updateStrategy: diff --git a/cockroachdb/templates/_helpers.tpl b/cockroachdb/templates/_helpers.tpl index 57030434..3670fccc 100644 --- a/cockroachdb/templates/_helpers.tpl +++ b/cockroachdb/templates/_helpers.tpl @@ -85,16 +85,20 @@ Return the appropriate apiVersion for StatefulSets Return CockroachDB store expression */}} {{- define "cockroachdb.conf.store" -}} -{{- $isInMemory := eq (.Values.conf.store.type | toString) "mem" -}} -{{- $persistentSize := empty .Values.conf.store.size | ternary .Values.storage.persistentVolume.size .Values.conf.store.size -}} + {{- $isInMemory := eq (.Values.conf.store.type | toString) "mem" -}} + {{- $persistentSize := empty .Values.conf.store.size | ternary .Values.storage.persistentVolume.size .Values.conf.store.size -}} -{{- $store := dict -}} -{{- $_ := set $store "type" ($isInMemory | ternary "type=mem" "") -}} -{{- $_ := set $store "path" ($isInMemory | ternary "" (print "path=" .Values.conf.path)) -}} -{{- $_ := set $store "size" (print "size=" ($isInMemory | ternary .Values.conf.store.size $persistentSize)) -}} -{{- $_ := set $store "attrs" (empty .Values.conf.store.attrs | ternary "" (print "attrs=" .Values.conf.store.attrs)) -}} + {{- $store := dict -}} + {{- $_ := set $store "type" ($isInMemory | ternary "type=mem" "") -}} + {{- if eq .Args.idx 0 -}} + {{- $_ := set $store "path" ($isInMemory | ternary "" (print "path=" .Values.conf.path)) -}} + {{- else -}} + {{- $_ := set $store "path" ($isInMemory | ternary "" (print "path=" .Values.conf.path "-" (add1 .Args.idx))) -}} + {{- end -}} + {{- $_ := set $store "size" (print "size=" ($isInMemory | ternary .Values.conf.store.size $persistentSize)) -}} + {{- $_ := set $store "attrs" (empty .Values.conf.store.attrs | ternary "" (print "attrs=" .Values.conf.store.attrs)) -}} -{{ compact (values $store) | join "," }} + {{- compact (values $store) | sortAlpha | join "," -}} {{- end -}} {{/* @@ -303,3 +307,46 @@ Validate the log configuration. {{- end -}} {{- end -}} {{- end -}} + +{{- define "cockroachdb.storage.hostPath.computation" -}} +{{- if hasSuffix "/" .Values.storage.hostPath -}} + {{- printf "%s-%d/" (dir .Values.storage.hostPath) (add1 .Args.idx) | quote -}} +{{- else -}} + {{- printf "%s-%d" .Values.storage.hostPath (add1 .Args.idx) | quote -}} +{{- end -}} +{{- end -}} + +{{/* +Validate the store count configuration. +*/}} +{{- define "cockroachdb.conf.store.validation" -}} + {{- if and (not .Values.conf.store.enabled) (ne (int .Values.conf.store.count) 1) -}} + {{ fail "Store count should be 1 when disabled" }} + {{- end -}} +{{- end -}} + +{{/* +Validate the WAL failover configuration. +*/}} +{{- define "cockroachdb.conf.wal-failover.validation" -}} + {{- with index .Values.conf `wal-failover` -}} + {{- if not (mustHas .value (list "" "disabled" "among-stores")) -}} + {{- if not (hasPrefix "path=" (.value | toString)) -}} + {{ fail "Invalid WAL failover configuration value. Expected either of '', 'disabled', 'among-stores' or 'path='" }} + {{- end -}} + {{- end -}} + {{- if eq .value "among-stores" -}} + {{- if or (not $.Values.conf.store.enabled) (eq (int $.Values.conf.store.count) 1) -}} + {{ fail "WAL failover among stores requires store enabled with count greater than 1" }} + {{- end -}} + {{- end -}} + {{- if hasPrefix "path=" (.value | toString) -}} + {{- if not .persistentVolume.enabled -}} + {{ fail "WAL failover to a side disk requires a persistent volume" }} + {{- end -}} + {{- if and (not (hasPrefix (printf "/cockroach/%s" .persistentVolume.path) (trimPrefix "path=" .value))) (not (hasPrefix .persistentVolume.path (trimPrefix "path=" .value))) -}} + {{ fail "WAL failover to a side disk requires a path to the mounted persistent volume" }} + {{- end -}} + {{- end -}} + {{- end -}} +{{- end -}} diff --git a/cockroachdb/templates/statefulset.yaml b/cockroachdb/templates/statefulset.yaml index 2b7ee04e..5be88394 100644 --- a/cockroachdb/templates/statefulset.yaml +++ b/cockroachdb/templates/statefulset.yaml @@ -1,4 +1,5 @@ {{ template "cockroachdb.conf.log.validation" . }} +{{ template "cockroachdb.conf.store.validation" . }} kind: StatefulSet apiVersion: {{ template "cockroachdb.statefulset.apiVersion" . }} metadata: @@ -235,7 +236,14 @@ spec: --sql-audit-dir={{ . }} {{- end }} {{- if .Values.conf.store.enabled }} - --store={{ template "cockroachdb.conf.store" . }} + {{- range $idx := until (int .Values.conf.store.count) }} + {{- $_ := set $ "Args" (dict "idx" $idx) }} + --store={{ include "cockroachdb.conf.store" $ }} + {{- end }} + {{- end }} + {{- with index .Values.conf `wal-failover` `value` }} + {{- template "cockroachdb.conf.wal-failover.validation" $ }} + --wal-failover={{ . }} {{- end }} {{- if .Values.conf.log.enabled }} --log-config-file=/cockroach/log-config/log-config.yaml @@ -271,8 +279,21 @@ spec: {{- end }} protocol: TCP volumeMounts: + {{- range $i := until (int .Values.conf.store.count) }} + {{- if eq $i 0 }} - name: datadir - mountPath: /cockroach/{{ .Values.conf.path }}/ + mountPath: /cockroach/{{ $.Values.conf.path }}/ + {{- else }} + - name: datadir-{{ add1 $i }} + mountPath: /cockroach/{{ $.Values.conf.path }}-{{ add1 $i }}/ + {{- end }} + {{- end }} + {{- with index .Values.conf `wal-failover` `persistentVolume` }} + {{- if .enabled }} + - name: failoverdir + mountPath: /cockroach/{{ .path }}/ + {{- end }} + {{- end }} {{- if .Values.tls.enabled }} - name: certs mountPath: /cockroach/cockroach-certs/ @@ -344,16 +365,43 @@ spec: resources: {{- toYaml . | nindent 12 }} {{- end }} volumes: + {{- range $i := until (int .Values.conf.store.count) }} + {{- if eq $i 0 }} - name: datadir - {{- if .Values.storage.persistentVolume.enabled }} + {{- if $.Values.storage.persistentVolume.enabled }} persistentVolumeClaim: claimName: datadir - {{- else if .Values.storage.hostPath }} + {{- else if $.Values.storage.hostPath }} + hostPath: + path: {{ $.Values.storage.hostPath | quote }} + {{- else }} + emptyDir: {} + {{- end }} + {{- else }} + - name: datadir-{{ add1 $i }} + {{- if $.Values.storage.persistentVolume.enabled }} + persistentVolumeClaim: + claimName: datadir-{{ add1 $i }} + {{- else if $.Values.storage.hostPath }} + {{- $_ := set $ "Args" (dict "idx" $i) }} hostPath: - path: {{ .Values.storage.hostPath | quote }} + path: {{ include "cockroachdb.storage.hostPath.computation" $ }} {{- else }} emptyDir: {} {{- end }} + {{- end }} + {{- end }} + {{- with index .Values.conf `wal-failover` }} + {{- if .value }} + - name: failoverdir + {{- if .persistentVolume.enabled }} + persistentVolumeClaim: + claimName: failoverdir + {{- else }} + emptyDir: {} + {{- end }} + {{- end }} + {{- end }} {{- with .Values.statefulset.volumes }} {{ toYaml . | nindent 8 }} {{- end }} @@ -418,35 +466,71 @@ spec: runAsNonRoot: true {{- end }} {{- end }} -{{- if or .Values.storage.persistentVolume.enabled .Values.conf.log.persistentVolume.enabled }} +{{- if or .Values.storage.persistentVolume.enabled (index .Values.conf `wal-failover` `persistentVolume` `enabled`) .Values.conf.log.persistentVolume.enabled }} volumeClaimTemplates: {{- if .Values.storage.persistentVolume.enabled }} + {{- range $i := until (int .Values.conf.store.count) }} - metadata: + {{- if eq $i 0 }} name: datadir + {{- else }} + name: datadir-{{ add1 $i }} + {{- end }} labels: - app.kubernetes.io/name: {{ template "cockroachdb.name" . }} - app.kubernetes.io/instance: {{ .Release.Name | quote }} - {{- with .Values.storage.persistentVolume.labels }} + app.kubernetes.io/name: {{ template "cockroachdb.name" $ }} + app.kubernetes.io/instance: {{ $.Release.Name | quote }} + {{- with $.Values.storage.persistentVolume.labels }} {{- toYaml . | nindent 10 }} {{- end }} - {{- with .Values.labels }} + {{- with $.Values.labels }} + {{- toYaml . | nindent 10 }} + {{- end }} + {{- with $.Values.storage.persistentVolume.annotations }} + annotations: {{- toYaml . | nindent 10 }} + {{- end }} + spec: + accessModes: ["ReadWriteOnce"] + {{- if $.Values.storage.persistentVolume.storageClass }} + {{- if (eq "-" $.Values.storage.persistentVolume.storageClass) }} + storageClassName: "" + {{- else }} + storageClassName: {{ $.Values.storage.persistentVolume.storageClass | quote}} + {{- end }} + {{- end }} + resources: + requests: + storage: {{ $.Values.storage.persistentVolume.size | quote }} + {{- end }} + {{- end }} + {{- with index .Values.conf `wal-failover` }} + {{- if .persistentVolume.enabled }} + - metadata: + name: failoverdir + labels: + app.kubernetes.io/name: {{ template "cockroachdb.name" $ }} + app.kubernetes.io/instance: {{ $.Release.Name | quote }} + {{- with .persistentVolume.labels }} {{- toYaml . | nindent 10 }} {{- end }} - {{- with .Values.storage.persistentVolume.annotations }} + {{- with $.Values.labels }} + {{- toYaml . | nindent 10 }} + {{- end }} + {{- with .persistentVolume.annotations }} annotations: {{- toYaml . | nindent 10 }} {{- end }} spec: accessModes: ["ReadWriteOnce"] - {{- if .Values.storage.persistentVolume.storageClass }} - {{- if (eq "-" .Values.storage.persistentVolume.storageClass) }} + {{- with .persistentVolume.storageClass }} + {{- if eq "-" . }} storageClassName: "" {{- else }} - storageClassName: {{ .Values.storage.persistentVolume.storageClass | quote}} + storageClassName: {{ . | quote}} {{- end }} {{- end }} resources: requests: - storage: {{ .Values.storage.persistentVolume.size | quote }} + storage: {{ .persistentVolume.size | quote }} + {{- end }} {{- end }} {{- if .Values.conf.log.persistentVolume.enabled }} - metadata: diff --git a/cockroachdb/values.yaml b/cockroachdb/values.yaml index b2315e0d..18996704 100644 --- a/cockroachdb/values.yaml +++ b/cockroachdb/values.yaml @@ -176,12 +176,19 @@ conf: http-port: "" # CockroachDB's data mount path. + # For multi-store configuration, the path for each store is evaluated as: + # Store 1: cockroach-data + # Store 2: cockroach-data-2 + # Store N: cockroach-data-N path: cockroach-data # CockroachDB's storage configuration https://www.cockroachlabs.com/docs/v21.1/cockroach-start.html#storage # Uses --store flag store: enabled: false + # Number of data stores per node. + # For multi-store configuration, set this to a value greater than 1. + count: 1 # Should be empty or 'mem' type: # Required for type=mem. If type and size is empty - storage.persistentVolume.size is used @@ -189,6 +196,40 @@ conf: # Arbitrary strings, separated by colons, specifying disk type or capability attrs: + # CockroachDB's WAL failover configuration: + # https://www.cockroachlabs.com/docs/stable/cockroach-start#write-ahead-log-wal-failover + # Uses `--wal-failover` flag + wal-failover: + # The value to be passed to the `--wal-failover` flag. + # Possible configurations: + # 1. ``: If empty, `--wal-failover` is not passed to cockroach start. + # 2. `disabled`: Disables WAL failover. + # 3. `among-stores`: Enables WAL failover among multiple stores. This requires + # `conf.store.count` to be greater than 1. + # 4. `path=`: Enables WAL failover to a side disk. This requires + # a persistent volume should be mounted at this path (e.g. `path=/cockroach/cockroach-failover`). + value: + + persistentVolume: + # If enabled, then a PersistentVolumeClaim will be created and + # used for WAL failover as a side disk. + # https://www.cockroachlabs.com/docs/v24.3/wal-failover#provision-a-single-store-cluster-and-side-disk-for-wal-failover + enabled: false + # Mount path for the side disk. This gets prepended with `/cockroach/` in the stateful set. + path: cockroach-failover + size: 25Gi + # If defined, then `storageClassName: `. + # If set to "-", then `storageClassName: ""`, which disables dynamic + # provisioning. + # If undefined or empty (default), then no `storageClassName` spec is + # set, so the default provisioner will be chosen (gp2 on AWS, standard + # on GKE, AWS & OpenStack). + storageClass: "" + # Additional labels to apply to the created PersistentVolumeClaims. + labels: {} + # Additional annotations to apply to the created PersistentVolumeClaims. + annotations: {} + statefulset: replicas: 3 updateStrategy: diff --git a/tests/e2e/install/cockroachdb_helm_e2e_test.go b/tests/e2e/install/cockroachdb_helm_e2e_test.go index f598a8be..434bde75 100644 --- a/tests/e2e/install/cockroachdb_helm_e2e_test.go +++ b/tests/e2e/install/cockroachdb_helm_e2e_test.go @@ -3,13 +3,16 @@ package integration import ( "fmt" "io/fs" + "log" "os" "path/filepath" + "strconv" "strings" "sync" "testing" "time" + "github.com/cockroachdb/cockroach-operator/pkg/kube" "github.com/gruntwork-io/terratest/modules/helm" "github.com/gruntwork-io/terratest/modules/k8s" "github.com/gruntwork-io/terratest/modules/random" @@ -19,7 +22,6 @@ import ( ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" - "github.com/cockroachdb/cockroach-operator/pkg/kube" "github.com/cockroachdb/helm-charts/pkg/security" util "github.com/cockroachdb/helm-charts/pkg/utils" "github.com/cockroachdb/helm-charts/tests/testutil" @@ -497,3 +499,143 @@ spec: time.Sleep(20 * time.Second) testutil.RequireCRDBToFunction(t, crdbCluster, false) } + +func TestWALFailoverSideDiskExistingCluster(t *testing.T) { + testWALFailoverExistingCluster( + t, + map[string]string{ + "conf.wal-failover.value": "path=cockroach-failover", + "conf.wal-failover.persistentVolume.enabled": "true", + "conf.wal-failover.persistentVolume.size": "1Gi", + }, + ) +} + +func TestWALFailoverAmongStoresExistingCluster(t *testing.T) { + testWALFailoverExistingCluster( + t, + map[string]string{ + "conf.wal-failover.value": "among-stores", + "conf.store.count": "2", + }, + ) +} + +func testWALFailoverExistingCluster(t *testing.T, additionalValues map[string]string) { + namespaceName := "cockroach" + strings.ToLower(random.UniqueId()) + numReplicas := 3 + kubectlOptions := k8s.NewKubectlOptions("", "", namespaceName) + var err error + + crdbCluster := testutil.CockroachCluster{ + Cfg: cfg, + K8sClient: k8sClient, + StatefulSetName: fmt.Sprintf("%s-cockroachdb", releaseName), + Namespace: namespaceName, + ClientSecret: fmt.Sprintf("%s-cockroachdb-client-secret", releaseName), + NodeSecret: fmt.Sprintf("%s-cockroachdb-node-secret", releaseName), + CaSecret: fmt.Sprintf("%s-cockroachdb-ca-secret", releaseName), + IsCaUserProvided: false, + } + + k8s.CreateNamespace(t, kubectlOptions, namespaceName) + defer k8s.DeleteNamespace(t, kubectlOptions, namespaceName) + + // Print the debug logs in case of test failure. + defer func() { + if t.Failed() { + testutil.PrintDebugLogs(t, kubectlOptions) + } + }() + + // Configure options for the initial deployment. + initialValues := map[string]string{ + "conf.cluster-name": "test", + "conf.store.enabled": "true", + "storage.persistentVolume.size": "1Gi", + "statefulset.replicas": strconv.Itoa(numReplicas), + } + options := &helm.Options{ + KubectlOptions: k8s.NewKubectlOptions("", "", namespaceName), + SetValues: initialValues, + } + + // Deploy the helm chart and confirm the installation is successful. + helm.Install(t, options, helmChartPath, releaseName) + + defer func() { + err = helm.DeleteE(t, options, releaseName, true) + // Ignore the error if the operation timed out. + if err == nil || !strings.Contains(err.Error(), "timed out") { + require.NoError(t, err) + } + + danglingSecrets := []string{ + crdbCluster.CaSecret, + crdbCluster.ClientSecret, + crdbCluster.NodeSecret, + } + for i := range danglingSecrets { + _, err = k8s.GetSecretE(t, kubectlOptions, danglingSecrets[i]) + require.Equal(t, true, kube.IsNotFound(err)) + t.Logf("Secret %s deleted by helm uninstall", danglingSecrets[i]) + } + }() + + // Wait for the service endpoint to be available. + serviceName := fmt.Sprintf("%s-cockroachdb-public", releaseName) + k8s.WaitUntilServiceAvailable(t, kubectlOptions, serviceName, 30, 2*time.Second) + testutil.RequireClusterToBeReadyEventuallyTimeout(t, crdbCluster, 600*time.Second) + + // Enable WAL Failover and upgrade the cluster. + // In order to prevent any downtime, we need to follow the below steps for each pod: + // - delete the statefulset with --cascade=orphan + // - delete the pod + // - upgrade the Helm chart + + // Configure options for the updated deployment. + updatedValues := map[string]string{} + for k, v := range initialValues { + updatedValues[k] = v + } + for k, v := range additionalValues { + updatedValues[k] = v + } + options = &helm.Options{ + KubectlOptions: k8s.NewKubectlOptions("", "", namespaceName), + SetValues: updatedValues, + } + + updateSinglePod := func(idx int) { + podName := fmt.Sprintf("%s-%d", crdbCluster.StatefulSetName, idx) + log.Printf("Request received to update pod %s\n", podName) + + // Delete the statefulset with --cascade=orphan + log.Println("Deleting the statefulset with --cascade=orphan") + k8s.RunKubectl( + t, + kubectlOptions, + "delete", + "statefulset", + crdbCluster.StatefulSetName, + "--cascade=orphan", + ) + + // Delete the pod + log.Printf("Deleting the pod %s\n", podName) + k8s.RunKubectl(t, kubectlOptions, "delete", "pod", podName) + testutil.WaitUntilPodDeleted(t, kubectlOptions, podName, 30, 2*time.Second) + + // Upgrade the Helm release + log.Println("Upgrading the Helm release") + helm.Upgrade(t, options, helmChartPath, releaseName) + } + + // Iterate over all pods in the statefulset. + for idx := 0; idx < numReplicas; idx++ { + updateSinglePod(idx) + + k8s.WaitUntilServiceAvailable(t, kubectlOptions, serviceName, 30, 2*time.Second) + testutil.RequireClusterToBeReadyEventuallyTimeout(t, crdbCluster, 600*time.Second) + } +} diff --git a/tests/template/cockroachdb_helm_template_test.go b/tests/template/cockroachdb_helm_template_test.go index 9d3e370a..a7f43ce1 100644 --- a/tests/template/cockroachdb_helm_template_test.go +++ b/tests/template/cockroachdb_helm_template_test.go @@ -1598,3 +1598,212 @@ func TestHelmCockroachStartCmd(t *testing.T) { }) } } + +// TestHelmWALFailoverConfiguration contains the tests around WAL failover configuration. +func TestHelmWALFailoverConfiguration(t *testing.T) { + t.Parallel() + + type expect struct { + statefulsetArgument string + renderErr string + persistentVolumeNames []string + } + + testCases := []struct { + name string + values map[string]string + expect expect + }{ + { + "WAL failover invalid configuration", + map[string]string{ + "conf.wal-failover.value": "invalid", + }, + expect{ + "", + "Invalid WAL failover configuration value. Expected either of '', 'disabled', 'among-stores' or 'path='", + []string{"datadir"}, + }, + }, + { + "WAL failover not configured", + map[string]string{ + "conf.wal-failover.value": "", + "conf.store.enabled": "true", + "conf.store.count": "1", + }, + expect{ + "--store=path=cockroach-data,size=100Gi", + "", + []string{"datadir"}, + }, + }, + { + "WAL failover among multiple stores", + map[string]string{ + "conf.wal-failover.value": "among-stores", + "conf.store.enabled": "true", + "conf.store.count": "2", + }, + expect{ + "--store=path=cockroach-data,size=100Gi " + + "--store=path=cockroach-data-2,size=100Gi " + + "--wal-failover=among-stores", + "", + []string{"datadir", "datadir-2"}, + }, + }, + { + "WAL failover disabled with multiple stores", + map[string]string{ + "conf.wal-failover.value": "disabled", + "conf.store.enabled": "true", + "conf.store.count": "2", + }, + expect{ + "--store=path=cockroach-data,size=100Gi " + + "--store=path=cockroach-data-2,size=100Gi " + + "--wal-failover=disabled", + "", + []string{"datadir", "datadir-2"}, + }, + }, + { + "WAL failover among stores but store disabled", + map[string]string{ + "conf.wal-failover.value": "among-stores", + "conf.store.enabled": "false", + }, + expect{ + "", + "WAL failover among stores requires store enabled with count greater than 1", + []string{"datadir"}, + }, + }, + { + "WAL failover among stores but single store", + map[string]string{ + "conf.wal-failover.value": "among-stores", + "conf.store.enabled": "true", + "conf.store.count": "1", + }, + expect{ + "", + "WAL failover among stores requires store enabled with count greater than 1", + []string{"datadir"}, + }, + }, + { + "WAL failover through side disk (absolute path)", + map[string]string{ + "conf.wal-failover.value": "path=/cockroach/cockroach-failover/abc", + "conf.wal-failover.persistentVolume.enabled": "true", + }, + expect{ + "--wal-failover=path=/cockroach/cockroach-failover/abc", + "", + []string{"datadir", "failoverdir"}, + }, + }, + { + "WAL failover through side disk (relative path)", + map[string]string{ + "conf.wal-failover.value": "path=cockroach-failover/abc", + "conf.wal-failover.persistentVolume.enabled": "true", + }, + expect{ + "--wal-failover=path=cockroach-failover/abc", + "", + []string{"datadir", "failoverdir"}, + }, + }, + { + "WAL failover disabled through side disk", + map[string]string{ + "conf.wal-failover.value": "disabled", + "conf.wal-failover.persistentVolume.enabled": "true", + }, + expect{ + "--wal-failover=disabled", + "", + []string{"datadir", "failoverdir"}, + }, + }, + { + "WAL failover through side disk but no pvc", + map[string]string{ + "conf.wal-failover.value": "path=/cockroach/cockroach-failover", + "conf.wal-failover.persistentVolume.enabled": "false", + }, + expect{ + "", + "WAL failover to a side disk requires a persistent volume", + []string{"datadir"}, + }, + }, + { + "WAL failover through side disk but invalid path", + map[string]string{ + "conf.wal-failover.value": "path=/invalid", + "conf.wal-failover.persistentVolume.enabled": "true", + }, + expect{ + "", + "WAL failover to a side disk requires a path to the mounted persistent volume", + []string{"datadir", "failoverdir"}, + }, + }, + } + + for _, testCase := range testCases { + var statefulset appsv1.StatefulSet + + // Here, we capture the range variable and force it into the scope of this block. + // If we don't do this, when the subtest switches contexts (because of t.Parallel), + // the testCase value will have been updated by the for loop and will be the next testCase. + testCase := testCase + + t.Run(testCase.name, func(subT *testing.T) { + subT.Parallel() + + options := &helm.Options{ + KubectlOptions: k8s.NewKubectlOptions("", "", namespaceName), + SetValues: testCase.values, + } + + output, err := helm.RenderTemplateE( + t, options, helmChartPath, releaseName, []string{"templates/statefulset.yaml"}, + ) + if err != nil { + require.ErrorContains(subT, err, testCase.expect.renderErr) + return + } else { + require.Empty(subT, testCase.expect.renderErr) + } + + helm.UnmarshalK8SYaml(t, output, &statefulset) + + require.Equal(subT, namespaceName, statefulset.Namespace) + require.Contains( + t, + statefulset.Spec.Template.Spec.Containers[0].Args[2], + testCase.expect.statefulsetArgument, + ) + + require.Equal( + subT, + len(testCase.expect.persistentVolumeNames), + len(statefulset.Spec.VolumeClaimTemplates), + ) + var actualPersistentVolumeNames []string + for _, pvc := range statefulset.Spec.VolumeClaimTemplates { + actualPersistentVolumeNames = append(actualPersistentVolumeNames, pvc.Name) + } + require.EqualValues( + subT, + testCase.expect.persistentVolumeNames, + actualPersistentVolumeNames, + ) + }) + } +} diff --git a/tests/testutil/require.go b/tests/testutil/require.go index d7ebfff2..dc847941 100644 --- a/tests/testutil/require.go +++ b/tests/testutil/require.go @@ -7,15 +7,18 @@ import ( "encoding/pem" "errors" "fmt" - "github.com/gruntwork-io/terratest/modules/k8s" - batchv1 "k8s.io/api/batch/v1" - corev1 "k8s.io/api/core/v1" + "log" "testing" "time" "github.com/cockroachdb/cockroach-operator/pkg/database" + "github.com/cockroachdb/cockroach-operator/pkg/kube" + "github.com/gruntwork-io/terratest/modules/k8s" + "github.com/gruntwork-io/terratest/modules/retry" "github.com/stretchr/testify/require" appsv1 "k8s.io/api/apps/v1" + batchv1 "k8s.io/api/batch/v1" + corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" @@ -292,17 +295,21 @@ func verifyCertificate(t *testing.T, caCert []byte, cert *x509.Certificate) { // PrintDebugLogs adds the verbose logging of the cluster at the runtime. func PrintDebugLogs(t *testing.T, options *k8s.KubectlOptions) { - out, err := k8s.RunKubectlAndGetOutputE(t, options, []string{"get", "nodes"}...) - require.NoError(t, err) - t.Log(out) - - out, err = k8s.RunKubectlAndGetOutputE(t, options, []string{"get", "pods"}...) - require.NoError(t, err) - t.Log(out) - - out, err = k8s.RunKubectlAndGetOutputE(t, options, []string{"describe", "pods"}...) - require.NoError(t, err) - t.Log(out) + for _, args := range [][]string{ + {"get", "nodes"}, + {"get", "pvc"}, + {"describe", "pvc"}, + {"get", "pv"}, + {"describe", "pv"}, + {"get", "sts"}, + {"describe", "sts"}, + {"get", "pods"}, + {"describe", "pods"}, + } { + out, err := k8s.RunKubectlAndGetOutputE(t, options, args...) + require.NoError(t, err) + t.Log(out) + } } // RequireToRunRotateJob triggers the client/node or CA certificate rotation job based on next cron schedule. @@ -427,3 +434,32 @@ func fetchJob(k8sClient client.Client, name, namespace string) (*batchv1.Job, er return &job, nil } + +// WaitUntilPodDeleted waits until the pod is deleted, retrying the check for the specified +// amount of times, sleeping for the provided duration between each try. +func WaitUntilPodDeleted( + t *testing.T, + options *k8s.KubectlOptions, + podName string, + retries int, + sleepBetweenRetries time.Duration, +) { + statusMsg := fmt.Sprintf("Wait for pod %s to be deleted.", podName) + message, err := retry.DoWithRetryE( + t, + statusMsg, + retries, + sleepBetweenRetries, + func() (string, error) { + _, err := k8s.GetPodE(t, options, podName) + if err != nil && kube.IsNotFound(err) { + return "Pod is now deleted", nil + } + return "", errors.New(fmt.Sprintf("pod is not deleted: %s", err)) + }, + ) + if err != nil { + log.Printf("Timedout waiting for Pod to be deleted: %s\n", err) + } + log.Println(message) +}