From 3d5c3192e228724aa4d4976533ba2ac4851c67a5 Mon Sep 17 00:00:00 2001 From: Pritesh Lahoti Date: Tue, 10 Dec 2024 14:07:51 +0530 Subject: [PATCH] feature: support WAL failover Adds support for WAL failover via: - among multiple stores - single store side disk (as a PVC) --- build/templates/values.yaml | 41 +++++++++ cockroachdb/templates/_helpers.tpl | 55 ++++++++++-- cockroachdb/templates/statefulset.yaml | 113 +++++++++++++++++++++---- cockroachdb/values.yaml | 41 +++++++++ 4 files changed, 227 insertions(+), 23 deletions(-) diff --git a/build/templates/values.yaml b/build/templates/values.yaml index 420092a9..71ab7576 100644 --- a/build/templates/values.yaml +++ b/build/templates/values.yaml @@ -175,12 +175,19 @@ conf: http-port: "" # CockroachDB's data mount path. + # For multi-store configuration, the path for each store is evaluated as: + # Store 1: cockroach-data + # Store 2: cockroach-data-2 + # Store N: cockroach-data-N path: cockroach-data # CockroachDB's storage configuration https://www.cockroachlabs.com/docs/v21.1/cockroach-start.html#storage # Uses --store flag store: enabled: false + # Number of data stores per node. + # For multi-store configuration, set this to a value greater than 1. + count: 1 # Should be empty or 'mem' type: # Required for type=mem. If type and size is empty - storage.persistentVolume.size is used @@ -188,6 +195,40 @@ conf: # Arbitrary strings, separated by colons, specifying disk type or capability attrs: + # CockroachDB's WAL failover configuration: + # https://www.cockroachlabs.com/docs/stable/cockroach-start#write-ahead-log-wal-failover + # Uses `--wal-failover` flag + wal-failover: + # The value to be passed to the `--wal-failover` flag. + # Possible configurations: + # 1. Default: If empty, `--wal-failover` is not passed to cockroach start + # 2. Disabled: Set to `disabled` to disable WAL failover + # 3. Multiple stores: Set to `among-stores` to enable WAL failover among multiple stores. + # Ensure that `conf.store.count` is greater than 1. + # 4. Single store Side disk: Set to `path=` to enable WAL failover to a side disk. + # A persistent volume should be mounted at this path (e.g. `path=/cockroach/cockroach-failover`). + value: + + persistentVolume: + # If enabled, then a PersistentVolumeClaim will be created and + # used for WAL failover as a side disk. + # https://www.cockroachlabs.com/docs/v24.3/wal-failover#provision-a-single-store-cluster-and-side-disk-for-wal-failover + enabled: false + # Mount path for the side disk. This gets prepended with `/cockroach/` in the stateful set. + path: cockroach-failover + size: 25Gi + # If defined, then `storageClassName: `. + # If set to "-", then `storageClassName: ""`, which disables dynamic + # provisioning. + # If undefined or empty (default), then no `storageClassName` spec is + # set, so the default provisioner will be chosen (gp2 on AWS, standard + # on GKE, AWS & OpenStack). + storageClass: "" + # Additional labels to apply to the created PersistentVolumeClaims. + labels: {} + # Additional annotations to apply to the created PersistentVolumeClaims. + annotations: {} + statefulset: replicas: 3 updateStrategy: diff --git a/cockroachdb/templates/_helpers.tpl b/cockroachdb/templates/_helpers.tpl index 57030434..0b036279 100644 --- a/cockroachdb/templates/_helpers.tpl +++ b/cockroachdb/templates/_helpers.tpl @@ -85,16 +85,20 @@ Return the appropriate apiVersion for StatefulSets Return CockroachDB store expression */}} {{- define "cockroachdb.conf.store" -}} -{{- $isInMemory := eq (.Values.conf.store.type | toString) "mem" -}} -{{- $persistentSize := empty .Values.conf.store.size | ternary .Values.storage.persistentVolume.size .Values.conf.store.size -}} + {{- $isInMemory := eq (.Values.conf.store.type | toString) "mem" -}} + {{- $persistentSize := empty .Values.conf.store.size | ternary .Values.storage.persistentVolume.size .Values.conf.store.size -}} -{{- $store := dict -}} -{{- $_ := set $store "type" ($isInMemory | ternary "type=mem" "") -}} -{{- $_ := set $store "path" ($isInMemory | ternary "" (print "path=" .Values.conf.path)) -}} -{{- $_ := set $store "size" (print "size=" ($isInMemory | ternary .Values.conf.store.size $persistentSize)) -}} -{{- $_ := set $store "attrs" (empty .Values.conf.store.attrs | ternary "" (print "attrs=" .Values.conf.store.attrs)) -}} + {{- $store := dict -}} + {{- $_ := set $store "type" ($isInMemory | ternary "type=mem" "") -}} + {{- if eq .Args.idx 0 -}} + {{- $_ := set $store "path" ($isInMemory | ternary "" (print "path=" .Values.conf.path)) -}} + {{- else -}} + {{- $_ := set $store "path" ($isInMemory | ternary "" (print "path=" .Values.conf.path "-" (add1 .Args.idx))) -}} + {{- end -}} + {{- $_ := set $store "size" (print "size=" ($isInMemory | ternary .Values.conf.store.size $persistentSize)) -}} + {{- $_ := set $store "attrs" (empty .Values.conf.store.attrs | ternary "" (print "attrs=" .Values.conf.store.attrs)) -}} -{{ compact (values $store) | join "," }} + {{- compact (values $store) | join "," -}} {{- end -}} {{/* @@ -303,3 +307,38 @@ Validate the log configuration. {{- end -}} {{- end -}} {{- end -}} + +{{/* +Validate the store count configuration. +*/}} +{{- define "cockroachdb.conf.store.validation" -}} + {{- if and (not .Values.conf.store.enabled) (ne (int .Values.conf.store.count) 1) -}} + {{ fail "Store count should be 1 when disabled" }} + {{- end -}} +{{- end -}} + +{{/* +Validate the WAL failover configuration. +*/}} +{{- define "cockroachdb.conf.wal-failover.validation" -}} + {{- with index .Values.conf `wal-failover` -}} + {{- if not (mustHas .value (list "" "disabled" "among-stores")) -}} + {{- if not (hasPrefix "path=" (.value | toString)) -}} + {{ fail "Invalid WAL failover configuration value. Expected either of '', 'disabled', 'among-stores' or 'path='" }} + {{- end -}} + {{- end -}} + {{- if eq .value "among-stores" -}} + {{- if or (not $.Values.conf.store.enabled) (eq (int $.Values.conf.store.count) 1) -}} + {{ fail "WAL failover among stores requires store enabled with count greater than 1" }} + {{- end -}} + {{- end -}} + {{- if hasPrefix "path=" (.value | toString) -}} + {{- if not .persistentVolume.enabled -}} + {{ fail "WAL failover to a side disk requires a persistent volume" }} + {{- end -}} + {{- if not (hasPrefix (printf "/cockroach/%s" .persistentVolume.path) (trimPrefix "path=" .value)) -}} + {{ fail "WAL failover to a side disk requires a path to the mounted persistent volume" }} + {{- end -}} + {{- end -}} + {{- end -}} +{{- end -}} diff --git a/cockroachdb/templates/statefulset.yaml b/cockroachdb/templates/statefulset.yaml index 2b7ee04e..1ec9b4e0 100644 --- a/cockroachdb/templates/statefulset.yaml +++ b/cockroachdb/templates/statefulset.yaml @@ -1,4 +1,5 @@ {{ template "cockroachdb.conf.log.validation" . }} +{{ template "cockroachdb.conf.store.validation" . }} kind: StatefulSet apiVersion: {{ template "cockroachdb.statefulset.apiVersion" . }} metadata: @@ -235,7 +236,14 @@ spec: --sql-audit-dir={{ . }} {{- end }} {{- if .Values.conf.store.enabled }} - --store={{ template "cockroachdb.conf.store" . }} + {{- range $idx := until (int .Values.conf.store.count) }} + {{- $_ := set $ "Args" (dict "idx" $idx) }} + --store={{ include "cockroachdb.conf.store" $ }} + {{- end }} + {{- end }} + {{- with index .Values.conf `wal-failover` `value` }} + {{- template "cockroachdb.conf.wal-failover.validation" $ }} + --wal-failover={{ . }} {{- end }} {{- if .Values.conf.log.enabled }} --log-config-file=/cockroach/log-config/log-config.yaml @@ -271,8 +279,21 @@ spec: {{- end }} protocol: TCP volumeMounts: + {{- range $i := until (int .Values.conf.store.count) }} + {{- if eq $i 0 }} - name: datadir - mountPath: /cockroach/{{ .Values.conf.path }}/ + mountPath: /cockroach/{{ $.Values.conf.path }}/ + {{- else }} + - name: datadir-{{ add1 $i }} + mountPath: /cockroach/{{ $.Values.conf.path }}-{{ add1 $i }}/ + {{- end }} + {{- end }} + {{- with index .Values.conf `wal-failover` `persistentVolume` }} + {{- if .enabled }} + - name: failoverdir + mountPath: /cockroach/{{ .path }}/ + {{- end }} + {{- end }} {{- if .Values.tls.enabled }} - name: certs mountPath: /cockroach/cockroach-certs/ @@ -344,16 +365,42 @@ spec: resources: {{- toYaml . | nindent 12 }} {{- end }} volumes: + {{- range $i := until (int .Values.conf.store.count) }} + {{- if eq $i 0 }} - name: datadir - {{- if .Values.storage.persistentVolume.enabled }} + {{- if $.Values.storage.persistentVolume.enabled }} persistentVolumeClaim: claimName: datadir - {{- else if .Values.storage.hostPath }} + {{- else if $.Values.storage.hostPath }} + hostPath: + path: {{ $.Values.storage.hostPath | quote }} + {{- else }} + emptyDir: {} + {{- end }} + {{- else }} + - name: datadir-{{ add1 $i }} + {{- if $.Values.storage.persistentVolume.enabled }} + persistentVolumeClaim: + claimName: datadir-{{ add1 $i }} + {{- else if $.Values.storage.hostPath }} hostPath: - path: {{ .Values.storage.hostPath | quote }} + path: {{ $.Values.storage.hostPath | quote }} {{- else }} emptyDir: {} {{- end }} + {{- end }} + {{- end }} + {{- with index .Values.conf `wal-failover` }} + {{- if .value }} + - name: failoverdir + {{- if .persistentVolume.enabled }} + persistentVolumeClaim: + claimName: failoverdir + {{- else }} + emptyDir: {} + {{- end }} + {{- end }} + {{- end }} {{- with .Values.statefulset.volumes }} {{ toYaml . | nindent 8 }} {{- end }} @@ -418,35 +465,71 @@ spec: runAsNonRoot: true {{- end }} {{- end }} -{{- if or .Values.storage.persistentVolume.enabled .Values.conf.log.persistentVolume.enabled }} +{{- if or .Values.storage.persistentVolume.enabled (index .Values.conf `wal-failover` `persistentVolume` `enabled`) .Values.conf.log.persistentVolume.enabled }} volumeClaimTemplates: {{- if .Values.storage.persistentVolume.enabled }} + {{- range $i := until (int .Values.conf.store.count) }} - metadata: + {{- if eq $i 0 }} name: datadir + {{- else }} + name: datadir-{{ add1 $i }} + {{- end }} labels: - app.kubernetes.io/name: {{ template "cockroachdb.name" . }} - app.kubernetes.io/instance: {{ .Release.Name | quote }} - {{- with .Values.storage.persistentVolume.labels }} + app.kubernetes.io/name: {{ template "cockroachdb.name" $ }} + app.kubernetes.io/instance: {{ $.Release.Name | quote }} + {{- with $.Values.storage.persistentVolume.labels }} {{- toYaml . | nindent 10 }} {{- end }} - {{- with .Values.labels }} + {{- with $.Values.labels }} + {{- toYaml . | nindent 10 }} + {{- end }} + {{- with $.Values.storage.persistentVolume.annotations }} + annotations: {{- toYaml . | nindent 10 }} + {{- end }} + spec: + accessModes: ["ReadWriteOnce"] + {{- if $.Values.storage.persistentVolume.storageClass }} + {{- if (eq "-" $.Values.storage.persistentVolume.storageClass) }} + storageClassName: "" + {{- else }} + storageClassName: {{ $.Values.storage.persistentVolume.storageClass | quote}} + {{- end }} + {{- end }} + resources: + requests: + storage: {{ $.Values.storage.persistentVolume.size | quote }} + {{- end }} + {{- end }} + {{- with index .Values.conf `wal-failover` }} + {{- if .persistentVolume.enabled }} + - metadata: + name: failoverdir + labels: + app.kubernetes.io/name: {{ template "cockroachdb.name" $ }} + app.kubernetes.io/instance: {{ $.Release.Name | quote }} + {{- with .persistentVolume.labels }} {{- toYaml . | nindent 10 }} {{- end }} - {{- with .Values.storage.persistentVolume.annotations }} + {{- with $.Values.labels }} + {{- toYaml . | nindent 10 }} + {{- end }} + {{- with .persistentVolume.annotations }} annotations: {{- toYaml . | nindent 10 }} {{- end }} spec: accessModes: ["ReadWriteOnce"] - {{- if .Values.storage.persistentVolume.storageClass }} - {{- if (eq "-" .Values.storage.persistentVolume.storageClass) }} + {{- with .persistentVolume.storageClass }} + {{- if eq "-" . }} storageClassName: "" {{- else }} - storageClassName: {{ .Values.storage.persistentVolume.storageClass | quote}} + storageClassName: {{ . | quote}} {{- end }} {{- end }} resources: requests: - storage: {{ .Values.storage.persistentVolume.size | quote }} + storage: {{ .persistentVolume.size | quote }} + {{- end }} {{- end }} {{- if .Values.conf.log.persistentVolume.enabled }} - metadata: diff --git a/cockroachdb/values.yaml b/cockroachdb/values.yaml index 70290660..7e61d0f3 100644 --- a/cockroachdb/values.yaml +++ b/cockroachdb/values.yaml @@ -176,12 +176,19 @@ conf: http-port: "" # CockroachDB's data mount path. + # For multi-store configuration, the path for each store is evaluated as: + # Store 1: cockroach-data + # Store 2: cockroach-data-2 + # Store N: cockroach-data-N path: cockroach-data # CockroachDB's storage configuration https://www.cockroachlabs.com/docs/v21.1/cockroach-start.html#storage # Uses --store flag store: enabled: false + # Number of data stores per node. + # For multi-store configuration, set this to a value greater than 1. + count: 1 # Should be empty or 'mem' type: # Required for type=mem. If type and size is empty - storage.persistentVolume.size is used @@ -189,6 +196,40 @@ conf: # Arbitrary strings, separated by colons, specifying disk type or capability attrs: + # CockroachDB's WAL failover configuration: + # https://www.cockroachlabs.com/docs/stable/cockroach-start#write-ahead-log-wal-failover + # Uses `--wal-failover` flag + wal-failover: + # The value to be passed to the `--wal-failover` flag. + # Possible configurations: + # 1. Default: If empty, `--wal-failover` is not passed to cockroach start + # 2. Disabled: Set to `disabled` to disable WAL failover + # 3. Multiple stores: Set to `among-stores` to enable WAL failover among multiple stores. + # Ensure that `conf.store.count` is greater than 1. + # 4. Single store Side disk: Set to `path=` to enable WAL failover to a side disk. + # A persistent volume should be mounted at this path (e.g. `path=/cockroach/cockroach-failover`). + value: + + persistentVolume: + # If enabled, then a PersistentVolumeClaim will be created and + # used for WAL failover as a side disk. + # https://www.cockroachlabs.com/docs/v24.3/wal-failover#provision-a-single-store-cluster-and-side-disk-for-wal-failover + enabled: false + # Mount path for the side disk. This gets prepended with `/cockroach/` in the stateful set. + path: cockroach-failover + size: 25Gi + # If defined, then `storageClassName: `. + # If set to "-", then `storageClassName: ""`, which disables dynamic + # provisioning. + # If undefined or empty (default), then no `storageClassName` spec is + # set, so the default provisioner will be chosen (gp2 on AWS, standard + # on GKE, AWS & OpenStack). + storageClass: "" + # Additional labels to apply to the created PersistentVolumeClaims. + labels: {} + # Additional annotations to apply to the created PersistentVolumeClaims. + annotations: {} + statefulset: replicas: 3 updateStrategy: