From 92a3192016a832916e6b94669bd428e70950408b Mon Sep 17 00:00:00 2001 From: Shreyas Rao <42259948+shreyas-s-rao@users.noreply.github.com> Date: Sun, 8 Sep 2024 16:02:27 +0530 Subject: [PATCH] Fix peer TLS enablement during scale-up of etcd cluster (#874) * Fix peer TLS enablement during scale-up of etcd cluster Co-authored-by: madhav bhargava --- internal/component/rolebinding/rolebinding.go | 4 ++-- internal/component/statefulset/statefulset.go | 16 ++++++------- internal/utils/lease.go | 7 ++++-- internal/utils/lease_test.go | 4 ++-- test/it/controller/etcd/helper.go | 24 ++++++++++++------- test/it/controller/etcd/reconciler_test.go | 12 +++++++++- 6 files changed, 44 insertions(+), 23 deletions(-) diff --git a/internal/component/rolebinding/rolebinding.go b/internal/component/rolebinding/rolebinding.go index 2cbb520dd..f59ebc003 100644 --- a/internal/component/rolebinding/rolebinding.go +++ b/internal/component/rolebinding/rolebinding.go @@ -80,14 +80,14 @@ func (r _resource) Sync(ctx component.OperatorContext, etcd *druidv1alpha1.Etcd) fmt.Sprintf("Error during create or update of role-binding %v for etcd: %v", objectKey, druidv1alpha1.GetNamespaceName(etcd.ObjectMeta)), ) } - ctx.Logger.Info("synced", "component", "role", "objectKey", objectKey, "result", result) + ctx.Logger.Info("synced", "component", "role-binding", "objectKey", objectKey, "result", result) return nil } // TriggerDelete triggers the deletion of the role binding for the given Etcd. func (r _resource) TriggerDelete(ctx component.OperatorContext, etcdObjMeta metav1.ObjectMeta) error { objectKey := getObjectKey(etcdObjMeta) - ctx.Logger.Info("Triggering deletion of role", "objectKey", objectKey) + ctx.Logger.Info("Triggering deletion of role-binding", "objectKey", objectKey) if err := r.client.Delete(ctx, emptyRoleBinding(objectKey)); err != nil { if errors.IsNotFound(err) { ctx.Logger.Info("No RoleBinding found, Deletion is a No-Op", "objectKey", objectKey) diff --git a/internal/component/statefulset/statefulset.go b/internal/component/statefulset/statefulset.go index 581254191..96eb35f5f 100644 --- a/internal/component/statefulset/statefulset.go +++ b/internal/component/statefulset/statefulset.go @@ -112,7 +112,7 @@ func (r _resource) PreSync(ctx component.OperatorContext, etcd *druidv1alpha1.Et fmt.Sprintf("StatefulSet pods are not yet updated with new labels, for StatefulSet: %v for etcd: %v", getObjectKey(sts.ObjectMeta), druidv1alpha1.GetNamespaceName(etcd.ObjectMeta)), ) } else { - ctx.Logger.Info("StatefulSet pods are updated with new labels", "objectKey", getObjectKey(etcd.ObjectMeta)) + ctx.Logger.Info("StatefulSet pods have all the desired labels", "objectKey", getObjectKey(etcd.ObjectMeta)) } // if sts label selector needs to be changed, then delete the statefulset, but keeping the pods intact. @@ -215,6 +215,10 @@ func (r _resource) createOrPatch(ctx component.OperatorContext, etcd *druidv1alp } func (r _resource) handlePeerTLSChanges(ctx component.OperatorContext, etcd *druidv1alpha1.Etcd, existingSts *appsv1.StatefulSet) error { + if etcd.Spec.Etcd.PeerUrlTLS == nil { + return nil + } + peerTLSEnabledForMembers, err := utils.IsPeerURLTLSEnabledForMembers(ctx, r.client, ctx.Logger, etcd.Namespace, etcd.Name, *existingSts.Spec.Replicas) if err != nil { return druiderr.WrapError(err, @@ -223,7 +227,7 @@ func (r _resource) handlePeerTLSChanges(ctx component.OperatorContext, etcd *dru fmt.Sprintf("Error checking if peer TLS is enabled for statefulset: %v, etcd: %v", client.ObjectKeyFromObject(existingSts), client.ObjectKeyFromObject(etcd))) } - if isPeerTLSEnablementPending(peerTLSEnabledForMembers, etcd) { + if !peerTLSEnabledForMembers { if !isStatefulSetPatchedWithPeerTLSVolMount(existingSts) { // This step ensures that only STS is updated with secret volume mounts which gets added to the etcd component due to // enabling of TLS for peer communication. It preserves the current STS replicas. @@ -236,11 +240,12 @@ func (r _resource) handlePeerTLSChanges(ctx component.OperatorContext, etcd *dru } else { ctx.Logger.Info("Secret volume mounts to enable Peer URL TLS have already been mounted. Skipping patching StatefulSet with secret volume mounts.") } - return druiderr.WrapError(err, + return druiderr.New( druiderr.ErrRequeueAfter, "Sync", fmt.Sprintf("Peer URL TLS not enabled for #%d members for etcd: %v, requeuing reconcile request", existingSts.Spec.Replicas, client.ObjectKeyFromObject(etcd))) } + ctx.Logger.Info("Peer URL TLS has been enabled for all currently running members") return nil } @@ -259,11 +264,6 @@ func isStatefulSetPatchedWithPeerTLSVolMount(sts *appsv1.StatefulSet) bool { return peerURLCAEtcdVolPresent && peerURLEtcdServerTLSVolPresent } -// isPeerTLSEnablementPending checks if the peer URL TLS has been enabled for the etcd, but it has not yet reflected in all etcd members. -func isPeerTLSEnablementPending(peerTLSEnabledStatusFromMembers bool, etcd *druidv1alpha1.Etcd) bool { - return !peerTLSEnabledStatusFromMembers && etcd.Spec.Etcd.PeerUrlTLS != nil -} - func (r _resource) checkAndPatchStsPodLabelsOnMismatch(ctx component.OperatorContext, etcd *druidv1alpha1.Etcd, sts *appsv1.StatefulSet) error { desiredPodTemplateLabels := getDesiredPodTemplateLabels(etcd) if !utils.ContainsAllDesiredLabels(sts.Spec.Template.Labels, desiredPodTemplateLabels) { diff --git a/internal/utils/lease.go b/internal/utils/lease.go index bca97415d..a4a08f323 100644 --- a/internal/utils/lease.go +++ b/internal/utils/lease.go @@ -18,7 +18,10 @@ import ( "sigs.k8s.io/controller-runtime/pkg/client" ) -const peerURLTLSEnabledKey = "member.etcd.gardener.cloud/tls-enabled" +// LeaseAnnotationKeyPeerURLTLSEnabled is the annotation key present on the member lease. +// If its value is `true` then it indicates that the member is TLS enabled. +// If the annotation is not present or its value is `false` then it indicates that the member is not TLS enabled. +const LeaseAnnotationKeyPeerURLTLSEnabled = "member.etcd.gardener.cloud/tls-enabled" // IsPeerURLTLSEnabledForMembers checks if TLS has been enabled for all existing members of an etcd cluster identified by etcdName and in the provided namespace. func IsPeerURLTLSEnabledForMembers(ctx context.Context, cl client.Client, logger logr.Logger, namespace, etcdName string, numReplicas int32) (bool, error) { @@ -47,7 +50,7 @@ func IsPeerURLTLSEnabledForMembers(ctx context.Context, cl client.Client, logger func parseAndGetTLSEnabledValue(lease coordinationv1.Lease, logger logr.Logger) (bool, error) { if lease.Annotations != nil { - if tlsEnabledStr, ok := lease.Annotations[peerURLTLSEnabledKey]; ok { + if tlsEnabledStr, ok := lease.Annotations[LeaseAnnotationKeyPeerURLTLSEnabled]; ok { tlsEnabled, err := strconv.ParseBool(tlsEnabledStr) if err != nil { logger.Error(err, "tls-enabled value is not a valid boolean", "namespace", lease.Namespace, "leaseName", lease.Name) diff --git a/internal/utils/lease_test.go b/internal/utils/lease_test.go index 95d3ebc11..583648a07 100644 --- a/internal/utils/lease_test.go +++ b/internal/utils/lease_test.go @@ -96,7 +96,7 @@ func createLeases(namespace, etcdName string, numLease, withTLSEnabled int) []*c var annotations map[string]string if tlsEnabledCount < withTLSEnabled { annotations = map[string]string{ - peerURLTLSEnabledKey: "true", + LeaseAnnotationKeyPeerURLTLSEnabled: "true", } tlsEnabledCount++ } else { @@ -120,7 +120,7 @@ func randomizeAnnotations() map[string]string { rBool := r.Intn(2) == 1 if rBool { return map[string]string{ - peerURLTLSEnabledKey: "false", + LeaseAnnotationKeyPeerURLTLSEnabled: "false", } } return nil diff --git a/test/it/controller/etcd/helper.go b/test/it/controller/etcd/helper.go index 0207c9b7b..f519b669e 100644 --- a/test/it/controller/etcd/helper.go +++ b/test/it/controller/etcd/helper.go @@ -175,22 +175,30 @@ func createAndAssertEtcdReconciliation(ctx context.Context, t *testing.T, reconc } type etcdMemberLeaseConfig struct { - name string - memberID string - role druidv1alpha1.EtcdRole - renewTime *metav1.MicroTime + name string + memberID string + role druidv1alpha1.EtcdRole + renewTime *metav1.MicroTime + annotations map[string]string } -func updateMemberLeaseSpec(ctx context.Context, t *testing.T, cl client.Client, namespace string, memberLeaseConfigs []etcdMemberLeaseConfig) { +func updateMemberLeases(ctx context.Context, t *testing.T, cl client.Client, namespace string, memberLeaseConfigs []etcdMemberLeaseConfig) { g := NewWithT(t) for _, config := range memberLeaseConfigs { lease := &coordinationv1.Lease{} g.Expect(cl.Get(ctx, client.ObjectKey{Name: config.name, Namespace: namespace}, lease)).To(Succeed()) updatedLease := lease.DeepCopy() - updatedLease.Spec.HolderIdentity = ptr.To(fmt.Sprintf("%s:%s", config.memberID, config.role)) - updatedLease.Spec.RenewTime = config.renewTime + for key, value := range config.annotations { + metav1.SetMetaDataAnnotation(&updatedLease.ObjectMeta, key, value) + } + if config.memberID != "" && config.role != "" { + updatedLease.Spec.HolderIdentity = ptr.To(fmt.Sprintf("%s:%s", config.memberID, config.role)) + } + if config.renewTime != nil { + updatedLease.Spec.RenewTime = config.renewTime + } g.Expect(cl.Update(ctx, updatedLease)).To(Succeed()) - t.Logf("successfully updated member lease %s with holderIdentity: %s", config.name, *updatedLease.Spec.HolderIdentity) + t.Logf("successfully updated member lease %s with config %+v", config.name, config) } } diff --git a/test/it/controller/etcd/reconciler_test.go b/test/it/controller/etcd/reconciler_test.go index b6416ff2d..735578113 100644 --- a/test/it/controller/etcd/reconciler_test.go +++ b/test/it/controller/etcd/reconciler_test.go @@ -14,6 +14,7 @@ import ( druidv1alpha1 "github.com/gardener/etcd-druid/api/v1alpha1" "github.com/gardener/etcd-druid/internal/common" "github.com/gardener/etcd-druid/internal/component" + "github.com/gardener/etcd-druid/internal/utils" "github.com/gardener/etcd-druid/test/it/controller/assets" "github.com/gardener/etcd-druid/test/it/setup" testutils "github.com/gardener/etcd-druid/test/utils" @@ -235,6 +236,15 @@ func testEtcdSpecUpdateWhenReconcileOperationAnnotationIsSet(t *testing.T, testN // create etcdInstance resource and assert successful reconciliation, and ensure that sts generation is 1 createAndAssertEtcdReconciliation(ctx, t, reconcilerTestEnv, etcdInstance) assertStatefulSetGeneration(ctx, t, reconcilerTestEnv.itTestEnv.GetClient(), client.ObjectKeyFromObject(etcdInstance), 1, 30*time.Second, 2*time.Second) + // update member leases with peer-tls-enabled annotation set to true + memberLeaseNames := druidv1alpha1.GetMemberLeaseNames(etcdInstance.ObjectMeta, etcdInstance.Spec.Replicas) + t.Log("updating member leases with peer-tls-enabled annotation set to true") + mlcs := []etcdMemberLeaseConfig{ + {name: memberLeaseNames[0], annotations: map[string]string{utils.LeaseAnnotationKeyPeerURLTLSEnabled: "true"}}, + {name: memberLeaseNames[1], annotations: map[string]string{utils.LeaseAnnotationKeyPeerURLTLSEnabled: "true"}}, + {name: memberLeaseNames[2], annotations: map[string]string{utils.LeaseAnnotationKeyPeerURLTLSEnabled: "true"}}, + } + updateMemberLeases(context.Background(), t, reconcilerTestEnv.itTestEnv.GetClient(), testNs, mlcs) // get latest version of etcdInstance g.Expect(cl.Get(ctx, druidv1alpha1.GetNamespaceName(etcdInstance.ObjectMeta), etcdInstance)).To(Succeed()) // update etcdInstance spec with reconcile operation annotation also set @@ -428,7 +438,7 @@ func testConditionsAndMembersWhenAllMemberLeasesAreActive(t *testing.T, etcd *dr {name: memberLeaseNames[1], memberID: testutils.GenerateRandomAlphanumericString(g, 8), role: druidv1alpha1.EtcdRoleLeader, renewTime: &metav1.MicroTime{Time: clock.Now()}}, {name: memberLeaseNames[2], memberID: testutils.GenerateRandomAlphanumericString(g, 8), role: druidv1alpha1.EtcdRoleMember, renewTime: &metav1.MicroTime{Time: clock.Now().Add(-time.Second * 30)}}, } - updateMemberLeaseSpec(context.Background(), t, reconcilerTestEnv.itTestEnv.GetClient(), testNs, mlcs) + updateMemberLeases(context.Background(), t, reconcilerTestEnv.itTestEnv.GetClient(), testNs, mlcs) // ******************************* test etcd status update flow ******************************* expectedConditions := []druidv1alpha1.Condition{ {Type: druidv1alpha1.ConditionTypeReady, Status: druidv1alpha1.ConditionTrue},