Skip to content

Commit

Permalink
Fix peer TLS enablement during scale-up of etcd cluster (#874)
Browse files Browse the repository at this point in the history
* Fix peer TLS enablement during scale-up of etcd cluster
Co-authored-by: madhav bhargava <[email protected]>
  • Loading branch information
shreyas-s-rao authored Sep 8, 2024
1 parent 55efca1 commit 92a3192
Show file tree
Hide file tree
Showing 6 changed files with 44 additions and 23 deletions.
4 changes: 2 additions & 2 deletions internal/component/rolebinding/rolebinding.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,14 +80,14 @@ func (r _resource) Sync(ctx component.OperatorContext, etcd *druidv1alpha1.Etcd)
fmt.Sprintf("Error during create or update of role-binding %v for etcd: %v", objectKey, druidv1alpha1.GetNamespaceName(etcd.ObjectMeta)),
)
}
ctx.Logger.Info("synced", "component", "role", "objectKey", objectKey, "result", result)
ctx.Logger.Info("synced", "component", "role-binding", "objectKey", objectKey, "result", result)
return nil
}

// TriggerDelete triggers the deletion of the role binding for the given Etcd.
func (r _resource) TriggerDelete(ctx component.OperatorContext, etcdObjMeta metav1.ObjectMeta) error {
objectKey := getObjectKey(etcdObjMeta)
ctx.Logger.Info("Triggering deletion of role", "objectKey", objectKey)
ctx.Logger.Info("Triggering deletion of role-binding", "objectKey", objectKey)
if err := r.client.Delete(ctx, emptyRoleBinding(objectKey)); err != nil {
if errors.IsNotFound(err) {
ctx.Logger.Info("No RoleBinding found, Deletion is a No-Op", "objectKey", objectKey)
Expand Down
16 changes: 8 additions & 8 deletions internal/component/statefulset/statefulset.go
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ func (r _resource) PreSync(ctx component.OperatorContext, etcd *druidv1alpha1.Et
fmt.Sprintf("StatefulSet pods are not yet updated with new labels, for StatefulSet: %v for etcd: %v", getObjectKey(sts.ObjectMeta), druidv1alpha1.GetNamespaceName(etcd.ObjectMeta)),
)
} else {
ctx.Logger.Info("StatefulSet pods are updated with new labels", "objectKey", getObjectKey(etcd.ObjectMeta))
ctx.Logger.Info("StatefulSet pods have all the desired labels", "objectKey", getObjectKey(etcd.ObjectMeta))
}

// if sts label selector needs to be changed, then delete the statefulset, but keeping the pods intact.
Expand Down Expand Up @@ -215,6 +215,10 @@ func (r _resource) createOrPatch(ctx component.OperatorContext, etcd *druidv1alp
}

func (r _resource) handlePeerTLSChanges(ctx component.OperatorContext, etcd *druidv1alpha1.Etcd, existingSts *appsv1.StatefulSet) error {
if etcd.Spec.Etcd.PeerUrlTLS == nil {
return nil
}

peerTLSEnabledForMembers, err := utils.IsPeerURLTLSEnabledForMembers(ctx, r.client, ctx.Logger, etcd.Namespace, etcd.Name, *existingSts.Spec.Replicas)
if err != nil {
return druiderr.WrapError(err,
Expand All @@ -223,7 +227,7 @@ func (r _resource) handlePeerTLSChanges(ctx component.OperatorContext, etcd *dru
fmt.Sprintf("Error checking if peer TLS is enabled for statefulset: %v, etcd: %v", client.ObjectKeyFromObject(existingSts), client.ObjectKeyFromObject(etcd)))
}

if isPeerTLSEnablementPending(peerTLSEnabledForMembers, etcd) {
if !peerTLSEnabledForMembers {
if !isStatefulSetPatchedWithPeerTLSVolMount(existingSts) {
// This step ensures that only STS is updated with secret volume mounts which gets added to the etcd component due to
// enabling of TLS for peer communication. It preserves the current STS replicas.
Expand All @@ -236,11 +240,12 @@ func (r _resource) handlePeerTLSChanges(ctx component.OperatorContext, etcd *dru
} else {
ctx.Logger.Info("Secret volume mounts to enable Peer URL TLS have already been mounted. Skipping patching StatefulSet with secret volume mounts.")
}
return druiderr.WrapError(err,
return druiderr.New(
druiderr.ErrRequeueAfter,
"Sync",
fmt.Sprintf("Peer URL TLS not enabled for #%d members for etcd: %v, requeuing reconcile request", existingSts.Spec.Replicas, client.ObjectKeyFromObject(etcd)))
}

ctx.Logger.Info("Peer URL TLS has been enabled for all currently running members")
return nil
}
Expand All @@ -259,11 +264,6 @@ func isStatefulSetPatchedWithPeerTLSVolMount(sts *appsv1.StatefulSet) bool {
return peerURLCAEtcdVolPresent && peerURLEtcdServerTLSVolPresent
}

// isPeerTLSEnablementPending checks if the peer URL TLS has been enabled for the etcd, but it has not yet reflected in all etcd members.
func isPeerTLSEnablementPending(peerTLSEnabledStatusFromMembers bool, etcd *druidv1alpha1.Etcd) bool {
return !peerTLSEnabledStatusFromMembers && etcd.Spec.Etcd.PeerUrlTLS != nil
}

func (r _resource) checkAndPatchStsPodLabelsOnMismatch(ctx component.OperatorContext, etcd *druidv1alpha1.Etcd, sts *appsv1.StatefulSet) error {
desiredPodTemplateLabels := getDesiredPodTemplateLabels(etcd)
if !utils.ContainsAllDesiredLabels(sts.Spec.Template.Labels, desiredPodTemplateLabels) {
Expand Down
7 changes: 5 additions & 2 deletions internal/utils/lease.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@ import (
"sigs.k8s.io/controller-runtime/pkg/client"
)

const peerURLTLSEnabledKey = "member.etcd.gardener.cloud/tls-enabled"
// LeaseAnnotationKeyPeerURLTLSEnabled is the annotation key present on the member lease.
// If its value is `true` then it indicates that the member is TLS enabled.
// If the annotation is not present or its value is `false` then it indicates that the member is not TLS enabled.
const LeaseAnnotationKeyPeerURLTLSEnabled = "member.etcd.gardener.cloud/tls-enabled"

// IsPeerURLTLSEnabledForMembers checks if TLS has been enabled for all existing members of an etcd cluster identified by etcdName and in the provided namespace.
func IsPeerURLTLSEnabledForMembers(ctx context.Context, cl client.Client, logger logr.Logger, namespace, etcdName string, numReplicas int32) (bool, error) {
Expand Down Expand Up @@ -47,7 +50,7 @@ func IsPeerURLTLSEnabledForMembers(ctx context.Context, cl client.Client, logger

func parseAndGetTLSEnabledValue(lease coordinationv1.Lease, logger logr.Logger) (bool, error) {
if lease.Annotations != nil {
if tlsEnabledStr, ok := lease.Annotations[peerURLTLSEnabledKey]; ok {
if tlsEnabledStr, ok := lease.Annotations[LeaseAnnotationKeyPeerURLTLSEnabled]; ok {
tlsEnabled, err := strconv.ParseBool(tlsEnabledStr)
if err != nil {
logger.Error(err, "tls-enabled value is not a valid boolean", "namespace", lease.Namespace, "leaseName", lease.Name)
Expand Down
4 changes: 2 additions & 2 deletions internal/utils/lease_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ func createLeases(namespace, etcdName string, numLease, withTLSEnabled int) []*c
var annotations map[string]string
if tlsEnabledCount < withTLSEnabled {
annotations = map[string]string{
peerURLTLSEnabledKey: "true",
LeaseAnnotationKeyPeerURLTLSEnabled: "true",
}
tlsEnabledCount++
} else {
Expand All @@ -120,7 +120,7 @@ func randomizeAnnotations() map[string]string {
rBool := r.Intn(2) == 1
if rBool {
return map[string]string{
peerURLTLSEnabledKey: "false",
LeaseAnnotationKeyPeerURLTLSEnabled: "false",
}
}
return nil
Expand Down
24 changes: 16 additions & 8 deletions test/it/controller/etcd/helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,22 +175,30 @@ func createAndAssertEtcdReconciliation(ctx context.Context, t *testing.T, reconc
}

type etcdMemberLeaseConfig struct {
name string
memberID string
role druidv1alpha1.EtcdRole
renewTime *metav1.MicroTime
name string
memberID string
role druidv1alpha1.EtcdRole
renewTime *metav1.MicroTime
annotations map[string]string
}

func updateMemberLeaseSpec(ctx context.Context, t *testing.T, cl client.Client, namespace string, memberLeaseConfigs []etcdMemberLeaseConfig) {
func updateMemberLeases(ctx context.Context, t *testing.T, cl client.Client, namespace string, memberLeaseConfigs []etcdMemberLeaseConfig) {
g := NewWithT(t)
for _, config := range memberLeaseConfigs {
lease := &coordinationv1.Lease{}
g.Expect(cl.Get(ctx, client.ObjectKey{Name: config.name, Namespace: namespace}, lease)).To(Succeed())
updatedLease := lease.DeepCopy()
updatedLease.Spec.HolderIdentity = ptr.To(fmt.Sprintf("%s:%s", config.memberID, config.role))
updatedLease.Spec.RenewTime = config.renewTime
for key, value := range config.annotations {
metav1.SetMetaDataAnnotation(&updatedLease.ObjectMeta, key, value)
}
if config.memberID != "" && config.role != "" {
updatedLease.Spec.HolderIdentity = ptr.To(fmt.Sprintf("%s:%s", config.memberID, config.role))
}
if config.renewTime != nil {
updatedLease.Spec.RenewTime = config.renewTime
}
g.Expect(cl.Update(ctx, updatedLease)).To(Succeed())
t.Logf("successfully updated member lease %s with holderIdentity: %s", config.name, *updatedLease.Spec.HolderIdentity)
t.Logf("successfully updated member lease %s with config %+v", config.name, config)
}
}

Expand Down
12 changes: 11 additions & 1 deletion test/it/controller/etcd/reconciler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ import (
druidv1alpha1 "github.com/gardener/etcd-druid/api/v1alpha1"
"github.com/gardener/etcd-druid/internal/common"
"github.com/gardener/etcd-druid/internal/component"
"github.com/gardener/etcd-druid/internal/utils"
"github.com/gardener/etcd-druid/test/it/controller/assets"
"github.com/gardener/etcd-druid/test/it/setup"
testutils "github.com/gardener/etcd-druid/test/utils"
Expand Down Expand Up @@ -235,6 +236,15 @@ func testEtcdSpecUpdateWhenReconcileOperationAnnotationIsSet(t *testing.T, testN
// create etcdInstance resource and assert successful reconciliation, and ensure that sts generation is 1
createAndAssertEtcdReconciliation(ctx, t, reconcilerTestEnv, etcdInstance)
assertStatefulSetGeneration(ctx, t, reconcilerTestEnv.itTestEnv.GetClient(), client.ObjectKeyFromObject(etcdInstance), 1, 30*time.Second, 2*time.Second)
// update member leases with peer-tls-enabled annotation set to true
memberLeaseNames := druidv1alpha1.GetMemberLeaseNames(etcdInstance.ObjectMeta, etcdInstance.Spec.Replicas)
t.Log("updating member leases with peer-tls-enabled annotation set to true")
mlcs := []etcdMemberLeaseConfig{
{name: memberLeaseNames[0], annotations: map[string]string{utils.LeaseAnnotationKeyPeerURLTLSEnabled: "true"}},
{name: memberLeaseNames[1], annotations: map[string]string{utils.LeaseAnnotationKeyPeerURLTLSEnabled: "true"}},
{name: memberLeaseNames[2], annotations: map[string]string{utils.LeaseAnnotationKeyPeerURLTLSEnabled: "true"}},
}
updateMemberLeases(context.Background(), t, reconcilerTestEnv.itTestEnv.GetClient(), testNs, mlcs)
// get latest version of etcdInstance
g.Expect(cl.Get(ctx, druidv1alpha1.GetNamespaceName(etcdInstance.ObjectMeta), etcdInstance)).To(Succeed())
// update etcdInstance spec with reconcile operation annotation also set
Expand Down Expand Up @@ -428,7 +438,7 @@ func testConditionsAndMembersWhenAllMemberLeasesAreActive(t *testing.T, etcd *dr
{name: memberLeaseNames[1], memberID: testutils.GenerateRandomAlphanumericString(g, 8), role: druidv1alpha1.EtcdRoleLeader, renewTime: &metav1.MicroTime{Time: clock.Now()}},
{name: memberLeaseNames[2], memberID: testutils.GenerateRandomAlphanumericString(g, 8), role: druidv1alpha1.EtcdRoleMember, renewTime: &metav1.MicroTime{Time: clock.Now().Add(-time.Second * 30)}},
}
updateMemberLeaseSpec(context.Background(), t, reconcilerTestEnv.itTestEnv.GetClient(), testNs, mlcs)
updateMemberLeases(context.Background(), t, reconcilerTestEnv.itTestEnv.GetClient(), testNs, mlcs)
// ******************************* test etcd status update flow *******************************
expectedConditions := []druidv1alpha1.Condition{
{Type: druidv1alpha1.ConditionTypeReady, Status: druidv1alpha1.ConditionTrue},
Expand Down

0 comments on commit 92a3192

Please sign in to comment.