diff --git a/README.md b/README.md index 71c82476a..2ce96558f 100644 --- a/README.md +++ b/README.md @@ -33,7 +33,14 @@ For each **rollout group**, the operator **guarantees**: The operator can also optionally coordinate scaling up and down of `StatefulSets` that are part of the same `rollout-group` based on the `grafana.com/rollout-downscale-leader` annotation. When using this feature, the `grafana.com/min-time-between-zones-downscale` label must also be set on each `StatefulSet`. -This can be useful for automating the tedious scaling of stateful services like Mimir ingesters. Making use of this feature requires adding a few annotations and labels to configure how it works. Examples for a multi-AZ ingester group are given below. +This can be useful for automating the tedious scaling of stateful services like Mimir ingesters. Making use of this feature requires adding a few annotations and labels to configure how it works. + +Additionally you can configure the operator to consider only the ready replicas of the leader `StatefulSet` when scaling up the follower `StatefulSets`. This is controlled by the `grafana.com/rollout-leader-ready` annotation. + +- If the annotation `grafana.com/rollout-leader-ready` is set to `true` on a follower StatefulSet, the operator will scale the follower based on the number of ready replicas in the leader StatefulSet, rather than the desired replicas. +- This feature ensures that a follower does not scale up beyond the number of ready replicas in the leader, preventing premature scaling in situations where the leader is not fully stable. + +Example usage for a multi-AZ ingester group: - For `ingester-zone-a`, add the following: - Labels: @@ -47,7 +54,8 @@ This can be useful for automating the tedious scaling of stateful services like - `grafana.com/min-time-between-zones-downscale=12h` (change the value here to an appropriate duration) - `grafana.com/prepare-downscale=true` (to allow the service to be notified when it will be scaled down) - Annotations: - - `grafana.com/rollout-downscale-leader=ingester-zone-a` (zone `b` will follow zone `a`, after a delay) + - `grafana.com/rollout-downscale-leader=ingester-zone-a` (zone `b` will follow zone `a`, after a delay) + - `grafana.com/rollout-leader-ready=true` (to consider only ready replicas in zone `a` before scaling up) - `grafana.com/prepare-downscale-http-path=ingester/prepare-shutdown` (to call a specific endpoint on the service) - `grafana.com/prepare-downscale-http-port=80` (to call a specific endpoint on the service) - For `ingester-zone-c`, add the following: @@ -56,6 +64,7 @@ This can be useful for automating the tedious scaling of stateful services like - `grafana.com/prepare-downscale=true` (to allow the service to be notified when it will be scaled down) - Annotations: - `grafana.com/rollout-downscale-leader=ingester-zone-b` (zone `c` will follow zone `b`, after a delay) + - `grafana.com/rollout-leader-ready=true` (to consider only ready replicas in zone `b` before scaling up) - `grafana.com/prepare-downscale-http-path=ingester/prepare-shutdown` (to call a specific endpoint on the service) - `grafana.com/prepare-downscale-http-port=80` (to call a specific endpoint on the service) diff --git a/pkg/config/config.go b/pkg/config/config.go index fcabb1e5d..409165e2f 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -28,6 +28,9 @@ const ( // RolloutDownscaleLeaderAnnotationKey is the name of the leader statefulset that should be used to determine // the number of replicas in a follower statefulset. RolloutDownscaleLeaderAnnotationKey = "grafana.com/rollout-downscale-leader" + // RolloutLeaderReadyKey is whether to only consider `ready` replicas in the leader zone before scaling up + RolloutLeaderReadyAnnotationKey = "grafana.com/rollout-leader-ready" + RolloutLeaderReadyAnnotationValue = "true" rolloutMirrorReplicasFromResourceAnnotationKeyPrefix = "grafana.com/rollout-mirror-replicas-from-resource" // RolloutMirrorReplicasFromResourceNameAnnotationKey -- when set (together with "kind" and optionally "api-version" annotations), rollout-operator sets number of diff --git a/pkg/controller/replicas.go b/pkg/controller/replicas.go index 0340d4da0..f9ac3fda7 100644 --- a/pkg/controller/replicas.go +++ b/pkg/controller/replicas.go @@ -19,9 +19,21 @@ func desiredStsReplicas(group string, sts *v1.StatefulSet, all []*v1.StatefulSet return followerReplicas, err } + // By default, consider the leader's desired replicas regardless of its current state. leaderReplicas := *leader.Spec.Replicas if leaderReplicas > followerReplicas { - // Scale up is always allowed immediately + // Handle scale-up scenarios + annotations := sts.GetAnnotations() + considerReady, ok := annotations[config.RolloutLeaderReadyAnnotationKey] + if ok && considerReady == config.RolloutLeaderReadyAnnotationValue { + // If the annotation is present, and equal to 'true', consider only the leader's ready replicas for scaling up. + leaderReplicas = leader.Status.ReadyReplicas + if leaderReplicas < followerReplicas { + // If fewer replicas are ready than currently exist in this zone, + // but the desired replicas are greater, do not scale. + return followerReplicas, nil + } + } return leaderReplicas, nil } diff --git a/pkg/controller/replicas_test.go b/pkg/controller/replicas_test.go index a29e93b0b..87afb48c2 100644 --- a/pkg/controller/replicas_test.go +++ b/pkg/controller/replicas_test.go @@ -211,6 +211,61 @@ func TestReconcileStsReplicas(t *testing.T) { require.Equal(t, int32(4), replicas) }) + t.Run("scale up considering ready replicas", func(t *testing.T) { + sts1 := mockStatefulSet("test-zone-a", withReplicas(4, 2)) + sts2 := mockStatefulSet("test-zone-b", withReplicas(2, 2), withAnnotations(map[string]string{ + config.RolloutDownscaleLeaderAnnotationKey: "test-zone-a", + config.RolloutLeaderReadyAnnotationKey: config.RolloutLeaderReadyAnnotationValue, + })) + sts3 := mockStatefulSet("test-zone-c", withReplicas(1, 1), withAnnotations(map[string]string{ + config.RolloutDownscaleLeaderAnnotationKey: "test-zone-b", + config.RolloutLeaderReadyAnnotationKey: config.RolloutLeaderReadyAnnotationValue, + })) + + replicas, err := desiredStsReplicas("test", sts2, []*v1.StatefulSet{sts1, sts2, sts3}, log.NewNopLogger()) + require.NoError(t, err) + require.Equal(t, int32(2), replicas) + + replicas, err = desiredStsReplicas("test", sts3, []*v1.StatefulSet{sts1, sts2, sts3}, log.NewNopLogger()) + require.NoError(t, err) + require.Equal(t, int32(2), replicas) + }) + + t.Run("scale up ignoring ready replicas", func(t *testing.T) { + sts1 := mockStatefulSet("test-zone-a", withReplicas(4, 2)) + sts2 := mockStatefulSet("test-zone-b", withReplicas(3, 3), withAnnotations(map[string]string{ + config.RolloutDownscaleLeaderAnnotationKey: "test-zone-a", + })) + + replicas, err := desiredStsReplicas("test", sts2, []*v1.StatefulSet{sts1, sts2}, log.NewNopLogger()) + require.NoError(t, err) + require.Equal(t, int32(4), replicas) + }) + + t.Run("do not scale down to ready replicas", func(t *testing.T) { + sts1 := mockStatefulSet("test-zone-a", withReplicas(5, 2)) + sts2 := mockStatefulSet("test-zone-b", withReplicas(3, 3), withAnnotations(map[string]string{ + config.RolloutDownscaleLeaderAnnotationKey: "test-zone-a", + config.RolloutLeaderReadyAnnotationKey: config.RolloutLeaderReadyAnnotationValue, + })) + + replicas, err := desiredStsReplicas("test", sts2, []*v1.StatefulSet{sts1, sts2}, log.NewNopLogger()) + require.NoError(t, err) + require.Equal(t, int32(3), replicas) + }) + + t.Run("scale down to replicas, not ready replicas", func(t *testing.T) { + sts1 := mockStatefulSet("test-zone-a", withReplicas(2, 1)) + sts2 := mockStatefulSet("test-zone-b", withReplicas(3, 3), withAnnotations(map[string]string{ + config.RolloutDownscaleLeaderAnnotationKey: "test-zone-a", + config.RolloutLeaderReadyAnnotationKey: config.RolloutLeaderReadyAnnotationValue, + })) + + replicas, err := desiredStsReplicas("test", sts2, []*v1.StatefulSet{sts1, sts2}, log.NewNopLogger()) + require.NoError(t, err) + require.Equal(t, int32(2), replicas) + }) + t.Run("scale down min time error", func(t *testing.T) { downscale1 := time.Now().UTC().Round(time.Second).Add(-72 * time.Hour) downscale2 := time.Now().UTC().Round(time.Second).Add(-60 * time.Hour)