Skip to content

Commit

Permalink
Only scale up zone after leader zone replicas are ready
Browse files Browse the repository at this point in the history
  • Loading branch information
jhesketh committed Aug 12, 2024
1 parent 53c59f6 commit 6ef8b83
Show file tree
Hide file tree
Showing 4 changed files with 82 additions and 3 deletions.
13 changes: 11 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,14 @@ For each **rollout group**, the operator **guarantees**:

The operator can also optionally coordinate scaling up and down of `StatefulSets` that are part of the same `rollout-group` based on the `grafana.com/rollout-downscale-leader` annotation. When using this feature, the `grafana.com/min-time-between-zones-downscale` label must also be set on each `StatefulSet`.

This can be useful for automating the tedious scaling of stateful services like Mimir ingesters. Making use of this feature requires adding a few annotations and labels to configure how it works. Examples for a multi-AZ ingester group are given below.
This can be useful for automating the tedious scaling of stateful services like Mimir ingesters. Making use of this feature requires adding a few annotations and labels to configure how it works.

Additionally you can configure the operator to consider only the ready replicas of the leader `StatefulSet` when scaling up the follower `StatefulSets`. This is controlled by the `grafana.com/rollout-leader-ready` annotation.

- If the annotation `grafana.com/rollout-leader-ready` is set to `true` on a follower StatefulSet, the operator will scale the follower based on the number of ready replicas in the leader StatefulSet, rather than the desired replicas.
- This feature ensures that a follower does not scale up beyond the number of ready replicas in the leader, preventing premature scaling in situations where the leader is not fully stable.

Example usage for a multi-AZ ingester group:

- For `ingester-zone-a`, add the following:
- Labels:
Expand All @@ -47,7 +54,8 @@ This can be useful for automating the tedious scaling of stateful services like
- `grafana.com/min-time-between-zones-downscale=12h` (change the value here to an appropriate duration)
- `grafana.com/prepare-downscale=true` (to allow the service to be notified when it will be scaled down)
- Annotations:
- `grafana.com/rollout-downscale-leader=ingester-zone-a` (zone `b` will follow zone `a`, after a delay)
- `grafana.com/rollout-downscale-leader=ingester-zone-a` (zone `b` will follow zone `a`, after a delay)
- `grafana.com/rollout-leader-ready=true` (to consider only ready replicas in zone `a` before scaling up)
- `grafana.com/prepare-downscale-http-path=ingester/prepare-shutdown` (to call a specific endpoint on the service)
- `grafana.com/prepare-downscale-http-port=80` (to call a specific endpoint on the service)
- For `ingester-zone-c`, add the following:
Expand All @@ -56,6 +64,7 @@ This can be useful for automating the tedious scaling of stateful services like
- `grafana.com/prepare-downscale=true` (to allow the service to be notified when it will be scaled down)
- Annotations:
- `grafana.com/rollout-downscale-leader=ingester-zone-b` (zone `c` will follow zone `b`, after a delay)
- `grafana.com/rollout-leader-ready=true` (to consider only ready replicas in zone `b` before scaling up)
- `grafana.com/prepare-downscale-http-path=ingester/prepare-shutdown` (to call a specific endpoint on the service)
- `grafana.com/prepare-downscale-http-port=80` (to call a specific endpoint on the service)

Expand Down
3 changes: 3 additions & 0 deletions pkg/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ const (
// RolloutDownscaleLeaderAnnotationKey is the name of the leader statefulset that should be used to determine
// the number of replicas in a follower statefulset.
RolloutDownscaleLeaderAnnotationKey = "grafana.com/rollout-downscale-leader"
// RolloutLeaderReadyKey is whether to only consider `ready` replicas in the leader zone before scaling up
RolloutLeaderReadyAnnotationKey = "grafana.com/rollout-leader-ready"
RolloutLeaderReadyAnnotationValue = "true"

rolloutMirrorReplicasFromResourceAnnotationKeyPrefix = "grafana.com/rollout-mirror-replicas-from-resource"
// RolloutMirrorReplicasFromResourceNameAnnotationKey -- when set (together with "kind" and optionally "api-version" annotations), rollout-operator sets number of
Expand Down
14 changes: 13 additions & 1 deletion pkg/controller/replicas.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,21 @@ func desiredStsReplicas(group string, sts *v1.StatefulSet, all []*v1.StatefulSet
return followerReplicas, err
}

// By default, consider the leader's desired replicas regardless of its current state.
leaderReplicas := *leader.Spec.Replicas
if leaderReplicas > followerReplicas {
// Scale up is always allowed immediately
// Handle scale-up scenarios
annotations := sts.GetAnnotations()
considerReady, ok := annotations[config.RolloutLeaderReadyAnnotationKey]
if ok && considerReady == config.RolloutLeaderReadyAnnotationValue {
// If the annotation is present, and equal to 'true', consider only the leader's ready replicas for scaling up.
leaderReplicas = leader.Status.ReadyReplicas
if leaderReplicas < followerReplicas {
// If fewer replicas are ready than currently exist in this zone,
// but the desired replicas are greater, do not scale.
return followerReplicas, nil
}
}
return leaderReplicas, nil
}

Expand Down
55 changes: 55 additions & 0 deletions pkg/controller/replicas_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,61 @@ func TestReconcileStsReplicas(t *testing.T) {
require.Equal(t, int32(4), replicas)
})

t.Run("scale up considering ready replicas", func(t *testing.T) {
sts1 := mockStatefulSet("test-zone-a", withReplicas(4, 2))
sts2 := mockStatefulSet("test-zone-b", withReplicas(2, 2), withAnnotations(map[string]string{
config.RolloutDownscaleLeaderAnnotationKey: "test-zone-a",
config.RolloutLeaderReadyAnnotationKey: config.RolloutLeaderReadyAnnotationValue,
}))
sts3 := mockStatefulSet("test-zone-c", withReplicas(1, 1), withAnnotations(map[string]string{
config.RolloutDownscaleLeaderAnnotationKey: "test-zone-b",
config.RolloutLeaderReadyAnnotationKey: config.RolloutLeaderReadyAnnotationValue,
}))

replicas, err := desiredStsReplicas("test", sts2, []*v1.StatefulSet{sts1, sts2, sts3}, log.NewNopLogger())
require.NoError(t, err)
require.Equal(t, int32(2), replicas)

replicas, err = desiredStsReplicas("test", sts3, []*v1.StatefulSet{sts1, sts2, sts3}, log.NewNopLogger())
require.NoError(t, err)
require.Equal(t, int32(2), replicas)
})

t.Run("scale up ignoring ready replicas", func(t *testing.T) {
sts1 := mockStatefulSet("test-zone-a", withReplicas(4, 2))
sts2 := mockStatefulSet("test-zone-b", withReplicas(3, 3), withAnnotations(map[string]string{
config.RolloutDownscaleLeaderAnnotationKey: "test-zone-a",
}))

replicas, err := desiredStsReplicas("test", sts2, []*v1.StatefulSet{sts1, sts2}, log.NewNopLogger())
require.NoError(t, err)
require.Equal(t, int32(4), replicas)
})

t.Run("do not scale down to ready replicas", func(t *testing.T) {
sts1 := mockStatefulSet("test-zone-a", withReplicas(5, 2))
sts2 := mockStatefulSet("test-zone-b", withReplicas(3, 3), withAnnotations(map[string]string{
config.RolloutDownscaleLeaderAnnotationKey: "test-zone-a",
config.RolloutLeaderReadyAnnotationKey: config.RolloutLeaderReadyAnnotationValue,
}))

replicas, err := desiredStsReplicas("test", sts2, []*v1.StatefulSet{sts1, sts2}, log.NewNopLogger())
require.NoError(t, err)
require.Equal(t, int32(3), replicas)
})

t.Run("scale down to replicas, not ready replicas", func(t *testing.T) {
sts1 := mockStatefulSet("test-zone-a", withReplicas(2, 1))
sts2 := mockStatefulSet("test-zone-b", withReplicas(3, 3), withAnnotations(map[string]string{
config.RolloutDownscaleLeaderAnnotationKey: "test-zone-a",
config.RolloutLeaderReadyAnnotationKey: config.RolloutLeaderReadyAnnotationValue,
}))

replicas, err := desiredStsReplicas("test", sts2, []*v1.StatefulSet{sts1, sts2}, log.NewNopLogger())
require.NoError(t, err)
require.Equal(t, int32(2), replicas)
})

t.Run("scale down min time error", func(t *testing.T) {
downscale1 := time.Now().UTC().Round(time.Second).Add(-72 * time.Hour)
downscale2 := time.Now().UTC().Round(time.Second).Add(-60 * time.Hour)
Expand Down

0 comments on commit 6ef8b83

Please sign in to comment.