Skip to content

Commit

Permalink
reconcile/deployment: properly track updateStatus for components
Browse files Browse the repository at this point in the history
   Previously, status could be incorrectly tracked for `Deployment` based application, such as `VMAgent` or `VMSingle`. Check was based on `controller-manager` status condition `NewReplicaSetAvailable` update on `Deployments`. But it due to kubernetes bug kubernetes/kubernetes#115538, condition was not updated if `replicaSet` for deployment exists and it was previously ready. 

This commit copies logic from `kubectl` tool for `Deployment` rollout status track https://kubernetes.io/docs/reference/kubectl/generated/kubectl_rollout/kubectl_rollout_status/.

 It also adds check for `generation`, which eliminates possible race condition between `Deployment` state updates by `api-server` and `controller-manager`. 
---------

Co-authored-by: Nikolay <[email protected]>
  • Loading branch information
Amper and f41gh7 authored Nov 11, 2024
1 parent cbe3688 commit cbdf26f
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 11 deletions.
1 change: 1 addition & 0 deletions docs/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ aliases:

- [vmrule](https://docs.victoriametrics.com/operator/resources/vmrule/): properly validate rules for [vlogs](https://docs.victoriametrics.com/victorialogs/vmalert/) group `type`.
- [operator](https://docs.victoriametrics.com/operator/): properly apply changes to the [converted](https://docs.victoriametrics.com/operator/migration/#objects-conversion) `VMScrapeConfig` during operator start-up.
- [operator](https://docs.victoriametrics.com/operator/): properly set `operational` update status for CRDs. Previously, `operational` status could be set before rollout finishes at Kubernetes due to bug at Kubernetes `controller-manager`.


## [v0.49.0](https://github.com/VictoriaMetrics/operator/releases/tag/v0.49.0) - 15 Oct 2024
Expand Down
3 changes: 3 additions & 0 deletions internal/controller/operator/factory/k8stools/test_helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -124,6 +124,9 @@ func NewReadyDeployment(name, namespace string) *appsv1.Deployment {
Status: "True",
},
},
UpdatedReplicas: 1,
AvailableReplicas: 1,
Replicas: 1,
},
}
}
Expand Down
44 changes: 34 additions & 10 deletions internal/controller/operator/factory/reconcile/deploy.go
Original file line number Diff line number Diff line change
Expand Up @@ -77,24 +77,48 @@ func waitDeploymentReady(ctx context.Context, rclient client.Client, dep *appsv1
if err := rclient.Get(ctx, types.NamespacedName{Namespace: dep.Namespace, Name: dep.Name}, &actualDeploy); err != nil {
return false, fmt.Errorf("cannot fetch actual deployment state: %w", err)
}
for _, cond := range actualDeploy.Status.Conditions {
if cond.Type == appsv1.DeploymentProgressing {
// https://kubernetes.io/docs/concepts/workloads/internal/controller/deployment/#complete-deployment
// Completed status for deployment
if cond.Reason == "NewReplicaSetAvailable" && cond.Status == "True" {
return true, nil
}
return false, nil
}
// Based on recommendations from the kubernetes documentation
// (https://kubernetes.io/docs/concepts/workloads/controllers/deployment/#complete-deployment)
// this function uses the deployment readiness detection algorithm from `kubectl rollout status` command
// (https://github.com/kubernetes/kubectl/blob/6e4fe32a45fdcbf61e5c30ebdc511d75e7242432/pkg/polymorphichelpers/rollout_status.go#L76)
if actualDeploy.Generation > actualDeploy.Status.ObservedGeneration {
// Waiting for deployment spec update to be observed by controller...
return false, nil
}
cond := getDeploymentCondition(actualDeploy.Status, appsv1.DeploymentProgressing)
if cond != nil && cond.Reason == "ProgressDeadlineExceeded" {
return false, fmt.Errorf("deployment %s/%s has exceeded its progress deadline", dep.Namespace, dep.Name)
}
if actualDeploy.Spec.Replicas != nil && actualDeploy.Status.UpdatedReplicas < *actualDeploy.Spec.Replicas {
// Waiting for deployment rollout to finish: part of new replicas have been updated...
return false, nil
}
if actualDeploy.Status.Replicas > actualDeploy.Status.UpdatedReplicas {
// Waiting for deployment rollout to finish: part of old replicas are pending termination...
return false, nil
}
if actualDeploy.Status.AvailableReplicas < actualDeploy.Status.UpdatedReplicas {
// Waiting for deployment rollout to finish: part of updated replicas are available
return false, nil
}
return false, nil
return true, nil
})
if err != nil {
return reportFirstNotReadyPodOnError(ctx, rclient, fmt.Errorf("cannot wait for deployment to become ready: %w", err), dep.Namespace, labels.SelectorFromSet(dep.Spec.Selector.MatchLabels), dep.Spec.MinReadySeconds)
}
return nil
}

func getDeploymentCondition(status appsv1.DeploymentStatus, condType appsv1.DeploymentConditionType) *appsv1.DeploymentCondition {
for i := range status.Conditions {
c := status.Conditions[i]
if c.Type == condType {
return &c
}
}
return nil
}

func reportFirstNotReadyPodOnError(ctx context.Context, rclient client.Client, origin error, ns string, selector labels.Selector, minReadySeconds int32) error {
// list pods and join statuses
var podList corev1.PodList
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ func TestDeployOk(t *testing.T) {
}
createdDep.Status.ReadyReplicas = 1
createdDep.Status.UpdatedReplicas = 1
createdDep.Status.AvailableReplicas = 1
createdDep.Status.Replicas = 1
if err := rclient.Status().Update(ctx, &createdDep); err != nil {
return false, err
}
Expand All @@ -83,9 +85,14 @@ func TestDeployOk(t *testing.T) {

// expect 1 UpdateCalls
reloadDep()
dep.Status.AvailableReplicas = 10
dep.Status.UpdatedReplicas = 10
if err = rclient.Status().Update(ctx, dep); err != nil {
t.Fatalf("cannot update deployment status: %s", err)
}

dep.Spec.Replicas = ptr.To[int32](10)
dep.Spec.Template.ObjectMeta.Annotations = map[string]string{"new-annotation": "value"}

if err := Deployment(ctx, rclient, dep, prevDeploy, false); err != nil {
t.Fatalf("expect 1 failed to update created deploy: %s", err)
}
Expand Down

0 comments on commit cbdf26f

Please sign in to comment.