Skip to content

Commit

Permalink
Early stall condition detection after remediation
Browse files Browse the repository at this point in the history
Detect stall condition due to exhausted remediation retry right after
remediating. This helps return from AtomicRelease.Reconcile() with
proper stalled status condition and error. Without this, after
remediation, a stalled condition detection required a new
reconciliation, leaving the status of the object without any Reconciling
or Stalled condition.

Signed-off-by: Sunny <[email protected]>
  • Loading branch information
darkowlzz authored and hiddeco committed Dec 14, 2023
1 parent abcdfef commit fa5e284
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 1 deletion.
10 changes: 9 additions & 1 deletion internal/reconcile/atomic_release.go
Original file line number Diff line number Diff line change
Expand Up @@ -262,10 +262,18 @@ func (r *AtomicRelease) Reconcile(ctx context.Context, req *Request) error {
"instructed to stop after running %s action reconciler %s", next.Type(), next.Name()),
)

if remediation := req.Object.GetActiveRemediation(); remediation == nil || !remediation.RetriesExhausted(req.Object) {
remediation := req.Object.GetActiveRemediation()
if remediation == nil || !remediation.RetriesExhausted(req.Object) {
conditions.MarkReconciling(req.Object, meta.ProgressingWithRetryReason, conditions.GetMessage(req.Object, meta.ReadyCondition))
return ErrMustRequeue
}
// Check if retries have exhausted after remediation for early
// stall condition detection.
if remediation != nil && remediation.RetriesExhausted(req.Object) {
conditions.MarkStalled(req.Object, "RetriesExceeded", "Failed to %s after %d attempt(s)",
req.Object.Status.LastAttemptedReleaseAction, req.Object.GetActiveRemediation().GetFailureCount(req.Object))
return ErrExceededMaxRetries
}

conditions.Delete(req.Object, meta.ReconcilingCondition)
return nil
Expand Down
54 changes: 54 additions & 0 deletions internal/reconcile/atomic_release_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -629,6 +629,7 @@ func TestAtomicRelease_Reconcile_Scenarios(t *testing.T) {
release.ObservedToSnapshot(release.ObserveRelease(releases[0])),
}
},
wantErr: ErrExceededMaxRetries,
},
{
name: "install test failure with remediation",
Expand All @@ -654,6 +655,7 @@ func TestAtomicRelease_Reconcile_Scenarios(t *testing.T) {
snap,
}
},
wantErr: ErrExceededMaxRetries,
},
{
name: "install test failure with test ignore",
Expand Down Expand Up @@ -759,6 +761,7 @@ func TestAtomicRelease_Reconcile_Scenarios(t *testing.T) {
release.ObservedToSnapshot(release.ObserveRelease(releases[0])),
}
},
wantErr: ErrExceededMaxRetries,
},
{
name: "upgrade failure with uninstall remediation",
Expand Down Expand Up @@ -799,6 +802,7 @@ func TestAtomicRelease_Reconcile_Scenarios(t *testing.T) {
release.ObservedToSnapshot(release.ObserveRelease(releases[0])),
}
},
wantErr: ErrExceededMaxRetries,
},
{
name: "upgrade test failure with remediation",
Expand Down Expand Up @@ -841,6 +845,7 @@ func TestAtomicRelease_Reconcile_Scenarios(t *testing.T) {
release.ObservedToSnapshot(release.ObserveRelease(releases[0])),
}
},
wantErr: ErrExceededMaxRetries,
},
{
name: "upgrade test failure with test ignore",
Expand Down Expand Up @@ -928,6 +933,55 @@ func TestAtomicRelease_Reconcile_Scenarios(t *testing.T) {
},
wantErr: ErrExceededMaxRetries,
},
{
name: "upgrade remediation results in exhausted retries",
releases: func(namespace string) []*helmrelease.Release {
return []*helmrelease.Release{
testutil.BuildRelease(&helmrelease.MockReleaseOptions{
Name: mockReleaseName,
Namespace: namespace,
Version: 1,
Chart: testutil.BuildChart(),
Status: helmrelease.StatusSuperseded,
}),
testutil.BuildRelease(&helmrelease.MockReleaseOptions{
Name: mockReleaseName,
Namespace: namespace,
Version: 2,
Chart: testutil.BuildChart(),
Status: helmrelease.StatusSuperseded,
}),
testutil.BuildRelease(&helmrelease.MockReleaseOptions{
Name: mockReleaseName,
Namespace: namespace,
Version: 3,
Chart: testutil.BuildChart(),
Status: helmrelease.StatusFailed,
}),
}
},
spec: func(spec *v2.HelmReleaseSpec) {
spec.Upgrade = &v2.Upgrade{
Remediation: &v2.UpgradeRemediation{
Retries: 1,
},
}
},
status: func(namespace string, releases []*helmrelease.Release) v2.HelmReleaseStatus {
return v2.HelmReleaseStatus{
History: v2.Snapshots{
release.ObservedToSnapshot(release.ObserveRelease(releases[2])),
release.ObservedToSnapshot(release.ObserveRelease(releases[1])),
release.ObservedToSnapshot(release.ObserveRelease(releases[0])),
},
LastAttemptedReleaseAction: v2.ReleaseActionUpgrade,
Failures: 2,
UpgradeFailures: 2,
}
},
chart: testutil.BuildChart(),
wantErr: ErrExceededMaxRetries,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
Expand Down

0 comments on commit fa5e284

Please sign in to comment.