Skip to content

Commit

Permalink
Early stall condition detection after remediation
Browse files Browse the repository at this point in the history
Detect stall condition due to exhausted remediation retry right after
remediating. This helps return from AtomicRelease.Reconcile() with
proper stalled status condition and error. Without this, after
remediation, a stalled condition detection required a new
reconciliation, leaving the status of the object without any Reconciling
and Stalled condition.

Signed-off-by: Sunny <[email protected]>
  • Loading branch information
darkowlzz committed Dec 13, 2023
1 parent 6921825 commit e4eede6
Show file tree
Hide file tree
Showing 2 changed files with 63 additions and 1 deletion.
10 changes: 9 additions & 1 deletion internal/reconcile/atomic_release.go
Original file line number Diff line number Diff line change
Expand Up @@ -262,10 +262,18 @@ func (r *AtomicRelease) Reconcile(ctx context.Context, req *Request) error {
"instructed to stop after running %s action reconciler %s", next.Type(), next.Name()),
)

if remediation := req.Object.GetActiveRemediation(); remediation == nil || !remediation.RetriesExhausted(req.Object) {
remediation := req.Object.GetActiveRemediation()
if remediation == nil || !remediation.RetriesExhausted(req.Object) {
conditions.MarkReconciling(req.Object, meta.ProgressingWithRetryReason, conditions.GetMessage(req.Object, meta.ReadyCondition))
return ErrMustRequeue
}
// Check if retries have exhausted after remediation for early
// stall condition detection.
if remediation != nil && remediation.RetriesExhausted(req.Object) {
conditions.MarkStalled(req.Object, "RetriesExceeded", "Failed to %s after %d attempt(s)",
req.Object.Status.LastAttemptedReleaseAction, req.Object.GetActiveRemediation().GetFailureCount(req.Object))
return ErrExceededMaxRetries
}

conditions.Delete(req.Object, meta.ReconcilingCondition)
return nil
Expand Down
54 changes: 54 additions & 0 deletions internal/reconcile/atomic_release_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -629,6 +629,7 @@ func TestAtomicRelease_Reconcile_Scenarios(t *testing.T) {
release.ObservedToSnapshot(release.ObserveRelease(releases[0])),
}
},
wantErr: ErrExceededMaxRetries,
},
{
name: "install test failure with remediation",
Expand All @@ -654,6 +655,7 @@ func TestAtomicRelease_Reconcile_Scenarios(t *testing.T) {
snap,
}
},
wantErr: ErrExceededMaxRetries,
},
{
name: "install test failure with test ignore",
Expand Down Expand Up @@ -759,6 +761,7 @@ func TestAtomicRelease_Reconcile_Scenarios(t *testing.T) {
release.ObservedToSnapshot(release.ObserveRelease(releases[0])),
}
},
wantErr: ErrExceededMaxRetries,
},
{
name: "upgrade failure with uninstall remediation",
Expand Down Expand Up @@ -799,6 +802,7 @@ func TestAtomicRelease_Reconcile_Scenarios(t *testing.T) {
release.ObservedToSnapshot(release.ObserveRelease(releases[0])),
}
},
wantErr: ErrExceededMaxRetries,
},
{
name: "upgrade test failure with remediation",
Expand Down Expand Up @@ -841,6 +845,7 @@ func TestAtomicRelease_Reconcile_Scenarios(t *testing.T) {
release.ObservedToSnapshot(release.ObserveRelease(releases[0])),
}
},
wantErr: ErrExceededMaxRetries,
},
{
name: "upgrade test failure with test ignore",
Expand Down Expand Up @@ -928,6 +933,55 @@ func TestAtomicRelease_Reconcile_Scenarios(t *testing.T) {
},
wantErr: ErrExceededMaxRetries,
},
{
name: "upgrade remediation results in exhausted retries",
releases: func(namespace string) []*helmrelease.Release {
return []*helmrelease.Release{
testutil.BuildRelease(&helmrelease.MockReleaseOptions{
Name: mockReleaseName,
Namespace: namespace,
Version: 1,
Chart: testutil.BuildChart(),
Status: helmrelease.StatusSuperseded,
}),
testutil.BuildRelease(&helmrelease.MockReleaseOptions{
Name: mockReleaseName,
Namespace: namespace,
Version: 2,
Chart: testutil.BuildChart(),
Status: helmrelease.StatusSuperseded,
}),
testutil.BuildRelease(&helmrelease.MockReleaseOptions{
Name: mockReleaseName,
Namespace: namespace,
Version: 3,
Chart: testutil.BuildChart(),
Status: helmrelease.StatusFailed,
}),
}
},
spec: func(spec *v2.HelmReleaseSpec) {
spec.Upgrade = &v2.Upgrade{
Remediation: &v2.UpgradeRemediation{
Retries: 1,
},
}
},
status: func(namespace string, releases []*helmrelease.Release) v2.HelmReleaseStatus {
return v2.HelmReleaseStatus{
History: v2.Snapshots{
release.ObservedToSnapshot(release.ObserveRelease(releases[2])),
release.ObservedToSnapshot(release.ObserveRelease(releases[1])),
release.ObservedToSnapshot(release.ObserveRelease(releases[0])),
},
LastAttemptedReleaseAction: v2.ReleaseActionUpgrade,
Failures: 2,
UpgradeFailures: 2,
}
},
chart: testutil.BuildChart(),
wantErr: ErrExceededMaxRetries,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
Expand Down

0 comments on commit e4eede6

Please sign in to comment.