Skip to content

Commit

Permalink
fix: remediator ignores no resource/kind match
Browse files Browse the repository at this point in the history
- Update remediator to record NoMatchErrors as a resource conflict.
  These are reported as metrics and cause retry, but are still not
  reported as sync errors, for now.
- Update comments in TestCRDDeleteBeforeRemoveCustomResourceV1
  to explain why a conflict is expected and which ones might be
  encountered due to race condition.
  • Loading branch information
karlkfi committed Jul 24, 2024
1 parent 7dbe0c1 commit 44ee0ad
Show file tree
Hide file tree
Showing 7 changed files with 60 additions and 22 deletions.
2 changes: 1 addition & 1 deletion cmd/nomoserrors/examples/examples.go
Original file line number Diff line number Diff line change
Expand Up @@ -346,7 +346,7 @@ func Generate() AllExamples {
// 2008
result.add(client.ConflictCreateAlreadyExists(errors.New("already exists"), fake.RoleObject()))
result.add(client.ConflictUpdateOldVersion(errors.New("old version"), fake.RoleObject()))
result.add(client.ConflictUpdateDoesNotExist(errors.New("does not exist"), fake.RoleObject()))
result.add(client.ConflictUpdateObjectDoesNotExist(errors.New("does not exist"), fake.RoleObject()))

// 2009
result.add(applier.Error(errors.New("failed to initialize an error")))
Expand Down
28 changes: 20 additions & 8 deletions e2e/testcases/custom_resources_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ func TestCRDDeleteBeforeRemoveCustomResourceV1(t *testing.T) {

// Remove CRD
// This will garbage collect the CR too and block until both are deleted.
nt.T.Log("Deleting Anvil CRD")
_, err = nt.Shell.Kubectl("delete", "-f", crdFile)
if err != nil {
nt.T.Fatal(err)
Expand All @@ -109,16 +110,27 @@ func TestCRDDeleteBeforeRemoveCustomResourceV1(t *testing.T) {
nt.T.Fatal(err)
}

// Wait for remediator to detect the drift (managed Anvil CR was deleted)
// and record it as a conflict.
// TODO: distinguish between management conflict (spec/generation drift) and concurrent status update conflict (resource version change)
err = nomostest.ValidateMetrics(nt,
// Resource Conflict errors from the remediator are not exposed as errors
// in the RootSync status. Instead, the error is recorded as a metric and
// logged as a warning. Then the object is refreshed from the server and
// re-enqueued for remediation.
//
// Validate that deleting the CRD of a managed CR causes at least of of the
// following errors:
// - NoResourceMatchError
// - NoKindMatchError
// - ObjectNotFound
// - ResourceVersionConflict
// Which error depends on race conditions between the remediator and the
// Kubernetes custom resource controller.
//
// Note: This is NOT a "management conflict", just a "resource conflict".
// But both types of conflict both share the same metric.
// TODO: distinguish between management conflict (unexpected manager annotation) and resource conflict (resource version change)
nt.Must(nomostest.ValidateMetrics(nt,
nomostest.ReconcilerErrorMetrics(nt, rootSyncLabels, firstCommitHash, metrics.ErrorSummary{
Conflicts: 1,
}))
if err != nil {
nt.T.Fatal(err)
}
})))

// Reset discovery client to invalidate the cached Anvil CRD
nt.RenewClient()
Expand Down
4 changes: 2 additions & 2 deletions pkg/remediator/reconcile/reconciler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -295,14 +295,14 @@ func TestRemediator_Reconcile_Metrics(t *testing.T) {
// Object on cluster has no label and is unmanaged
actual: fake.RoleObject(core.Namespace("example"), core.Name("example")),
// Object update fails, because it was deleted by another client
updateError: syncerclient.ConflictUpdateDoesNotExist(
updateError: syncerclient.ConflictUpdateObjectDoesNotExist(
apierrors.NewNotFound(schema.GroupResource{Group: "rbac", Resource: "roles"}, "example"),
fake.RoleObject(core.Namespace("example"), core.Name("example"))),
// Object NOT updated on cluster, because update failed with conflict error
want: fake.RoleObject(core.Namespace("example"), core.Name("example"),
core.UID("1"), core.ResourceVersion("1"), core.Generation(1)),
// Expect update error returned from Remediate
wantError: syncerclient.ConflictUpdateDoesNotExist(
wantError: syncerclient.ConflictUpdateObjectDoesNotExist(
apierrors.NewNotFound(schema.GroupResource{Group: "rbac", Resource: "roles"}, "example"),
fake.RoleObject(core.Namespace("example"), core.Name("example"))),
// Expect resource conflict error
Expand Down
4 changes: 2 additions & 2 deletions pkg/syncer/client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ func (c *Client) apply(ctx context.Context, obj client.Object, updateFn update,
err := c.Client.Get(ctx, namespacedName, workingObj)
switch {
case apierrors.IsNotFound(err):
return nil, ConflictUpdateDoesNotExist(err, obj)
return nil, ConflictUpdateObjectDoesNotExist(err, obj)
case err != nil:
return nil, status.ResourceWrap(err, "failed to get object to update", obj)
}
Expand Down Expand Up @@ -220,7 +220,7 @@ func (c *Client) Update(ctx context.Context, obj client.Object, opts ...client.U
m.RecordAPICallDuration(ctx, "update", m.StatusTagKey(err), start)
switch {
case apierrors.IsNotFound(err):
return ConflictUpdateDoesNotExist(err, obj)
return ConflictUpdateObjectDoesNotExist(err, obj)
case err != nil:
return status.ResourceWrap(err, "failed to update", obj)
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/syncer/client/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ func TestClient_Apply(t *testing.T) {
declared: fake.RoleObject(core.Name("admin"), core.Namespace("billing")),
client: syncertestfake.NewErrorClient(
apierrors.NewNotFound(rbacv1.Resource("Role"), "billing/admin")),
wantErr: syncerclient.ConflictUpdateDoesNotExist(
wantErr: syncerclient.ConflictUpdateObjectDoesNotExist(
apierrors.NewNotFound(rbacv1.Resource("Role"), "billing/admin"),
fake.RoleObject(core.Name("admin"), core.Namespace("billing"))),
},
Expand Down Expand Up @@ -128,7 +128,7 @@ func TestClient_Update(t *testing.T) {
declared: fake.RoleObject(core.Name("admin"), core.Namespace("billing")),
client: syncertestfake.NewErrorClient(
apierrors.NewNotFound(rbacv1.Resource("Role"), "admin")),
wantErr: syncerclient.ConflictUpdateDoesNotExist(
wantErr: syncerclient.ConflictUpdateObjectDoesNotExist(
apierrors.NewNotFound(rbacv1.Resource("Role"), "admin"),
fake.RoleObject(core.Name("admin"), core.Namespace("billing"))),
},
Expand Down
28 changes: 23 additions & 5 deletions pkg/syncer/client/retriable_errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,34 @@ var retriableConflictBuilder = status.NewErrorBuilder(ResourceConflictCode)
func ConflictCreateAlreadyExists(err error, resource client.Object) status.Error {
return retriableConflictBuilder.
Wrap(err).
Sprint("tried to create resource that already exists").
Sprint("tried to create object that already exists").
BuildWithResources(resource)
}

// ConflictUpdateDoesNotExist means we tried to update an object which does not
// ConflictCreateResourceDoesNotExist means we tried to create an object whose
// resource group or kind does not exist.
func ConflictCreateResourceDoesNotExist(err error, resource client.Object) status.Error {
return retriableConflictBuilder.
Wrap(err).
Sprint("tried to create object whose resource type does not exist").
BuildWithResources(resource)
}

// ConflictUpdateObjectDoesNotExist means we tried to update an object which does not
// exist.
func ConflictUpdateDoesNotExist(err error, resource client.Object) status.Error {
func ConflictUpdateObjectDoesNotExist(err error, resource client.Object) status.Error {
return retriableConflictBuilder.
Wrap(err).
Sprint("tried to update object which does not exist").
BuildWithResources(resource)
}

// ConflictUpdateResourceDoesNotExist means we tried to update an object whose
// resource group or kind does not exist.
func ConflictUpdateResourceDoesNotExist(err error, resource client.Object) status.Error {
return retriableConflictBuilder.
Wrap(err).
Sprint("tried to update resource which does not exist").
Sprint("tried to update object whose resource type does not exist").
BuildWithResources(resource)
}

Expand All @@ -48,6 +66,6 @@ func ConflictUpdateDoesNotExist(err error, resource client.Object) status.Error
func ConflictUpdateOldVersion(err error, resource client.Object) status.Error {
return retriableConflictBuilder.
Wrap(err).
Sprintf("tried to update with stale version of resource").
Sprintf("tried to update object with stale resource version").
BuildWithResources(resource)
}
12 changes: 10 additions & 2 deletions pkg/syncer/reconcile/apply.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"github.com/google/go-cmp/cmp"
"k8s.io/apimachinery/pkg/api/equality"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime"
Expand Down Expand Up @@ -111,7 +112,12 @@ func (c *clientApplier) Create(ctx context.Context, intendedState *unstructured.
err = c.create(ctx, intendedState)
} else {
if err1 := c.client.Patch(ctx, intendedState, client.Apply, client.FieldOwner(configsync.FieldManager)); err1 != nil {
err = status.ResourceWrap(err1, "unable to apply resource", intendedState)
switch {
case meta.IsNoMatchError(err1):
err = syncerclient.ConflictCreateResourceDoesNotExist(err1, intendedState)
default:
err = status.ResourceWrap(err1, "unable to apply resource", intendedState)
}
}
}
metrics.Operations.WithLabelValues("create", metrics.StatusLabel(err)).Inc()
Expand Down Expand Up @@ -141,7 +147,9 @@ func (c *clientApplier) Update(ctx context.Context, intendedState, currentState
case apierrors.IsConflict(err):
return syncerclient.ConflictUpdateOldVersion(err, intendedState)
case apierrors.IsNotFound(err):
return syncerclient.ConflictUpdateDoesNotExist(err, intendedState)
return syncerclient.ConflictUpdateObjectDoesNotExist(err, intendedState)
case meta.IsNoMatchError(err):
return syncerclient.ConflictUpdateResourceDoesNotExist(err, intendedState)
case err != nil:
return status.ResourceWrap(err, "unable to update resource", intendedState)
}
Expand Down

0 comments on commit 44ee0ad

Please sign in to comment.