Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: remediator ignores no resource/kind match #1348

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion cmd/nomoserrors/examples/examples.go
Original file line number Diff line number Diff line change
Expand Up @@ -345,8 +345,10 @@ func Generate() AllExamples {

// 2008
result.add(client.ConflictCreateAlreadyExists(errors.New("already exists"), k8sobjects.RoleObject()))
result.add(client.ConflictCreateResourceDoesNotExist(errors.New("no resource match"), k8sobjects.RoleObject()))
result.add(client.ConflictUpdateOldVersion(errors.New("old version"), k8sobjects.RoleObject()))
result.add(client.ConflictUpdateDoesNotExist(errors.New("does not exist"), k8sobjects.RoleObject()))
result.add(client.ConflictUpdateObjectDoesNotExist(errors.New("does not exist"), k8sobjects.RoleObject()))
result.add(client.ConflictUpdateResourceDoesNotExist(errors.New("no resource match"), k8sobjects.RoleObject()))

// 2009
result.add(applier.Error(errors.New("failed to initialize an error")))
Expand Down
28 changes: 20 additions & 8 deletions e2e/testcases/custom_resources_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,7 @@ func TestCRDDeleteBeforeRemoveCustomResourceV1(t *testing.T) {

// Remove CRD
// This will garbage collect the CR too and block until both are deleted.
nt.T.Log("Deleting Anvil CRD")
_, err = nt.Shell.Kubectl("delete", "-f", crdFile)
if err != nil {
nt.T.Fatal(err)
Expand All @@ -109,16 +110,27 @@ func TestCRDDeleteBeforeRemoveCustomResourceV1(t *testing.T) {
nt.T.Fatal(err)
}

// Wait for remediator to detect the drift (managed Anvil CR was deleted)
// and record it as a conflict.
// TODO: distinguish between management conflict (spec/generation drift) and concurrent status update conflict (resource version change)
err = nomostest.ValidateMetrics(nt,
// Resource Conflict errors from the remediator are not exposed as errors
// in the RootSync status. Instead, the error is recorded as a metric and
// logged as a warning. Then the object is refreshed from the server and
// re-enqueued for remediation.
//
// Validate that deleting the CRD of a managed CR causes at least of of the
// following errors:
// - NoResourceMatchError
// - NoKindMatchError
// - ObjectNotFound
// - ResourceVersionConflict
// Which error depends on race conditions between the remediator and the
// Kubernetes custom resource controller.
//
// Note: This is NOT a "management conflict", just a "resource conflict".
// But both types of conflict both share the same metric.
// TODO: distinguish between management conflict (unexpected manager annotation) and resource conflict (resource version change)
nt.Must(nomostest.ValidateMetrics(nt,
nomostest.ReconcilerErrorMetrics(nt, rootSyncLabels, firstCommitHash, metrics.ErrorSummary{
Conflicts: 1,
}))
if err != nil {
nt.T.Fatal(err)
}
})))

// Reset discovery client to invalidate the cached Anvil CRD
nt.RenewClient()
Expand Down
4 changes: 2 additions & 2 deletions pkg/remediator/reconcile/reconciler_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -295,14 +295,14 @@ func TestRemediator_Reconcile_Metrics(t *testing.T) {
// Object on cluster has no label and is unmanaged
actual: k8sobjects.RoleObject(core.Namespace("example"), core.Name("example")),
// Object update fails, because it was deleted by another client
updateError: syncerclient.ConflictUpdateDoesNotExist(
updateError: syncerclient.ConflictUpdateObjectDoesNotExist(
apierrors.NewNotFound(schema.GroupResource{Group: "rbac", Resource: "roles"}, "example"),
k8sobjects.RoleObject(core.Namespace("example"), core.Name("example"))),
// Object NOT updated on cluster, because update failed with conflict error
want: k8sobjects.RoleObject(core.Namespace("example"), core.Name("example"),
core.UID("1"), core.ResourceVersion("1"), core.Generation(1)),
// Expect update error returned from Remediate
wantError: syncerclient.ConflictUpdateDoesNotExist(
wantError: syncerclient.ConflictUpdateObjectDoesNotExist(
apierrors.NewNotFound(schema.GroupResource{Group: "rbac", Resource: "roles"}, "example"),
k8sobjects.RoleObject(core.Namespace("example"), core.Name("example"))),
// Expect resource conflict error
Expand Down
4 changes: 2 additions & 2 deletions pkg/syncer/client/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,7 +159,7 @@ func (c *Client) apply(ctx context.Context, obj client.Object, updateFn update,
err := c.Client.Get(ctx, namespacedName, workingObj)
switch {
case apierrors.IsNotFound(err):
return nil, ConflictUpdateDoesNotExist(err, obj)
return nil, ConflictUpdateObjectDoesNotExist(err, obj)
case err != nil:
return nil, status.ResourceWrap(err, "failed to get object to update", obj)
}
Expand Down Expand Up @@ -220,7 +220,7 @@ func (c *Client) Update(ctx context.Context, obj client.Object, opts ...client.U
m.RecordAPICallDuration(ctx, "update", m.StatusTagKey(err), start)
switch {
case apierrors.IsNotFound(err):
return ConflictUpdateDoesNotExist(err, obj)
return ConflictUpdateObjectDoesNotExist(err, obj)
case err != nil:
return status.ResourceWrap(err, "failed to update", obj)
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/syncer/client/client_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ func TestClient_Apply(t *testing.T) {
declared: k8sobjects.RoleObject(core.Name("admin"), core.Namespace("billing")),
client: syncertestfake.NewErrorClient(
apierrors.NewNotFound(rbacv1.Resource("Role"), "billing/admin")),
wantErr: syncerclient.ConflictUpdateDoesNotExist(
wantErr: syncerclient.ConflictUpdateObjectDoesNotExist(
apierrors.NewNotFound(rbacv1.Resource("Role"), "billing/admin"),
k8sobjects.RoleObject(core.Name("admin"), core.Namespace("billing"))),
},
Expand Down Expand Up @@ -128,7 +128,7 @@ func TestClient_Update(t *testing.T) {
declared: k8sobjects.RoleObject(core.Name("admin"), core.Namespace("billing")),
client: syncertestfake.NewErrorClient(
apierrors.NewNotFound(rbacv1.Resource("Role"), "admin")),
wantErr: syncerclient.ConflictUpdateDoesNotExist(
wantErr: syncerclient.ConflictUpdateObjectDoesNotExist(
apierrors.NewNotFound(rbacv1.Resource("Role"), "admin"),
k8sobjects.RoleObject(core.Name("admin"), core.Namespace("billing"))),
},
Expand Down
28 changes: 23 additions & 5 deletions pkg/syncer/client/retriable_errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,16 +30,34 @@ var retriableConflictBuilder = status.NewErrorBuilder(ResourceConflictCode)
func ConflictCreateAlreadyExists(err error, resource client.Object) status.Error {
return retriableConflictBuilder.
Wrap(err).
Sprint("tried to create resource that already exists").
Sprint("tried to create object that already exists").
BuildWithResources(resource)
}

// ConflictUpdateDoesNotExist means we tried to update an object which does not
// ConflictCreateResourceDoesNotExist means we tried to create an object whose
// resource group or kind does not exist.
func ConflictCreateResourceDoesNotExist(err error, resource client.Object) status.Error {
return retriableConflictBuilder.
Wrap(err).
Sprint("tried to create object whose resource type does not exist").
BuildWithResources(resource)
}

// ConflictUpdateObjectDoesNotExist means we tried to update an object which does not
// exist.
func ConflictUpdateDoesNotExist(err error, resource client.Object) status.Error {
func ConflictUpdateObjectDoesNotExist(err error, resource client.Object) status.Error {
return retriableConflictBuilder.
Wrap(err).
Sprint("tried to update object which does not exist").
BuildWithResources(resource)
}

// ConflictUpdateResourceDoesNotExist means we tried to update an object whose
// resource group or kind does not exist.
func ConflictUpdateResourceDoesNotExist(err error, resource client.Object) status.Error {
return retriableConflictBuilder.
Wrap(err).
Sprint("tried to update resource which does not exist").
Sprint("tried to update object whose resource type does not exist").
BuildWithResources(resource)
}

Expand All @@ -48,6 +66,6 @@ func ConflictUpdateDoesNotExist(err error, resource client.Object) status.Error
func ConflictUpdateOldVersion(err error, resource client.Object) status.Error {
return retriableConflictBuilder.
Wrap(err).
Sprintf("tried to update with stale version of resource").
Sprintf("tried to update object with stale resource version").
BuildWithResources(resource)
}
12 changes: 10 additions & 2 deletions pkg/syncer/reconcile/apply.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"github.com/google/go-cmp/cmp"
"k8s.io/apimachinery/pkg/api/equality"
apierrors "k8s.io/apimachinery/pkg/api/errors"
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
"k8s.io/apimachinery/pkg/runtime"
Expand Down Expand Up @@ -111,7 +112,12 @@ func (c *clientApplier) Create(ctx context.Context, intendedState *unstructured.
err = c.create(ctx, intendedState)
} else {
if err1 := c.client.Patch(ctx, intendedState, client.Apply, client.FieldOwner(configsync.FieldManager)); err1 != nil {
err = status.ResourceWrap(err1, "unable to apply resource", intendedState)
switch {
case meta.IsNoMatchError(err1):
err = syncerclient.ConflictCreateResourceDoesNotExist(err1, intendedState)
default:
err = status.ResourceWrap(err1, "unable to apply resource", intendedState)
}
}
}
metrics.Operations.WithLabelValues("create", metrics.StatusLabel(err)).Inc()
Expand Down Expand Up @@ -141,7 +147,9 @@ func (c *clientApplier) Update(ctx context.Context, intendedState, currentState
case apierrors.IsConflict(err):
return syncerclient.ConflictUpdateOldVersion(err, intendedState)
case apierrors.IsNotFound(err):
return syncerclient.ConflictUpdateDoesNotExist(err, intendedState)
return syncerclient.ConflictUpdateObjectDoesNotExist(err, intendedState)
case meta.IsNoMatchError(err):
return syncerclient.ConflictUpdateResourceDoesNotExist(err, intendedState)
case err != nil:
return status.ResourceWrap(err, "unable to update resource", intendedState)
}
Expand Down