From ef5c28c6c95eb03d47c85ec91ab3f21776f5843a Mon Sep 17 00:00:00 2001 From: Yetkin Timocin Date: Tue, 5 Dec 2023 16:06:48 -0800 Subject: [PATCH] Fixing the panic for annotations.Configuration (#6864) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit # Description Fixing the panic for annotations.Configuration ## Type of change - This pull request is a minor refactor, code cleanup, test improvement, or other maintenance task and doesn't change the functionality of Radius (issue link optional). Fixes: #issue_number ## Auto-generated summary ### 🤖[[deprecated]](https://githubnext.com/copilot-for-prs-sunset) Generated by Copilot at 3f37db3 ### Summary 🛡️🐛🚀 Fix possible nil pointer dereferences in deployment reconciler. Ensure `annotations.Configuration` is always initialized before accessing its subfields in `pkg/controller/reconciler/deployment_reconciler.go`. > _`Configuration` /_ > _may be nil, so we guard /_ > _against winter frost_ ### Walkthrough * Initialize configuration annotations field to avoid nil pointer dereferences ([link](https://github.com/radius-project/radius/pull/6864/files?diff=unified&w=0#diff-87a7dfa06c174a6b41b671b2cfffb84c81e481881baec866a026e7dd00db8195L251-R261)) Signed-off-by: ytimocin --- pkg/controller/reconciler/annotations.go | 38 +++- pkg/controller/reconciler/annotations_test.go | 167 ++++++++++++++++++ .../reconciler/deployment_reconciler.go | 23 ++- .../reconciler/deployment_reconciler_test.go | 123 +++++++++++++ 4 files changed, 343 insertions(+), 8 deletions(-) create mode 100644 pkg/controller/reconciler/annotations_test.go diff --git a/pkg/controller/reconciler/annotations.go b/pkg/controller/reconciler/annotations.go index e64b9e1c11..b87a4ad772 100644 --- a/pkg/controller/reconciler/annotations.go +++ b/pkg/controller/reconciler/annotations.go @@ -44,7 +44,7 @@ type deploymentAnnotations struct { // This will be nil if Radius is not enabled for the Deployment. Configuration *deploymentConfiguration - //ConfigurationHash is the hash of the user-provided configuration. + // ConfigurationHash is the hash of the user-provided configuration. // This will be used to diff the configuration and determine if the Deployment needs to be updated. ConfigurationHash string @@ -52,6 +52,22 @@ type deploymentAnnotations struct { Status *deploymentStatus } +// There are 4 cases that is possible based on the previous state and the current state of the Deployment: +// Case 1: Previous State: Enabled - Current State: Disabled +// Case 2: Previous State: Disabled - Current State: Enabled +// Case 3: Previous State: Enabled - Current State: Enabled +// Case 4: Previous State: Disabled - Current State: Disabled +// +// How to understand the previous state: +// 1. If "radapp.io/status" annotation is set, then the previous state is Enabled. +// +// Ways to disable Radius: +// 1. "radapp.io/enabled" annotation is set to "false". +// 2. "radapp.io/enabled" annotation is not set. +// +// Ways to enable Radius: +// 1. "radapp.io/enabled" annotation is set to "true". + // deploymentConfiguration is the configuration of the Deployment provided by the user via annotations. type deploymentConfiguration struct { Application string `json:"application,omitempty"` @@ -98,9 +114,9 @@ func readAnnotations(deployment *appsv1.Deployment) (deploymentAnnotations, erro if err != nil { return result, fmt.Errorf("failed to unmarshal status annotation: %w", err) } - } - result.Status = &s + result.Status = &s + } // Note: we need to read and return the configuration even if Radius is not enabled for the Deployment. // This is important so that can clean up previously created connections when Radius is disabled. @@ -184,3 +200,19 @@ func (annotations *deploymentAnnotations) IsUpToDate() bool { return hash == annotations.ConfigurationHash } + +// OperationInProgress returns true if there is an operation in progress for the given deployment. +func (annotations *deploymentAnnotations) OperationInProgress() bool { + return annotations.Status != nil && annotations.Status.Operation != nil +} + +// isRadiusEnabled returns true if Radius is enabled for the given deployment. +func (annotations *deploymentAnnotations) isRadiusEnabled() bool { + return annotations.Configuration != nil +} + +// needsCleanup returns true if Radius was previously enabled on the deployment and now is disabled. +// This means that we need to clean up the resources created by Radius. +func (annotations *deploymentAnnotations) needsCleanup() bool { + return annotations.Configuration == nil && annotations.Status != nil +} diff --git a/pkg/controller/reconciler/annotations_test.go b/pkg/controller/reconciler/annotations_test.go new file mode 100644 index 0000000000..74ef34390b --- /dev/null +++ b/pkg/controller/reconciler/annotations_test.go @@ -0,0 +1,167 @@ +/* +Copyright 2023. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package reconciler + +import ( + "encoding/json" + "fmt" + "testing" + + "github.com/stretchr/testify/require" + appsv1 "k8s.io/api/apps/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +func Test_readAnnotations(t *testing.T) { + testDeploymentStatus := &deploymentStatus{ + Scope: "/planes/radius/local/resourceGroups/controller-test", + Application: "test-application", + Environment: "test-environment", + Container: "test-container", + Operation: nil, + Phrase: deploymentPhraseReady, + } + + dsm, err := json.Marshal(testDeploymentStatus) + require.NoError(t, err) + + // invalidDeploymentStatus is missing a curly brace at the end of the JSON + // so that an unmarshaling error can be triggered. + invalidDeploymentStatus := []byte(`{"invalid": "json"`) + + tests := []struct { + name string + deployment *appsv1.Deployment + annotations deploymentAnnotations + err error + }{ + { + name: "radius-disabled-with-annotation", + deployment: &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + AnnotationRadiusEnabled: "false", + }, + }, + }, + annotations: deploymentAnnotations{ + Configuration: nil, + ConfigurationHash: "", + Status: nil, + }, + err: nil, + }, + { + name: "radius-disabled-empty-annotation-map", + deployment: &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{}, + }, + }, + annotations: deploymentAnnotations{ + Configuration: nil, + ConfigurationHash: "", + Status: nil, + }, + err: nil, + }, + { + name: "radius-disabled-no-annotations", + deployment: &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{}, + }, + annotations: deploymentAnnotations{ + Configuration: nil, + ConfigurationHash: "", + Status: nil, + }, + err: nil, + }, + { + name: "radius-was-enabled-now-disabled-with-annotations", + deployment: &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + AnnotationRadiusEnabled: "false", + AnnotationRadiusConfigurationHash: "configuration-hash", + AnnotationRadiusStatus: string(dsm), + }, + }, + }, + annotations: deploymentAnnotations{ + Configuration: nil, + ConfigurationHash: "configuration-hash", + Status: testDeploymentStatus, + }, + err: nil, + }, + { + name: "radius-enabled-with-annotations", + deployment: &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + AnnotationRadiusEnabled: "true", + AnnotationRadiusConfigurationHash: "configuration-hash", + AnnotationRadiusStatus: string(dsm), + AnnotationRadiusApplication: "test-application", + AnnotationRadiusEnvironment: "test-environment", + AnnotationRadiusConnectionPrefix + "test-connection": "test-connection-value", + }, + }, + }, + annotations: deploymentAnnotations{ + Configuration: &deploymentConfiguration{ + Environment: "test-environment", + Application: "test-application", + Connections: map[string]string{ + "test-connection": "test-connection-value", + }, + }, + ConfigurationHash: "configuration-hash", + Status: testDeploymentStatus, + }, + err: nil, + }, + { + name: "status-unmarshal-error", + deployment: &appsv1.Deployment{ + ObjectMeta: metav1.ObjectMeta{ + Annotations: map[string]string{ + AnnotationRadiusEnabled: "true", + AnnotationRadiusConfigurationHash: "configuration-hash", + AnnotationRadiusStatus: string(invalidDeploymentStatus), + AnnotationRadiusApplication: "test-application", + AnnotationRadiusEnvironment: "test-environment", + AnnotationRadiusConnectionPrefix + "test-connection": "test-connection-value", + }, + }, + }, + annotations: deploymentAnnotations{ + ConfigurationHash: "configuration-hash", + }, + err: fmt.Errorf("failed to unmarshal status annotation: %w", + json.Unmarshal(invalidDeploymentStatus, &deploymentStatus{})), + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + annotations, err := readAnnotations(tt.deployment) + require.Equal(t, tt.err, err) + require.Equal(t, tt.annotations, annotations) + }) + } +} diff --git a/pkg/controller/reconciler/deployment_reconciler.go b/pkg/controller/reconciler/deployment_reconciler.go index 6912041fcc..91bb505d2f 100644 --- a/pkg/controller/reconciler/deployment_reconciler.go +++ b/pkg/controller/reconciler/deployment_reconciler.go @@ -107,7 +107,8 @@ func (r *DeploymentReconciler) Reconcile(ctx context.Context, req ctrl.Request) // and repair it on the next reconcile. } - if annotations.Status != nil && annotations.Status.Operation != nil { + // If there is an operation in progress, then we need to continue polling it. + if annotations.OperationInProgress() { // NOTE: if reconcileOperation completes successfully, then it will return a "zero" result, // this means the operation has completed and we should continue processing. result, err := r.reconcileOperation(ctx, &deployment, &annotations) @@ -126,11 +127,23 @@ func (r *DeploymentReconciler) Reconcile(ctx context.Context, req ctrl.Request) // If the Deployment is being deleted **or** if Radius is no longer enabled, then we should // clean up any Radius state. - if deployment.DeletionTimestamp != nil || (annotations.Configuration == nil && annotations.Status != nil) { + // + // If the DeletionTimestamp is not nil, then it means that the Deployment is being deleted. + // + // If the Configuration is nil, which means that Radius is disabled, and Status is not nil, + // then we need to reconcile the deletion of the Deployment. + if deployment.DeletionTimestamp != nil || annotations.needsCleanup() { return r.reconcileDelete(ctx, &deployment, &annotations) } - return r.reconcileUpdate(ctx, &deployment, &annotations) + // When do we need to reconcile update? When Radius is enabled, no matter + // if it was previously enabled or disabled. + if annotations.isRadiusEnabled() { + return r.reconcileUpdate(ctx, &deployment, &annotations) + } + + r.EventRecorder.Event(&deployment, corev1.EventTypeNormal, "NoOp", fmt.Sprintf("Radius is not enabled for %s", deployment.Name)) + return ctrl.Result{}, nil } // reconcileOperation reconciles a Deployment that has an operation in progress. @@ -249,12 +262,12 @@ func (r *DeploymentReconciler) reconcileUpdate(ctx context.Context, deployment * } environmentName := "default" - if annotations.Configuration != nil && annotations.Configuration.Environment != "" { + if annotations.Configuration.Environment != "" { environmentName = annotations.Configuration.Environment } applicationName := deployment.Namespace - if annotations.Configuration != nil && annotations.Configuration.Application != "" { + if annotations.Configuration.Application != "" { applicationName = annotations.Configuration.Application } diff --git a/pkg/controller/reconciler/deployment_reconciler_test.go b/pkg/controller/reconciler/deployment_reconciler_test.go index c958b6534b..9bc9975cb2 100644 --- a/pkg/controller/reconciler/deployment_reconciler_test.go +++ b/pkg/controller/reconciler/deployment_reconciler_test.go @@ -446,6 +446,96 @@ func Test_DeploymentReconciler_Connections(t *testing.T) { require.True(t, apierrors.IsNotFound(err)) } +// Creates a deployment with Radius disabled. +// +// Then checks the Events for Disabled-Disabled. +func Test_DeploymentReconciler_RadiusDisabled_ThenRadiusDisabled_ByAnnotation(t *testing.T) { + ctx := testcontext.New(t) + _, client := SetupDeploymentTest(t) + + name := types.NamespacedName{ + Namespace: "deployment-disabled-disabled-by-annotation", + Name: "test-deployment-disabled-disabled-by-annotation", + } + err := client.Create(ctx, &corev1.Namespace{ObjectMeta: ctrl.ObjectMeta{Name: name.Namespace}}) + require.NoError(t, err) + + deployment := makeDeployment(name) + err = client.Create(ctx, deployment) + require.NoError(t, err) + + waitForEvent(t, client, + expectedEvent{ + EventType: "Normal", + Reason: "NoOp", + Message: fmt.Sprintf("Radius is not enabled for %s", deployment.Name), + Count: 1, + }, + ) + + // Explicitly setting Radius disabled + deployment.Annotations = map[string]string{ + AnnotationRadiusEnabled: "false", + } + err = client.Update(ctx, deployment) + require.NoError(t, err) + + // We expect the same event to be generated. + // Because Radius was disabled and is still disabled for the deployment. + waitForEvent(t, client, + expectedEvent{ + EventType: "Normal", + Reason: "NoOp", + Message: fmt.Sprintf("Radius is not enabled for %s", deployment.Name), + Count: 2, + }, + ) +} + +// Creates a deployment with Radius disabled. +// +// Then checks the Events for Disabled-Disabled. +func Test_DeploymentReconciler_RadiusDisabled_ThenRadiusDisabled(t *testing.T) { + ctx := testcontext.New(t) + _, client := SetupDeploymentTest(t) + + name := types.NamespacedName{ + Namespace: "deployment-disabled-disabled", + Name: "test-deployment-disabled-disabled", + } + err := client.Create(ctx, &corev1.Namespace{ObjectMeta: ctrl.ObjectMeta{Name: name.Namespace}}) + require.NoError(t, err) + + deployment := makeDeployment(name) + err = client.Create(ctx, deployment) + require.NoError(t, err) + + waitForEvent(t, client, + expectedEvent{ + EventType: "Normal", + Reason: "NoOp", + Message: fmt.Sprintf("Radius is not enabled for %s", deployment.Name), + Count: 1, + }, + ) + + // Update Labels of the Deployment so that the Reconciler can detect a change. + deployment.Labels = map[string]string{"foo": "bar"} + err = client.Update(ctx, deployment) + require.NoError(t, err) + + // We expect the same event to be generated. + // Because Radius was disabled and is still disabled for the deployment. + waitForEvent(t, client, + expectedEvent{ + EventType: "Normal", + Reason: "NoOp", + Message: fmt.Sprintf("Radius is not enabled for %s", deployment.Name), + Count: 2, + }, + ) +} + func makeDeployment(name types.NamespacedName) *appsv1.Deployment { return &appsv1.Deployment{ ObjectMeta: metav1.ObjectMeta{ @@ -574,6 +664,39 @@ func waitForStateDeleting(t *testing.T, client client.Client, name types.Namespa return &annotations } +type expectedEvent struct { + EventType string + Reason string + Message string + Count int +} + +// waitForEvent waits for the expected event to occur in the cluster. +// +// We can have multiple events as the result of the List function but we are only interested in the expected event. +func waitForEvent(t *testing.T, client client.Client, event expectedEvent) { + ctx := testcontext.New(t) + logger := t + + require.EventuallyWithTf(t, func(t *assert.CollectT) { + logger.Log("Fetching Events") + + events := &corev1.EventList{} + err := client.List(ctx, events) + require.NoError(t, err) + + found := false + for _, e := range events.Items { + // If the event is the one we are looking for, check the count. + if e.Type == event.EventType && e.Reason == event.Reason && + e.Message == event.Message && assert.Equal(t, event.Count, int(e.Count)) { + found = true + } + } + assert.True(t, found) + }, deploymentTestWaitDuration, deploymentTestWaitInterval, "Waiting for the expected event") +} + func waitForRadiusContainerDeleted(t *testing.T, client client.Client, name types.NamespacedName) *deploymentAnnotations { ctx := testcontext.New(t)