From d5b9ac0bea5dd9292fb00a924a6b8cae911dcaf8 Mon Sep 17 00:00:00 2001 From: Yetkin Timocin Date: Fri, 15 Nov 2024 15:23:04 -0800 Subject: [PATCH] Adding retries and exponential backoff (#8051) # Description Adding retries and exponential backoff ## Type of change - This pull request is a minor refactor, code cleanup, test improvement, or other maintenance task and doesn't change the functionality of Radius (issue link optional). Fixes: #7994 ## Contributor checklist Please verify that the PR meets the following requirements, where applicable: - [ ] An overview of proposed schema changes is included in a linked GitHub issue. - [ ] A design document PR is created in the [design-notes repository](https://github.com/radius-project/design-notes/), if new APIs are being introduced. - [ ] If applicable, design document has been reviewed and approved by Radius maintainers/approvers. - [ ] A PR for the [samples repository](https://github.com/radius-project/samples) is created, if existing samples are affected by the changes in this PR. - [ ] A PR for the [documentation repository](https://github.com/radius-project/docs) is created, if the changes in this PR affect the documentation or any user facing updates are made. - [ ] A PR for the [recipes repository](https://github.com/radius-project/recipes) is created, if existing recipes are affected by the changes in this PR. Signed-off-by: ytimocin --- test/rp/rptest.go | 8 ++++++-- test/validation/aws.go | 22 ++++++++++++++++++---- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/test/rp/rptest.go b/test/rp/rptest.go index ed9b515b0d..21dc72d435 100644 --- a/test/rp/rptest.go +++ b/test/rp/rptest.go @@ -454,6 +454,8 @@ func (ct RPTest) Test(t *testing.T) { // Ensure that the resource is deleted with retries notFound := false + baseWaitTime := 15 * time.Second + for attempt := 1; attempt <= AWSDeletionRetryLimit; attempt++ { t.Logf("validating deletion of AWS resource for %s (attempt %d/%d)", ct.Description, attempt, AWSDeletionRetryLimit) @@ -467,8 +469,10 @@ func (ct RPTest) Test(t *testing.T) { t.Logf("checking existence of resource %s failed with err: %s", resource.Name, err) break } else { - // Wait for 10 seconds - time.Sleep(10 * time.Second) + // Wait with exponential backoff + waitTime := baseWaitTime * time.Duration(attempt) + t.Logf("waiting for %s before next attempt", waitTime) + time.Sleep(waitTime) } } diff --git a/test/validation/aws.go b/test/validation/aws.go index 267a05d8ba..32bc3e71f0 100644 --- a/test/validation/aws.go +++ b/test/validation/aws.go @@ -61,14 +61,28 @@ type AWSResourceSet struct { // ValidateAWSResources checks that the expected AWS resources exist and have the expected properties. func ValidateAWSResources(ctx context.Context, t *testing.T, expected *AWSResourceSet, client awsclient.AWSCloudControlClient) { + baseWaitTime := 15 * time.Second + retryLimit := 5 + for _, resource := range expected.Resources { resourceType, err := GetResourceTypeName(ctx, &resource) require.NoError(t, err) - resourceResponse, err := client.GetResource(ctx, &cloudcontrol.GetResourceInput{ - Identifier: to.Ptr(resource.Identifier), - TypeName: &resourceType, - }) + var resourceResponse *cloudcontrol.GetResourceOutput + for attempt := 1; attempt <= retryLimit; attempt++ { + resourceResponse, err = client.GetResource(ctx, &cloudcontrol.GetResourceInput{ + Identifier: to.Ptr(resource.Identifier), + TypeName: &resourceType, + }) + if err == nil { + break + } + + t.Logf("attempt %d/%d: failed to get resource %s with error: %s", attempt, retryLimit, resource.Identifier, err) + waitTime := baseWaitTime * time.Duration(attempt) + t.Logf("waiting for %s before next attempt", waitTime) + time.Sleep(waitTime) + } require.NoError(t, err) if resource.Properties != nil {