Skip to content

Commit

Permalink
Add retries to all resources and datasources (#286)
Browse files Browse the repository at this point in the history
* Work Pool resource: retry Create and Read

Adds retries for Create and Read on the Work Pool resource.

* Use hashicorp/retryablehttp for retries

Uses the hashicorp/retryablehttp package to get an HTTP client with
retries and exponential backoff out of the box. This is implemented
within our client package, meaning calls to the CRUD methods from both
resources and datasources will inherit that functionality.

* Remove unused client functions

* Expand comment for retryablehttp usage

* Exit on error for blockSchemaClient.List

* Increase the retry interval and maximum

Blocks, and other resources, need more time to resolve objects so let's
bump up the defaults to accommodate that.

* Bump min retry wait seconds to 3

Still saw failures in acceptance tests in CI

* Bump retry seconds to 3

Was still seeing errors in the acceptance tests

* Remove time/retry overrides, add retry check func

- Adds a function to define how to determine whether or not to retry,
  extending the default function so we can accept 409s and retry 404s.
  We can adapt this function over time as well.
- Removes the time and retry overrides, as those were not the reason
  that block tests were failing - the requests themselves were passing,
  and didn't need retries - what they needed was to retry 404s and not
  retry 409s.

* Use the stdlib http.Client

Uses the stdlib http.Client interface so we don't need to specify the
retryablehttp variant in every client method.
  • Loading branch information
mitchnielsen authored Oct 23, 2024
1 parent f2f4ccd commit 956d9c6
Show file tree
Hide file tree
Showing 7 changed files with 68 additions and 104 deletions.
4 changes: 2 additions & 2 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ go 1.21
toolchain go1.22.1

require (
github.com/avast/retry-go/v4 v4.6.0
github.com/go-test/deep v1.1.1
github.com/google/uuid v1.6.0
github.com/hashicorp/go-retryablehttp v0.7.7
github.com/hashicorp/terraform-plugin-docs v0.19.4
github.com/hashicorp/terraform-plugin-framework v1.11.0
github.com/hashicorp/terraform-plugin-framework-jsontypes v0.1.0
Expand Down Expand Up @@ -41,7 +41,6 @@ require (
github.com/hashicorp/go-hclog v1.6.3 // indirect
github.com/hashicorp/go-multierror v1.1.1 // indirect
github.com/hashicorp/go-plugin v1.6.0 // indirect
github.com/hashicorp/go-retryablehttp v0.7.7 // indirect
github.com/hashicorp/go-uuid v1.0.3 // indirect
github.com/hashicorp/go-version v1.7.0 // indirect
github.com/hashicorp/hc-install v0.8.0 // indirect
Expand All @@ -67,6 +66,7 @@ require (
github.com/posener/complete v1.2.3 // indirect
github.com/shopspring/decimal v1.3.1 // indirect
github.com/spf13/cast v1.5.0 // indirect
github.com/stretchr/testify v1.9.0 // indirect
github.com/vmihailenco/msgpack v4.0.4+incompatible // indirect
github.com/vmihailenco/msgpack/v5 v5.4.1 // indirect
github.com/vmihailenco/tagparser/v2 v2.0.0 // indirect
Expand Down
2 changes: 0 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,6 @@ github.com/apparentlymart/go-textseg/v15 v15.0.0 h1:uYvfpb3DyLSCGWnctWKGj857c6ew
github.com/apparentlymart/go-textseg/v15 v15.0.0/go.mod h1:K8XmNZdhEBkdlyDdvbmmsvpAG721bKi0joRfFdHIWJ4=
github.com/armon/go-radix v1.0.0 h1:F4z6KzEeeQIMeLFa97iZU6vupzoecKdU5TX24SNppXI=
github.com/armon/go-radix v1.0.0/go.mod h1:ufUuZ+zHj4x4TnLV4JWEpy2hxWSpsRywHrMgIH9cCH8=
github.com/avast/retry-go/v4 v4.6.0 h1:K9xNA+KeB8HHc2aWFuLb25Offp+0iVRXEvFx8IinRJA=
github.com/avast/retry-go/v4 v4.6.0/go.mod h1:gvWlPhBVsvBbLkVGDg/KwvBv0bEkCOLRRSHKIr2PyOE=
github.com/bgentry/speakeasy v0.1.0 h1:ByYyxL9InA1OWqxJqqp2A5pYHUrCiAL6K3J+LKSsQkY=
github.com/bgentry/speakeasy v0.1.0/go.mod h1:+zsyZBPWlz7T6j88CTgSN5bM796AkVf0kBD4zp0CCIs=
github.com/bmatcuk/doublestar/v4 v4.6.1 h1:FH9SifrbvJhnlQpztAx++wlkk70QBf0iBWDwNy7PA4I=
Expand Down
65 changes: 44 additions & 21 deletions internal/client/client.go
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
package client

import (
"context"
"errors"
"fmt"
"net/http"
Expand All @@ -10,14 +11,38 @@ import (
"github.com/google/uuid"

"github.com/prefecthq/terraform-provider-prefect/internal/api"

retryablehttp "github.com/hashicorp/go-retryablehttp"
)

var _ = api.PrefectClient(&Client{})

// New creates and returns new client instance.
func New(opts ...Option) (*Client, error) {
// Uses the retryablehttp package for built-in retries
// with exponential backoff.
//
// Some notable defaults from that package include:
// - max retries: 4
// - retry wait minimum seconds: 1
// - retry wait maximum seconds: 30
//
// All defaults are defined in
// https://github.com/hashicorp/go-retryablehttp/blob/main/client.go#L48-L51.
retryableClient := retryablehttp.NewClient()

// By default, retryablehttp will only retry requests if there was some kind
// of transient server or networking error. We can be more specific with this
// by providing a custom function for determining whether or not to retry.
retryableClient.CheckRetry = checkRetryPolicy

// Finally, convert the retryablehttp client to a standard http client.
// This allows us to retain the `http.Client` interface, and avoid specifying
// the `retryablehttp.Client` interface in our client methods.
httpClient := retryableClient.StandardClient()

client := &Client{
hc: http.DefaultClient,
hc: httpClient,
}

var errs []error
Expand All @@ -36,26 +61,6 @@ func New(opts ...Option) (*Client, error) {
return client, nil
}

// MustNew returns a new client or panics if an error occurred.
func MustNew(opts ...Option) *Client {
client, err := New(opts...)
if err != nil {
panic(fmt.Sprintf("error occurred during construction: %s", err))
}

return client
}

// WithClient configures the underlying http.Client used to send
// requests.
func WithClient(httpClient *http.Client) Option {
return func(client *Client) error {
client.hc = httpClient

return nil
}
}

// WithEndpoint configures the client to communicate with a self-hosted
// Prefect server or Prefect Cloud.
func WithEndpoint(endpoint string) Option {
Expand Down Expand Up @@ -97,3 +102,21 @@ func WithDefaults(accountID uuid.UUID, workspaceID uuid.UUID) Option {
return nil
}
}

func checkRetryPolicy(ctx context.Context, resp *http.Response, err error) (bool, error) {
// If the response is a 409 (StatusConflict), that means the request
// eventually succeeded and we don't need to make the request again.
if resp.StatusCode == http.StatusConflict {
return false, nil
}

// If the response is a 404 (NotFound), try again. This is particularly
// relevant for block-related objects that are created asynchronously.
if resp.StatusCode == http.StatusNotFound {
return true, err
}

// Fall back to the default retry policy for any other status codes.
//nolint:wrapcheck // we've extended this method, no need to wrap error
return retryablehttp.DefaultRetryPolicy(ctx, resp, err)
}
44 changes: 11 additions & 33 deletions internal/provider/resources/block.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ import (
"fmt"
"strings"

"github.com/avast/retry-go/v4"
"github.com/google/uuid"
"github.com/hashicorp/terraform-plugin-framework-jsontypes/jsontypes"
"github.com/hashicorp/terraform-plugin-framework/diag"
Expand All @@ -19,7 +18,6 @@ import (
"github.com/prefecthq/terraform-provider-prefect/internal/api"
"github.com/prefecthq/terraform-provider-prefect/internal/provider/customtypes"
"github.com/prefecthq/terraform-provider-prefect/internal/provider/helpers"
"github.com/prefecthq/terraform-provider-prefect/internal/utils"
)

type BlockResource struct {
Expand Down Expand Up @@ -165,59 +163,39 @@ func (r *BlockResource) Create(ctx context.Context, req resource.CreateRequest,

return
}

blockSchemaClient, err := r.client.BlockSchemas(plan.AccountID.ValueUUID(), plan.WorkspaceID.ValueUUID())
if err != nil {
resp.Diagnostics.Append(helpers.CreateClientErrorDiagnostic("Block Schema", err))

return
}

blockDocumentClient, err := r.client.BlockDocuments(plan.AccountID.ValueUUID(), plan.WorkspaceID.ValueUUID())
if err != nil {
resp.Diagnostics.Append(helpers.CreateClientErrorDiagnostic("Block Document", err))

return
}

// NOTE: here, we add retries for BlockType and BlockSchema fetches.
// BlockTypes / BlockSchemas are tied to a particular Workspace, and
// they are created asynchronously after a new Workspace request resolves.
// This means that if a `prefect_block` is created in the same plan as a
// new `prefect_workspace`, there is a potential race condition that could occur if the
// Workspace is created + resolves, but the BlockTypes / BlockSchemas have yet to have
// been created - this has been observed to happen even with a depends_on relationship defined.
// To eliminate spurious create failures, test flakiness, and general user confusion,
// we'll add a retry for these specific operations.
blockType, err := retry.DoWithData(
func() (*api.BlockType, error) {
return blockTypeClient.GetBySlug(ctx, plan.TypeSlug.ValueString())
},
utils.DefaultRetryOptions...,
)
blockType, err := blockTypeClient.GetBySlug(ctx, plan.TypeSlug.ValueString())
if err != nil {
resp.Diagnostics.Append(helpers.ResourceClientErrorDiagnostic("Block Type", "get_by_slug", err))

return
}

blockSchemas, err := retry.DoWithData(
func() ([]*api.BlockSchema, error) {
blockSchemas, err := blockSchemaClient.List(ctx, []uuid.UUID{blockType.ID})
if err != nil {
return nil, fmt.Errorf("http request to fetch block schemas failed: %w", err)
}
blockSchemas, err := blockSchemaClient.List(ctx, []uuid.UUID{blockType.ID})
if err != nil {
resp.Diagnostics.Append(helpers.ResourceClientErrorDiagnostic("Block Schema", "list", err))

if len(blockSchemas) == 0 {
return nil, fmt.Errorf("no block schemas found for %s block type slug", plan.TypeSlug.ValueString())
}
return
}

return blockSchemas, nil
},
utils.DefaultRetryOptions...,
)
if err != nil {
if len(blockSchemas) == 0 {
resp.Diagnostics.AddError(
"Failed to fetch block schemas",
fmt.Sprintf("Failed to fetch block schemas for %s block type slug due to: %s", plan.TypeSlug.ValueString(), err.Error()),
"No block schemas found",
fmt.Sprintf("No block schemas found for %s block type slug", plan.TypeSlug.ValueString()),
)

return
Expand Down
18 changes: 5 additions & 13 deletions internal/provider/resources/variable.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@ import (
"context"
"strings"

"github.com/avast/retry-go/v4"
"github.com/google/uuid"
"github.com/hashicorp/terraform-plugin-framework/attr"
"github.com/hashicorp/terraform-plugin-framework/diag"
Expand All @@ -20,7 +19,6 @@ import (
"github.com/prefecthq/terraform-provider-prefect/internal/api"
"github.com/prefecthq/terraform-provider-prefect/internal/provider/customtypes"
"github.com/prefecthq/terraform-provider-prefect/internal/provider/helpers"
"github.com/prefecthq/terraform-provider-prefect/internal/utils"
)

var (
Expand Down Expand Up @@ -177,17 +175,11 @@ func (r *VariableResource) Create(ctx context.Context, req resource.CreateReques
return
}

variable, err := retry.DoWithData(
func() (*api.Variable, error) {
return client.Create(ctx, api.VariableCreate{
Name: plan.Name.ValueString(),
Value: plan.Value.ValueString(),
Tags: tags,
})
},
utils.DefaultRetryOptions...,
)

variable, err := client.Create(ctx, api.VariableCreate{
Name: plan.Name.ValueString(),
Value: plan.Value.ValueString(),
Tags: tags,
})
if err != nil {
resp.Diagnostics.Append(helpers.ResourceClientErrorDiagnostic("Variable", "create", err))

Expand Down
24 changes: 6 additions & 18 deletions internal/provider/resources/workspace.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ import (
"fmt"
"strings"

"github.com/avast/retry-go/v4"
"github.com/google/uuid"
"github.com/hashicorp/terraform-plugin-framework/diag"
"github.com/hashicorp/terraform-plugin-framework/path"
Expand All @@ -18,7 +17,6 @@ import (
"github.com/prefecthq/terraform-provider-prefect/internal/api"
"github.com/prefecthq/terraform-provider-prefect/internal/provider/customtypes"
"github.com/prefecthq/terraform-provider-prefect/internal/provider/helpers"
"github.com/prefecthq/terraform-provider-prefect/internal/utils"
)

var (
Expand Down Expand Up @@ -156,24 +154,14 @@ func (r *WorkspaceResource) Create(ctx context.Context, req resource.CreateReque
return
}

// In certain scenarios, such as in tests when multiple Workspaces are created at the same time
// for test isolation, Prefect will return a "503 Service Unavailable" error.
// To mitigate this, we will retry the Workspace creation.
// See https://github.com/PrefectHQ/terraform-provider-prefect/issues/241 for more information.
workspace, err := retry.DoWithData(
func() (*api.Workspace, error) {
return client.Create(
ctx,
api.WorkspaceCreate{
Name: plan.Name.ValueString(),
Handle: plan.Handle.ValueString(),
Description: plan.Description.ValueStringPointer(),
},
)
workspace, err := client.Create(
ctx,
api.WorkspaceCreate{
Name: plan.Name.ValueString(),
Handle: plan.Handle.ValueString(),
Description: plan.Description.ValueStringPointer(),
},
utils.DefaultRetryOptions...,
)

if err != nil {
resp.Diagnostics.Append(helpers.ResourceClientErrorDiagnostic("Workspace", "create", err))

Expand Down
15 changes: 0 additions & 15 deletions internal/utils/retry.go

This file was deleted.

0 comments on commit 956d9c6

Please sign in to comment.