From ad8e7d613a654ecbc20e7ee71d904467813fa9f8 Mon Sep 17 00:00:00 2001 From: Joseph Kwasniewski Date: Tue, 17 Oct 2023 15:52:42 -0700 Subject: [PATCH] feat: add support for webhook retries Add a new field `.spec.webhooks[].retries` to specify the number of retries when calling a webhook. Signed-off-by: Joseph Kwasniewski --- artifacts/flagger/crd.yaml | 3 +++ charts/flagger/crds/crd.yaml | 3 +++ docs/gitbook/usage/webhooks.md | 4 ++++ go.mod | 2 ++ go.sum | 6 ++++++ kustomize/base/flagger/crd.yaml | 3 +++ pkg/apis/flagger/v1beta1/canary.go | 4 ++++ pkg/controller/webhook.go | 22 ++++++++++++---------- pkg/controller/webhook_test.go | 26 ++++++++++++++++++++++++++ 9 files changed, 63 insertions(+), 10 deletions(-) diff --git a/artifacts/flagger/crd.yaml b/artifacts/flagger/crd.yaml index c4824bbb3..3a124a4d9 100644 --- a/artifacts/flagger/crd.yaml +++ b/artifacts/flagger/crd.yaml @@ -1132,6 +1132,9 @@ spec: description: Request timeout for this webhook type: string pattern: "^[0-9]+(m|s)" + retries: + description: Number of retries for this webhook + type: number metadata: description: Metadata (key-value pairs) for this webhook type: object diff --git a/charts/flagger/crds/crd.yaml b/charts/flagger/crds/crd.yaml index c4824bbb3..3a124a4d9 100644 --- a/charts/flagger/crds/crd.yaml +++ b/charts/flagger/crds/crd.yaml @@ -1132,6 +1132,9 @@ spec: description: Request timeout for this webhook type: string pattern: "^[0-9]+(m|s)" + retries: + description: Number of retries for this webhook + type: number metadata: description: Metadata (key-value pairs) for this webhook type: object diff --git a/docs/gitbook/usage/webhooks.md b/docs/gitbook/usage/webhooks.md index 447dbc187..994885b67 100644 --- a/docs/gitbook/usage/webhooks.md +++ b/docs/gitbook/usage/webhooks.md @@ -41,6 +41,7 @@ Spec: - name: "start gate" type: confirm-rollout url: http://flagger-loadtester.test/gate/approve + retries: 5 - name: "helm test" type: pre-rollout url: http://flagger-helmtester.flagger/ @@ -72,6 +73,7 @@ Spec: - name: "send to Slack" type: event url: http://event-recevier.notifications/slack + retries: 3 metadata: environment: "test" cluster: "flagger-test" @@ -122,6 +124,8 @@ Event payload (HTTP POST): The event receiver can create alerts based on the received phase (possible values: `Initialized`, `Waiting`, `Progressing`, `Promoting`, `Finalising`, `Succeeded` or `Failed`). +The webhook request can be retried by specifying a positive integer in the `retries` field. + ## Load Testing For workloads that are not receiving constant traffic Flagger can be configured with a webhook, diff --git a/go.mod b/go.mod index 4bc39d848..81ece63cd 100644 --- a/go.mod +++ b/go.mod @@ -48,6 +48,8 @@ require ( github.com/google/uuid v1.3.1 // indirect github.com/googleapis/enterprise-certificate-proxy v0.3.1 // indirect github.com/h2non/parth v0.0.0-20190131123155-b4df798d6542 // indirect + github.com/hashicorp/go-cleanhttp v0.5.2 // indirect + github.com/hashicorp/go-retryablehttp v0.7.4 // indirect github.com/imdario/mergo v0.3.15 // indirect github.com/influxdata/line-protocol v0.0.0-20210922203350-b1ad95c89adf // indirect github.com/jmespath/go-jmespath v0.4.0 // indirect diff --git a/go.sum b/go.sum index 17b263a78..9aede1579 100644 --- a/go.sum +++ b/go.sum @@ -112,6 +112,11 @@ github.com/googleapis/gax-go/v2 v2.12.0/go.mod h1:y+aIqrI5eb1YGMVJfuV3185Ts/D7qK github.com/grpc-ecosystem/grpc-gateway v1.16.0/go.mod h1:BDjrQk3hbvj6Nolgz8mAMFbcEtjT1g+wF4CSlocrBnw= github.com/h2non/parth v0.0.0-20190131123155-b4df798d6542 h1:2VTzZjLZBgl62/EtslCrtky5vbi9dd7HrQPQIx6wqiw= github.com/h2non/parth v0.0.0-20190131123155-b4df798d6542/go.mod h1:Ow0tF8D4Kplbc8s8sSb3V2oUCygFHVp8gC3Dn6U4MNI= +github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9neXJWAZQ= +github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48= +github.com/hashicorp/go-hclog v0.9.2/go.mod h1:5CU+agLiy3J7N7QjHK5d05KxGsuXiQLrjA0H7acj2lQ= +github.com/hashicorp/go-retryablehttp v0.7.4 h1:ZQgVdpTdAL7WpMIwLzCfbalOcSUdkDZnpUv3/+BxzFA= +github.com/hashicorp/go-retryablehttp v0.7.4/go.mod h1:Jy/gPYAdjqffZ/yFGCFV2doI5wjtH1ewM9u8iYVjtX8= github.com/imdario/mergo v0.3.15 h1:M8XP7IuFNsqUx6VPK2P9OSmsYsI/YFaGil0uD21V3dM= github.com/imdario/mergo v0.3.15/go.mod h1:WBLT9ZmE3lPoWsEzCh9LPo3TiwVN+ZKEjmz+hD27ysY= github.com/influxdata/influxdb-client-go/v2 v2.12.3 h1:28nRlNMRIV4QbtIUvxhWqaxn0IpXeMSkY/uJa/O/vC4= @@ -176,6 +181,7 @@ github.com/stoewer/go-strcase v1.2.0/go.mod h1:IBiWB2sKIp3wVVQ3Y035++gc+knqhUQag github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= +github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= diff --git a/kustomize/base/flagger/crd.yaml b/kustomize/base/flagger/crd.yaml index c4824bbb3..3a124a4d9 100644 --- a/kustomize/base/flagger/crd.yaml +++ b/kustomize/base/flagger/crd.yaml @@ -1132,6 +1132,9 @@ spec: description: Request timeout for this webhook type: string pattern: "^[0-9]+(m|s)" + retries: + description: Number of retries for this webhook + type: number metadata: description: Metadata (key-value pairs) for this webhook type: object diff --git a/pkg/apis/flagger/v1beta1/canary.go b/pkg/apis/flagger/v1beta1/canary.go index 0fe99dc30..54ae27061 100644 --- a/pkg/apis/flagger/v1beta1/canary.go +++ b/pkg/apis/flagger/v1beta1/canary.go @@ -394,6 +394,10 @@ type CanaryWebhook struct { // Metadata (key-value pairs) for this webhook // +optional Metadata *map[string]string `json:"metadata,omitempty"` + + // Number of retries for this webhook + // +optional + Retries int `json:"retries,omitempty"` } // CanaryWebhookPayload holds the deployment info and metadata sent to webhooks diff --git a/pkg/controller/webhook.go b/pkg/controller/webhook.go index fe820b033..5b08bd85d 100644 --- a/pkg/controller/webhook.go +++ b/pkg/controller/webhook.go @@ -18,21 +18,21 @@ package controller import ( "bytes" - "context" "encoding/json" "errors" "fmt" "io" - "net/http" "net/url" "strconv" "time" + "github.com/hashicorp/go-retryablehttp" + flaggerv1 "github.com/fluxcd/flagger/pkg/apis/flagger/v1beta1" "github.com/fluxcd/flagger/pkg/canary" ) -func callWebhook(webhook string, payload interface{}, timeout string) error { +func callWebhook(webhook string, payload interface{}, timeout string, retries int) error { payloadBin, err := json.Marshal(payload) if err != nil { return err @@ -43,7 +43,11 @@ func callWebhook(webhook string, payload interface{}, timeout string) error { return err } - req, err := http.NewRequest("POST", hook.String(), bytes.NewBuffer(payloadBin)) + httpClient := retryablehttp.NewClient() + httpClient.RetryMax = retries + httpClient.Logger = nil + + req, err := retryablehttp.NewRequest("POST", hook.String(), bytes.NewBuffer(payloadBin)) if err != nil { return err } @@ -53,16 +57,14 @@ func callWebhook(webhook string, payload interface{}, timeout string) error { if timeout == "" { timeout = "10s" } - t, err := time.ParseDuration(timeout) if err != nil { return err } - ctx, cancel := context.WithTimeout(req.Context(), t) - defer cancel() + httpClient.HTTPClient.Timeout = t - r, err := http.DefaultClient.Do(req.WithContext(ctx)) + r, err := httpClient.Do(req) if err != nil { return err } @@ -98,7 +100,7 @@ func CallWebhook(canary flaggerv1.Canary, phase flaggerv1.CanaryPhase, w flagger w.Timeout = "10s" } - return callWebhook(w.URL, payload, w.Timeout) + return callWebhook(w.URL, payload, w.Timeout, w.Retries) } func CallEventWebhook(r *flaggerv1.Canary, w flaggerv1.CanaryWebhook, message, eventtype string) error { @@ -124,7 +126,7 @@ func CallEventWebhook(r *flaggerv1.Canary, w flaggerv1.CanaryWebhook, message, e payload.Metadata[key] = value } } - return callWebhook(w.URL, payload, "5s") + return callWebhook(w.URL, payload, "5s", w.Retries) } func canaryChecksum(c flaggerv1.Canary) string { diff --git a/pkg/controller/webhook_test.go b/pkg/controller/webhook_test.go index 7c1a4c797..d651da006 100644 --- a/pkg/controller/webhook_test.go +++ b/pkg/controller/webhook_test.go @@ -263,3 +263,29 @@ func TestCanaryChecksum(t *testing.T) { require.NotEqual(t, canary3sum, canary1sum) require.NotEqual(t, canary4sum, canary1sum) } + +func TestCallWebhook_Retries(t *testing.T) { + retries := 1 + failures := 0 + ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if failures <= retries-1 { + w.WriteHeader(http.StatusInternalServerError) + failures++ + } else { + w.WriteHeader(http.StatusAccepted) + } + })) + defer ts.Close() + hook := flaggerv1.CanaryWebhook{ + Name: "validation", + URL: ts.URL, + Retries: retries, + } + + err := CallWebhook( + flaggerv1.Canary{ + ObjectMeta: metav1.ObjectMeta{ + Name: "podinfo", Namespace: corev1.NamespaceDefault}}, + flaggerv1.CanaryPhaseProgressing, hook) + require.NoError(t, err) +}