From dc6dd0661aebbe141bbe5764ae0daa64c00439ca Mon Sep 17 00:00:00 2001 From: Sanskar Jaiswal Date: Wed, 27 Sep 2023 15:25:41 +0530 Subject: [PATCH 1/2] gatewayapi: add support for b/g mirroring Add support for mirroring requests while performing B/G deployments with Gateway API. A `RequestMirror` filter pointing to the canary service is added to the HTTPRoute during a Canary run. During the Canary run, drift correction for `.spec.rules[].filters` is disabled to avoid removing the mirror filter. Signed-off-by: Sanskar Jaiswal --- pkg/router/gateway_api_v1beta1.go | 33 ++++++++++++++++++++++ pkg/router/gateway_api_v1beta1_test.go | 38 ++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) diff --git a/pkg/router/gateway_api_v1beta1.go b/pkg/router/gateway_api_v1beta1.go index ff023854e..c1762bad2 100644 --- a/pkg/router/gateway_api_v1beta1.go +++ b/pkg/router/gateway_api_v1beta1.go @@ -187,6 +187,16 @@ func (gwr *GatewayAPIV1Beta1Router) Reconcile(canary *flaggerv1.Canary) error { ignoreCmpOptions = append(ignoreCmpOptions, cmpopts.IgnoreFields(v1beta1.HTTPBackendRef{}, "Filters")) } + if canary.GetAnalysis().Mirror { + // If a Canary run is in progress, the HTTPRoute rule will have an extra filter of type RequestMirror + // which needs to be ignored so that the requests are mirrored to the canary deployment. + inProgress := canary.Status.Phase == flaggerv1.CanaryPhaseWaiting || canary.Status.Phase == flaggerv1.CanaryPhaseProgressing || + canary.Status.Phase == flaggerv1.CanaryPhaseWaitingPromotion + if inProgress { + ignoreCmpOptions = append(ignoreCmpOptions, cmpopts.IgnoreFields(v1beta1.HTTPRouteRule{}, "Filters")) + } + } + if httpRoute != nil { specDiff := cmp.Diff( httpRoute.Spec, httpRouteSpec, @@ -249,6 +259,12 @@ func (gwr *GatewayAPIV1Beta1Router) GetRoutes(canary *flaggerv1.Canary) ( } } } + for _, filter := range rule.Filters { + if filter.Type == v1beta1.HTTPRouteFilterRequestMirror && filter.RequestMirror != nil && + string(filter.RequestMirror.BackendRef.Name) == canarySvcName { + mirrored = true + } + } } if weightedRule != nil { @@ -307,6 +323,23 @@ func (gwr *GatewayAPIV1Beta1Router) SetRoutes( }, }, } + + // If B/G mirroring is enabled, then add a route filter which mirrors the traffic + // to the canary service. + if mirrored && canary.GetAnalysis().Iterations > 0 { + weightedRouteRule.Filters = append(weightedRouteRule.Filters, v1beta1.HTTPRouteFilter{ + Type: v1beta1.HTTPRouteFilterRequestMirror, + RequestMirror: &v1beta1.HTTPRequestMirrorFilter{ + BackendRef: v1beta1.BackendObjectReference{ + Group: (*v1beta1.Group)(&backendRefGroup), + Kind: (*v1beta1.Kind)(&backendRefKind), + Name: v1beta1.ObjectName(canarySvcName), + Port: (*v1beta1.PortNumber)(&canary.Spec.Service.Port), + }, + }, + }) + } + httpRouteSpec := v1beta1.HTTPRouteSpec{ CommonRouteSpec: v1beta1.CommonRouteSpec{ ParentRefs: canary.Spec.Service.GatewayRefs, diff --git a/pkg/router/gateway_api_v1beta1_test.go b/pkg/router/gateway_api_v1beta1_test.go index 37a4627df..647061c32 100644 --- a/pkg/router/gateway_api_v1beta1_test.go +++ b/pkg/router/gateway_api_v1beta1_test.go @@ -233,6 +233,44 @@ func TestGatewayAPIV1Beta1Router_Routes(t *testing.T) { } assert.True(t, found) }) + + t.Run("b/g mirror", func(t *testing.T) { + canary := mocks.canary.DeepCopy() + canary.Spec.Analysis.Mirror = true + canary.Spec.Analysis.Iterations = 5 + _, _, cSvcName := canary.GetServiceNames() + + err = router.SetRoutes(canary, 100, 0, true) + hr, err := mocks.meshClient.GatewayapiV1beta1().HTTPRoutes("default").Get(context.TODO(), "podinfo", metav1.GetOptions{}) + require.NoError(t, err) + assert.Len(t, hr.Spec.Rules, 1) + + rule := hr.Spec.Rules[0] + var found bool + for _, filter := range rule.Filters { + if filter.Type == v1beta1.HTTPRouteFilterRequestMirror && filter.RequestMirror != nil && + string(filter.RequestMirror.BackendRef.Name) == cSvcName { + found = true + } + } + assert.True(t, found, "could not find request mirror filter in HTTPRoute") + + // Mark the status as progressing to assert that request mirror filter is ignored. + canary.Status.Phase = flaggerv1.CanaryPhaseProgressing + err = router.Reconcile(canary) + require.NoError(t, err) + + hr, err = mocks.meshClient.GatewayapiV1beta1().HTTPRoutes("default").Get(context.TODO(), "podinfo", metav1.GetOptions{}) + require.NoError(t, err) + assert.Len(t, hr.Spec.Rules, 1) + assert.Empty(t, cmp.Diff(hr.Spec.Rules[0], rule)) + + err = router.SetRoutes(canary, 100, 0, false) + hr, err = mocks.meshClient.GatewayapiV1beta1().HTTPRoutes("default").Get(context.TODO(), "podinfo", metav1.GetOptions{}) + require.NoError(t, err) + assert.Len(t, hr.Spec.Rules, 1) + assert.Len(t, hr.Spec.Rules[0].Filters, 0) + }) } func TestGatewayAPIV1Beta1Router_getSessionAffinityRouteRules(t *testing.T) { From 8dbc72d7ffe35bd15e95fb44f486a16aca02c44d Mon Sep 17 00:00:00 2001 From: Sanskar Jaiswal Date: Tue, 3 Oct 2023 13:36:39 +0530 Subject: [PATCH 2/2] gatewayapi: add docs for b/g mirroring Signed-off-by: Sanskar Jaiswal --- .../gatewayapi-progressive-delivery.md | 85 +++++++++++++++++++ docs/gitbook/usage/deployment-strategies.md | 7 +- 2 files changed, 88 insertions(+), 4 deletions(-) diff --git a/docs/gitbook/tutorials/gatewayapi-progressive-delivery.md b/docs/gitbook/tutorials/gatewayapi-progressive-delivery.md index fc6f9683e..b7a10532c 100644 --- a/docs/gitbook/tutorials/gatewayapi-progressive-delivery.md +++ b/docs/gitbook/tutorials/gatewayapi-progressive-delivery.md @@ -622,5 +622,90 @@ Events: Normal Synced 5s flagger Promotion completed! Scaling down podinfo.test ``` +## Traffic mirroring + +![Flagger Canary Traffic Shadowing](https://raw.githubusercontent.com/fluxcd/flagger/main/docs/diagrams/flagger-canary-traffic-mirroring.png) + +For applications that perform read operations, Flagger can be configured to do B/G tests with traffic mirroring. +Gateway API traffic mirroring will copy each incoming request, sending one request to the primary and one to the canary service. +The response from the primary is sent back to the user and the response from the canary is discarded. +Metrics are collected on both requests so that the deployment will only proceed if the canary metrics are within the threshold values. + +Note that mirroring should be used for requests that are **idempotent** or capable of being processed twice \(once by the primary and once by the canary\). + +You can enable mirroring by replacing `stepWeight` with `iterations` and by setting `analysis.mirror` to `true`: + +```yaml +apiVersion: flagger.app/v1beta1 +kind: Canary +metadata: + name: podinfo + namespace: test +spec: + # deployment reference + targetRef: + apiVersion: apps/v1 + kind: Deployment + name: podinfo + service: + # service port number + port: 9898 + # container port number or name (optional) + targetPort: 9898 + # Gateway API HTTPRoute host names + hosts: + - localproject.contour.io + # Reference to the Gateway that the generated HTTPRoute would attach to. + gatewayRefs: + - name: contour + namespace: projectcontour + analysis: + # schedule interval + interval: 1m + # max number of failed metric checks before rollback + threshold: 5 + # total number of iterations + iterations: 10 + # enable traffic shadowing + mirror: true + # Gateway API HTTPRoute host names + metrics: + - name: request-success-rate + thresholdRange: + min: 99 + interval: 1m + - name: request-duration + thresholdRange: + max: 500 + interval: 1m + webhooks: + - name: load-test + url: http://flagger-loadtester.test/ + timeout: 5s + metadata: + cmd: "hey -z 2m -q 10 -c 2 -host localproject.contour.io http://envoy.projectcontour/" +``` + +With the above configuration, Flagger will run a canary release with the following steps: + +* detect new revision \(deployment spec, secrets or configmaps changes\) +* scale from zero the canary deployment +* wait for the HPA to set the canary minimum replicas +* check canary pods health +* run the acceptance tests +* abort the canary release if tests fail +* start the load tests +* mirror 100% of the traffic from primary to canary +* check request success rate and request duration every minute +* abort the canary release if the metrics check failure threshold is reached +* stop traffic mirroring after the number of iterations is reached +* route live traffic to the canary pods +* promote the canary \(update the primary secrets, configmaps and deployment spec\) +* wait for the primary deployment rollout to finish +* wait for the HPA to set the primary minimum replicas +* check primary pods health +* switch live traffic back to primary +* scale to zero the canary +* send notification with the canary analysis result The above procedures can be extended with [custom metrics](../usage/metrics.md) checks, [webhooks](../usage/webhooks.md), [manual promotion](../usage/webhooks.md#manual-gating) approval and [Slack or MS Teams](../usage/alerting.md) notifications. diff --git a/docs/gitbook/usage/deployment-strategies.md b/docs/gitbook/usage/deployment-strategies.md index fae2f62e2..e7d624455 100644 --- a/docs/gitbook/usage/deployment-strategies.md +++ b/docs/gitbook/usage/deployment-strategies.md @@ -9,7 +9,7 @@ Flagger can run automated application analysis, promotion and rollback for the f * **Blue/Green** \(traffic switching\) * Kubernetes CNI, Istio, Linkerd, App Mesh, NGINX, Contour, Gloo Edge, Open Service Mesh, Gateway API * **Blue/Green Mirroring** \(traffic shadowing\) - * Istio + * Istio, Gateway API * **Canary Release with Session Affinity** \(progressive traffic shifting combined with cookie based routing\) * Istio, Gateway API @@ -353,8 +353,6 @@ you should consider what will happen if a write is duplicated and handled by the To use mirroring, set `spec.analysis.mirror` to `true`. -Istio example: - ```yaml analysis: # schedule interval (default 60s) @@ -363,9 +361,10 @@ Istio example: iterations: 10 # max number of failed iterations before rollback threshold: 2 - # Traffic shadowing (compatible with Istio only) + # Traffic shadowing mirror: true # Weight of the traffic mirrored to your canary (defaults to 100%) + # Only applicable for Istio. mirrorWeight: 100 ```