From e67589a4420c674d8370884dff2adad8c65598a7 Mon Sep 17 00:00:00 2001 From: Ben Ye Date: Wed, 15 Nov 2023 13:43:40 -0800 Subject: [PATCH 1/4] add querier.max-subquery-steps to make subquery step size check optional Signed-off-by: Ben Ye --- pkg/cortex/modules.go | 1 + pkg/querier/querier.go | 4 ++ .../queryrange/query_range_middlewares.go | 3 +- .../query_range_middlewares_test.go | 1 + pkg/querier/tripperware/roundtrip.go | 5 +- pkg/querier/tripperware/roundtrip_test.go | 72 ++++++++++++------- 6 files changed, 59 insertions(+), 27 deletions(-) diff --git a/pkg/cortex/modules.go b/pkg/cortex/modules.go index 64cce59870..7858483c68 100644 --- a/pkg/cortex/modules.go +++ b/pkg/cortex/modules.go @@ -486,6 +486,7 @@ func (t *Cortex) initQueryFrontendTripperware() (serv services.Service, err erro t.Overrides, queryAnalyzer, t.Cfg.Querier.DefaultEvaluationInterval, + t.Cfg.Querier.MaxSubQuerySteps, ) return services.NewIdleService(nil, func(_ error) error { diff --git a/pkg/querier/querier.go b/pkg/querier/querier.go index df13fb7ce6..d66697674d 100644 --- a/pkg/querier/querier.go +++ b/pkg/querier/querier.go @@ -64,6 +64,9 @@ type Config struct { // step if not specified. DefaultEvaluationInterval time.Duration `yaml:"default_evaluation_interval"` + // Limit of number of steps allowed for every subquery expression in a query. + MaxSubQuerySteps int64 `yaml:"max_subquery_steps"` + // Directory for ActiveQueryTracker. If empty, ActiveQueryTracker will be disabled and MaxConcurrent will not be applied (!). // ActiveQueryTracker logs queries that were active during the last crash, but logs them on the next startup. // However, we need to use active query tracker, otherwise we cannot limit Max Concurrent queries in the PromQL @@ -114,6 +117,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) { f.DurationVar(&cfg.LookbackDelta, "querier.lookback-delta", 5*time.Minute, "Time since the last sample after which a time series is considered stale and ignored by expression evaluations.") f.DurationVar(&cfg.ShuffleShardingIngestersLookbackPeriod, "querier.shuffle-sharding-ingesters-lookback-period", 0, "When distributor's sharding strategy is shuffle-sharding and this setting is > 0, queriers fetch in-memory series from the minimum set of required ingesters, selecting only ingesters which may have received series since 'now - lookback period'. The lookback period should be greater or equal than the configured 'query store after' and 'query ingesters within'. If this setting is 0, queriers always query all ingesters (ingesters shuffle sharding on read path is disabled).") f.BoolVar(&cfg.ThanosEngine, "querier.thanos-engine", false, "Experimental. Use Thanos promql engine https://github.com/thanos-io/promql-engine rather than the Prometheus promql engine.") + f.Int64Var(&cfg.MaxSubQuerySteps, "querier.max-subquery-steps", 0, "Max number of steps allowed for every subquery expression in query. Number of steps is calculated using subquery range / step. A value > 0 enables it.") } // Validate the config diff --git a/pkg/querier/tripperware/queryrange/query_range_middlewares.go b/pkg/querier/tripperware/queryrange/query_range_middlewares.go index 09a768028a..89cdaf1d7c 100644 --- a/pkg/querier/tripperware/queryrange/query_range_middlewares.go +++ b/pkg/querier/tripperware/queryrange/query_range_middlewares.go @@ -40,7 +40,8 @@ type Config struct { CacheResults bool `yaml:"cache_results"` MaxRetries int `yaml:"max_retries"` // List of headers which query_range middleware chain would forward to downstream querier. - ForwardHeaders flagext.StringSlice `yaml:"forward_headers_list"` + ForwardHeaders flagext.StringSlice `yaml:"forward_headers_list"` + MaxSubQueryExprSteps int64 `yaml:"max_subquery_expr_steps"` // Populated based on the query configuration VerticalShardSize int `yaml:"-"` diff --git a/pkg/querier/tripperware/queryrange/query_range_middlewares_test.go b/pkg/querier/tripperware/queryrange/query_range_middlewares_test.go index 5ade2abd52..6276be72bd 100644 --- a/pkg/querier/tripperware/queryrange/query_range_middlewares_test.go +++ b/pkg/querier/tripperware/queryrange/query_range_middlewares_test.go @@ -74,6 +74,7 @@ func TestRoundTrip(t *testing.T) { nil, qa, time.Minute, + 0, ) for i, tc := range []struct { diff --git a/pkg/querier/tripperware/roundtrip.go b/pkg/querier/tripperware/roundtrip.go index 93fa1234cd..6aefe4ccec 100644 --- a/pkg/querier/tripperware/roundtrip.go +++ b/pkg/querier/tripperware/roundtrip.go @@ -103,6 +103,7 @@ func NewQueryTripperware( limits Limits, queryAnalyzer querysharding.Analyzer, defaultSubQueryInterval time.Duration, + maxSubQuerySteps int64, ) Tripperware { // Per tenant query metrics. queriesPerTenant := promauto.With(registerer).NewCounterVec(prometheus.CounterOpts{ @@ -145,10 +146,10 @@ func NewQueryTripperware( activeUsers.UpdateUserTimestamp(userStr, time.Now()) queriesPerTenant.WithLabelValues(op, userStr).Inc() - if isQuery || isQueryRange { + if maxSubQuerySteps > 0 && (isQuery || isQueryRange) { query := r.FormValue("query") // Check subquery step size. - if err := SubQueryStepSizeCheck(query, defaultSubQueryInterval, MaxStep); err != nil { + if err := SubQueryStepSizeCheck(query, defaultSubQueryInterval, maxSubQuerySteps); err != nil { return nil, err } } diff --git a/pkg/querier/tripperware/roundtrip_test.go b/pkg/querier/tripperware/roundtrip_test.go index 8497491ba6..e52514ee8d 100644 --- a/pkg/querier/tripperware/roundtrip_test.go +++ b/pkg/querier/tripperware/roundtrip_test.go @@ -109,46 +109,69 @@ func TestRoundTrip(t *testing.T) { path, expectedBody string expectedErr error limits Limits + maxSubQuerySteps int64 }{ { - path: "/foo", - expectedBody: "bar", - limits: defaultOverrides, + path: "/foo", + expectedBody: "bar", + limits: defaultOverrides, + maxSubQuerySteps: 11000, }, { - path: queryExemplar, - expectedBody: "bar", - limits: defaultOverrides, + path: queryExemplar, + expectedBody: "bar", + limits: defaultOverrides, + maxSubQuerySteps: 11000, }, { - path: queryRange, - expectedBody: responseBody, - limits: defaultOverrides, + path: queryRange, + expectedBody: responseBody, + limits: defaultOverrides, + maxSubQuerySteps: 11000, }, { - path: query, - expectedBody: "bar", - limits: defaultOverrides, + path: query, + expectedBody: "bar", + limits: defaultOverrides, + maxSubQuerySteps: 11000, }, { - path: queryNonShardable, - expectedBody: "bar", - limits: defaultOverrides, + path: queryNonShardable, + expectedBody: "bar", + limits: defaultOverrides, + maxSubQuerySteps: 11000, }, { - path: queryNonShardable, - expectedBody: "bar", - limits: shardingOverrides, + path: queryNonShardable, + expectedBody: "bar", + limits: shardingOverrides, + maxSubQuerySteps: 11000, }, { - path: query, - expectedBody: responseBody, - limits: shardingOverrides, + path: query, + expectedBody: responseBody, + limits: shardingOverrides, + maxSubQuerySteps: 11000, }, + // Shouldn't hit subquery step limit because max steps is set to 0 so this check is disabled. { - path: querySubqueryStepSizeTooSmall, - expectedErr: httpgrpc.Errorf(http.StatusBadRequest, ErrSubQueryStepTooSmall, 11000), - limits: defaultOverrides, + path: querySubqueryStepSizeTooSmall, + expectedBody: "bar", + limits: defaultOverrides, + maxSubQuerySteps: 0, + }, + // Shouldn't hit subquery step limit because max steps is higher, which is 100K. + { + path: querySubqueryStepSizeTooSmall, + expectedBody: "bar", + limits: defaultOverrides, + maxSubQuerySteps: 100000, + }, + { + path: querySubqueryStepSizeTooSmall, + expectedErr: httpgrpc.Errorf(http.StatusBadRequest, ErrSubQueryStepTooSmall, 11000), + limits: defaultOverrides, + maxSubQuerySteps: 11000, }, } { t.Run(tc.path, func(t *testing.T) { @@ -177,6 +200,7 @@ func TestRoundTrip(t *testing.T) { tc.limits, querysharding.NewQueryAnalyzer(), time.Minute, + tc.maxSubQuerySteps, ) resp, err := tw(downstream).RoundTrip(req) if tc.expectedErr == nil { From c9b9f3e10eeaa1a66a374393aa019d7b6c9e643f Mon Sep 17 00:00:00 2001 From: Ben Ye Date: Wed, 15 Nov 2023 14:16:27 -0800 Subject: [PATCH 2/4] update Signed-off-by: Ben Ye --- pkg/querier/tripperware/queryrange/query_range_middlewares.go | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/pkg/querier/tripperware/queryrange/query_range_middlewares.go b/pkg/querier/tripperware/queryrange/query_range_middlewares.go index 89cdaf1d7c..09a768028a 100644 --- a/pkg/querier/tripperware/queryrange/query_range_middlewares.go +++ b/pkg/querier/tripperware/queryrange/query_range_middlewares.go @@ -40,8 +40,7 @@ type Config struct { CacheResults bool `yaml:"cache_results"` MaxRetries int `yaml:"max_retries"` // List of headers which query_range middleware chain would forward to downstream querier. - ForwardHeaders flagext.StringSlice `yaml:"forward_headers_list"` - MaxSubQueryExprSteps int64 `yaml:"max_subquery_expr_steps"` + ForwardHeaders flagext.StringSlice `yaml:"forward_headers_list"` // Populated based on the query configuration VerticalShardSize int `yaml:"-"` From 834ad93209a6717047ce543de7a770ee0551ec77 Mon Sep 17 00:00:00 2001 From: Ben Ye Date: Wed, 15 Nov 2023 22:17:43 +0000 Subject: [PATCH 3/4] disable subquery step size check by default, make it optional Signed-off-by: Ben Ye --- docs/blocks-storage/querier.md | 5 +++++ docs/configuration/config-file-reference.md | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/docs/blocks-storage/querier.md b/docs/blocks-storage/querier.md index ae9360b501..b1bdd3a90c 100644 --- a/docs/blocks-storage/querier.md +++ b/docs/blocks-storage/querier.md @@ -157,6 +157,11 @@ querier: # CLI flag: -querier.default-evaluation-interval [default_evaluation_interval: | default = 1m] + # Max number of steps allowed for every subquery expression in query. Number + # of steps is calculated using subquery range / step. A value > 0 enables it. + # CLI flag: -querier.max-subquery-steps + [max_subquery_steps: | default = 0] + # Active query tracker monitors active queries, and writes them to the file in # given directory. If Cortex discovers any queries in this log during startup, # it will log them to the log file. Setting to empty value disables active diff --git a/docs/configuration/config-file-reference.md b/docs/configuration/config-file-reference.md index 2958ad71a6..e0f5871201 100644 --- a/docs/configuration/config-file-reference.md +++ b/docs/configuration/config-file-reference.md @@ -3436,6 +3436,11 @@ The `querier_config` configures the Cortex querier. # CLI flag: -querier.default-evaluation-interval [default_evaluation_interval: | default = 1m] +# Max number of steps allowed for every subquery expression in query. Number of +# steps is calculated using subquery range / step. A value > 0 enables it. +# CLI flag: -querier.max-subquery-steps +[max_subquery_steps: | default = 0] + # Active query tracker monitors active queries, and writes them to the file in # given directory. If Cortex discovers any queries in this log during startup, # it will log them to the log file. Setting to empty value disables active query From 63dfb89c05cadd7ac278fc2fb60eaecbea4cdca6 Mon Sep 17 00:00:00 2001 From: Ben Ye Date: Wed, 15 Nov 2023 14:30:46 -0800 Subject: [PATCH 4/4] fix integ test and add changelog Signed-off-by: Ben Ye --- CHANGELOG.md | 1 + integration/query_frontend_test.go | 5 +++++ 2 files changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index dba1248b2c..24b9cb02f6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## master / unreleased * [CHANGE] Azure Storage: Upgraded objstore dependency and support Azure Workload Identity Authentication. Added `connection_string` to support authenticating via SAS token. Marked `msi_resource` config as deprecating. #5645 +* [CHANGE] Query Frontend: Expose `-querier.max-subquery-steps` to configure subquery max steps check. By default, the limit is set to 0, which is disabled. #5656 * [FEATURE] Ingester: Add per-tenant new metric `cortex_ingester_tsdb_data_replay_duration_seconds`. #5477 * [ENHANCEMENT] Store Gateway: Added `-store-gateway.enabled-tenants` and `-store-gateway.disabled-tenants` to explicitly enable or disable store-gateway for specific tenants. #5638 * [BUGFIX] Query Frontend: Fix query string being omitted in query stats log. #5655 diff --git a/integration/query_frontend_test.go b/integration/query_frontend_test.go index 0dbbadeb10..5b3c0029c0 100644 --- a/integration/query_frontend_test.go +++ b/integration/query_frontend_test.go @@ -220,6 +220,11 @@ func TestQueryFrontendSubQueryStepSize(t *testing.T) { minio := e2edb.NewMinio(9000, BlocksStorageFlags()["-blocks-storage.s3.bucket-name"]) require.NoError(t, s.StartAndWaitReady(minio)) + + // Enable subquery step size check. + flags = mergeFlags(e2e.EmptyFlags(), map[string]string{ + "-querier.max-subquery-steps": "11000", + }) return cortexConfigFile, flags }, })