Skip to content

Commit

Permalink
Add querier.max-subquery-steps to make subquery step size check optio…
Browse files Browse the repository at this point in the history
…nal (#5656)

* add querier.max-subquery-steps to make subquery step size check optional

Signed-off-by: Ben Ye <[email protected]>

* update

Signed-off-by: Ben Ye <[email protected]>

* disable subquery step size check by default, make it optional

Signed-off-by: Ben Ye <[email protected]>

* fix integ test and add changelog

Signed-off-by: Ben Ye <[email protected]>

---------

Signed-off-by: Ben Ye <[email protected]>
  • Loading branch information
yeya24 authored Nov 16, 2023
1 parent b601141 commit 5a7228e
Show file tree
Hide file tree
Showing 9 changed files with 73 additions and 26 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

## master / unreleased
* [CHANGE] Azure Storage: Upgraded objstore dependency and support Azure Workload Identity Authentication. Added `connection_string` to support authenticating via SAS token. Marked `msi_resource` config as deprecating. #5645
* [CHANGE] Query Frontend: Expose `-querier.max-subquery-steps` to configure subquery max steps check. By default, the limit is set to 0, which is disabled. #5656
* [FEATURE] Ingester: Add per-tenant new metric `cortex_ingester_tsdb_data_replay_duration_seconds`. #5477
* [ENHANCEMENT] Store Gateway: Added `-store-gateway.enabled-tenants` and `-store-gateway.disabled-tenants` to explicitly enable or disable store-gateway for specific tenants. #5638
* [BUGFIX] Query Frontend: Fix query string being omitted in query stats log. #5655
Expand Down
5 changes: 5 additions & 0 deletions docs/blocks-storage/querier.md
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,11 @@ querier:
# CLI flag: -querier.default-evaluation-interval
[default_evaluation_interval: <duration> | default = 1m]

# Max number of steps allowed for every subquery expression in query. Number
# of steps is calculated using subquery range / step. A value > 0 enables it.
# CLI flag: -querier.max-subquery-steps
[max_subquery_steps: <int> | default = 0]

# Active query tracker monitors active queries, and writes them to the file in
# given directory. If Cortex discovers any queries in this log during startup,
# it will log them to the log file. Setting to empty value disables active
Expand Down
5 changes: 5 additions & 0 deletions docs/configuration/config-file-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -3436,6 +3436,11 @@ The `querier_config` configures the Cortex querier.
# CLI flag: -querier.default-evaluation-interval
[default_evaluation_interval: <duration> | default = 1m]
# Max number of steps allowed for every subquery expression in query. Number of
# steps is calculated using subquery range / step. A value > 0 enables it.
# CLI flag: -querier.max-subquery-steps
[max_subquery_steps: <int> | default = 0]
# Active query tracker monitors active queries, and writes them to the file in
# given directory. If Cortex discovers any queries in this log during startup,
# it will log them to the log file. Setting to empty value disables active query
Expand Down
5 changes: 5 additions & 0 deletions integration/query_frontend_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,11 @@ func TestQueryFrontendSubQueryStepSize(t *testing.T) {

minio := e2edb.NewMinio(9000, BlocksStorageFlags()["-blocks-storage.s3.bucket-name"])
require.NoError(t, s.StartAndWaitReady(minio))

// Enable subquery step size check.
flags = mergeFlags(e2e.EmptyFlags(), map[string]string{
"-querier.max-subquery-steps": "11000",
})
return cortexConfigFile, flags
},
})
Expand Down
1 change: 1 addition & 0 deletions pkg/cortex/modules.go
Original file line number Diff line number Diff line change
Expand Up @@ -486,6 +486,7 @@ func (t *Cortex) initQueryFrontendTripperware() (serv services.Service, err erro
t.Overrides,
queryAnalyzer,
t.Cfg.Querier.DefaultEvaluationInterval,
t.Cfg.Querier.MaxSubQuerySteps,
)

return services.NewIdleService(nil, func(_ error) error {
Expand Down
4 changes: 4 additions & 0 deletions pkg/querier/querier.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,9 @@ type Config struct {
// step if not specified.
DefaultEvaluationInterval time.Duration `yaml:"default_evaluation_interval"`

// Limit of number of steps allowed for every subquery expression in a query.
MaxSubQuerySteps int64 `yaml:"max_subquery_steps"`

// Directory for ActiveQueryTracker. If empty, ActiveQueryTracker will be disabled and MaxConcurrent will not be applied (!).
// ActiveQueryTracker logs queries that were active during the last crash, but logs them on the next startup.
// However, we need to use active query tracker, otherwise we cannot limit Max Concurrent queries in the PromQL
Expand Down Expand Up @@ -114,6 +117,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) {
f.DurationVar(&cfg.LookbackDelta, "querier.lookback-delta", 5*time.Minute, "Time since the last sample after which a time series is considered stale and ignored by expression evaluations.")
f.DurationVar(&cfg.ShuffleShardingIngestersLookbackPeriod, "querier.shuffle-sharding-ingesters-lookback-period", 0, "When distributor's sharding strategy is shuffle-sharding and this setting is > 0, queriers fetch in-memory series from the minimum set of required ingesters, selecting only ingesters which may have received series since 'now - lookback period'. The lookback period should be greater or equal than the configured 'query store after' and 'query ingesters within'. If this setting is 0, queriers always query all ingesters (ingesters shuffle sharding on read path is disabled).")
f.BoolVar(&cfg.ThanosEngine, "querier.thanos-engine", false, "Experimental. Use Thanos promql engine https://github.com/thanos-io/promql-engine rather than the Prometheus promql engine.")
f.Int64Var(&cfg.MaxSubQuerySteps, "querier.max-subquery-steps", 0, "Max number of steps allowed for every subquery expression in query. Number of steps is calculated using subquery range / step. A value > 0 enables it.")
}

// Validate the config
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,7 @@ func TestRoundTrip(t *testing.T) {
nil,
qa,
time.Minute,
0,
)

for i, tc := range []struct {
Expand Down
5 changes: 3 additions & 2 deletions pkg/querier/tripperware/roundtrip.go
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ func NewQueryTripperware(
limits Limits,
queryAnalyzer querysharding.Analyzer,
defaultSubQueryInterval time.Duration,
maxSubQuerySteps int64,
) Tripperware {
// Per tenant query metrics.
queriesPerTenant := promauto.With(registerer).NewCounterVec(prometheus.CounterOpts{
Expand Down Expand Up @@ -145,10 +146,10 @@ func NewQueryTripperware(
activeUsers.UpdateUserTimestamp(userStr, time.Now())
queriesPerTenant.WithLabelValues(op, userStr).Inc()

if isQuery || isQueryRange {
if maxSubQuerySteps > 0 && (isQuery || isQueryRange) {
query := r.FormValue("query")
// Check subquery step size.
if err := SubQueryStepSizeCheck(query, defaultSubQueryInterval, MaxStep); err != nil {
if err := SubQueryStepSizeCheck(query, defaultSubQueryInterval, maxSubQuerySteps); err != nil {
return nil, err
}
}
Expand Down
72 changes: 48 additions & 24 deletions pkg/querier/tripperware/roundtrip_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -109,46 +109,69 @@ func TestRoundTrip(t *testing.T) {
path, expectedBody string
expectedErr error
limits Limits
maxSubQuerySteps int64
}{
{
path: "/foo",
expectedBody: "bar",
limits: defaultOverrides,
path: "/foo",
expectedBody: "bar",
limits: defaultOverrides,
maxSubQuerySteps: 11000,
},
{
path: queryExemplar,
expectedBody: "bar",
limits: defaultOverrides,
path: queryExemplar,
expectedBody: "bar",
limits: defaultOverrides,
maxSubQuerySteps: 11000,
},
{
path: queryRange,
expectedBody: responseBody,
limits: defaultOverrides,
path: queryRange,
expectedBody: responseBody,
limits: defaultOverrides,
maxSubQuerySteps: 11000,
},
{
path: query,
expectedBody: "bar",
limits: defaultOverrides,
path: query,
expectedBody: "bar",
limits: defaultOverrides,
maxSubQuerySteps: 11000,
},
{
path: queryNonShardable,
expectedBody: "bar",
limits: defaultOverrides,
path: queryNonShardable,
expectedBody: "bar",
limits: defaultOverrides,
maxSubQuerySteps: 11000,
},
{
path: queryNonShardable,
expectedBody: "bar",
limits: shardingOverrides,
path: queryNonShardable,
expectedBody: "bar",
limits: shardingOverrides,
maxSubQuerySteps: 11000,
},
{
path: query,
expectedBody: responseBody,
limits: shardingOverrides,
path: query,
expectedBody: responseBody,
limits: shardingOverrides,
maxSubQuerySteps: 11000,
},
// Shouldn't hit subquery step limit because max steps is set to 0 so this check is disabled.
{
path: querySubqueryStepSizeTooSmall,
expectedErr: httpgrpc.Errorf(http.StatusBadRequest, ErrSubQueryStepTooSmall, 11000),
limits: defaultOverrides,
path: querySubqueryStepSizeTooSmall,
expectedBody: "bar",
limits: defaultOverrides,
maxSubQuerySteps: 0,
},
// Shouldn't hit subquery step limit because max steps is higher, which is 100K.
{
path: querySubqueryStepSizeTooSmall,
expectedBody: "bar",
limits: defaultOverrides,
maxSubQuerySteps: 100000,
},
{
path: querySubqueryStepSizeTooSmall,
expectedErr: httpgrpc.Errorf(http.StatusBadRequest, ErrSubQueryStepTooSmall, 11000),
limits: defaultOverrides,
maxSubQuerySteps: 11000,
},
} {
t.Run(tc.path, func(t *testing.T) {
Expand Down Expand Up @@ -177,6 +200,7 @@ func TestRoundTrip(t *testing.T) {
tc.limits,
querysharding.NewQueryAnalyzer(),
time.Minute,
tc.maxSubQuerySteps,
)
resp, err := tw(downstream).RoundTrip(req)
if tc.expectedErr == nil {
Expand Down

0 comments on commit 5a7228e

Please sign in to comment.