diff --git a/.github/workflows/build-image.yml b/.github/workflows/build-image.yml index 9179b469fa..f7d54f1638 100644 --- a/.github/workflows/build-image.yml +++ b/.github/workflows/build-image.yml @@ -28,13 +28,13 @@ jobs: uses: docker/setup-qemu-action@49b3bc8e6bdd4a60e6116a5414239cba5943d3cf # v3.2.0 - name: Set up Docker Buildx - uses: docker/setup-buildx-action@c47758b77c9736f4b2ef4073d4d51994fabfe349 # v3.7.1 + uses: docker/setup-buildx-action@6524bf65af31da8d45b59e8c27de4bd072b392f5 # v3.8.0 - name: Save image run: make save-multiarch-build-image - name: Upload Docker Images Artifacts - uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 + uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b # v4.5.0 with: name: build-image path: | diff --git a/.github/workflows/scorecards.yml b/.github/workflows/scorecards.yml index d70212b1d8..0a0df034d8 100644 --- a/.github/workflows/scorecards.yml +++ b/.github/workflows/scorecards.yml @@ -55,7 +55,7 @@ jobs: # Upload the results as artifacts (optional). Commenting out will disable uploads of run results in SARIF # format to the repository Actions tab. - name: "Upload artifact" - uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v3.pre.node20 + uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b # v3.pre.node20 with: name: SARIF file path: results.sarif @@ -64,6 +64,6 @@ jobs: # Upload the results to GitHub's code scanning dashboard (optional). # Commenting out will disable upload of results to your repo's Code Scanning dashboard - name: "Upload to code-scanning" - uses: github/codeql-action/upload-sarif@babb554ede22fd5605947329c4d04d8e7a0b8155 # v3.27.7 + uses: github/codeql-action/upload-sarif@48ab28a6f5dbc2a99bf1e0131198dd8f1df78169 # v3.28.0 with: sarif_file: results.sarif diff --git a/.github/workflows/test-build-deploy.yml b/.github/workflows/test-build-deploy.yml index 92fd95b69e..e132b025d9 100644 --- a/.github/workflows/test-build-deploy.yml +++ b/.github/workflows/test-build-deploy.yml @@ -93,15 +93,15 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@babb554ede22fd5605947329c4d04d8e7a0b8155 # v3.27.7 + uses: github/codeql-action/init@48ab28a6f5dbc2a99bf1e0131198dd8f1df78169 # v3.28.0 with: languages: go - name: Autobuild - uses: github/codeql-action/autobuild@babb554ede22fd5605947329c4d04d8e7a0b8155 # v3.27.7 + uses: github/codeql-action/autobuild@48ab28a6f5dbc2a99bf1e0131198dd8f1df78169 # v3.28.0 - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@babb554ede22fd5605947329c4d04d8e7a0b8155 # v3.27.7 + uses: github/codeql-action/analyze@48ab28a6f5dbc2a99bf1e0131198dd8f1df78169 # v3.28.0 build: @@ -131,7 +131,7 @@ jobs: touch build-image/.uptodate make BUILD_IN_CONTAINER=false web-build - name: Upload Website Artifact - uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 + uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b # v4.5.0 with: name: website public path: website/public/ @@ -143,7 +143,7 @@ jobs: - name: Create Docker Images Archive run: tar -cvf images.tar /tmp/images - name: Upload Docker Images Artifact - uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 + uses: actions/upload-artifact@6f51ac03b9356f520e9adb1b1b7802705f340c2b # v4.5.0 with: name: Docker Images path: ./images.tar diff --git a/CHANGELOG.md b/CHANGELOG.md index 7da08bf6ea..bad38ecd12 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ * [CHANGE] Change default value of `-blocks-storage.bucket-store.index-cache.multilevel.max-async-concurrency` from `50` to `3` #6265 * [CHANGE] Enable Compactor and Alertmanager in target all. #6204 * [CHANGE] Update the `cortex_ingester_inflight_push_requests` metric to represent the maximum number of inflight requests recorded in the last minute. #6437 +* [FEATURE] Ruler: Add an experimental flag `-ruler.query-response-format` to retrieve query response as a proto format. #6345 * [FEATURE] Ruler: Pagination support for List Rules API. #6299 * [FEATURE] Query Frontend/Querier: Add protobuf codec `-api.querier-default-codec` and the option to choose response compression type `-querier.response-compression`. #5527 * [FEATURE] Ruler: Experimental: Add `ruler.frontend-address` to allow query to query frontends instead of ingesters. #6151 @@ -18,6 +19,7 @@ * [FEATURE] Chunk Cache: Support multi level cache and add metrics. #6249 * [FEATURE] Distributor: Accept multiple HA Tracker pairs in the same request. #6256 * [FEATURE] Ruler: Add support for per-user external labels #6340 +* [ENHANCEMENT] Querier: Add a `-tenant-federation.max-concurrent` flags to configure the number of worker processing federated query and add a `cortex_querier_federated_tenants_per_query` histogram to track the number of tenants per query. #6449 * [ENHANCEMENT] Query Frontend: Add a number of series in the query response to the query stat log. #6423 * [ENHANCEMENT] Store Gateway: Add a hedged request to reduce the tail latency. #6388 * [ENHANCEMENT] Ingester: Add metrics to track succeed/failed native histograms. #6370 @@ -52,11 +54,14 @@ * [ENHANCEMENT] Ingester: If a limit per label set entry doesn't have any label, use it as the default partition to catch all series that doesn't match any other label sets entries. #6435 * [ENHANCEMENT] Querier: Add new `cortex_querier_codec_response_size` metric to track the size of the encoded query responses from queriers. #6444 * [ENHANCEMENT] Distributor: Added `cortex_distributor_received_samples_per_labelset_total` metric to calculate ingestion rate per label set. #6443 +* [ENHANCEMENT] Added metric name in limiter per-metric exceeded errors. #6416 +* [ENHANCEMENT] StoreGateway: Added `cortex_bucket_store_indexheader_load_duration_seconds` and `cortex_bucket_store_indexheader_download_duration_seconds` metrics for time of downloading and loading index header files. #6445 * [BUGFIX] Runtime-config: Handle absolute file paths when working directory is not / #6224 * [BUGFIX] Ruler: Allow rule evaluation to complete during shutdown. #6326 * [BUGFIX] Ring: update ring with new ip address when instance is lost, rejoins, but heartbeat is disabled. #6271 * [BUGFIX] Ingester: Fix regression on usage of cortex_ingester_queried_chunks. #6398 * [BUGFIX] Ingester: Fix possible race condition when `active series per LabelSet` is configured. #6409 +* [BUGFIX] Query Frontend: Fix @ modifier not being applied correctly on sub queries. #6450 ## 1.18.1 2024-10-14 diff --git a/RELEASE.md b/RELEASE.md index fb9b03bd65..85577bf52f 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -34,6 +34,7 @@ Our goal is to provide a new minor release every 6 weeks. This is a new process | v1.16.0 | 2023-11-05 | Ben Ye (@yeya24) | | v1.17.0 | 2024-04-25 | Ben Ye (@yeya24) | | v1.18.0 | 2024-08-16 | Daniel Blando (@danielblando) | +| v1.19.0 | 2025-01-15 | Charlie Le (@charlietle) | ## Release shepherd responsibilities diff --git a/docs/blocks-storage/compactor.md b/docs/blocks-storage/compactor.md index 030035f2ac..f5539511ca 100644 --- a/docs/blocks-storage/compactor.md +++ b/docs/blocks-storage/compactor.md @@ -286,18 +286,18 @@ compactor: [wait_active_instance_timeout: | default = 10m] # The compaction strategy to use. Supported values are: default, partitioning. - # CLI flag: -compactor.compaction-mode - [compaction_mode: | default = "default"] + # CLI flag: -compactor.compaction-strategy + [compaction_strategy: | default = "default"] - # How long block visit marker file should be considered as expired and able to - # be picked up by compactor again. - # CLI flag: -compactor.block-visit-marker-timeout - [block_visit_marker_timeout: | default = 5m] + # How long compaction visit marker file should be considered as expired and + # able to be picked up by compactor again. + # CLI flag: -compactor.compaction-visit-marker-timeout + [compaction_visit_marker_timeout: | default = 10m] - # How frequently block visit marker file should be updated duration + # How frequently compaction visit marker file should be updated duration # compaction. - # CLI flag: -compactor.block-visit-marker-file-update-interval - [block_visit_marker_file_update_interval: | default = 1m] + # CLI flag: -compactor.compaction-visit-marker-file-update-interval + [compaction_visit_marker_file_update_interval: | default = 1m] # How long cleaner visit marker file should be considered as expired and able # to be picked up by cleaner again. The value should be smaller than diff --git a/docs/configuration/arguments.md b/docs/configuration/arguments.md index 145a5caea9..b26398444d 100644 --- a/docs/configuration/arguments.md +++ b/docs/configuration/arguments.md @@ -53,7 +53,7 @@ The next three options only apply when the querier is used together with the Que ## Querier and Ruler -- `-promql.lookback-delta` +- `-querier.lookback-delta` Time since the last sample after which a time series is considered stale and ignored by expression evaluations. @@ -63,9 +63,9 @@ The next three options only apply when the querier is used together with the Que If set to true, will cause the query frontend to mutate incoming queries and align their start and end parameters to the step parameter of the query. This improves the cacheability of the query results. -- `-querier.split-queries-by-day` +- `-querier.split-queries-by-interval` - If set to true, will cause the query frontend to split multi-day queries into multiple single-day queries and execute them in parallel. + If set, will cause the query frontend to split multi-day queries into multiple queries and execute them in parallel. A multiple of 24h should be used when setting this flag. - `-querier.cache-results` @@ -391,12 +391,6 @@ Valid per-tenant limits are (with their corresponding flags for default values): Requires `-distributor.replication-factor`, `-distributor.shard-by-all-labels`, `-distributor.sharding-strategy` and `-distributor.zone-awareness-enabled` set for the ingesters too. -- `max_series_per_query` / `-ingester.max-series-per-query` - -- `max_samples_per_query` / `-ingester.max-samples-per-query` - - Limits on the number of timeseries and samples returns by a single ingester during a query. - - `max_metadata_per_user` / `-ingester.max-metadata-per-user` - `max_metadata_per_metric` / `-ingester.max-metadata-per-metric` Enforced by the ingesters; limits the number of active metadata a user (or a given metric) can have. When running with `-distributor.shard-by-all-labels=false` (the default), this limit will enforce the maximum number of metadata a metric can have 'globally', as all metadata for a single metric will be sent to the same replication set of ingesters. This is not the case when running with `-distributor.shard-by-all-labels=true`, so the actual limit will be N/RF times higher, where N is number of ingester replicas and RF is configured replication factor. @@ -443,12 +437,6 @@ Valid ingester instance limits are (with their corresponding flags): Limit the maximum number of requests being handled by an ingester at once. This setting is critical for preventing ingesters from using an excessive amount of memory during high load or temporary slow downs. When this limit is reached, new requests will fail with an HTTP 500 error. -## Storage - -- `s3.force-path-style` - - Set this to `true` to force the request to use path-style addressing (`http://s3.amazonaws.com/BUCKET/KEY`). By default, the S3 client will use virtual hosted bucket addressing when possible (`http://BUCKET.s3.amazonaws.com/KEY`). - ## DNS Service Discovery Some clients in Cortex support service discovery via DNS to find addresses of backend servers to connect to (ie. caching servers). The clients supporting it are: diff --git a/docs/configuration/config-file-reference.md b/docs/configuration/config-file-reference.md index f48bfeddbc..70978a36bd 100644 --- a/docs/configuration/config-file-reference.md +++ b/docs/configuration/config-file-reference.md @@ -157,6 +157,10 @@ tenant_federation: # CLI flag: -tenant-federation.enabled [enabled: | default = false] + # The number of workers used to process each federated query. + # CLI flag: -tenant-federation.max-concurrent + [max_concurrent: | default = 16] + # The ruler_config configures the Cortex ruler. [ruler: ] @@ -2327,17 +2331,18 @@ sharding_ring: [wait_active_instance_timeout: | default = 10m] # The compaction strategy to use. Supported values are: default, partitioning. -# CLI flag: -compactor.compaction-mode -[compaction_mode: | default = "default"] +# CLI flag: -compactor.compaction-strategy +[compaction_strategy: | default = "default"] -# How long block visit marker file should be considered as expired and able to -# be picked up by compactor again. -# CLI flag: -compactor.block-visit-marker-timeout -[block_visit_marker_timeout: | default = 5m] +# How long compaction visit marker file should be considered as expired and able +# to be picked up by compactor again. +# CLI flag: -compactor.compaction-visit-marker-timeout +[compaction_visit_marker_timeout: | default = 10m] -# How frequently block visit marker file should be updated duration compaction. -# CLI flag: -compactor.block-visit-marker-file-update-interval -[block_visit_marker_file_update_interval: | default = 1m] +# How frequently compaction visit marker file should be updated duration +# compaction. +# CLI flag: -compactor.compaction-visit-marker-file-update-interval +[compaction_visit_marker_file_update_interval: | default = 1m] # How long cleaner visit marker file should be considered as expired and able to # be picked up by cleaner again. The value should be smaller than @@ -3588,6 +3593,14 @@ query_rejection: # CLI flag: -compactor.tenant-shard-size [compactor_tenant_shard_size: | default = 0] +# Index size limit in bytes for each compaction partition. 0 means no limit +# CLI flag: -compactor.partition-index-size-bytes +[compactor_partition_index_size_bytes: | default = 68719476736] + +# Time series count limit for each compaction partition. 0 means no limit +# CLI flag: -compactor.partition-series-count +[compactor_partition_series_count: | default = 0] + # S3 server-side encryption type. Required to enable server-side encryption # overrides for a specific tenant. If not set, the default S3 client settings # are used. @@ -4313,6 +4326,12 @@ The `ruler_config` configures the Cortex ruler. # CLI flag: -ruler.frontend-address [frontend_address: | default = ""] +# [Experimental] Query response format to get query results from Query Frontend +# when the rule evaluation. It will only take effect when +# `-ruler.frontend-address` is configured. Supported values: json,protobuf +# CLI flag: -ruler.query-response-format +[query_response_format: | default = "protobuf"] + frontend_client: # gRPC client max receive message size (bytes). # CLI flag: -ruler.frontendClient.grpc-max-recv-msg-size diff --git a/docs/configuration/v1-guarantees.md b/docs/configuration/v1-guarantees.md index 94da14fb10..2b46407986 100644 --- a/docs/configuration/v1-guarantees.md +++ b/docs/configuration/v1-guarantees.md @@ -35,7 +35,9 @@ Cortex is an actively developed project and we want to encourage the introductio Currently experimental features are: -- Ruler: Evaluate rules to query frontend instead of ingesters (enabled via `-ruler.frontend-address` ) +- Ruler + - Evaluate rules to query frontend instead of ingesters (enabled via `-ruler.frontend-address`). + - When `-ruler.frontend-address` is specified, the response format can be specified (via `-ruler.query-response-format`). - S3 Server Side Encryption (SSE) using KMS (including per-tenant KMS config overrides). - Azure blob storage. - Zone awareness based replication. diff --git a/go.mod b/go.mod index 1a5920b422..d1cdc26870 100644 --- a/go.mod +++ b/go.mod @@ -9,7 +9,6 @@ require ( github.com/armon/go-metrics v0.4.1 github.com/aws/aws-sdk-go v1.55.5 github.com/bradfitz/gomemcache v0.0.0-20230905024940-24af94b03874 - github.com/cespare/xxhash v1.1.0 github.com/cortexproject/promqlsmith v0.0.0-20241121054008-8b48fe2471ef github.com/dustin/go-humanize v1.0.1 github.com/efficientgo/core v1.0.0-rc.3 @@ -27,7 +26,7 @@ require ( github.com/gorilla/mux v1.8.1 github.com/grafana/regexp v0.0.0-20240518133315-a468a5bfb3bc github.com/grpc-ecosystem/go-grpc-middleware v1.4.0 - github.com/hashicorp/consul/api v1.30.0 + github.com/hashicorp/consul/api v1.31.0 github.com/hashicorp/go-cleanhttp v0.5.2 github.com/hashicorp/go-sockaddr v1.0.7 github.com/hashicorp/memberlist v0.5.1 @@ -52,8 +51,8 @@ require ( github.com/spf13/afero v1.11.0 github.com/stretchr/testify v1.10.0 github.com/thanos-io/objstore v0.0.0-20241111205755-d1dd89d41f97 - github.com/thanos-io/promql-engine v0.0.0-20241203103240-2f49f80c7c68 - github.com/thanos-io/thanos v0.37.2-0.20241210234302-0ea6bac096ce + github.com/thanos-io/promql-engine v0.0.0-20241217103156-9dbff30059cf + github.com/thanos-io/thanos v0.37.3-0.20241224143735-2d041dc774da github.com/uber/jaeger-client-go v2.30.0+incompatible github.com/weaveworks/common v0.0.0-20230728070032-dd9e68f319d5 go.etcd.io/etcd/api/v3 v3.5.17 @@ -67,7 +66,7 @@ require ( go.opentelemetry.io/otel/sdk v1.33.0 go.opentelemetry.io/otel/trace v1.33.0 go.uber.org/atomic v1.11.0 - golang.org/x/net v0.32.0 + golang.org/x/net v0.33.0 golang.org/x/sync v0.10.0 golang.org/x/time v0.8.0 google.golang.org/grpc v1.68.1 @@ -81,11 +80,12 @@ require ( github.com/cespare/xxhash/v2 v2.3.0 github.com/google/go-cmp v0.6.0 github.com/hashicorp/golang-lru/v2 v2.0.7 + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 github.com/sercand/kuberesolver/v5 v5.1.1 github.com/tjhop/slog-gokit v0.1.2 go.opentelemetry.io/collector/pdata v1.22.0 golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8 - google.golang.org/protobuf v1.36.0 + google.golang.org/protobuf v1.36.1 ) require ( @@ -189,7 +189,6 @@ require ( github.com/mitchellh/mapstructure v1.5.0 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect - github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/mwitkow/go-conntrack v0.0.0-20190716064945-2f068394615f // indirect github.com/ncw/swift v1.0.53 // indirect github.com/oklog/run v1.1.0 // indirect diff --git a/go.sum b/go.sum index 8d74606c32..bcbf2121ac 100644 --- a/go.sum +++ b/go.sum @@ -798,9 +798,6 @@ github.com/Masterminds/squirrel v1.5.4 h1:uUcX/aBc8O7Fg9kaISIUsHXdKuqehiXAMQTYX8 github.com/Masterminds/squirrel v1.5.4/go.mod h1:NNaOrjSoIDfDA40n7sr2tPNZRfjzjA400rg+riTZj10= github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY= github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU= -github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= -github.com/OneOfOne/xxhash v1.2.6 h1:U68crOE3y3MPttCMQGywZOLrTeF5HHJ3/vDBCJn9/bA= -github.com/OneOfOne/xxhash v1.2.6/go.mod h1:eZbhyaAYD41SGSSsnmcpxVoRiQ/MPUTjUdIIOT9Um7Q= github.com/VictoriaMetrics/fastcache v1.12.2 h1:N0y9ASrJ0F6h0QaC3o6uJb3NIZ9VKLjCM7NQbSmF7WI= github.com/VictoriaMetrics/fastcache v1.12.2/go.mod h1:AmC+Nzz1+3G2eCPapF6UcsnkThDcMsQicp4xDukwJYI= github.com/ajstarks/deck v0.0.0-20200831202436-30c9fc6549a9/go.mod h1:JynElWSGnm/4RlzPXRlREEwqTHAN3T56Bv2ITsFT3gY= @@ -904,8 +901,6 @@ github.com/cenkalti/backoff/v4 v4.3.0/go.mod h1:Y3VNntkOUPxTVeUxJ/G5vcM//AlwfmyY github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/census-instrumentation/opencensus-proto v0.3.0/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/census-instrumentation/opencensus-proto v0.4.1/go.mod h1:4T9NM4+4Vw91VeyqjLS6ao50K5bOcLKN6Q42XnYaRYw= -github.com/cespare/xxhash v1.1.0 h1:a6HrQnmkObjyL+Gs60czilIUGqrzKutQD6XZog3p+ko= -github.com/cespare/xxhash v1.1.0/go.mod h1:XrSqR1VqqWfGrhpAt58auRo0WTKS1nRRg3ghfAqPWnc= github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cespare/xxhash/v2 v2.2.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= @@ -1262,8 +1257,8 @@ github.com/grpc-ecosystem/grpc-gateway/v2 v2.16.0/go.mod h1:YN5jB8ie0yfIUg6VvR9K github.com/grpc-ecosystem/grpc-gateway/v2 v2.24.0 h1:TmHmbvxPmaegwhDubVz0lICL0J5Ka2vwTzhoePEXsGE= github.com/grpc-ecosystem/grpc-gateway/v2 v2.24.0/go.mod h1:qztMSjm835F2bXf+5HKAPIS5qsmQDqZna/PgVt4rWtI= github.com/hashicorp/consul/api v1.12.0/go.mod h1:6pVBMo0ebnYdt2S3H87XhekM/HHrUoTD2XXb/VrZVy0= -github.com/hashicorp/consul/api v1.30.0 h1:ArHVMMILb1nQv8vZSGIwwQd2gtc+oSQZ6CalyiyH2XQ= -github.com/hashicorp/consul/api v1.30.0/go.mod h1:B2uGchvaXVW2JhFoS8nqTxMD5PBykr4ebY4JWHTTeLM= +github.com/hashicorp/consul/api v1.31.0 h1:32BUNLembeSRek0G/ZAM6WNfdEwYdYo8oQ4+JoqGkNQ= +github.com/hashicorp/consul/api v1.31.0/go.mod h1:2ZGIiXM3A610NmDULmCHd/aqBJj8CkMfOhswhOafxRg= github.com/hashicorp/consul/sdk v0.8.0/go.mod h1:GBvyrGALthsZObzUGsfgHZQDXjg4lOjagTIwIR1vPms= github.com/hashicorp/consul/sdk v0.16.1 h1:V8TxTnImoPD5cj0U9Spl0TUxcytjcbbJeADFF07KdHg= github.com/hashicorp/consul/sdk v0.16.1/go.mod h1:fSXvwxB2hmh1FMZCNl6PwX0Q/1wdWtHJcZ7Ea5tns0s= @@ -1619,9 +1614,6 @@ github.com/soheilhy/cmux v0.1.5 h1:jjzc5WVemNEDTLwv9tlmemhC73tI08BNOIGwBOo10Js= github.com/soheilhy/cmux v0.1.5/go.mod h1:T7TcVDs9LWfQgPlPsdngu6I6QIoyIFZDDC6sNE1GqG0= github.com/sony/gobreaker v1.0.0 h1:feX5fGGXSl3dYd4aHZItw+FpHLvvoaqkawKjVNiFMNQ= github.com/sony/gobreaker v1.0.0/go.mod h1:ZKptC7FHNvhBz7dN2LGjPVBz2sZJmc0/PkyDJOjmxWY= -github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= -github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI= -github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= github.com/spf13/afero v1.3.3/go.mod h1:5KUK8ByomD5Ti5Artl0RtHeI5pTF7MIDuXL3yY520V4= github.com/spf13/afero v1.6.0/go.mod h1:Ai8FlHk4v/PARR026UzYexafAt9roJ7LcLMAmO6Z93I= github.com/spf13/afero v1.8.2/go.mod h1:CtAatgMJh6bJEIs48Ay/FOnkljP3WeGUG0MC1RfAqwo= @@ -1663,10 +1655,10 @@ github.com/thanos-community/galaxycache v0.0.0-20211122094458-3a32041a1f1e h1:f1 github.com/thanos-community/galaxycache v0.0.0-20211122094458-3a32041a1f1e/go.mod h1:jXcofnrSln/cLI6/dhlBxPQZEEQHVPCcFaH75M+nSzM= github.com/thanos-io/objstore v0.0.0-20241111205755-d1dd89d41f97 h1:VjG0mwhN1DkncwDHFvrpd12/2TLfgYNRmEQA48ikp+0= github.com/thanos-io/objstore v0.0.0-20241111205755-d1dd89d41f97/go.mod h1:vyzFrBXgP+fGNG2FopEGWOO/zrIuoy7zt3LpLeezRsw= -github.com/thanos-io/promql-engine v0.0.0-20241203103240-2f49f80c7c68 h1:cChM/FbpXeYmrSmXO1/MmmSlONviLVxWAWCB0/g4JrY= -github.com/thanos-io/promql-engine v0.0.0-20241203103240-2f49f80c7c68/go.mod h1:wx0JlRZtsB2S10JYUgeg5GqLfMxw31SzArP+28yyE00= -github.com/thanos-io/thanos v0.37.2-0.20241210234302-0ea6bac096ce h1:77bIiukjCF+GRZFjwJOsLIwWri7BO8LTwhchkPI7l5U= -github.com/thanos-io/thanos v0.37.2-0.20241210234302-0ea6bac096ce/go.mod h1:5Ni7Uc1Bc8UCGOYmZ/2f/LVvDkZKNDdqDJZqjDFG+rI= +github.com/thanos-io/promql-engine v0.0.0-20241217103156-9dbff30059cf h1:JFh4PjC9yQidiFi4qMWbPddIgsLWPIsSEbXs75+tLxs= +github.com/thanos-io/promql-engine v0.0.0-20241217103156-9dbff30059cf/go.mod h1:wx0JlRZtsB2S10JYUgeg5GqLfMxw31SzArP+28yyE00= +github.com/thanos-io/thanos v0.37.3-0.20241224143735-2d041dc774da h1:xnaeDaL1kOUgqA7BL6bPO5v5K66imXUweVYk8HqDFsA= +github.com/thanos-io/thanos v0.37.3-0.20241224143735-2d041dc774da/go.mod h1:E+olRxu9jl7KknntphHYXhLieVaMXXl/Q/Ioh6tj+oE= github.com/tjhop/slog-gokit v0.1.2 h1:pmQI4SvU9h4gA0vIQsdhJQSqQg4mOmsPykG2/PM3j1I= github.com/tjhop/slog-gokit v0.1.2/go.mod h1:8fhlcp8C8ELbg3GCyKv06tgt4B5sDq2P1r2DQAu1HuM= github.com/tv42/httpunix v0.0.0-20150427012821-b75d8614f926/go.mod h1:9ESjWnEqriFuLhtthL60Sar/7RFoluCcXsuvEwTV5KM= @@ -1933,8 +1925,8 @@ golang.org/x/net v0.20.0/go.mod h1:z8BVo6PvndSri0LbOE3hAn0apkU+1YvI6E70E9jsnvY= golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44= golang.org/x/net v0.22.0/go.mod h1:JKghWKKOSdJwpW2GEx0Ja7fmaKnMsbu+MWVZTokSYmg= golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= -golang.org/x/net v0.32.0 h1:ZqPmj8Kzc+Y6e0+skZsuACbx+wzMgo5MQsJh9Qd6aYI= -golang.org/x/net v0.32.0/go.mod h1:CwU0IoeOlnQQWJ6ioyFrfRuomB8GKF6KbYXZVyeXNfs= +golang.org/x/net v0.33.0 h1:74SYHlV8BIgHIFC/LrYkOGIwL19eTYXQ5wc6TBuO36I= +golang.org/x/net v0.33.0/go.mod h1:HXLR5J+9DxmrqMwG9qjGCxZ+zKXxBru04zlTvWlWuN4= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -2502,8 +2494,8 @@ google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqw google.golang.org/protobuf v1.32.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= google.golang.org/protobuf v1.33.0/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= google.golang.org/protobuf v1.34.1/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= -google.golang.org/protobuf v1.36.0 h1:mjIs9gYtt56AzC4ZaffQuh88TZurBGhIJMBZGSxNerQ= -google.golang.org/protobuf v1.36.0/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= +google.golang.org/protobuf v1.36.1 h1:yBPeRvTftaleIgM3PZ/WBIZ7XM/eEYAaEyCwvyjq/gk= +google.golang.org/protobuf v1.36.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= diff --git a/integration/ruler_test.go b/integration/ruler_test.go index f7d16507d1..d9e7362b5f 100644 --- a/integration/ruler_test.go +++ b/integration/ruler_test.go @@ -1670,42 +1670,64 @@ func TestRulerEvalWithQueryFrontend(t *testing.T) { distributor := e2ecortex.NewDistributor("distributor", e2ecortex.RingStoreConsul, consul.NetworkHTTPEndpoint(), flags, "") ingester := e2ecortex.NewIngester("ingester", e2ecortex.RingStoreConsul, consul.NetworkHTTPEndpoint(), flags, "") require.NoError(t, s.StartAndWaitReady(distributor, ingester)) - queryFrontend := e2ecortex.NewQueryFrontend("query-frontend", flags, "") - require.NoError(t, s.Start(queryFrontend)) - - ruler := e2ecortex.NewRuler("ruler", consul.NetworkHTTPEndpoint(), mergeFlags(flags, map[string]string{ - "-ruler.frontend-address": queryFrontend.NetworkGRPCEndpoint(), - }), "") - querier := e2ecortex.NewQuerier("querier", e2ecortex.RingStoreConsul, consul.NetworkHTTPEndpoint(), mergeFlags(flags, map[string]string{ - "-querier.frontend-address": queryFrontend.NetworkGRPCEndpoint(), - }), "") - require.NoError(t, s.StartAndWaitReady(ruler, querier)) - - c, err := e2ecortex.NewClient("", "", "", ruler.HTTPEndpoint(), user) - require.NoError(t, err) + for _, format := range []string{"protobuf", "json"} { + t.Run(fmt.Sprintf("format:%s", format), func(t *testing.T) { + queryFrontendFlag := mergeFlags(flags, map[string]string{ + "-ruler.query-response-format": format, + }) + queryFrontend := e2ecortex.NewQueryFrontend("query-frontend", queryFrontendFlag, "") + require.NoError(t, s.Start(queryFrontend)) - expression := "metric" - groupName := "rule_group" - ruleName := "rule_name" - require.NoError(t, c.SetRuleGroup(ruleGroupWithRule(groupName, ruleName, expression), namespace)) + querier := e2ecortex.NewQuerier("querier", e2ecortex.RingStoreConsul, consul.NetworkHTTPEndpoint(), mergeFlags(queryFrontendFlag, map[string]string{ + "-querier.frontend-address": queryFrontend.NetworkGRPCEndpoint(), + }), "") + require.NoError(t, s.StartAndWaitReady(querier)) - rgMatcher := ruleGroupMatcher(user, namespace, groupName) - // Wait until ruler has loaded the group. - require.NoError(t, ruler.WaitSumMetricsWithOptions(e2e.Equals(1), []string{"cortex_prometheus_rule_group_rules"}, e2e.WithLabelMatchers(rgMatcher), e2e.WaitMissingMetrics)) - // Wait until rule group has tried to evaluate the rule. - require.NoError(t, ruler.WaitSumMetricsWithOptions(e2e.GreaterOrEqual(1), []string{"cortex_prometheus_rule_evaluations_total"}, e2e.WithLabelMatchers(rgMatcher), e2e.WaitMissingMetrics)) + rulerFlag := mergeFlags(queryFrontendFlag, map[string]string{ + "-ruler.frontend-address": queryFrontend.NetworkGRPCEndpoint(), + }) + ruler := e2ecortex.NewRuler("ruler", consul.NetworkHTTPEndpoint(), rulerFlag, "") + require.NoError(t, s.StartAndWaitReady(ruler)) - matcher := labels.MustNewMatcher(labels.MatchEqual, "user", user) - // Check that cortex_ruler_query_frontend_clients went up - require.NoError(t, ruler.WaitSumMetricsWithOptions(e2e.Equals(1), []string{"cortex_ruler_query_frontend_clients"}, e2e.WaitMissingMetrics)) - // Check that cortex_ruler_queries_total went up - require.NoError(t, ruler.WaitSumMetricsWithOptions(e2e.GreaterOrEqual(1), []string{"cortex_ruler_queries_total"}, e2e.WithLabelMatchers(matcher), e2e.WaitMissingMetrics)) - // Check that cortex_ruler_queries_failed_total is zero - require.NoError(t, ruler.WaitSumMetricsWithOptions(e2e.Equals(0), []string{"cortex_ruler_queries_failed_total"}, e2e.WithLabelMatchers(matcher), e2e.WaitMissingMetrics)) - // Check that cortex_ruler_write_requests_total went up - require.NoError(t, ruler.WaitSumMetricsWithOptions(e2e.GreaterOrEqual(1), []string{"cortex_ruler_write_requests_total"}, e2e.WithLabelMatchers(matcher), e2e.WaitMissingMetrics)) - // Check that cortex_ruler_write_requests_failed_total is zero - require.NoError(t, ruler.WaitSumMetricsWithOptions(e2e.Equals(0), []string{"cortex_ruler_write_requests_failed_total"}, e2e.WithLabelMatchers(matcher), e2e.WaitMissingMetrics)) + t.Cleanup(func() { + _ = s.Stop(ruler) + _ = s.Stop(queryFrontend) + _ = s.Stop(querier) + }) + + c, err := e2ecortex.NewClient("", "", "", ruler.HTTPEndpoint(), user) + require.NoError(t, err) + + expression := "metric" // vector + //expression := "scalar(count(up == 1)) > bool 1" // scalar + groupName := "rule_group" + ruleName := "rule_name" + require.NoError(t, c.SetRuleGroup(ruleGroupWithRule(groupName, ruleName, expression), namespace)) + + rgMatcher := ruleGroupMatcher(user, namespace, groupName) + // Wait until ruler has loaded the group. + require.NoError(t, ruler.WaitSumMetricsWithOptions(e2e.Equals(1), []string{"cortex_prometheus_rule_group_rules"}, e2e.WithLabelMatchers(rgMatcher), e2e.WaitMissingMetrics)) + // Wait until rule group has tried to evaluate the rule. + require.NoError(t, ruler.WaitSumMetricsWithOptions(e2e.GreaterOrEqual(1), []string{"cortex_prometheus_rule_evaluations_total"}, e2e.WithLabelMatchers(rgMatcher), e2e.WaitMissingMetrics)) + // Make sure not to fail + require.NoError(t, ruler.WaitSumMetricsWithOptions(e2e.Equals(0), []string{"cortex_prometheus_rule_evaluation_failures_total"}, e2e.WithLabelMatchers(rgMatcher), e2e.WaitMissingMetrics)) + + matcher := labels.MustNewMatcher(labels.MatchEqual, "user", user) + sourceMatcher := labels.MustNewMatcher(labels.MatchEqual, "source", "ruler") + // Check that cortex_ruler_query_frontend_clients went up + require.NoError(t, ruler.WaitSumMetricsWithOptions(e2e.Equals(1), []string{"cortex_ruler_query_frontend_clients"}, e2e.WaitMissingMetrics)) + // Check that cortex_ruler_queries_total went up + require.NoError(t, ruler.WaitSumMetricsWithOptions(e2e.GreaterOrEqual(1), []string{"cortex_ruler_queries_total"}, e2e.WithLabelMatchers(matcher), e2e.WaitMissingMetrics)) + // Check that cortex_ruler_queries_failed_total is zero + require.NoError(t, ruler.WaitSumMetricsWithOptions(e2e.Equals(0), []string{"cortex_ruler_queries_failed_total"}, e2e.WithLabelMatchers(matcher), e2e.WaitMissingMetrics)) + // Check that cortex_ruler_write_requests_total went up + require.NoError(t, ruler.WaitSumMetricsWithOptions(e2e.GreaterOrEqual(1), []string{"cortex_ruler_write_requests_total"}, e2e.WithLabelMatchers(matcher), e2e.WaitMissingMetrics)) + // Check that cortex_ruler_write_requests_failed_total is zero + require.NoError(t, ruler.WaitSumMetricsWithOptions(e2e.Equals(0), []string{"cortex_ruler_write_requests_failed_total"}, e2e.WithLabelMatchers(matcher), e2e.WaitMissingMetrics)) + // Check that cortex_query_frontend_queries_total went up + require.NoError(t, queryFrontend.WaitSumMetricsWithOptions(e2e.GreaterOrEqual(1), []string{"cortex_query_frontend_queries_total"}, e2e.WithLabelMatchers(matcher, sourceMatcher), e2e.WaitMissingMetrics)) + }) + } } func parseAlertFromRule(t *testing.T, rules interface{}) *alertingRule { diff --git a/pkg/chunk/cache/memcached_client_selector.go b/pkg/chunk/cache/memcached_client_selector.go index e3e1c960ca..38fff0b340 100644 --- a/pkg/chunk/cache/memcached_client_selector.go +++ b/pkg/chunk/cache/memcached_client_selector.go @@ -6,7 +6,7 @@ import ( "sync" "github.com/bradfitz/gomemcache/memcache" - "github.com/cespare/xxhash" + "github.com/cespare/xxhash/v2" "github.com/facette/natsort" ) diff --git a/pkg/compactor/compactor.go b/pkg/compactor/compactor.go index a0a7e65e68..c50a98ca48 100644 --- a/pkg/compactor/compactor.go +++ b/pkg/compactor/compactor.go @@ -60,7 +60,7 @@ var ( errInvalidCompactionStrategy = errors.New("invalid compaction strategy") errInvalidCompactionStrategyPartitioning = errors.New("compaction strategy partitioning can only be enabled when shuffle sharding is enabled") - DefaultBlocksGrouperFactory = func(ctx context.Context, cfg Config, bkt objstore.InstrumentedBucket, logger log.Logger, blocksMarkedForNoCompaction prometheus.Counter, _ prometheus.Counter, _ prometheus.Counter, syncerMetrics *compact.SyncerMetrics, compactorMetrics *compactorMetrics, _ *ring.Ring, _ *ring.Lifecycler, _ Limits, _ string, _ *compact.GatherNoCompactionMarkFilter) compact.Grouper { + DefaultBlocksGrouperFactory = func(ctx context.Context, cfg Config, bkt objstore.InstrumentedBucket, logger log.Logger, blocksMarkedForNoCompaction prometheus.Counter, _ prometheus.Counter, _ prometheus.Counter, syncerMetrics *compact.SyncerMetrics, compactorMetrics *compactorMetrics, _ *ring.Ring, _ *ring.Lifecycler, _ Limits, _ string, _ *compact.GatherNoCompactionMarkFilter, _ int) compact.Grouper { return compact.NewDefaultGrouperWithMetrics( logger, bkt, @@ -79,9 +79,31 @@ var ( cfg.BlocksFetchConcurrency) } - ShuffleShardingGrouperFactory = func(ctx context.Context, cfg Config, bkt objstore.InstrumentedBucket, logger log.Logger, blocksMarkedForNoCompaction prometheus.Counter, blockVisitMarkerReadFailed prometheus.Counter, blockVisitMarkerWriteFailed prometheus.Counter, syncerMetrics *compact.SyncerMetrics, compactorMetrics *compactorMetrics, ring *ring.Ring, ringLifecycle *ring.Lifecycler, limits Limits, userID string, noCompactionMarkFilter *compact.GatherNoCompactionMarkFilter) compact.Grouper { + ShuffleShardingGrouperFactory = func(ctx context.Context, cfg Config, bkt objstore.InstrumentedBucket, logger log.Logger, blocksMarkedForNoCompaction prometheus.Counter, blockVisitMarkerReadFailed prometheus.Counter, blockVisitMarkerWriteFailed prometheus.Counter, syncerMetrics *compact.SyncerMetrics, compactorMetrics *compactorMetrics, ring *ring.Ring, ringLifecycle *ring.Lifecycler, limits Limits, userID string, noCompactionMarkFilter *compact.GatherNoCompactionMarkFilter, ingestionReplicationFactor int) compact.Grouper { if cfg.CompactionStrategy == util.CompactionStrategyPartitioning { - return NewPartitionCompactionGrouper(ctx, logger, bkt) + return NewPartitionCompactionGrouper( + ctx, + logger, + bkt, + cfg.AcceptMalformedIndex, + true, // Enable vertical compaction + blocksMarkedForNoCompaction, + syncerMetrics, + compactorMetrics, + metadata.NoneFunc, + cfg, + ring, + ringLifecycle.Addr, + ringLifecycle.ID, + limits, + userID, + cfg.BlockFilesConcurrency, + cfg.BlocksFetchConcurrency, + cfg.CompactionConcurrency, + true, + cfg.CompactionVisitMarkerTimeout, + noCompactionMarkFilter.NoCompactMarkedBlocks, + ingestionReplicationFactor) } else { return NewShuffleShardingGrouper( ctx, @@ -102,7 +124,7 @@ var ( cfg.BlockFilesConcurrency, cfg.BlocksFetchConcurrency, cfg.CompactionConcurrency, - cfg.BlockVisitMarkerTimeout, + cfg.CompactionVisitMarkerTimeout, blockVisitMarkerReadFailed, blockVisitMarkerWriteFailed, noCompactionMarkFilter.NoCompactMarkedBlocks) @@ -133,7 +155,7 @@ var ( if cfg.CompactionStrategy == util.CompactionStrategyPartitioning { return NewPartitionCompactionPlanner(ctx, bkt, logger) } else { - return NewShuffleShardingPlanner(ctx, bkt, logger, cfg.BlockRanges.ToMilliseconds(), noCompactionMarkFilter.NoCompactMarkedBlocks, ringLifecycle.ID, cfg.BlockVisitMarkerTimeout, cfg.BlockVisitMarkerFileUpdateInterval, blockVisitMarkerReadFailed, blockVisitMarkerWriteFailed) + return NewShuffleShardingPlanner(ctx, bkt, logger, cfg.BlockRanges.ToMilliseconds(), noCompactionMarkFilter.NoCompactMarkedBlocks, ringLifecycle.ID, cfg.CompactionVisitMarkerTimeout, cfg.CompactionVisitMarkerFileUpdateInterval, blockVisitMarkerReadFailed, blockVisitMarkerWriteFailed) } } return compactor, plannerFactory, nil @@ -156,6 +178,7 @@ type BlocksGrouperFactory func( limit Limits, userID string, noCompactionMarkFilter *compact.GatherNoCompactionMarkFilter, + ingestionReplicationFactor int, ) compact.Grouper // BlocksCompactorFactory builds and returns the compactor and planner to use to compact a tenant's blocks. @@ -182,6 +205,8 @@ type PlannerFactory func( // Limits defines limits used by the Compactor. type Limits interface { CompactorTenantShardSize(userID string) int + CompactorPartitionIndexSizeBytes(userID string) int64 + CompactorPartitionSeriesCount(userID string) int64 } // Config holds the Compactor config. @@ -213,8 +238,8 @@ type Config struct { ShardingStrategy string `yaml:"sharding_strategy"` ShardingRing RingConfig `yaml:"sharding_ring"` - // Compaction mode. - CompactionStrategy string `yaml:"compaction_mode"` + // Compaction strategy. + CompactionStrategy string `yaml:"compaction_strategy"` // No need to add options to customize the retry backoff, // given the defaults should be fine, but allow to override @@ -226,9 +251,9 @@ type Config struct { BlocksGrouperFactory BlocksGrouperFactory `yaml:"-"` BlocksCompactorFactory BlocksCompactorFactory `yaml:"-"` - // Block visit marker file config - BlockVisitMarkerTimeout time.Duration `yaml:"block_visit_marker_timeout"` - BlockVisitMarkerFileUpdateInterval time.Duration `yaml:"block_visit_marker_file_update_interval"` + // Compaction visit marker file config + CompactionVisitMarkerTimeout time.Duration `yaml:"compaction_visit_marker_timeout"` + CompactionVisitMarkerFileUpdateInterval time.Duration `yaml:"compaction_visit_marker_file_update_interval"` // Cleaner visit marker file config CleanerVisitMarkerTimeout time.Duration `yaml:"cleaner_visit_marker_timeout"` @@ -258,7 +283,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) { f.IntVar(&cfg.CleanupConcurrency, "compactor.cleanup-concurrency", 20, "Max number of tenants for which blocks cleanup and maintenance should run concurrently.") f.BoolVar(&cfg.ShardingEnabled, "compactor.sharding-enabled", false, "Shard tenants across multiple compactor instances. Sharding is required if you run multiple compactor instances, in order to coordinate compactions and avoid race conditions leading to the same tenant blocks simultaneously compacted by different instances.") f.StringVar(&cfg.ShardingStrategy, "compactor.sharding-strategy", util.ShardingStrategyDefault, fmt.Sprintf("The sharding strategy to use. Supported values are: %s.", strings.Join(supportedShardingStrategies, ", "))) - f.StringVar(&cfg.CompactionStrategy, "compactor.compaction-mode", util.CompactionStrategyDefault, fmt.Sprintf("The compaction strategy to use. Supported values are: %s.", strings.Join(supportedCompactionStrategies, ", "))) + f.StringVar(&cfg.CompactionStrategy, "compactor.compaction-strategy", util.CompactionStrategyDefault, fmt.Sprintf("The compaction strategy to use. Supported values are: %s.", strings.Join(supportedCompactionStrategies, ", "))) f.DurationVar(&cfg.DeletionDelay, "compactor.deletion-delay", 12*time.Hour, "Time before a block marked for deletion is deleted from bucket. "+ "If not 0, blocks will be marked for deletion and compactor component will permanently delete blocks marked for deletion from the bucket. "+ "If 0, blocks will be deleted straight away. Note that deleting blocks immediately can cause query failures.") @@ -271,8 +296,8 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) { f.Var(&cfg.EnabledTenants, "compactor.enabled-tenants", "Comma separated list of tenants that can be compacted. If specified, only these tenants will be compacted by compactor, otherwise all tenants can be compacted. Subject to sharding.") f.Var(&cfg.DisabledTenants, "compactor.disabled-tenants", "Comma separated list of tenants that cannot be compacted by this compactor. If specified, and compactor would normally pick given tenant for compaction (via -compactor.enabled-tenants or sharding), it will be ignored instead.") - f.DurationVar(&cfg.BlockVisitMarkerTimeout, "compactor.block-visit-marker-timeout", 5*time.Minute, "How long block visit marker file should be considered as expired and able to be picked up by compactor again.") - f.DurationVar(&cfg.BlockVisitMarkerFileUpdateInterval, "compactor.block-visit-marker-file-update-interval", 1*time.Minute, "How frequently block visit marker file should be updated duration compaction.") + f.DurationVar(&cfg.CompactionVisitMarkerTimeout, "compactor.compaction-visit-marker-timeout", 10*time.Minute, "How long compaction visit marker file should be considered as expired and able to be picked up by compactor again.") + f.DurationVar(&cfg.CompactionVisitMarkerFileUpdateInterval, "compactor.compaction-visit-marker-file-update-interval", 1*time.Minute, "How frequently compaction visit marker file should be updated duration compaction.") f.DurationVar(&cfg.CleanerVisitMarkerTimeout, "compactor.cleaner-visit-marker-timeout", 10*time.Minute, "How long cleaner visit marker file should be considered as expired and able to be picked up by cleaner again. The value should be smaller than -compactor.cleanup-interval") f.DurationVar(&cfg.CleanerVisitMarkerFileUpdateInterval, "compactor.cleaner-visit-marker-file-update-interval", 5*time.Minute, "How frequently cleaner visit marker file should be updated when cleaning user.") @@ -305,7 +330,7 @@ func (cfg *Config) Validate(limits validation.Limits) error { } } - // Make sure a valid compaction mode is being used + // Make sure a valid compaction strategy is being used if !util.StringsContain(supportedCompactionStrategies, cfg.CompactionStrategy) { return errInvalidCompactionStrategy } @@ -379,10 +404,13 @@ type Compactor struct { // Thanos compactor metrics per user compactorMetrics *compactorMetrics + + // Replication factor of ingester ring + ingestionReplicationFactor int } // NewCompactor makes a new Compactor. -func NewCompactor(compactorCfg Config, storageCfg cortex_tsdb.BlocksStorageConfig, logger log.Logger, registerer prometheus.Registerer, limits *validation.Overrides) (*Compactor, error) { +func NewCompactor(compactorCfg Config, storageCfg cortex_tsdb.BlocksStorageConfig, logger log.Logger, registerer prometheus.Registerer, limits *validation.Overrides, ingestionReplicationFactor int) (*Compactor, error) { bucketClientFactory := func(ctx context.Context) (objstore.InstrumentedBucket, error) { return bucket.NewClient(ctx, storageCfg.Bucket, nil, "compactor", logger, registerer) } @@ -405,7 +433,11 @@ func NewCompactor(compactorCfg Config, storageCfg cortex_tsdb.BlocksStorageConfi } } - cortexCompactor, err := newCompactor(compactorCfg, storageCfg, logger, registerer, bucketClientFactory, blocksGrouperFactory, blocksCompactorFactory, limits) + if ingestionReplicationFactor <= 0 { + ingestionReplicationFactor = 1 + } + + cortexCompactor, err := newCompactor(compactorCfg, storageCfg, logger, registerer, bucketClientFactory, blocksGrouperFactory, blocksCompactorFactory, limits, ingestionReplicationFactor) if err != nil { return nil, errors.Wrap(err, "failed to create Cortex blocks compactor") } @@ -422,6 +454,7 @@ func newCompactor( blocksGrouperFactory BlocksGrouperFactory, blocksCompactorFactory BlocksCompactorFactory, limits *validation.Overrides, + ingestionReplicationFactor int, ) (*Compactor, error) { var compactorMetrics *compactorMetrics if compactorCfg.ShardingStrategy == util.ShardingStrategyShuffle { @@ -496,8 +529,9 @@ func newCompactor( Name: "cortex_compactor_block_visit_marker_write_failed", Help: "Number of block visit marker file failed to be written.", }), - limits: limits, - compactorMetrics: compactorMetrics, + limits: limits, + compactorMetrics: compactorMetrics, + ingestionReplicationFactor: ingestionReplicationFactor, } if len(compactorCfg.EnabledTenants) > 0 { @@ -761,6 +795,7 @@ func (c *Compactor) compactUsers(ctx context.Context) { continue } else if markedForDeletion { c.CompactionRunSkippedTenants.Inc() + c.compactorMetrics.deleteMetricsForDeletedTenant(userID) level.Debug(c.logger).Log("msg", "skipping user because it is marked for deletion", "user", userID) continue } @@ -929,7 +964,7 @@ func (c *Compactor) compactUser(ctx context.Context, userID string) error { compactor, err := compact.NewBucketCompactor( ulogger, syncer, - c.blocksGrouperFactory(currentCtx, c.compactorCfg, bucket, ulogger, c.BlocksMarkedForNoCompaction, c.blockVisitMarkerReadFailed, c.blockVisitMarkerWriteFailed, syncerMetrics, c.compactorMetrics, c.ring, c.ringLifecycler, c.limits, userID, noCompactMarkerFilter), + c.blocksGrouperFactory(currentCtx, c.compactorCfg, bucket, ulogger, c.BlocksMarkedForNoCompaction, c.blockVisitMarkerReadFailed, c.blockVisitMarkerWriteFailed, syncerMetrics, c.compactorMetrics, c.ring, c.ringLifecycler, c.limits, userID, noCompactMarkerFilter, c.ingestionReplicationFactor), c.blocksPlannerFactory(currentCtx, bucket, ulogger, c.compactorCfg, noCompactMarkerFilter, c.ringLifecycler, userID, c.blockVisitMarkerReadFailed, c.blockVisitMarkerWriteFailed, c.compactorMetrics), c.blocksCompactor, c.compactDirForUser(userID), diff --git a/pkg/compactor/compactor_metrics.go b/pkg/compactor/compactor_metrics.go index bdd3fefef0..e14fb9a0dc 100644 --- a/pkg/compactor/compactor_metrics.go +++ b/pkg/compactor/compactor_metrics.go @@ -38,6 +38,7 @@ type compactorMetrics struct { verticalCompactions *prometheus.CounterVec remainingPlannedCompactions *prometheus.GaugeVec compactionErrorsCount *prometheus.CounterVec + partitionCount *prometheus.GaugeVec } const ( @@ -169,6 +170,10 @@ func newCompactorMetricsWithLabels(reg prometheus.Registerer, commonLabels []str Name: "cortex_compactor_compaction_error_total", Help: "Total number of errors from compactions.", }, append(commonLabels, compactionErrorTypesLabelName)) + m.partitionCount = promauto.With(reg).NewGaugeVec(prometheus.GaugeOpts{ + Name: "cortex_compactor_group_partition_count", + Help: "Number of partitions for each compaction group.", + }, compactionLabels) return &m } @@ -207,3 +212,28 @@ func (m *compactorMetrics) getCommonLabelValues(userID string) []string { } return labelValues } + +func (m *compactorMetrics) initMetricWithCompactionLabelValues(labelValue ...string) { + if len(m.compactionLabels) != len(commonLabels)+len(compactionLabels) { + return + } + + m.compactions.WithLabelValues(labelValue...) + m.compactionPlanned.WithLabelValues(labelValue...) + m.compactionRunsStarted.WithLabelValues(labelValue...) + m.compactionRunsCompleted.WithLabelValues(labelValue...) + m.compactionFailures.WithLabelValues(labelValue...) + m.verticalCompactions.WithLabelValues(labelValue...) + m.partitionCount.WithLabelValues(labelValue...) +} + +func (m *compactorMetrics) deleteMetricsForDeletedTenant(userID string) { + m.syncerBlocksMarkedForDeletion.DeleteLabelValues(userID) + m.compactions.DeleteLabelValues(userID) + m.compactionPlanned.DeleteLabelValues(userID) + m.compactionRunsStarted.DeleteLabelValues(userID) + m.compactionRunsCompleted.DeleteLabelValues(userID) + m.compactionFailures.DeleteLabelValues(userID) + m.verticalCompactions.DeleteLabelValues(userID) + m.partitionCount.DeleteLabelValues(userID) +} diff --git a/pkg/compactor/compactor_metrics_test.go b/pkg/compactor/compactor_metrics_test.go index da4bb82025..f2a13276cd 100644 --- a/pkg/compactor/compactor_metrics_test.go +++ b/pkg/compactor/compactor_metrics_test.go @@ -130,6 +130,11 @@ func TestSyncerMetrics(t *testing.T) { cortex_compactor_compaction_error_total{type="unauthorized",user="aaa"} 477730 cortex_compactor_compaction_error_total{type="unauthorized",user="bbb"} 488840 cortex_compactor_compaction_error_total{type="unauthorized",user="ccc"} 499950 + # HELP cortex_compactor_group_partition_count Number of partitions for each compaction group. + # TYPE cortex_compactor_group_partition_count gauge + cortex_compactor_group_partition_count{user="aaa"} 511060 + cortex_compactor_group_partition_count{user="bbb"} 522170 + cortex_compactor_group_partition_count{user="ccc"} 533280 `)) require.NoError(t, err) @@ -183,4 +188,7 @@ func generateTestData(cm *compactorMetrics, base float64) { cm.compactionErrorsCount.WithLabelValues("aaa", unauthorizedError).Add(43 * base) cm.compactionErrorsCount.WithLabelValues("bbb", unauthorizedError).Add(44 * base) cm.compactionErrorsCount.WithLabelValues("ccc", unauthorizedError).Add(45 * base) + cm.partitionCount.WithLabelValues("aaa").Add(46 * base) + cm.partitionCount.WithLabelValues("bbb").Add(47 * base) + cm.partitionCount.WithLabelValues("ccc").Add(48 * base) } diff --git a/pkg/compactor/compactor_test.go b/pkg/compactor/compactor_test.go index 09a3929d64..fd67e6b650 100644 --- a/pkg/compactor/compactor_test.go +++ b/pkg/compactor/compactor_test.go @@ -1578,7 +1578,7 @@ func prepare(t *testing.T, compactorCfg Config, bucketClient objstore.Instrument blocksGrouperFactory = DefaultBlocksGrouperFactory } - c, err := newCompactor(compactorCfg, storageCfg, logger, registry, bucketClientFactory, blocksGrouperFactory, blocksCompactorFactory, overrides) + c, err := newCompactor(compactorCfg, storageCfg, logger, registry, bucketClientFactory, blocksGrouperFactory, blocksCompactorFactory, overrides, 1) require.NoError(t, err) return c, tsdbCompactor, tsdbPlanner, logs, registry diff --git a/pkg/compactor/partition_compaction_grouper.go b/pkg/compactor/partition_compaction_grouper.go index c3687f7e6a..1340093ab2 100644 --- a/pkg/compactor/partition_compaction_grouper.go +++ b/pkg/compactor/partition_compaction_grouper.go @@ -2,37 +2,922 @@ package compactor import ( "context" + "fmt" + "math" + "math/rand" + "sort" + "strings" + "time" "github.com/go-kit/log" + "github.com/go-kit/log/level" "github.com/oklog/ulid" + "github.com/pkg/errors" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/prometheus/model/labels" "github.com/thanos-io/objstore" + thanosblock "github.com/thanos-io/thanos/pkg/block" "github.com/thanos-io/thanos/pkg/block/metadata" "github.com/thanos-io/thanos/pkg/compact" + + "github.com/cortexproject/cortex/pkg/ring" + "github.com/cortexproject/cortex/pkg/storage/tsdb" +) + +var ( + DUMMY_BLOCK_ID = ulid.ULID{} ) type PartitionCompactionGrouper struct { - ctx context.Context - logger log.Logger - bkt objstore.InstrumentedBucket + ctx context.Context + logger log.Logger + bkt objstore.InstrumentedBucket + acceptMalformedIndex bool + enableVerticalCompaction bool + blocksMarkedForNoCompact prometheus.Counter + hashFunc metadata.HashFunc + syncerMetrics *compact.SyncerMetrics + compactorMetrics *compactorMetrics + compactorCfg Config + limits Limits + userID string + blockFilesConcurrency int + blocksFetchConcurrency int + compactionConcurrency int + + doRandomPick bool + + ring ring.ReadRing + ringLifecyclerAddr string + ringLifecyclerID string + + noCompBlocksFunc func() map[ulid.ULID]*metadata.NoCompactMark + partitionVisitMarkerTimeout time.Duration + + ingestionReplicationFactor int } func NewPartitionCompactionGrouper( ctx context.Context, logger log.Logger, bkt objstore.InstrumentedBucket, + acceptMalformedIndex bool, + enableVerticalCompaction bool, + blocksMarkedForNoCompact prometheus.Counter, + syncerMetrics *compact.SyncerMetrics, + compactorMetrics *compactorMetrics, + hashFunc metadata.HashFunc, + compactorCfg Config, + ring ring.ReadRing, + ringLifecyclerAddr string, + ringLifecyclerID string, + limits Limits, + userID string, + blockFilesConcurrency int, + blocksFetchConcurrency int, + compactionConcurrency int, + doRandomPick bool, + partitionVisitMarkerTimeout time.Duration, + noCompBlocksFunc func() map[ulid.ULID]*metadata.NoCompactMark, + ingestionReplicationFactor int, ) *PartitionCompactionGrouper { if logger == nil { logger = log.NewNopLogger() } return &PartitionCompactionGrouper{ - ctx: ctx, - logger: logger, - bkt: bkt, + ctx: ctx, + logger: logger, + bkt: bkt, + acceptMalformedIndex: acceptMalformedIndex, + enableVerticalCompaction: enableVerticalCompaction, + blocksMarkedForNoCompact: blocksMarkedForNoCompact, + hashFunc: hashFunc, + syncerMetrics: syncerMetrics, + compactorMetrics: compactorMetrics, + compactorCfg: compactorCfg, + ring: ring, + ringLifecyclerAddr: ringLifecyclerAddr, + ringLifecyclerID: ringLifecyclerID, + limits: limits, + userID: userID, + blockFilesConcurrency: blockFilesConcurrency, + blocksFetchConcurrency: blocksFetchConcurrency, + compactionConcurrency: compactionConcurrency, + doRandomPick: doRandomPick, + partitionVisitMarkerTimeout: partitionVisitMarkerTimeout, + noCompBlocksFunc: noCompBlocksFunc, + ingestionReplicationFactor: ingestionReplicationFactor, } } // Groups function modified from https://github.com/cortexproject/cortex/pull/2616 func (g *PartitionCompactionGrouper) Groups(blocks map[ulid.ULID]*metadata.Meta) (res []*compact.Group, err error) { - panic("PartitionCompactionGrouper not implemented") + // Check if this compactor is on the subring. + // If the compactor is not on the subring when using the userID as a identifier + // no plans generated below will be owned by the compactor so we can just return an empty array + // as there will be no planned groups + onSubring, err := g.checkSubringForCompactor() + if err != nil { + return nil, errors.Wrap(err, "unable to check sub-ring for compactor ownership") + } + if !onSubring { + level.Debug(g.logger).Log("msg", "compactor is not on the current sub-ring skipping user", "user", g.userID) + return nil, nil + } + + // Filter out no compact blocks + noCompactMarked := g.noCompBlocksFunc() + for id, b := range blocks { + if _, excluded := noCompactMarked[b.ULID]; excluded { + delete(blocks, id) + } + } + + partitionCompactionJobs, err := g.generateCompactionJobs(blocks) + if err != nil { + return nil, errors.Wrap(err, "unable to generate compaction jobs") + } + + pickedPartitionCompactionJobs := g.pickPartitionCompactionJob(partitionCompactionJobs) + + return pickedPartitionCompactionJobs, nil +} + +// Check whether this compactor exists on the subring based on user ID +func (g *PartitionCompactionGrouper) checkSubringForCompactor() (bool, error) { + subRing := g.ring.ShuffleShard(g.userID, g.limits.CompactorTenantShardSize(g.userID)) + + rs, err := subRing.GetAllHealthy(RingOp) + if err != nil { + return false, err + } + + return rs.Includes(g.ringLifecyclerAddr), nil +} + +func (g *PartitionCompactionGrouper) generateCompactionJobs(blocks map[ulid.ULID]*metadata.Meta) ([]*blocksGroupWithPartition, error) { + timeRanges := g.compactorCfg.BlockRanges.ToMilliseconds() + + groups := g.groupBlocks(blocks, timeRanges) + + existingPartitionedGroups, err := g.loadExistingPartitionedGroups() + if err != nil { + return nil, err + } + + var blockIDs []string + for _, p := range existingPartitionedGroups { + blockIDs = p.getAllBlockIDs() + level.Info(g.logger).Log("msg", "existing partitioned group", "partitioned_group_id", p.PartitionedGroupID, "partition_count", p.PartitionCount, "rangeStart", p.rangeStartTime().String(), "rangeEnd", p.rangeEndTime().String(), "blocks", strings.Join(blockIDs, ",")) + } + + allPartitionedGroup, err := g.generatePartitionedGroups(blocks, groups, existingPartitionedGroups, timeRanges) + if err != nil { + return nil, err + } + g.sortPartitionedGroups(allPartitionedGroup) + for _, p := range allPartitionedGroup { + blockIDs = p.getAllBlockIDs() + level.Info(g.logger).Log("msg", "partitioned group ready for compaction", "partitioned_group_id", p.PartitionedGroupID, "partition_count", p.PartitionCount, "rangeStart", p.rangeStartTime().String(), "rangeEnd", p.rangeEndTime().String(), "blocks", strings.Join(blockIDs, ",")) + } + + partitionCompactionJobs := g.generatePartitionCompactionJobs(blocks, allPartitionedGroup, g.doRandomPick) + for _, p := range partitionCompactionJobs { + blockIDs = p.getBlockIDs() + level.Info(g.logger).Log("msg", "partitioned compaction job", "partitioned_group_id", p.partitionedGroupInfo.PartitionedGroupID, "partition_id", p.partition.PartitionID, "partition_count", p.partitionedGroupInfo.PartitionCount, "rangeStart", p.rangeStartTime().String(), "rangeEnd", p.rangeEndTime().String(), "blocks", strings.Join(blockIDs, ",")) + } + return partitionCompactionJobs, nil +} + +func (g *PartitionCompactionGrouper) loadExistingPartitionedGroups() (map[uint32]*PartitionedGroupInfo, error) { + partitionedGroups := make(map[uint32]*PartitionedGroupInfo) + err := g.bkt.Iter(g.ctx, PartitionedGroupDirectory, func(file string) error { + if !strings.Contains(file, PartitionVisitMarkerDirectory) { + partitionedGroup, err := ReadPartitionedGroupInfoFile(g.ctx, g.bkt, g.logger, file) + if err != nil { + return err + } + partitionedGroups[partitionedGroup.PartitionedGroupID] = partitionedGroup + } + return nil + }) + if err != nil { + return nil, errors.Wrap(err, "unable to load existing partitioned groups") + } + return partitionedGroups, nil +} + +func (g *PartitionCompactionGrouper) groupBlocks(blocks map[ulid.ULID]*metadata.Meta, timeRanges []int64) []blocksGroupWithPartition { + // First of all we have to group blocks using the Thanos default + // grouping (based on downsample resolution + external labels). + mainGroups := map[string][]*metadata.Meta{} + for _, b := range blocks { + key := b.Thanos.GroupKey() + mainGroups[key] = append(mainGroups[key], b) + } + + var groups []blocksGroupWithPartition + for _, mainBlocks := range mainGroups { + groups = append(groups, g.groupBlocksByCompactableRanges(mainBlocks, timeRanges)...) + } + + g.sortBlockGroups(groups) + + return groups +} + +func (g *PartitionCompactionGrouper) groupBlocksByCompactableRanges(blocks []*metadata.Meta, timeRanges []int64) []blocksGroupWithPartition { + if len(blocks) == 0 { + return nil + } + + // Sort blocks by min time. + sortMetasByMinTime(blocks) + + var groups []blocksGroupWithPartition + + for _, tr := range timeRanges { + groups = append(groups, g.groupBlocksByRange(blocks, tr)...) + } + + return groups +} + +func (g *PartitionCompactionGrouper) groupBlocksByRange(blocks []*metadata.Meta, tr int64) []blocksGroupWithPartition { + var ret []blocksGroupWithPartition + + for i := 0; i < len(blocks); { + var ( + group blocksGroupWithPartition + m = blocks[i] + ) + + group.rangeStart = getRangeStart(m, tr) + group.rangeEnd = group.rangeStart + tr + + // Skip blocks that don't fall into the range. This can happen via mis-alignment or + // by being the multiple of the intended range. + if m.MaxTime > group.rangeEnd { + i++ + continue + } + + // Add all blocks to the current group that are within [t0, t0+tr]. + for ; i < len(blocks); i++ { + // If the block does not start within this group, then we should break the iteration + // and move it to the next group. + if blocks[i].MinTime >= group.rangeEnd { + break + } + + // If the block doesn't fall into this group, but it started within this group then it + // means it spans across multiple ranges and we should skip it. + if blocks[i].MaxTime > group.rangeEnd { + continue + } + + group.blocks = append(group.blocks, blocks[i]) + } + + if len(group.blocks) > 1 { + ret = append(ret, group) + } + } + + return ret +} + +func (g *PartitionCompactionGrouper) sortBlockGroups(groups []blocksGroupWithPartition) { + // Ensure groups are sorted by smallest range, oldest min time first. The rationale + // is that we wanna favor smaller ranges first (ie. to deduplicate samples sooner + // than later) and older ones are more likely to be "complete" (no missing block still + // to be uploaded). + sort.SliceStable(groups, func(i, j int) bool { + iGroup := groups[i] + jGroup := groups[j] + iRangeStart := iGroup.rangeStart + iRangeEnd := iGroup.rangeEnd + jRangeStart := jGroup.rangeStart + jRangeEnd := jGroup.rangeEnd + iLength := iRangeEnd - iRangeStart + jLength := jRangeEnd - jRangeStart + + if iLength != jLength { + return iLength < jLength + } + if iRangeStart != jRangeStart { + return iRangeStart < jRangeStart + } + + iGroupHash := hashGroup(g.userID, iRangeStart, iRangeEnd) + iGroupKey := createGroupKeyWithPartition(iGroupHash, iGroup) + jGroupHash := hashGroup(g.userID, jRangeStart, jRangeEnd) + jGroupKey := createGroupKeyWithPartition(jGroupHash, jGroup) + // Guarantee stable sort for tests. + return iGroupKey < jGroupKey + }) +} + +func (g *PartitionCompactionGrouper) generatePartitionedGroups(blocks map[ulid.ULID]*metadata.Meta, groups []blocksGroupWithPartition, existingPartitionedGroups map[uint32]*PartitionedGroupInfo, timeRanges []int64) ([]*PartitionedGroupInfo, error) { + var allPartitionedGroup []*PartitionedGroupInfo + for _, partitionedGroup := range existingPartitionedGroups { + status := partitionedGroup.getPartitionedGroupStatus(g.ctx, g.bkt, g.partitionVisitMarkerTimeout, g.logger) + if !status.IsCompleted { + allPartitionedGroup = append(allPartitionedGroup, partitionedGroup) + } + } + + timeRangeChecker := NewCompletenessChecker(blocks, groups, timeRanges) + for _, startTimeMap := range timeRangeChecker.TimeRangesStatus { + for _, status := range startTimeMap { + if !status.canTakeCompaction { + level.Info(g.logger).Log("msg", "incomplete time range", "rangeStart", status.rangeStartTime().String(), "rangeEnd", status.rangeEndTime().String(), + "timeRange", status.timeRangeDuration().String(), "previousTimeRange", status.previousTimeRangeDuration().String()) + } + } + } + + var blockIDs []string + for _, group := range groups { + groupHash := hashGroup(g.userID, group.rangeStart, group.rangeEnd) + logger := log.With(g.logger, "partitioned_group_id", groupHash, "rangeStart", group.rangeStartTime().String(), "rangeEnd", group.rangeEndTime().String()) + + blockIDs = group.getBlockIDs() + level.Info(logger).Log("msg", "block group", "blocks", strings.Join(blockIDs, ",")) + + level.Info(logger).Log("msg", "start generating partitioned group") + if g.shouldSkipGroup(logger, group, groupHash, existingPartitionedGroups, timeRangeChecker) { + level.Info(logger).Log("msg", "skip generating partitioned group") + continue + } + partitionedGroup, err := g.generatePartitionBlockGroup(group, groupHash) + if err != nil { + return nil, errors.Wrapf(err, "unable to generate partitioned group: %d", groupHash) + } + level.Info(logger).Log("msg", "generated partitioned group") + allPartitionedGroup = append(allPartitionedGroup, partitionedGroup) + } + return allPartitionedGroup, nil +} + +func (g *PartitionCompactionGrouper) shouldSkipGroup(logger log.Logger, group blocksGroupWithPartition, partitionedGroupID uint32, existingPartitionedGroups map[uint32]*PartitionedGroupInfo, timeRangeChecker TimeRangeChecker) bool { + if _, ok := existingPartitionedGroups[partitionedGroupID]; ok { + level.Info(logger).Log("msg", "skip group", "reason", "partitioned group already exists") + return true + } + tr := group.rangeEnd - group.rangeStart + if status, ok := timeRangeChecker.TimeRangesStatus[tr][group.rangeStart]; !ok { + level.Info(logger).Log("msg", "skip group", "reason", "unable to get time range status") + return true + } else if !status.canTakeCompaction { + level.Info(logger).Log("msg", "skip group", "reason", "time range cannot take compaction job") + return true + } + + // Check if all blocks in group having same partitioned group id as destination partitionedGroupID + for _, b := range group.blocks { + partitionInfo, err := tsdb.GetPartitionInfo(*b) + if err != nil || partitionInfo == nil || partitionInfo.PartitionedGroupID != partitionedGroupID { + return false + } + } + level.Info(logger).Log("msg", "skip group", "reason", "all blocks in the group have partitioned group id equals to new group partitioned_group_id") + return true +} + +func (g *PartitionCompactionGrouper) generatePartitionBlockGroup(group blocksGroupWithPartition, groupHash uint32) (*PartitionedGroupInfo, error) { + partitionedGroupInfo, err := g.partitionBlockGroup(group, groupHash) + if err != nil { + return nil, err + } + updatedPartitionedGroupInfo, err := UpdatePartitionedGroupInfo(g.ctx, g.bkt, g.logger, *partitionedGroupInfo) + if err != nil { + return nil, err + } + return updatedPartitionedGroupInfo, nil +} + +func (g *PartitionCompactionGrouper) partitionBlockGroup(group blocksGroupWithPartition, groupHash uint32) (*PartitionedGroupInfo, error) { + partitionCount := g.calculatePartitionCount(group, groupHash) + blocksByMinTime := g.groupBlocksByMinTime(group) + partitionedGroups, err := g.partitionBlocksGroup(partitionCount, blocksByMinTime, group.rangeStart, group.rangeEnd) + if err != nil { + return nil, err + } + + partitions := make([]Partition, partitionCount) + for partitionID := 0; partitionID < partitionCount; partitionID++ { + partitionedGroup := partitionedGroups[partitionID] + blockIDs := make([]ulid.ULID, len(partitionedGroup.blocks)) + for i, m := range partitionedGroup.blocks { + blockIDs[i] = m.ULID + } + partitions[partitionID] = Partition{ + PartitionID: partitionID, + Blocks: blockIDs, + } + } + partitionedGroupInfo := PartitionedGroupInfo{ + PartitionedGroupID: groupHash, + PartitionCount: partitionCount, + Partitions: partitions, + RangeStart: group.rangeStart, + RangeEnd: group.rangeEnd, + Version: PartitionedGroupInfoVersion1, + } + return &partitionedGroupInfo, nil +} + +func (g *PartitionCompactionGrouper) calculatePartitionCount(group blocksGroupWithPartition, groupHash uint32) int { + indexSizeLimit := g.limits.CompactorPartitionIndexSizeBytes(g.userID) + seriesCountLimit := g.limits.CompactorPartitionSeriesCount(g.userID) + smallestRange := g.compactorCfg.BlockRanges.ToMilliseconds()[0] + groupRange := group.rangeLength() + if smallestRange >= groupRange { + level.Info(g.logger).Log("msg", "calculate level 1 block limits", "partitioned_group_id", groupHash, "smallest_range", smallestRange, "group_range", groupRange, "ingestion_replication_factor", g.ingestionReplicationFactor) + indexSizeLimit = indexSizeLimit * int64(g.ingestionReplicationFactor) + seriesCountLimit = seriesCountLimit * int64(g.ingestionReplicationFactor) + } + + totalIndexSizeInBytes := int64(0) + totalSeriesCount := int64(0) + for _, block := range group.blocks { + blockFiles := block.Thanos.Files + totalSeriesCount += int64(block.Stats.NumSeries) + var indexFile *metadata.File + for _, file := range blockFiles { + if file.RelPath == thanosblock.IndexFilename { + indexFile = &file + } + } + if indexFile == nil { + level.Debug(g.logger).Log("msg", "unable to find index file in metadata", "block", block.ULID) + break + } + indexSize := indexFile.SizeBytes + totalIndexSizeInBytes += indexSize + } + partitionNumberBasedOnIndex := 1 + if indexSizeLimit > 0 && totalIndexSizeInBytes > indexSizeLimit { + partitionNumberBasedOnIndex = g.findNearestPartitionNumber(float64(totalIndexSizeInBytes), float64(indexSizeLimit)) + } + partitionNumberBasedOnSeries := 1 + if seriesCountLimit > 0 && totalSeriesCount > seriesCountLimit { + partitionNumberBasedOnSeries = g.findNearestPartitionNumber(float64(totalSeriesCount), float64(seriesCountLimit)) + } + partitionNumber := partitionNumberBasedOnIndex + if partitionNumberBasedOnSeries > partitionNumberBasedOnIndex { + partitionNumber = partitionNumberBasedOnSeries + } + level.Info(g.logger).Log("msg", "calculated partition number for group", "partitioned_group_id", groupHash, "partition_number", partitionNumber, "total_index_size", totalIndexSizeInBytes, "index_size_limit", indexSizeLimit, "total_series_count", totalSeriesCount, "series_count_limit", seriesCountLimit, "group", group.String()) + return partitionNumber +} + +func (g *PartitionCompactionGrouper) findNearestPartitionNumber(size float64, limit float64) int { + return int(math.Pow(2, math.Ceil(math.Log2(size/limit)))) +} + +func (g *PartitionCompactionGrouper) groupBlocksByMinTime(group blocksGroupWithPartition) map[int64][]*metadata.Meta { + blocksByMinTime := make(map[int64][]*metadata.Meta) + for _, block := range group.blocks { + blockRange := block.MaxTime - block.MinTime + minTime := block.MinTime + for _, tr := range g.compactorCfg.BlockRanges.ToMilliseconds() { + if blockRange <= tr { + minTime = tr * (block.MinTime / tr) + break + } + } + blocksByMinTime[minTime] = append(blocksByMinTime[minTime], block) + } + return blocksByMinTime +} + +func (g *PartitionCompactionGrouper) partitionBlocksGroup(partitionCount int, blocksByMinTime map[int64][]*metadata.Meta, rangeStart int64, rangeEnd int64) (map[int]blocksGroupWithPartition, error) { + partitionedGroups := make(map[int]blocksGroupWithPartition) + addToPartitionedGroups := func(blocks []*metadata.Meta, partitionID int) { + if _, ok := partitionedGroups[partitionID]; !ok { + partitionedGroups[partitionID] = blocksGroupWithPartition{ + rangeStart: rangeStart, + rangeEnd: rangeEnd, + blocks: []*metadata.Meta{}, + } + } + partitionedGroup := partitionedGroups[partitionID] + partitionedGroup.blocks = append(partitionedGroup.blocks, blocks...) + partitionedGroups[partitionID] = partitionedGroup + } + + for _, blocksInSameTimeInterval := range blocksByMinTime { + for _, block := range blocksInSameTimeInterval { + partitionInfo, err := tsdb.GetPartitionInfo(*block) + if err != nil { + return nil, err + } + if partitionInfo == nil || partitionInfo.PartitionCount < 1 { + // For legacy blocks with level > 1, treat PartitionID is always 0. + // So it can be included in every partition. + defaultPartitionInfo := tsdb.DefaultPartitionInfo + partitionInfo = &defaultPartitionInfo + } + if partitionInfo.PartitionCount < partitionCount { + for partitionID := partitionInfo.PartitionID; partitionID < partitionCount; partitionID += partitionInfo.PartitionCount { + addToPartitionedGroups([]*metadata.Meta{block}, partitionID) + } + } else if partitionInfo.PartitionCount == partitionCount { + addToPartitionedGroups([]*metadata.Meta{block}, partitionInfo.PartitionID) + } else { + addToPartitionedGroups([]*metadata.Meta{block}, partitionInfo.PartitionID%partitionCount) + } + } + } + return partitionedGroups, nil +} + +func (g *PartitionCompactionGrouper) sortPartitionedGroups(partitionedGroups []*PartitionedGroupInfo) { + // Ensure groups are sorted by smallest range, oldest min time first. The rationale + // is that we wanna favor smaller ranges first (ie. to deduplicate samples sooner + // than later) and older ones are more likely to be "complete" (no missing block still + // to be uploaded). + sort.SliceStable(partitionedGroups, func(i, j int) bool { + iGroup := partitionedGroups[i] + jGroup := partitionedGroups[j] + iRangeStart := iGroup.RangeStart + iRangeEnd := iGroup.RangeEnd + jRangeStart := jGroup.RangeStart + jRangeEnd := jGroup.RangeEnd + iLength := iRangeEnd - iRangeStart + jLength := jRangeEnd - jRangeStart + + if iLength != jLength { + return iLength < jLength + } + if iRangeStart != jRangeStart { + return iRangeStart < jRangeStart + } + // Guarantee stable sort for tests. + return iGroup.PartitionedGroupID < jGroup.PartitionedGroupID + }) +} + +func (g *PartitionCompactionGrouper) generatePartitionCompactionJobs(blocks map[ulid.ULID]*metadata.Meta, partitionedGroups []*PartitionedGroupInfo, doRandomPick bool) []*blocksGroupWithPartition { + var partitionedBlockGroups []*blocksGroupWithPartition + for _, partitionedGroupInfo := range partitionedGroups { + partitionedGroupID := partitionedGroupInfo.PartitionedGroupID + partitionAdded := 0 + var partitionIDs []int + if doRandomPick { + // Randomly pick partitions from partitioned group to avoid all compactors + // trying to get same partition at same time. + r := rand.New(rand.NewSource(time.Now().UnixMicro() + int64(hashString(g.ringLifecyclerID)))) + partitionIDs = r.Perm(len(partitionedGroupInfo.Partitions)) + } else { + for i := 0; i < partitionedGroupInfo.PartitionCount; i++ { + partitionIDs = append(partitionIDs, i) + } + } + for _, i := range partitionIDs { + partition := partitionedGroupInfo.Partitions[i] + if len(partition.Blocks) == 1 { + partition.Blocks = append(partition.Blocks, DUMMY_BLOCK_ID) + level.Info(g.logger).Log("msg", "handled single block in partition", "partitioned_group_id", partitionedGroupInfo.PartitionedGroupID, "partition_count", partitionedGroupInfo.PartitionCount, "partition_id", partition.PartitionID) + } else if len(partition.Blocks) < 1 { + if err := g.handleEmptyPartition(partitionedGroupInfo, partition); err != nil { + level.Warn(g.logger).Log("msg", "failed to handle empty partition", "partitioned_group_id", partitionedGroupInfo.PartitionedGroupID, "partition_count", partitionedGroupInfo.PartitionCount, "partition_id", partition.PartitionID, "err", err) + } + continue + } + partitionedGroup, err := createBlocksGroup(blocks, partition.Blocks, partitionedGroupInfo.RangeStart, partitionedGroupInfo.RangeEnd) + if err != nil { + continue + } + partitionedGroup.groupHash = partitionedGroupID + partitionedGroup.partitionedGroupInfo = partitionedGroupInfo + partitionedGroup.partition = partition + partitionedBlockGroups = append(partitionedBlockGroups, partitionedGroup) + partitionAdded++ + } + } + return partitionedBlockGroups +} + +// handleEmptyPartition uploads a completed partition visit marker for any partition that does have any blocks assigned +func (g *PartitionCompactionGrouper) handleEmptyPartition(partitionedGroupInfo *PartitionedGroupInfo, partition Partition) error { + if len(partition.Blocks) > 0 { + return nil + } + + level.Info(g.logger).Log("msg", "handling empty block partition", "partitioned_group_id", partitionedGroupInfo.PartitionedGroupID, "partition_count", partitionedGroupInfo.PartitionCount, "partition_id", partition.PartitionID) + visitMarker := &partitionVisitMarker{ + PartitionedGroupID: partitionedGroupInfo.PartitionedGroupID, + PartitionID: partition.PartitionID, + Version: PartitionVisitMarkerVersion1, + } + visitMarkerManager := NewVisitMarkerManager(g.bkt, g.logger, g.ringLifecyclerID, visitMarker) + visitMarkerManager.MarkWithStatus(g.ctx, Completed) + + level.Info(g.logger).Log("msg", "handled empty block in partition", "partitioned_group_id", partitionedGroupInfo.PartitionedGroupID, "partition_count", partitionedGroupInfo.PartitionCount, "partition_id", partition.PartitionID) + return nil +} + +func (g *PartitionCompactionGrouper) pickPartitionCompactionJob(partitionCompactionJobs []*blocksGroupWithPartition) []*compact.Group { + var outGroups []*compact.Group + for _, partitionedGroup := range partitionCompactionJobs { + groupHash := partitionedGroup.groupHash + partitionedGroupID := partitionedGroup.partitionedGroupInfo.PartitionedGroupID + partitionCount := partitionedGroup.partitionedGroupInfo.PartitionCount + partitionID := partitionedGroup.partition.PartitionID + partitionedGroupLogger := log.With(g.logger, "rangeStart", partitionedGroup.rangeStartTime().String(), "rangeEnd", partitionedGroup.rangeEndTime().String(), "rangeDuration", partitionedGroup.rangeDuration().String(), "partitioned_group_id", partitionedGroupID, "partition_id", partitionID, "partition_count", partitionCount, "group_hash", groupHash) + visitMarker := newPartitionVisitMarker(g.ringLifecyclerID, partitionedGroupID, partitionID) + visitMarkerManager := NewVisitMarkerManager(g.bkt, g.logger, g.ringLifecyclerID, visitMarker) + if isVisited, err := g.isGroupVisited(partitionID, visitMarkerManager); err != nil { + level.Warn(partitionedGroupLogger).Log("msg", "unable to check if partition is visited", "err", err, "group", partitionedGroup.String()) + continue + } else if isVisited { + level.Info(partitionedGroupLogger).Log("msg", "skipping group because partition is visited") + continue + } + partitionedGroupKey := createGroupKeyWithPartitionID(groupHash, partitionID, *partitionedGroup) + + level.Info(partitionedGroupLogger).Log("msg", "found compactable group for user", "group", partitionedGroup.String()) + begin := time.Now() + + visitMarkerManager.MarkWithStatus(g.ctx, Pending) + level.Info(partitionedGroupLogger).Log("msg", "marked partition visited in group", "duration", time.Since(begin), "duration_ms", time.Since(begin).Milliseconds(), "group", partitionedGroup.String()) + + resolution := partitionedGroup.blocks[0].Thanos.Downsample.Resolution + externalLabels := labels.FromMap(partitionedGroup.blocks[0].Thanos.Labels) + timeRange := partitionedGroup.rangeEnd - partitionedGroup.rangeStart + metricLabelValues := []string{ + g.userID, + fmt.Sprintf("%d", timeRange), + } + g.compactorMetrics.initMetricWithCompactionLabelValues(metricLabelValues...) + g.compactorMetrics.partitionCount.WithLabelValues(metricLabelValues...).Set(float64(partitionCount)) + thanosGroup, err := compact.NewGroup( + log.With(partitionedGroupLogger, "groupKey", partitionedGroupKey, "externalLabels", externalLabels, "downsampleResolution", resolution), + g.bkt, + partitionedGroupKey, + externalLabels, + resolution, + g.acceptMalformedIndex, + true, // Enable vertical compaction. + g.compactorMetrics.compactions.WithLabelValues(metricLabelValues...), + g.compactorMetrics.compactionRunsStarted.WithLabelValues(metricLabelValues...), + g.compactorMetrics.compactionRunsCompleted.WithLabelValues(metricLabelValues...), + g.compactorMetrics.compactionFailures.WithLabelValues(metricLabelValues...), + g.compactorMetrics.verticalCompactions.WithLabelValues(metricLabelValues...), + g.syncerMetrics.GarbageCollectedBlocks, + g.syncerMetrics.BlocksMarkedForDeletion, + g.blocksMarkedForNoCompact, + g.hashFunc, + g.blockFilesConcurrency, + g.blocksFetchConcurrency, + ) + if err != nil { + level.Error(partitionedGroupLogger).Log("msg", "failed to create partitioned group", "blocks", partitionedGroup.partition.Blocks) + } + + for _, m := range partitionedGroup.blocks { + if err := thanosGroup.AppendMeta(m); err != nil { + level.Error(partitionedGroupLogger).Log("msg", "failed to add block to partitioned group", "block", m.ULID, "err", err) + } + } + thanosGroup.SetExtensions(&tsdb.CortexMetaExtensions{ + PartitionInfo: &tsdb.PartitionInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: partitionCount, + PartitionID: partitionID, + PartitionedGroupCreationTime: partitionedGroup.partitionedGroupInfo.CreationTime, + }, + TimeRange: timeRange, + }) + + outGroups = append(outGroups, thanosGroup) + level.Debug(partitionedGroupLogger).Log("msg", "added partition to compaction groups") + if len(outGroups) >= g.compactionConcurrency { + break + } + } + + level.Info(g.logger).Log("msg", fmt.Sprintf("total groups for compaction: %d", len(outGroups))) + + for _, p := range outGroups { + partitionInfo, err := tsdb.ConvertToPartitionInfo(p.Extensions()) + if err == nil && partitionInfo != nil { + level.Info(g.logger).Log("msg", "picked compaction job", "partitioned_group_id", partitionInfo.PartitionedGroupID, "partition_count", partitionInfo.PartitionCount) + } + } + return outGroups +} + +func (g *PartitionCompactionGrouper) isGroupVisited(partitionID int, visitMarkerManager *VisitMarkerManager) (bool, error) { + visitMarker := &partitionVisitMarker{} + err := visitMarkerManager.ReadVisitMarker(g.ctx, visitMarker) + if err != nil { + if errors.Is(err, errorVisitMarkerNotFound) { + level.Warn(g.logger).Log("msg", "no visit marker file for partition", "partition_visit_marker_file", visitMarkerManager.visitMarker.GetVisitMarkerFilePath()) + return false, nil + } + level.Error(g.logger).Log("msg", "unable to read partition visit marker file", "partition_visit_marker_file", visitMarkerManager.visitMarker.GetVisitMarkerFilePath(), "err", err) + return true, err + } + if visitMarker.GetStatus() == Completed { + level.Info(g.logger).Log("msg", "partition visit marker with partition ID is completed", "partition_visit_marker", visitMarker.String()) + return true, nil + } + if visitMarker.IsVisited(g.partitionVisitMarkerTimeout, partitionID) { + level.Info(g.logger).Log("msg", "visited partition with partition ID", "partition_visit_marker", visitMarker.String()) + return true, nil + } + return false, nil +} + +type TimeRangeChecker struct { + // This is a map of timeRange to a map of rangeStart to timeRangeStatus + TimeRangesStatus map[int64]map[int64]*timeRangeStatus +} + +func NewCompletenessChecker(blocks map[ulid.ULID]*metadata.Meta, groups []blocksGroupWithPartition, timeRanges []int64) TimeRangeChecker { + timeRangeToBlockMap := make(map[int64][]*metadata.Meta) + for _, b := range blocks { + timeRange := int64(0) + if b.Compaction.Level > 1 { + ext, err := tsdb.GetCortexMetaExtensionsFromMeta(*b) + if err == nil && ext != nil && ext.TimeRange > 0 { + timeRange = ext.TimeRange + } else { + // fallback logic to guess block time range based + // on MaxTime and MinTime + blockRange := b.MaxTime - b.MinTime + for _, tr := range timeRanges { + rangeStart := getRangeStart(b, tr) + rangeEnd := rangeStart + tr + if tr >= blockRange && rangeEnd >= b.MaxTime { + timeRange = tr + break + } + } + } + } + timeRangeToBlockMap[timeRange] = append(timeRangeToBlockMap[timeRange], b) + } + timeRangesStatus := make(map[int64]map[int64]*timeRangeStatus) + for _, g := range groups { + tr := g.rangeEnd - g.rangeStart + if _, ok := timeRangesStatus[tr]; !ok { + timeRangesStatus[tr] = make(map[int64]*timeRangeStatus) + } + timeRangesStatus[tr][g.rangeStart] = &timeRangeStatus{ + timeRange: tr, + rangeStart: g.rangeStart, + rangeEnd: g.rangeEnd, + numActiveBlocks: 0, + canTakeCompaction: false, + } + } + for tr, blks := range timeRangeToBlockMap { + if _, ok := timeRangesStatus[tr]; !ok { + timeRangesStatus[tr] = make(map[int64]*timeRangeStatus) + } + for _, b := range blks { + actualTr := tr + if tr == 0 { + actualTr = timeRanges[0] + } + rangeStart := getRangeStart(b, actualTr) + if _, ok := timeRangesStatus[tr][rangeStart]; !ok { + timeRangesStatus[tr][rangeStart] = &timeRangeStatus{ + timeRange: tr, + rangeStart: rangeStart, + rangeEnd: rangeStart + actualTr, + numActiveBlocks: 0, + canTakeCompaction: false, + } + } + timeRangesStatus[tr][rangeStart].addBlock(1) + } + } + previousTimeRanges := []int64{0} + for _, tr := range timeRanges { + timeRangeLoop: + for rangeStart, status := range timeRangesStatus[tr] { + previousTrBlocks := 0 + for _, previousTr := range previousTimeRanges { + allPreviousTimeRanges := getAllPreviousTimeRanges(tr, rangeStart, previousTr, timeRanges[0]) + for _, previousRangeStart := range allPreviousTimeRanges { + if previousTrStatus, ok := timeRangesStatus[previousTr][previousRangeStart]; ok { + if previousTrStatus.canTakeCompaction { + status.canTakeCompaction = false + continue timeRangeLoop + } + previousTrBlocks += previousTrStatus.numActiveBlocks + } + } + } + status.canTakeCompaction = !(previousTrBlocks == 0 || (previousTrBlocks == 1 && status.numActiveBlocks == 0)) + } + previousTimeRanges = append(previousTimeRanges, tr) + } + return TimeRangeChecker{TimeRangesStatus: timeRangesStatus} +} + +// getAllPreviousTimeRanges returns a list of rangeStart time for previous time range that +// falls within current time range and start time +func getAllPreviousTimeRanges(currentTr int64, rangeStart int64, previousTr int64, smallestTr int64) []int64 { + var result []int64 + if previousTr == 0 { + previousTr = smallestTr + } + previousRangeStart := rangeStart + for ; previousRangeStart+previousTr <= rangeStart+currentTr; previousRangeStart += previousTr { + result = append(result, previousRangeStart) + } + return result +} + +type timeRangeStatus struct { + timeRange int64 + rangeStart int64 + rangeEnd int64 + numActiveBlocks int + canTakeCompaction bool + previousTimeRange int64 +} + +func (t *timeRangeStatus) addBlock(num int) { + t.numActiveBlocks += num +} + +func (t *timeRangeStatus) rangeStartTime() time.Time { + return time.Unix(0, t.rangeStart*int64(time.Millisecond)).UTC() +} + +func (t *timeRangeStatus) rangeEndTime() time.Time { + return time.Unix(0, t.rangeEnd*int64(time.Millisecond)).UTC() +} + +func (t *timeRangeStatus) timeRangeDuration() time.Duration { + return time.Duration(t.timeRange) * time.Millisecond +} + +func (t *timeRangeStatus) previousTimeRangeDuration() time.Duration { + return time.Duration(t.previousTimeRange) * time.Millisecond +} + +type blocksGroupWithPartition struct { + blocksGroup + rangeStart int64 // Included. + rangeEnd int64 // Excluded. + blocks []*metadata.Meta + groupHash uint32 + partitionedGroupInfo *PartitionedGroupInfo + partition Partition +} + +func (g blocksGroupWithPartition) rangeDuration() time.Duration { + return g.rangeEndTime().Sub(g.rangeStartTime()) +} + +func (g blocksGroupWithPartition) getBlockIDs() []string { + blockIDs := make([]string, len(g.blocks)) + for i, block := range g.blocks { + blockIDs[i] = block.ULID.String() + } + return blockIDs +} + +func createGroupKeyWithPartition(groupHash uint32, group blocksGroupWithPartition) string { + return fmt.Sprintf("%v%s", groupHash, group.blocks[0].Thanos.GroupKey()) +} + +func createGroupKeyWithPartitionID(groupHash uint32, partitionID int, group blocksGroupWithPartition) string { + return fmt.Sprintf("%v%d%s", groupHash, partitionID, group.blocks[0].Thanos.GroupKey()) +} + +func createBlocksGroup(blocks map[ulid.ULID]*metadata.Meta, blockIDs []ulid.ULID, rangeStart int64, rangeEnd int64) (*blocksGroupWithPartition, error) { + var group blocksGroupWithPartition + group.rangeStart = rangeStart + group.rangeEnd = rangeEnd + var nonDummyBlock *metadata.Meta + for _, blockID := range blockIDs { + if blockID == DUMMY_BLOCK_ID { + continue + } + m, ok := blocks[blockID] + if !ok { + return nil, fmt.Errorf("block not found: %s", blockID) + } + nonDummyBlock = m + group.blocks = append(group.blocks, m) + } + for _, blockID := range blockIDs { + if blockID == DUMMY_BLOCK_ID { + dummyMeta := *nonDummyBlock + dummyMeta.ULID = DUMMY_BLOCK_ID + group.blocks = append(group.blocks, &dummyMeta) + } + } + return &group, nil } diff --git a/pkg/compactor/partition_compaction_grouper_test.go b/pkg/compactor/partition_compaction_grouper_test.go new file mode 100644 index 0000000000..2167a219ae --- /dev/null +++ b/pkg/compactor/partition_compaction_grouper_test.go @@ -0,0 +1,2139 @@ +package compactor + +import ( + "context" + "encoding/json" + "fmt" + "path" + "testing" + "time" + + "github.com/oklog/ulid" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/prometheus/tsdb" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" + "github.com/thanos-io/objstore" + thanosblock "github.com/thanos-io/thanos/pkg/block" + "github.com/thanos-io/thanos/pkg/block/metadata" + + "github.com/cortexproject/cortex/pkg/ring" + "github.com/cortexproject/cortex/pkg/storage/bucket" + cortextsdb "github.com/cortexproject/cortex/pkg/storage/tsdb" + "github.com/cortexproject/cortex/pkg/util/validation" +) + +var ( + M = time.Minute.Milliseconds() + H = time.Hour.Milliseconds() +) + +func TestPartitionCompactionGrouper_GenerateCompactionJobs(t *testing.T) { + block1 := ulid.MustNew(1, nil) + block2 := ulid.MustNew(2, nil) + block3 := ulid.MustNew(3, nil) + block4 := ulid.MustNew(4, nil) + block5 := ulid.MustNew(5, nil) + block6 := ulid.MustNew(6, nil) + block7 := ulid.MustNew(7, nil) + + testCompactorID := "test-compactor" + //otherCompactorID := "other-compactor" + + userID := "test-user" + partitionedGroupID_0_2 := hashGroup(userID, 0*H, 2*H) + partitionedGroupID_0_12 := hashGroup(userID, 0*H, 12*H) + partitionedGroupID_0_24 := hashGroup(userID, 0*H, 24*H) + + tests := map[string]generateCompactionJobsTestCase{ + "only level 1 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{}, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 2 * H}, + {blocks: []ulid.ULID{block3, block4}, partitionCount: 1, partitionID: 0, rangeStart: 2 * H, rangeEnd: 4 * H}, + }, + }, + "only level 1 blocks, there is existing partitioned group file": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{ + {rangeStart: 0 * H, rangeEnd: 2 * H, partitionCount: 1, partitions: []Partition{ + {PartitionID: 0, Blocks: []ulid.ULID{block1, block2}}, + }}, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 2 * H}, + {blocks: []ulid.ULID{block3, block4}, partitionCount: 1, partitionID: 0, rangeStart: 2 * H, rangeEnd: 4 * H}, + }, + }, + "only level 1 blocks, there are existing partitioned group files for all blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{ + {rangeStart: 0 * H, rangeEnd: 2 * H, partitionCount: 1, partitions: []Partition{ + {PartitionID: 0, Blocks: []ulid.ULID{block1, block2}}, + }}, + {rangeStart: 2 * H, rangeEnd: 4 * H, partitionCount: 1, partitions: []Partition{ + {PartitionID: 0, Blocks: []ulid.ULID{block3, block4}}, + }}, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 2 * H}, + {blocks: []ulid.ULID{block3, block4}, partitionCount: 1, partitionID: 0, rangeStart: 2 * H, rangeEnd: 4 * H}, + }, + }, + "only level 2 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block5: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block5, MinTime: 12 * H, MaxTime: 14 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{}, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3, block4}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "only level 2 blocks, there is existing partitioned group file": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block5: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block5, MinTime: 12 * H, MaxTime: 14 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{ + {rangeStart: 0 * H, rangeEnd: 12 * H, partitionCount: 1, partitions: []Partition{ + {PartitionID: 0, Blocks: []ulid.ULID{block1, block2, block3, block4}}, + }}, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3, block4}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "only level 2 blocks from same time range": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{}, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "mix level 1 and level 2 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block5: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block5, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{}, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block4, block5}, partitionCount: 1, partitionID: 0, rangeStart: 10 * H, rangeEnd: 12 * H}, + }, + }, + "mix level 1 and level 2 blocks, there is partitioned group file for level 1 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block5: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block5, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{ + {rangeStart: 10 * H, rangeEnd: 12 * H, partitionCount: 1, partitions: []Partition{ + {PartitionID: 0, Blocks: []ulid.ULID{block4, block5}}, + }}, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block4, block5}, partitionCount: 1, partitionID: 0, rangeStart: 10 * H, rangeEnd: 12 * H}, + }, + }, + "mix level 1 and level 2 blocks in different time range": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 12 * H, MaxTime: 14 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block5: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block5, MinTime: 12 * H, MaxTime: 14 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{}, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block4, block5}, partitionCount: 1, partitionID: 0, rangeStart: 12 * H, rangeEnd: 14 * H}, + {blocks: []ulid.ULID{block1, block2, block3}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "mix level 1 and level 2 blocks in different time range, there are partitioned group files for all groups": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 12 * H, MaxTime: 14 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block5: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block5, MinTime: 12 * H, MaxTime: 14 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{ + {rangeStart: 0 * H, rangeEnd: 12 * H, partitionCount: 1, partitions: []Partition{ + {PartitionID: 0, Blocks: []ulid.ULID{block1, block2, block3}}, + }}, + {rangeStart: 12 * H, rangeEnd: 14 * H, partitionCount: 1, partitions: []Partition{ + {PartitionID: 0, Blocks: []ulid.ULID{block4, block5}}, + }}, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block4, block5}, partitionCount: 1, partitionID: 0, rangeStart: 12 * H, rangeEnd: 14 * H}, + {blocks: []ulid.ULID{block1, block2, block3}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "level 2 blocks along with level 3 blocks from some of partitions, level 1 blocks in different time range, there are partitioned group files for all groups": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 4, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block5: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block5, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 4, PartitionID: 2}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block6: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block6, MinTime: 22 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block7: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block7, MinTime: 22 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{ + {rangeStart: 0 * H, rangeEnd: 12 * H, partitionCount: 4, partitions: []Partition{ + {PartitionID: 0, Blocks: []ulid.ULID{block1, block3}}, + {PartitionID: 1, Blocks: []ulid.ULID{block2, block3}}, + {PartitionID: 2, Blocks: []ulid.ULID{block1, block3}}, + {PartitionID: 3, Blocks: []ulid.ULID{block2, block3}}, + }, partitionVisitMarkers: map[int]mockPartitionVisitMarker{ + 0: {partitionID: 0, compactorID: testCompactorID, isExpired: true, status: Completed}, + 2: {partitionID: 2, compactorID: testCompactorID, isExpired: false, status: Completed}, + }}, + {rangeStart: 22 * H, rangeEnd: 24 * H, partitionCount: 1, partitions: []Partition{ + {PartitionID: 0, Blocks: []ulid.ULID{block6, block7}}, + }}, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block6, block7}, partitionCount: 1, partitionID: 0, rangeStart: 22 * H, rangeEnd: 24 * H}, + {blocks: []ulid.ULID{block1, block3}, partitionCount: 4, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + {blocks: []ulid.ULID{block2, block3}, partitionCount: 4, partitionID: 1, rangeStart: 0 * H, rangeEnd: 12 * H}, + {blocks: []ulid.ULID{block1, block3}, partitionCount: 4, partitionID: 2, rangeStart: 0 * H, rangeEnd: 12 * H}, + {blocks: []ulid.ULID{block2, block3}, partitionCount: 4, partitionID: 3, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "level 2 blocks in first 12h are all complete, level 2 blocks in second 12h have not started compaction, there is no partitioned group file": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 4 * H, MaxTime: 6 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 6 * H, MaxTime: 8 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block5: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block5, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block6: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block6, MinTime: 12 * H, MaxTime: 14 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block7: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block7, MinTime: 12 * H, MaxTime: 14 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{}, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block6, block7}, partitionCount: 1, partitionID: 0, rangeStart: 12 * H, rangeEnd: 14 * H}, + {blocks: []ulid.ULID{block1, block2, block3, block4, block5}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "level 2 blocks are all complete, there is no partitioned group file": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 4 * H, MaxTime: 6 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block5: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block5, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block6: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block6, MinTime: 12 * H, MaxTime: 14 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block7: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block7, MinTime: 14 * H, MaxTime: 16 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{}, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3, block4, block5}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + {blocks: []ulid.ULID{block6, block7}, partitionCount: 1, partitionID: 0, rangeStart: 12 * H, rangeEnd: 24 * H}, + }, + }, + "level 2 blocks are complete only in second half of 12h, there is existing partitioned group file": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 4 * H, MaxTime: 6 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block5: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block5, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block6: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block6, MinTime: 12 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block7: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block7, MinTime: 12 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{ + {rangeStart: 0 * H, rangeEnd: 12 * H, partitionCount: 2, partitions: []Partition{ + {PartitionID: 0, Blocks: []ulid.ULID{block1, block2, block3, block4}}, + {PartitionID: 1, Blocks: []ulid.ULID{block1, block2, block3, block4}}, + }, partitionVisitMarkers: map[int]mockPartitionVisitMarker{ + 1: {partitionID: 1, compactorID: testCompactorID, isExpired: true, status: Completed}, + }}, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3, block4}, partitionCount: 2, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + {blocks: []ulid.ULID{block1, block2, block3, block4}, partitionCount: 2, partitionID: 1, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "level 3 blocks are complete, there are some level 2 blocks not deleted, there is existing partitioned group file": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 4 * H, MaxTime: 6 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block5: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block5, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block6: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block6, MinTime: 12 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block7: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block7, MinTime: 12 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{ + {rangeStart: 0 * H, rangeEnd: 12 * H, partitionCount: 1, partitions: []Partition{ + {PartitionID: 0, Blocks: []ulid.ULID{block1, block2, block3, block4}}, + }, partitionVisitMarkers: map[int]mockPartitionVisitMarker{ + 0: {partitionID: 0, compactorID: testCompactorID, isExpired: true, status: Completed}, + }}, + }, + expected: []expectedCompactionJob{ + // nothing should be grouped. cleaner should mark all level 2 blocks for deletion + // and delete partitioned group file since level 2 to level 3 compaction is complete + }, + }, + "recompact one level 1 block with level 2 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 2 * H}, + }, + }, + "recompact one level 1 block with level 2 blocks in same and different time range": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 2 * H}, + }, + }, + "recompact one level 1 block with level 2 blocks all in different time range": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 4 * H, MaxTime: 6 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3, block4}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "recompact one level 1 block with level 2 blocks and level 3 block": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block5: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block5, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 2 * H}, + }, + }, + "recompact two level 1 block with level 2 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3, block4}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 2 * H}, + }, + }, + "recompact one level 1 block with one level 3 block": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "recompact one level 1 block with level 3 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "recompact two level 1 block in same time range with level 3 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 2 * H}, + }, + }, + "recompact two level 1 block in different time range with level 3 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3, block4}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "recompact one level 1 block with one level 4 block": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 22 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 24 * H}, + }, + }, + "recompact one level 1 block with level 4 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 24 * H}, + }, + }, + "recompact two level 1 blocks in different time range with level 4 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 22 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3, block4}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 24 * H}, + }, + }, + "recompact one level 2 block with level 3 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "recompact two level 2 blocks from different time range with level 3 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3, block4}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "recompact two level 2 blocks from same time range with level 3 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3, block4}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "recompact one level 2 block with level 4 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 10 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 24 * H}, + }, + }, + "recompact two level 2 blocks from different time range with level 4 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "recompact two level 2 blocks from same time range with level 4 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "recompact one level 3 block with level 4 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 24 * H}, + }, + }, + "recompact two level 3 blocks from different time range with level 4 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 12 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3, block4}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 24 * H}, + }, + }, + "recompact two level 3 blocks from same time range with level 4 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 12 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 0 * H, MaxTime: 24 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3, block4}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 24 * H}, + }, + }, + "blocks with partition info should be assigned to correct partition": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}, Stats: tsdb.BlockStats{NumSeries: 1}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 4, PartitionID: 0}}, Files: []metadata.File{{RelPath: thanosblock.IndexFilename, SizeBytes: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}, Stats: tsdb.BlockStats{NumSeries: 1}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 4, PartitionID: 1}}, Files: []metadata.File{{RelPath: thanosblock.IndexFilename, SizeBytes: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}, Stats: tsdb.BlockStats{NumSeries: 1}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 4, PartitionID: 2}}, Files: []metadata.File{{RelPath: thanosblock.IndexFilename, SizeBytes: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block4: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block4, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}, Stats: tsdb.BlockStats{NumSeries: 1}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 4, PartitionID: 3}}, Files: []metadata.File{{RelPath: thanosblock.IndexFilename, SizeBytes: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block5: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block5, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}, Stats: tsdb.BlockStats{NumSeries: 1}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}, Files: []metadata.File{{RelPath: thanosblock.IndexFilename, SizeBytes: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block6: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block6, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}, Stats: tsdb.BlockStats{NumSeries: 1}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 1}}, Files: []metadata.File{{RelPath: thanosblock.IndexFilename, SizeBytes: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block7: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block7, MinTime: 4 * H, MaxTime: 6 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}, Stats: tsdb.BlockStats{NumSeries: 1}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}, Files: []metadata.File{{RelPath: thanosblock.IndexFilename, SizeBytes: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block3, block5, block7}, partitionCount: 2, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + {blocks: []ulid.ULID{block2, block4, block6, block7}, partitionCount: 2, partitionID: 1, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "one of the partitions got only one block": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}, Stats: tsdb.BlockStats{NumSeries: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}, Files: []metadata.File{{RelPath: thanosblock.IndexFilename, SizeBytes: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}, Stats: tsdb.BlockStats{NumSeries: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}, Files: []metadata.File{{RelPath: thanosblock.IndexFilename, SizeBytes: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}, Stats: tsdb.BlockStats{NumSeries: 2}}, + Thanos: metadata.Thanos{Files: []metadata.File{{RelPath: thanosblock.IndexFilename, SizeBytes: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3}, partitionCount: 2, partitionID: 0, rangeStart: 0 * H, rangeEnd: 2 * H}, + {blocks: []ulid.ULID{block3, DUMMY_BLOCK_ID}, partitionCount: 2, partitionID: 1, rangeStart: 0 * H, rangeEnd: 2 * H}, + }, + }, + "not all level 2 blocks are in bucket index": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 2 * H, MaxTime: 4 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{ + {rangeStart: 0 * H, rangeEnd: 2 * H, partitionCount: 2, partitions: []Partition{ + {PartitionID: 0, Blocks: []ulid.ULID{ulid.MustNew(99, nil), ulid.MustNew(98, nil)}}, + {PartitionID: 1, Blocks: []ulid.ULID{ulid.MustNew(99, nil), ulid.MustNew(98, nil)}}, + }, partitionVisitMarkers: map[int]mockPartitionVisitMarker{ + 0: {partitionID: 0, compactorID: testCompactorID, isExpired: true, status: Completed}, + 1: {partitionID: 1, compactorID: testCompactorID, isExpired: true, status: Completed}, + }}, + }, + expected: []expectedCompactionJob{}, + }, + "not all level 2 blocks are in bucket index and there are late level 1 blocks": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block3, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{ + {rangeStart: 0 * H, rangeEnd: 2 * H, partitionCount: 2, partitions: []Partition{ + {PartitionID: 0, Blocks: []ulid.ULID{ulid.MustNew(99, nil), ulid.MustNew(98, nil)}}, + {PartitionID: 1, Blocks: []ulid.ULID{ulid.MustNew(99, nil), ulid.MustNew(98, nil)}}, + }, partitionVisitMarkers: map[int]mockPartitionVisitMarker{ + 0: {partitionID: 0, compactorID: testCompactorID, isExpired: true, status: Completed}, + 1: {partitionID: 1, compactorID: testCompactorID, isExpired: true, status: Completed}, + }}, + }, + expected: []expectedCompactionJob{}, + }, + "level 2 blocks all have same partitioned group id as destination group": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionedGroupID: partitionedGroupID_0_12, PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionedGroupID: partitionedGroupID_0_12, PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{}, + expected: []expectedCompactionJob{}, + }, + "level 3 blocks all have same partitioned group id as destination group": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionedGroupID: partitionedGroupID_0_24, PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 12 * H, Compaction: tsdb.BlockMetaCompaction{Level: 3}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionedGroupID: partitionedGroupID_0_24, PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{}, + expected: []expectedCompactionJob{}, + }, + "level 2 blocks not all have same partitioned group id as destination group": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionedGroupID: partitionedGroupID_0_12, PartitionCount: 2, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionedGroupID: partitionedGroupID_0_12, PartitionCount: 2, PartitionID: 1}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block3: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 0 * H, MaxTime: 2 * H, Compaction: tsdb.BlockMetaCompaction{Level: 2}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionedGroupID: partitionedGroupID_0_2, PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + }, + existingPartitionedGroups: []mockExistingPartitionedGroup{}, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2, block3}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + "recompact one level 1 block with level 4 blocks with data only in part of time range across smaller time range": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 10*H + 49*M, MaxTime: 11*H + 47*M, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 10*H + 49*M, MaxTime: 16 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 24 * H}, + }, + }, + "recompact one level 1 block with level 4 blocks with time range in meta and data only in part of time range in same smaller time range": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 10*H + 49*M, MaxTime: 11*H + 47*M, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 1 * H, MaxTime: 10 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{TimeRange: 24 * H, PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 24 * H}, + }, + }, + "recompact one level 1 block with level 4 blocks with no time range in meta and data only in part of time range in same smaller time range": { + ranges: []time.Duration{2 * time.Hour, 12 * time.Hour, 24 * time.Hour}, + blocks: map[ulid.ULID]mockBlock{ + block1: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block1, MinTime: 10*H + 49*M, MaxTime: 11*H + 47*M, Compaction: tsdb.BlockMetaCompaction{Level: 1}}, + }, + timeRange: 2 * time.Hour, + hasNoCompactMark: false, + }, + block2: { + meta: &metadata.Meta{ + BlockMeta: tsdb.BlockMeta{ULID: block2, MinTime: 1 * H, MaxTime: 10 * H, Compaction: tsdb.BlockMetaCompaction{Level: 4}}, + Thanos: metadata.Thanos{Extensions: cortextsdb.CortexMetaExtensions{PartitionInfo: &cortextsdb.PartitionInfo{PartitionCount: 1, PartitionID: 0}}}, + }, + timeRange: 24 * time.Hour, + hasNoCompactMark: false, + }, + }, + expected: []expectedCompactionJob{ + {blocks: []ulid.ULID{block1, block2}, partitionCount: 1, partitionID: 0, rangeStart: 0 * H, rangeEnd: 12 * H}, + }, + }, + } + + for testName, testCase := range tests { + t.Run(testName, func(t *testing.T) { + compactorCfg := &Config{ + BlockRanges: testCase.ranges, + } + + limits := &validation.Limits{ + CompactorPartitionSeriesCount: 4, + } + overrides, err := validation.NewOverrides(*limits, nil) + require.NoError(t, err) + + // Setup mocking of the ring so that the grouper will own all the shards + rs := ring.ReplicationSet{ + Instances: []ring.InstanceDesc{ + {Addr: "test-addr"}, + }, + } + subring := &RingMock{} + subring.On("GetAllHealthy", mock.Anything).Return(rs, nil) + subring.On("Get", mock.Anything, mock.Anything, mock.Anything, mock.Anything, mock.Anything).Return(rs, nil) + + ring := &RingMock{} + ring.On("ShuffleShard", mock.Anything, mock.Anything).Return(subring, nil) + + registerer := prometheus.NewPedanticRegistry() + + metrics := newCompactorMetrics(registerer) + + noCompactFilter := testCase.getNoCompactFilter() + + bkt := &bucket.ClientMock{} + visitMarkerTimeout := 5 * time.Minute + testCase.setupBucketStore(t, bkt, userID, visitMarkerTimeout) + bkt.MockUpload(mock.Anything, nil) + bkt.MockGet(mock.Anything, "", nil) + bkt.MockIter(mock.Anything, nil, nil) + + for _, b := range testCase.blocks { + b.fixPartitionInfo(t, userID) + } + + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + g := NewPartitionCompactionGrouper( + ctx, + nil, + objstore.WithNoopInstr(bkt), + false, // Do not accept malformed indexes + true, // Enable vertical compaction + nil, + metrics.getSyncerMetrics(userID), + metrics, + metadata.NoneFunc, + *compactorCfg, + ring, + "test-addr", + testCompactorID, + overrides, + userID, + 10, + 3, + 1, + false, + visitMarkerTimeout, + noCompactFilter, + 1, + ) + actual, err := g.generateCompactionJobs(testCase.getBlocks()) + require.NoError(t, err) + require.Len(t, actual, len(testCase.expected)) + + for idx, expectedGroup := range testCase.expected { + actualGroup := actual[idx] + actualBlocks := actualGroup.blocks + require.Equal(t, expectedGroup.rangeStart, actualGroup.partitionedGroupInfo.RangeStart) + require.Equal(t, expectedGroup.rangeEnd, actualGroup.partitionedGroupInfo.RangeEnd) + require.Equal(t, expectedGroup.partitionCount, actualGroup.partitionedGroupInfo.PartitionCount) + require.Equal(t, expectedGroup.partitionID, actualGroup.partition.PartitionID) + require.Len(t, actualBlocks, len(expectedGroup.blocks)) + for _, b := range actualBlocks { + require.Contains(t, expectedGroup.blocks, b.ULID) + } + } + }) + } +} + +type generateCompactionJobsTestCase struct { + ranges []time.Duration + blocks map[ulid.ULID]mockBlock + existingPartitionedGroups []mockExistingPartitionedGroup + expected []expectedCompactionJob +} + +func (g *generateCompactionJobsTestCase) setupBucketStore(t *testing.T, bkt *bucket.ClientMock, userID string, visitMarkerTimeout time.Duration) { + var existingPartitionedGroupFiles []string + for _, existingPartitionedGroup := range g.existingPartitionedGroups { + partitionedGroupFilePath := existingPartitionedGroup.setupBucketStore(t, bkt, userID, visitMarkerTimeout) + existingPartitionedGroupFiles = append(existingPartitionedGroupFiles, partitionedGroupFilePath) + } + bkt.MockIter(PartitionedGroupDirectory, existingPartitionedGroupFiles, nil) +} + +func (g *generateCompactionJobsTestCase) getNoCompactFilter() func() map[ulid.ULID]*metadata.NoCompactMark { + noCompactBlocks := make(map[ulid.ULID]*metadata.NoCompactMark) + for id, b := range g.blocks { + if b.hasNoCompactMark { + noCompactBlocks[id] = &metadata.NoCompactMark{ + ID: id, + NoCompactTime: time.Now().Add(-1 * time.Hour).Unix(), + } + } + } + return func() map[ulid.ULID]*metadata.NoCompactMark { + return noCompactBlocks + } +} + +func (g *generateCompactionJobsTestCase) getBlocks() map[ulid.ULID]*metadata.Meta { + blocks := make(map[ulid.ULID]*metadata.Meta) + for id, b := range g.blocks { + blocks[id] = b.meta + } + return blocks +} + +type mockExistingPartitionedGroup struct { + partitionedGroupID uint32 + rangeStart int64 + rangeEnd int64 + partitionCount int + partitions []Partition + partitionVisitMarkers map[int]mockPartitionVisitMarker +} + +func (p *mockExistingPartitionedGroup) updatePartitionedGroupID(userID string) { + p.partitionedGroupID = hashGroup(userID, p.rangeStart, p.rangeEnd) +} + +func (p *mockExistingPartitionedGroup) setupBucketStore(t *testing.T, bkt *bucket.ClientMock, userID string, visitMarkerTimeout time.Duration) string { + p.updatePartitionedGroupID(userID) + partitionedGroupFilePath := path.Join(PartitionedGroupDirectory, fmt.Sprintf("%d.json", p.partitionedGroupID)) + for _, partition := range p.partitions { + partitionID := partition.PartitionID + if _, ok := p.partitionVisitMarkers[partitionID]; !ok { + continue + } + visitMarker := p.partitionVisitMarkers[partitionID] + partitionVisitMarkerFilePath := path.Join(PartitionedGroupDirectory, PartitionVisitMarkerDirectory, + fmt.Sprintf("%d/%s%d-%s", p.partitionedGroupID, PartitionVisitMarkerFilePrefix, partitionID, PartitionVisitMarkerFileSuffix)) + visitTime := time.Now() + if visitMarker.isExpired { + visitTime = time.Now().Add(-2 * visitMarkerTimeout) + } + actualVisitMarker := partitionVisitMarker{ + CompactorID: visitMarker.compactorID, + Status: visitMarker.status, + PartitionedGroupID: p.partitionedGroupID, + PartitionID: partitionID, + VisitTime: visitTime.UnixMilli(), + Version: PartitionVisitMarkerVersion1, + } + partitionVisitMarkerContent, err := json.Marshal(actualVisitMarker) + require.NoError(t, err) + bkt.MockGet(partitionVisitMarkerFilePath, string(partitionVisitMarkerContent), nil) + } + partitionedGroup := PartitionedGroupInfo{ + PartitionedGroupID: p.partitionedGroupID, + PartitionCount: p.partitionCount, + Partitions: p.partitions, + RangeStart: p.rangeStart, + RangeEnd: p.rangeEnd, + CreationTime: time.Now().Add(-1 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + } + partitionedGroupContent, err := json.Marshal(partitionedGroup) + require.NoError(t, err) + bkt.MockGet(partitionedGroupFilePath, string(partitionedGroupContent), nil) + return partitionedGroupFilePath +} + +type mockBlock struct { + meta *metadata.Meta + timeRange time.Duration + hasNoCompactMark bool +} + +func (b *mockBlock) fixPartitionInfo(t *testing.T, userID string) { + extensions, err := cortextsdb.GetCortexMetaExtensionsFromMeta(*b.meta) + require.NoError(t, err) + if extensions != nil { + rangeStart := getRangeStart(b.meta, b.timeRange.Milliseconds()) + rangeEnd := rangeStart + b.timeRange.Milliseconds() + if extensions.PartitionInfo.PartitionedGroupID == 0 { + extensions.PartitionInfo.PartitionedGroupID = hashGroup(userID, rangeStart, rangeEnd) + } + b.meta.Thanos.Extensions = extensions + } +} + +type mockPartitionVisitMarker struct { + partitionID int + compactorID string + isExpired bool + status VisitStatus +} + +type expectedCompactionJob struct { + blocks []ulid.ULID + partitionCount int + partitionID int + rangeStart int64 + rangeEnd int64 +} diff --git a/pkg/compactor/partition_visit_marker.go b/pkg/compactor/partition_visit_marker.go new file mode 100644 index 0000000000..4a5d8fdc4a --- /dev/null +++ b/pkg/compactor/partition_visit_marker.go @@ -0,0 +1,96 @@ +package compactor + +import ( + "fmt" + "path" + "strings" + "time" + + "github.com/pkg/errors" +) + +const ( + // PartitionVisitMarkerDirectory is the name of directory where all visit markers are saved. + PartitionVisitMarkerDirectory = "visit-marks" + // PartitionVisitMarkerFileSuffix is the known suffix of json filename for representing the most recent compactor visit. + PartitionVisitMarkerFileSuffix = "visit-mark.json" + // PartitionVisitMarkerFilePrefix is the known prefix of json filename for representing the most recent compactor visit. + PartitionVisitMarkerFilePrefix = "partition-" + // PartitionVisitMarkerVersion1 is the current supported version of visit-mark file. + PartitionVisitMarkerVersion1 = 1 +) + +var ( + errorNotPartitionVisitMarker = errors.New("file is not partition visit marker") +) + +type partitionVisitMarker struct { + CompactorID string `json:"compactorID"` + Status VisitStatus `json:"status"` + PartitionedGroupID uint32 `json:"partitionedGroupID"` + PartitionID int `json:"partitionID"` + // VisitTime is a unix timestamp of when the partition was visited (mark updated). + VisitTime int64 `json:"visitTime"` + // Version of the file. + Version int `json:"version"` +} + +func newPartitionVisitMarker(compactorID string, partitionedGroupID uint32, partitionID int) *partitionVisitMarker { + return &partitionVisitMarker{ + CompactorID: compactorID, + PartitionedGroupID: partitionedGroupID, + PartitionID: partitionID, + } +} + +func (b *partitionVisitMarker) IsExpired(partitionVisitMarkerTimeout time.Duration) bool { + return !time.Now().Before(time.Unix(b.VisitTime, 0).Add(partitionVisitMarkerTimeout)) +} + +func (b *partitionVisitMarker) IsVisited(partitionVisitMarkerTimeout time.Duration, partitionID int) bool { + return b.GetStatus() == Completed || (partitionID == b.PartitionID && !b.IsExpired(partitionVisitMarkerTimeout)) +} + +func (b *partitionVisitMarker) IsPendingByCompactor(partitionVisitMarkerTimeout time.Duration, partitionID int, compactorID string) bool { + return b.CompactorID == compactorID && partitionID == b.PartitionID && b.GetStatus() == Pending && !b.IsExpired(partitionVisitMarkerTimeout) +} + +func (b *partitionVisitMarker) GetStatus() VisitStatus { + return b.Status +} + +func (b *partitionVisitMarker) GetVisitMarkerFilePath() string { + return GetPartitionVisitMarkerFilePath(b.PartitionedGroupID, b.PartitionID) +} + +func (b *partitionVisitMarker) UpdateStatus(ownerIdentifier string, status VisitStatus) { + b.CompactorID = ownerIdentifier + b.Status = status + b.VisitTime = time.Now().Unix() +} + +func (b *partitionVisitMarker) String() string { + return fmt.Sprintf("visit_marker_partitioned_group_id=%d visit_marker_partition_id=%d visit_marker_compactor_id=%s visit_marker_status=%s visit_marker_visit_time=%s", + b.PartitionedGroupID, + b.PartitionID, + b.CompactorID, + b.Status, + time.Unix(b.VisitTime, 0).String(), + ) +} + +func GetPartitionVisitMarkerFilePath(partitionedGroupID uint32, partitionID int) string { + return path.Join(GetPartitionVisitMarkerDirectoryPath(partitionedGroupID), fmt.Sprintf("%s%d-%s", PartitionVisitMarkerFilePrefix, partitionID, PartitionVisitMarkerFileSuffix)) +} + +func GetPartitionVisitMarkerDirectoryPath(partitionedGroupID uint32) string { + return path.Join(PartitionedGroupDirectory, PartitionVisitMarkerDirectory, fmt.Sprintf("%d", partitionedGroupID)) +} + +func IsPartitionVisitMarker(path string) bool { + return strings.HasSuffix(path, PartitionVisitMarkerFileSuffix) +} + +func IsNotPartitionVisitMarkerError(err error) bool { + return errors.Is(err, errorNotPartitionVisitMarker) +} diff --git a/pkg/compactor/partitioned_group_info.go b/pkg/compactor/partitioned_group_info.go new file mode 100644 index 0000000000..71d4c61639 --- /dev/null +++ b/pkg/compactor/partitioned_group_info.go @@ -0,0 +1,303 @@ +package compactor + +import ( + "bytes" + "context" + "encoding/json" + "fmt" + "io" + "path" + "strings" + "time" + + "github.com/go-kit/log" + "github.com/go-kit/log/level" + "github.com/oklog/ulid" + "github.com/pkg/errors" + "github.com/thanos-io/objstore" + "github.com/thanos-io/thanos/pkg/block/metadata" + + "github.com/cortexproject/cortex/pkg/util/runutil" +) + +const ( + PartitionedGroupDirectory = "partitioned-groups" + PartitionedGroupInfoVersion1 = 1 +) + +var ( + ErrorPartitionedGroupInfoNotFound = errors.New("partitioned group info not found") + ErrorUnmarshalPartitionedGroupInfo = errors.New("unmarshal partitioned group info JSON") +) + +type Partition struct { + PartitionID int `json:"partitionID"` + Blocks []ulid.ULID `json:"blocks"` +} + +type PartitionedGroupStatus struct { + PartitionedGroupID uint32 + CanDelete bool + IsCompleted bool + DeleteVisitMarker bool + PendingPartitions int + InProgressPartitions int + PendingOrFailedPartitions []Partition +} + +func (s PartitionedGroupStatus) String() string { + var partitions []string + for _, p := range s.PendingOrFailedPartitions { + partitions = append(partitions, fmt.Sprintf("%d", p.PartitionID)) + } + return fmt.Sprintf(`{"partitioned_group_id": %d, "can_delete": %t, "is_complete": %t, "delete_visit_marker": %t, "pending_partitions": %d, "in_progress_partitions": %d, "pending_or_failed_partitions": [%s]}`, + s.PartitionedGroupID, s.CanDelete, s.IsCompleted, s.DeleteVisitMarker, s.PendingPartitions, s.InProgressPartitions, strings.Join(partitions, ",")) +} + +type PartitionedGroupInfo struct { + PartitionedGroupID uint32 `json:"partitionedGroupID"` + PartitionCount int `json:"partitionCount"` + Partitions []Partition `json:"partitions"` + RangeStart int64 `json:"rangeStart"` + RangeEnd int64 `json:"rangeEnd"` + CreationTime int64 `json:"creationTime"` + // Version of the file. + Version int `json:"version"` +} + +func (p *PartitionedGroupInfo) rangeStartTime() time.Time { + return time.Unix(0, p.RangeStart*int64(time.Millisecond)).UTC() +} + +func (p *PartitionedGroupInfo) rangeEndTime() time.Time { + return time.Unix(0, p.RangeEnd*int64(time.Millisecond)).UTC() +} + +func (p *PartitionedGroupInfo) getPartitionIDsByBlock(blockID ulid.ULID) []int { + var partitionIDs []int +partitionLoop: + for _, partition := range p.Partitions { + for _, block := range partition.Blocks { + if block == blockID { + partitionIDs = append(partitionIDs, partition.PartitionID) + continue partitionLoop + } + } + } + return partitionIDs +} + +func (p *PartitionedGroupInfo) getAllBlocks() []ulid.ULID { + uniqueBlocks := make(map[ulid.ULID]struct{}) + for _, partition := range p.Partitions { + for _, block := range partition.Blocks { + uniqueBlocks[block] = struct{}{} + } + } + blocks := make([]ulid.ULID, len(uniqueBlocks)) + i := 0 + for block := range uniqueBlocks { + blocks[i] = block + i++ + } + return blocks +} + +func (p *PartitionedGroupInfo) getAllBlockIDs() []string { + blocks := p.getAllBlocks() + blockIDs := make([]string, len(blocks)) + for i, block := range blocks { + blockIDs[i] = block.String() + } + return blockIDs +} + +func (p *PartitionedGroupInfo) getPartitionedGroupStatus( + ctx context.Context, + userBucket objstore.InstrumentedBucket, + partitionVisitMarkerTimeout time.Duration, + userLogger log.Logger, +) PartitionedGroupStatus { + status := PartitionedGroupStatus{ + PartitionedGroupID: p.PartitionedGroupID, + CanDelete: false, + IsCompleted: false, + DeleteVisitMarker: false, + PendingPartitions: 0, + InProgressPartitions: 0, + PendingOrFailedPartitions: []Partition{}, + } + allPartitionCompleted := true + hasInProgressPartitions := false + for _, partition := range p.Partitions { + visitMarker := &partitionVisitMarker{ + PartitionedGroupID: p.PartitionedGroupID, + PartitionID: partition.PartitionID, + } + visitMarkerManager := NewVisitMarkerManager(userBucket, userLogger, "PartitionedGroupInfo.getPartitionedGroupStatus", visitMarker) + partitionVisitMarkerExists := true + if err := visitMarkerManager.ReadVisitMarker(ctx, visitMarker); err != nil { + if errors.Is(err, errorVisitMarkerNotFound) { + partitionVisitMarkerExists = false + } else { + level.Warn(userLogger).Log("msg", "unable to read partition visit marker", "path", visitMarker.GetVisitMarkerFilePath(), "err", err) + return status + } + } + + if !partitionVisitMarkerExists { + status.PendingPartitions++ + allPartitionCompleted = false + status.PendingOrFailedPartitions = append(status.PendingOrFailedPartitions, partition) + } else if visitMarker.VisitTime < p.CreationTime { + status.DeleteVisitMarker = true + allPartitionCompleted = false + } else if (visitMarker.GetStatus() == Pending || visitMarker.GetStatus() == InProgress) && !visitMarker.IsExpired(partitionVisitMarkerTimeout) { + status.InProgressPartitions++ + hasInProgressPartitions = true + allPartitionCompleted = false + } else if visitMarker.GetStatus() != Completed { + status.PendingPartitions++ + allPartitionCompleted = false + status.PendingOrFailedPartitions = append(status.PendingOrFailedPartitions, partition) + } + } + + if hasInProgressPartitions { + return status + } + + status.IsCompleted = allPartitionCompleted + + if allPartitionCompleted { + status.CanDelete = true + status.DeleteVisitMarker = true + return status + } + + checkedBlocks := make(map[ulid.ULID]struct{}) + for _, partition := range status.PendingOrFailedPartitions { + for _, blockID := range partition.Blocks { + if _, ok := checkedBlocks[blockID]; ok { + continue + } + if !p.doesBlockExist(ctx, userBucket, userLogger, blockID) { + level.Info(userLogger).Log("msg", "delete partitioned group", "reason", "block is physically deleted", "block", blockID) + status.CanDelete = true + status.DeleteVisitMarker = true + return status + } + if p.isBlockDeleted(ctx, userBucket, userLogger, blockID) { + level.Info(userLogger).Log("msg", "delete partitioned group", "reason", "block is marked for deletion", "block", blockID) + status.CanDelete = true + status.DeleteVisitMarker = true + return status + } + if p.isBlockNoCompact(ctx, userBucket, userLogger, blockID) { + level.Info(userLogger).Log("msg", "delete partitioned group", "reason", "block is marked for no compact", "block", blockID) + status.CanDelete = true + status.DeleteVisitMarker = true + return status + } + checkedBlocks[blockID] = struct{}{} + } + } + return status +} + +func (p *PartitionedGroupInfo) doesBlockExist(ctx context.Context, userBucket objstore.InstrumentedBucket, userLogger log.Logger, blockID ulid.ULID) bool { + metaExists, err := userBucket.Exists(ctx, path.Join(blockID.String(), metadata.MetaFilename)) + if err != nil { + level.Warn(userLogger).Log("msg", "unable to get stats of meta.json for block", "partitioned_group_id", p.PartitionedGroupID, "block", blockID.String()) + return true + } + return metaExists +} + +func (p *PartitionedGroupInfo) isBlockDeleted(ctx context.Context, userBucket objstore.InstrumentedBucket, userLogger log.Logger, blockID ulid.ULID) bool { + deletionMarkerExists, err := userBucket.Exists(ctx, path.Join(blockID.String(), metadata.DeletionMarkFilename)) + if err != nil { + level.Warn(userLogger).Log("msg", "unable to get stats of deletion-mark.json for block", "partitioned_group_id", p.PartitionedGroupID, "block", blockID.String()) + return false + } + return deletionMarkerExists +} + +func (p *PartitionedGroupInfo) isBlockNoCompact(ctx context.Context, userBucket objstore.InstrumentedBucket, userLogger log.Logger, blockID ulid.ULID) bool { + noCompactMarkerExists, err := userBucket.Exists(ctx, path.Join(blockID.String(), metadata.NoCompactMarkFilename)) + if err != nil { + level.Warn(userLogger).Log("msg", "unable to get stats of no-compact-mark.json for block", "partitioned_group_id", p.PartitionedGroupID, "block", blockID.String()) + return false + } + return noCompactMarkerExists +} + +func (p *PartitionedGroupInfo) String() string { + var partitions []string + for _, partition := range p.Partitions { + partitions = append(partitions, fmt.Sprintf("(PartitionID: %d, Blocks: %s)", partition.PartitionID, partition.Blocks)) + } + return fmt.Sprintf("{PartitionedGroupID: %d, PartitionCount: %d, Partitions: %s}", p.PartitionedGroupID, p.PartitionCount, strings.Join(partitions, ", ")) +} + +func GetPartitionedGroupFile(partitionedGroupID uint32) string { + return path.Join(PartitionedGroupDirectory, fmt.Sprintf("%d.json", partitionedGroupID)) +} + +func ReadPartitionedGroupInfo(ctx context.Context, bkt objstore.InstrumentedBucketReader, logger log.Logger, partitionedGroupID uint32) (*PartitionedGroupInfo, error) { + return ReadPartitionedGroupInfoFile(ctx, bkt, logger, GetPartitionedGroupFile(partitionedGroupID)) +} + +func ReadPartitionedGroupInfoFile(ctx context.Context, bkt objstore.InstrumentedBucketReader, logger log.Logger, partitionedGroupFile string) (*PartitionedGroupInfo, error) { + partitionedGroupReader, err := bkt.ReaderWithExpectedErrs(bkt.IsObjNotFoundErr).Get(ctx, partitionedGroupFile) + if err != nil { + if bkt.IsObjNotFoundErr(err) { + return nil, errors.Wrapf(ErrorPartitionedGroupInfoNotFound, "partitioned group file: %s", partitionedGroupReader) + } + return nil, errors.Wrapf(err, "get partitioned group file: %s", partitionedGroupReader) + } + defer runutil.CloseWithLogOnErr(logger, partitionedGroupReader, "close partitioned group reader") + p, err := io.ReadAll(partitionedGroupReader) + if err != nil { + return nil, errors.Wrapf(err, "read partitioned group file: %s", partitionedGroupFile) + } + partitionedGroupInfo := PartitionedGroupInfo{} + if err = json.Unmarshal(p, &partitionedGroupInfo); err != nil { + return nil, errors.Wrapf(ErrorUnmarshalPartitionedGroupInfo, "partitioned group file: %s, error: %v", partitionedGroupFile, err.Error()) + } + if partitionedGroupInfo.Version != VisitMarkerVersion1 { + return nil, errors.Errorf("unexpected partitioned group file version %d, expected %d", partitionedGroupInfo.Version, VisitMarkerVersion1) + } + if partitionedGroupInfo.CreationTime <= 0 { + objAttr, err := bkt.Attributes(ctx, partitionedGroupFile) + if err != nil { + return nil, errors.Errorf("unable to get partitioned group file attributes: %s, error: %v", partitionedGroupFile, err.Error()) + } + partitionedGroupInfo.CreationTime = objAttr.LastModified.Unix() + } + return &partitionedGroupInfo, nil +} + +func UpdatePartitionedGroupInfo(ctx context.Context, bkt objstore.InstrumentedBucket, logger log.Logger, partitionedGroupInfo PartitionedGroupInfo) (*PartitionedGroupInfo, error) { + // Ignore error in order to always update partitioned group info. There is no harm to put latest version of + // partitioned group info which is supposed to be the correct grouping based on latest bucket store. + existingPartitionedGroup, _ := ReadPartitionedGroupInfo(ctx, bkt, logger, partitionedGroupInfo.PartitionedGroupID) + if existingPartitionedGroup != nil { + level.Warn(logger).Log("msg", "partitioned group info already exists", "partitioned_group_id", partitionedGroupInfo.PartitionedGroupID) + return existingPartitionedGroup, nil + } + if partitionedGroupInfo.CreationTime <= 0 { + partitionedGroupInfo.CreationTime = time.Now().Unix() + } + partitionedGroupFile := GetPartitionedGroupFile(partitionedGroupInfo.PartitionedGroupID) + partitionedGroupInfoContent, err := json.Marshal(partitionedGroupInfo) + if err != nil { + return nil, err + } + reader := bytes.NewReader(partitionedGroupInfoContent) + if err := bkt.Upload(ctx, partitionedGroupFile, reader); err != nil { + return nil, err + } + level.Info(logger).Log("msg", "created new partitioned group info", "partitioned_group_id", partitionedGroupInfo.PartitionedGroupID) + return &partitionedGroupInfo, nil +} diff --git a/pkg/compactor/partitioned_group_info_test.go b/pkg/compactor/partitioned_group_info_test.go new file mode 100644 index 0000000000..aa4f27253b --- /dev/null +++ b/pkg/compactor/partitioned_group_info_test.go @@ -0,0 +1,882 @@ +package compactor + +import ( + "context" + "encoding/json" + "path" + "testing" + "time" + + "github.com/go-kit/log" + "github.com/oklog/ulid" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" + "github.com/thanos-io/objstore" + "github.com/thanos-io/thanos/pkg/block/metadata" + + "github.com/cortexproject/cortex/pkg/storage/bucket" + cortex_testutil "github.com/cortexproject/cortex/pkg/storage/tsdb/testutil" +) + +func TestPartitionedGroupInfo(t *testing.T) { + ulid0 := ulid.MustNew(0, nil) + ulid1 := ulid.MustNew(1, nil) + ulid2 := ulid.MustNew(2, nil) + rangeStart := (1 * time.Hour).Milliseconds() + rangeEnd := (2 * time.Hour).Milliseconds() + partitionedGroupID := uint32(12345) + for _, tcase := range []struct { + name string + partitionedGroupInfo PartitionedGroupInfo + }{ + { + name: "write partitioned group info 1", + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 2, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + RangeStart: rangeStart, + RangeEnd: rangeEnd, + Version: PartitionedGroupInfoVersion1, + }, + }, + { + name: "write partitioned group info 2", + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 3, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid1, + }, + }, + { + PartitionID: 2, + Blocks: []ulid.ULID{ + ulid2, + }, + }, + }, + RangeStart: rangeStart, + RangeEnd: rangeEnd, + Version: PartitionedGroupInfoVersion1, + }, + }, + } { + t.Run(tcase.name, func(t *testing.T) { + ctx := context.Background() + testBkt, _ := cortex_testutil.PrepareFilesystemBucket(t) + bkt := objstore.WithNoopInstr(testBkt) + logger := log.NewNopLogger() + writeRes, err := UpdatePartitionedGroupInfo(ctx, bkt, logger, tcase.partitionedGroupInfo) + tcase.partitionedGroupInfo.CreationTime = writeRes.CreationTime + require.NoError(t, err) + require.Equal(t, tcase.partitionedGroupInfo, *writeRes) + readRes, err := ReadPartitionedGroupInfo(ctx, bkt, logger, tcase.partitionedGroupInfo.PartitionedGroupID) + require.NoError(t, err) + require.Equal(t, tcase.partitionedGroupInfo, *readRes) + }) + } +} + +func TestGetPartitionIDsByBlock(t *testing.T) { + ulid0 := ulid.MustNew(0, nil) + ulid1 := ulid.MustNew(1, nil) + ulid2 := ulid.MustNew(2, nil) + ulid3 := ulid.MustNew(3, nil) + partitionedGroupInfo := PartitionedGroupInfo{ + PartitionedGroupID: uint32(12345), + PartitionCount: 3, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + { + PartitionID: 2, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + ulid2, + ulid3, + }, + }, + }, + RangeStart: (1 * time.Hour).Milliseconds(), + RangeEnd: (2 * time.Hour).Milliseconds(), + Version: PartitionedGroupInfoVersion1, + } + + res0 := partitionedGroupInfo.getPartitionIDsByBlock(ulid0) + require.Equal(t, 3, len(res0)) + require.Contains(t, res0, 0) + require.Contains(t, res0, 1) + require.Contains(t, res0, 2) + + res1 := partitionedGroupInfo.getPartitionIDsByBlock(ulid1) + require.Equal(t, 2, len(res1)) + require.Contains(t, res1, 0) + require.Contains(t, res1, 2) + + res2 := partitionedGroupInfo.getPartitionIDsByBlock(ulid2) + require.Equal(t, 2, len(res2)) + require.Contains(t, res2, 1) + require.Contains(t, res2, 2) + + res3 := partitionedGroupInfo.getPartitionIDsByBlock(ulid3) + require.Equal(t, 1, len(res3)) + require.Contains(t, res3, 2) +} + +func TestGetPartitionedGroupStatus(t *testing.T) { + ulid0 := ulid.MustNew(0, nil) + ulid1 := ulid.MustNew(1, nil) + ulid2 := ulid.MustNew(2, nil) + partitionedGroupID := uint32(1234) + for _, tcase := range []struct { + name string + expectedResult PartitionedGroupStatus + partitionedGroupInfo PartitionedGroupInfo + partitionVisitMarkers []partitionVisitMarker + deletedBlock map[ulid.ULID]bool + noCompactBlock map[ulid.ULID]struct{} + }{ + { + name: "test one partition is not visited and contains block marked for deletion", + expectedResult: PartitionedGroupStatus{ + CanDelete: true, + IsCompleted: false, + DeleteVisitMarker: true, + PendingOrFailedPartitions: []Partition{ + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + }, + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 2, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + RangeStart: (1 * time.Hour).Milliseconds(), + RangeEnd: (2 * time.Hour).Milliseconds(), + CreationTime: time.Now().Add(-10 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + }, + partitionVisitMarkers: []partitionVisitMarker{ + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 0, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + }, + deletedBlock: map[ulid.ULID]bool{ + ulid0: true, + }, + }, + { + name: "test one partition is pending and contains block marked for deletion", + expectedResult: PartitionedGroupStatus{ + CanDelete: true, + IsCompleted: false, + DeleteVisitMarker: true, + PendingOrFailedPartitions: []Partition{ + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + }, + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 2, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + RangeStart: (1 * time.Hour).Milliseconds(), + RangeEnd: (2 * time.Hour).Milliseconds(), + CreationTime: time.Now().Add(-10 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + }, + partitionVisitMarkers: []partitionVisitMarker{ + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 0, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 1, + Status: Pending, + VisitTime: time.Now().Add(-5 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + }, + deletedBlock: map[ulid.ULID]bool{ + ulid0: true, + }, + }, + { + name: "test one partition is completed and one partition is under visiting", + expectedResult: PartitionedGroupStatus{ + CanDelete: false, + IsCompleted: false, + DeleteVisitMarker: false, + PendingOrFailedPartitions: []Partition{}, + }, + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 2, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + RangeStart: (1 * time.Hour).Milliseconds(), + RangeEnd: (2 * time.Hour).Milliseconds(), + CreationTime: time.Now().Add(-10 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + }, + partitionVisitMarkers: []partitionVisitMarker{ + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 0, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 1, + Status: Pending, + VisitTime: time.Now().Add(time.Second).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + }, + deletedBlock: map[ulid.ULID]bool{ + ulid0: false, + }, + }, + { + name: "test one partition is pending expired", + expectedResult: PartitionedGroupStatus{ + CanDelete: false, + IsCompleted: false, + DeleteVisitMarker: false, + PendingOrFailedPartitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + }, + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 2, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + RangeStart: (1 * time.Hour).Milliseconds(), + RangeEnd: (2 * time.Hour).Milliseconds(), + CreationTime: time.Now().Add(-10 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + }, + partitionVisitMarkers: []partitionVisitMarker{ + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 1, + Status: Pending, + VisitTime: time.Now().Add(-5 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + }, + deletedBlock: map[ulid.ULID]bool{}, + }, + { + name: "test one partition is complete with one block deleted and one partition is not visited with no blocks deleted", + expectedResult: PartitionedGroupStatus{ + CanDelete: false, + IsCompleted: false, + DeleteVisitMarker: false, + PendingOrFailedPartitions: []Partition{ + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + }, + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 2, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + RangeStart: (1 * time.Hour).Milliseconds(), + RangeEnd: (2 * time.Hour).Milliseconds(), + CreationTime: time.Now().Add(-10 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + }, + partitionVisitMarkers: []partitionVisitMarker{ + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 0, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + }, + deletedBlock: map[ulid.ULID]bool{ + ulid1: true, + }, + }, + { + name: "test one partition is complete and one partition is failed with no blocks deleted", + expectedResult: PartitionedGroupStatus{ + CanDelete: false, + IsCompleted: false, + DeleteVisitMarker: false, + PendingOrFailedPartitions: []Partition{ + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + }, + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 2, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + RangeStart: (1 * time.Hour).Milliseconds(), + RangeEnd: (2 * time.Hour).Milliseconds(), + CreationTime: time.Now().Add(-10 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + }, + partitionVisitMarkers: []partitionVisitMarker{ + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 0, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 1, + Status: Failed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + }, + deletedBlock: map[ulid.ULID]bool{}, + }, + { + name: "test one partition is complete and one partition is failed one block deleted", + expectedResult: PartitionedGroupStatus{ + CanDelete: true, + IsCompleted: false, + DeleteVisitMarker: true, + PendingOrFailedPartitions: []Partition{ + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + }, + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 2, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + RangeStart: (1 * time.Hour).Milliseconds(), + RangeEnd: (2 * time.Hour).Milliseconds(), + CreationTime: time.Now().Add(-10 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + }, + partitionVisitMarkers: []partitionVisitMarker{ + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 0, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 1, + Status: Failed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + }, + deletedBlock: map[ulid.ULID]bool{ + ulid2: true, + }, + }, + { + name: "test all partitions are complete", + expectedResult: PartitionedGroupStatus{ + CanDelete: true, + IsCompleted: true, + DeleteVisitMarker: true, + PendingOrFailedPartitions: []Partition{}, + }, + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 2, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + RangeStart: (1 * time.Hour).Milliseconds(), + RangeEnd: (2 * time.Hour).Milliseconds(), + CreationTime: time.Now().Add(-10 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + }, + partitionVisitMarkers: []partitionVisitMarker{ + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 0, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 1, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + }, + deletedBlock: map[ulid.ULID]bool{ + ulid2: true, + }, + }, + { + name: "test partitioned group created after visit marker", + expectedResult: PartitionedGroupStatus{ + CanDelete: false, + IsCompleted: false, + DeleteVisitMarker: true, + PendingOrFailedPartitions: []Partition{}, + }, + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 2, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + RangeStart: (1 * time.Hour).Milliseconds(), + RangeEnd: (2 * time.Hour).Milliseconds(), + CreationTime: time.Now().Add(1 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + }, + partitionVisitMarkers: []partitionVisitMarker{ + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 0, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 1, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + }, + deletedBlock: map[ulid.ULID]bool{}, + }, + { + name: "test one partition is in progress not expired and contains block marked for deletion", + expectedResult: PartitionedGroupStatus{ + CanDelete: false, + IsCompleted: false, + DeleteVisitMarker: false, + PendingOrFailedPartitions: []Partition{}, + }, + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 2, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + RangeStart: (1 * time.Hour).Milliseconds(), + RangeEnd: (2 * time.Hour).Milliseconds(), + CreationTime: time.Now().Add(-10 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + }, + partitionVisitMarkers: []partitionVisitMarker{ + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 0, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 1, + Status: InProgress, + VisitTime: time.Now().Add(time.Second).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + }, + deletedBlock: map[ulid.ULID]bool{ + ulid0: true, + }, + }, + { + name: "test one partition is not visited and contains block with no compact mark", + expectedResult: PartitionedGroupStatus{ + CanDelete: true, + IsCompleted: false, + DeleteVisitMarker: true, + PendingOrFailedPartitions: []Partition{ + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + }, + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 2, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + RangeStart: (1 * time.Hour).Milliseconds(), + RangeEnd: (2 * time.Hour).Milliseconds(), + CreationTime: time.Now().Add(-10 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + }, + partitionVisitMarkers: []partitionVisitMarker{ + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 0, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + }, + noCompactBlock: map[ulid.ULID]struct{}{ + ulid0: {}, + }, + }, + { + name: "test one partition is expired and contains block with no compact mark", + expectedResult: PartitionedGroupStatus{ + CanDelete: true, + IsCompleted: false, + DeleteVisitMarker: true, + PendingOrFailedPartitions: []Partition{ + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + }, + partitionedGroupInfo: PartitionedGroupInfo{ + PartitionedGroupID: partitionedGroupID, + PartitionCount: 2, + Partitions: []Partition{ + { + PartitionID: 0, + Blocks: []ulid.ULID{ + ulid0, + ulid1, + }, + }, + { + PartitionID: 1, + Blocks: []ulid.ULID{ + ulid0, + ulid2, + }, + }, + }, + RangeStart: (1 * time.Hour).Milliseconds(), + RangeEnd: (2 * time.Hour).Milliseconds(), + CreationTime: time.Now().Add(-10 * time.Minute).Unix(), + Version: PartitionedGroupInfoVersion1, + }, + partitionVisitMarkers: []partitionVisitMarker{ + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 0, + Status: Completed, + VisitTime: time.Now().Add(-2 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + { + PartitionedGroupID: partitionedGroupID, + PartitionID: 1, + Status: InProgress, + VisitTime: time.Now().Add(-10 * time.Minute).Unix(), + Version: PartitionVisitMarkerVersion1, + }, + }, + noCompactBlock: map[ulid.ULID]struct{}{ + ulid0: {}, + }, + }, + } { + t.Run(tcase.name, func(t *testing.T) { + bucketClient := &bucket.ClientMock{} + for _, partitionVisitMarker := range tcase.partitionVisitMarkers { + content, _ := json.Marshal(partitionVisitMarker) + bucketClient.MockGet(partitionVisitMarker.GetVisitMarkerFilePath(), string(content), nil) + } + + for _, partition := range tcase.partitionedGroupInfo.Partitions { + for _, blockID := range partition.Blocks { + metaPath := path.Join(blockID.String(), metadata.MetaFilename) + noCompactPath := path.Join(blockID.String(), metadata.NoCompactMarkFilename) + deletionMarkerPath := path.Join(blockID.String(), metadata.DeletionMarkFilename) + if hasDeletionMarker, ok := tcase.deletedBlock[blockID]; ok { + if hasDeletionMarker { + bucketClient.MockExists(metaPath, true, nil) + bucketClient.MockExists(deletionMarkerPath, true, nil) + } else { + bucketClient.MockExists(metaPath, false, nil) + } + } else { + bucketClient.MockExists(metaPath, true, nil) + bucketClient.MockExists(deletionMarkerPath, false, nil) + } + if _, ok := tcase.noCompactBlock[blockID]; ok { + bucketClient.MockExists(noCompactPath, true, nil) + } else { + bucketClient.MockExists(noCompactPath, false, nil) + } + } + } + bucketClient.MockGet(mock.Anything, "", nil) + + ctx := context.Background() + logger := log.NewNopLogger() + result := tcase.partitionedGroupInfo.getPartitionedGroupStatus(ctx, bucketClient, 60*time.Second, logger) + require.Equal(t, tcase.expectedResult.CanDelete, result.CanDelete) + require.Equal(t, tcase.expectedResult.IsCompleted, result.IsCompleted) + require.Equal(t, len(tcase.expectedResult.PendingOrFailedPartitions), len(result.PendingOrFailedPartitions)) + for _, partition := range result.PendingOrFailedPartitions { + require.Contains(t, tcase.expectedResult.PendingOrFailedPartitions, partition) + } + }) + } +} diff --git a/pkg/compactor/shuffle_sharding_grouper.go b/pkg/compactor/shuffle_sharding_grouper.go index a041f55b6b..f6328b8fb5 100644 --- a/pkg/compactor/shuffle_sharding_grouper.go +++ b/pkg/compactor/shuffle_sharding_grouper.go @@ -289,15 +289,20 @@ func (g *ShuffleShardingGrouper) checkSubringForCompactor() (bool, error) { return rs.Includes(g.ringLifecyclerAddr), nil } -// Get the hash of a group based on the UserID, and the starting and ending time of the group's range. +// hashGroup Get the hash of a group based on the UserID, and the starting and ending time of the group's range. func hashGroup(userID string, rangeStart int64, rangeEnd int64) uint32 { groupString := fmt.Sprintf("%v%v%v", userID, rangeStart, rangeEnd) - groupHasher := fnv.New32a() + + return hashString(groupString) +} + +func hashString(s string) uint32 { + hasher := fnv.New32a() // Hasher never returns err. - _, _ = groupHasher.Write([]byte(groupString)) - groupHash := groupHasher.Sum32() + _, _ = hasher.Write([]byte(s)) + result := hasher.Sum32() - return groupHash + return result } func createGroupKey(groupHash uint32, group blocksGroup) string { diff --git a/pkg/cortex/modules.go b/pkg/cortex/modules.go index 8823922a3c..5b8a3640b0 100644 --- a/pkg/cortex/modules.go +++ b/pkg/cortex/modules.go @@ -273,7 +273,7 @@ func (t *Cortex) initTenantFederation() (serv services.Service, err error) { // single tenant. This allows for a less impactful enabling of tenant // federation. byPassForSingleQuerier := true - t.QuerierQueryable = querier.NewSampleAndChunkQueryable(tenantfederation.NewQueryable(t.QuerierQueryable, byPassForSingleQuerier)) + t.QuerierQueryable = querier.NewSampleAndChunkQueryable(tenantfederation.NewQueryable(t.QuerierQueryable, t.Cfg.TenantFederation.MaxConcurrent, byPassForSingleQuerier, prometheus.DefaultRegisterer)) } return nil, nil } @@ -684,8 +684,9 @@ func (t *Cortex) initAlertManager() (serv services.Service, err error) { func (t *Cortex) initCompactor() (serv services.Service, err error) { t.Cfg.Compactor.ShardingRing.ListenPort = t.Cfg.Server.GRPCListenPort + ingestionReplicationFactor := t.Cfg.Ingester.LifecyclerConfig.RingConfig.ReplicationFactor - t.Compactor, err = compactor.NewCompactor(t.Cfg.Compactor, t.Cfg.BlocksStorage, util_log.Logger, prometheus.DefaultRegisterer, t.Overrides) + t.Compactor, err = compactor.NewCompactor(t.Cfg.Compactor, t.Cfg.BlocksStorage, util_log.Logger, prometheus.DefaultRegisterer, t.Overrides, ingestionReplicationFactor) if err != nil { return } diff --git a/pkg/ingester/ingester.go b/pkg/ingester/ingester.go index 1d3fc2c5fc..1d29b8b95d 100644 --- a/pkg/ingester/ingester.go +++ b/pkg/ingester/ingester.go @@ -33,6 +33,7 @@ import ( "github.com/prometheus/prometheus/tsdb/chunkenc" "github.com/prometheus/prometheus/tsdb/chunks" "github.com/prometheus/prometheus/tsdb/wlog" + "github.com/prometheus/prometheus/util/zeropool" "github.com/thanos-io/objstore" "github.com/thanos-io/thanos/pkg/block/metadata" "github.com/thanos-io/thanos/pkg/shipper" @@ -95,6 +96,8 @@ const ( var ( errExemplarRef = errors.New("exemplars not ingested because series not already present") errIngesterStopping = errors.New("ingester stopping") + + tsChunksPool zeropool.Pool[[]client.TimeSeriesChunk] ) // Config for an Ingester. @@ -1169,18 +1172,20 @@ func (i *Ingester) Push(ctx context.Context, req *cortexpb.WriteRequest) (*corte case errors.Is(cause, errMaxSeriesPerUserLimitExceeded): perUserSeriesLimitCount++ - updateFirstPartial(func() error { return makeLimitError(perUserSeriesLimit, i.limiter.FormatError(userID, cause)) }) + updateFirstPartial(func() error { + return makeLimitError(perUserSeriesLimit, i.limiter.FormatError(userID, cause, copiedLabels)) + }) case errors.Is(cause, errMaxSeriesPerMetricLimitExceeded): perMetricSeriesLimitCount++ updateFirstPartial(func() error { - return makeMetricLimitError(perMetricSeriesLimit, copiedLabels, i.limiter.FormatError(userID, cause)) + return makeMetricLimitError(perMetricSeriesLimit, copiedLabels, i.limiter.FormatError(userID, cause, copiedLabels)) }) case errors.As(cause, &errMaxSeriesPerLabelSetLimitExceeded{}): perLabelSetSeriesLimitCount++ updateFirstPartial(func() error { - return makeMetricLimitError(perLabelsetSeriesLimit, copiedLabels, i.limiter.FormatError(userID, cause)) + return makeMetricLimitError(perLabelsetSeriesLimit, copiedLabels, i.limiter.FormatError(userID, cause, copiedLabels)) }) case errors.Is(cause, histogram.ErrHistogramSpanNegativeOffset): @@ -1699,27 +1704,42 @@ func (i *Ingester) labelNamesCommon(ctx context.Context, req *client.LabelNamesR // MetricsForLabelMatchers returns all the metrics which match a set of matchers. func (i *Ingester) MetricsForLabelMatchers(ctx context.Context, req *client.MetricsForLabelMatchersRequest) (*client.MetricsForLabelMatchersResponse, error) { - result, cleanup, err := i.metricsForLabelMatchersCommon(ctx, req) + result := &client.MetricsForLabelMatchersResponse{} + cleanup, err := i.metricsForLabelMatchersCommon(ctx, req, func(l labels.Labels) error { + result.Metric = append(result.Metric, &cortexpb.Metric{ + Labels: cortexpb.FromLabelsToLabelAdapters(l), + }) + return nil + }) defer cleanup() return result, err } func (i *Ingester) MetricsForLabelMatchersStream(req *client.MetricsForLabelMatchersRequest, stream client.Ingester_MetricsForLabelMatchersStreamServer) error { - result, cleanup, err := i.metricsForLabelMatchersCommon(stream.Context(), req) + result := &client.MetricsForLabelMatchersStreamResponse{} + + cleanup, err := i.metricsForLabelMatchersCommon(stream.Context(), req, func(l labels.Labels) error { + result.Metric = append(result.Metric, &cortexpb.Metric{ + Labels: cortexpb.FromLabelsToLabelAdapters(l), + }) + + if len(result.Metric) >= metadataStreamBatchSize { + err := client.SendMetricsForLabelMatchersStream(stream, result) + if err != nil { + return err + } + result.Metric = result.Metric[:0] + } + return nil + }) defer cleanup() if err != nil { return err } - for i := 0; i < len(result.Metric); i += metadataStreamBatchSize { - j := i + metadataStreamBatchSize - if j > len(result.Metric) { - j = len(result.Metric) - } - resp := &client.MetricsForLabelMatchersStreamResponse{ - Metric: result.Metric[i:j], - } - err := client.SendMetricsForLabelMatchersStream(stream, resp) + // Send last batch + if len(result.Metric) > 0 { + err := client.SendMetricsForLabelMatchersStream(stream, result) if err != nil { return err } @@ -1731,36 +1751,36 @@ func (i *Ingester) MetricsForLabelMatchersStream(req *client.MetricsForLabelMatc // metricsForLabelMatchersCommon returns all the metrics which match a set of matchers. // this should be used by MetricsForLabelMatchers and MetricsForLabelMatchersStream. // the cleanup function should be called in order to close the querier -func (i *Ingester) metricsForLabelMatchersCommon(ctx context.Context, req *client.MetricsForLabelMatchersRequest) (*client.MetricsForLabelMatchersResponse, func(), error) { +func (i *Ingester) metricsForLabelMatchersCommon(ctx context.Context, req *client.MetricsForLabelMatchersRequest, acc func(labels.Labels) error) (func(), error) { cleanup := func() {} if err := i.checkRunning(); err != nil { - return nil, cleanup, err + return cleanup, err } userID, err := tenant.TenantID(ctx) if err != nil { - return nil, cleanup, err + return cleanup, err } db := i.getTSDB(userID) if db == nil { - return &client.MetricsForLabelMatchersResponse{}, cleanup, nil + return cleanup, nil } // Parse the request _, _, limit, matchersSet, err := client.FromMetricsForLabelMatchersRequest(req) if err != nil { - return nil, cleanup, err + return cleanup, err } mint, maxt, err := metadataQueryRange(req.StartTimestampMs, req.EndTimestampMs, db, i.cfg.QueryIngestersWithin) if err != nil { - return nil, cleanup, err + return cleanup, err } q, err := db.Querier(mint, maxt) if err != nil { - return nil, cleanup, err + return cleanup, err } cleanup = func() { @@ -1783,7 +1803,7 @@ func (i *Ingester) metricsForLabelMatchersCommon(ctx context.Context, req *clien for _, matchers := range matchersSet { // Interrupt if the context has been canceled. if ctx.Err() != nil { - return nil, cleanup, ctx.Err() + return cleanup, ctx.Err() } seriesSet := q.Select(ctx, true, hints, matchers...) @@ -1794,28 +1814,23 @@ func (i *Ingester) metricsForLabelMatchersCommon(ctx context.Context, req *clien mergedSet = q.Select(ctx, false, hints, matchersSet[0]...) } - // Generate the response merging all series sets. - result := &client.MetricsForLabelMatchersResponse{ - Metric: make([]*cortexpb.Metric, 0), - } - cnt := 0 for mergedSet.Next() { cnt++ // Interrupt if the context has been canceled. if cnt%util.CheckContextEveryNIterations == 0 && ctx.Err() != nil { - return nil, cleanup, ctx.Err() + return cleanup, ctx.Err() + } + if err := acc(mergedSet.At().Labels()); err != nil { + return cleanup, err } - result.Metric = append(result.Metric, &cortexpb.Metric{ - Labels: cortexpb.FromLabelsToLabelAdapters(mergedSet.At().Labels()), - }) - if limit > 0 && len(result.Metric) >= limit { + if limit > 0 && cnt >= limit { break } } - return result, cleanup, nil + return cleanup, nil } // MetricsMetadata returns all the metric metadata of a user. @@ -2045,7 +2060,8 @@ func (i *Ingester) queryStreamChunks(ctx context.Context, db *userTSDB, from, th return 0, 0, 0, 0, ss.Err() } - chunkSeries := make([]client.TimeSeriesChunk, 0, queryStreamBatchSize) + chunkSeries := getTimeSeriesChunksSlice() + defer putTimeSeriesChunksSlice(chunkSeries) batchSizeBytes := 0 var it chunks.Iterator for ss.Next() { @@ -3062,6 +3078,31 @@ func (i *Ingester) ModeHandler(w http.ResponseWriter, r *http.Request) { _, _ = w.Write([]byte(respMsg)) } +func (i *Ingester) getInstanceLimits() *InstanceLimits { + // Don't apply any limits while starting. We especially don't want to apply series in memory limit while replaying WAL. + if i.State() == services.Starting { + return nil + } + + if i.cfg.InstanceLimitsFn == nil { + return defaultInstanceLimits + } + + l := i.cfg.InstanceLimitsFn() + if l == nil { + return defaultInstanceLimits + } + + return l +} + +// stopIncomingRequests is called during the shutdown process. +func (i *Ingester) stopIncomingRequests() { + i.stoppedMtx.Lock() + defer i.stoppedMtx.Unlock() + i.stopped = true +} + // metadataQueryRange returns the best range to query for metadata queries based on the timerange in the ingester. func metadataQueryRange(queryStart, queryEnd int64, db *userTSDB, queryIngestersWithin time.Duration) (mint, maxt int64, err error) { if queryIngestersWithin > 0 { @@ -3119,27 +3160,16 @@ func wrappedTSDBIngestExemplarErr(ingestErr error, timestamp model.Time, seriesL ) } -func (i *Ingester) getInstanceLimits() *InstanceLimits { - // Don't apply any limits while starting. We especially don't want to apply series in memory limit while replaying WAL. - if i.State() == services.Starting { - return nil - } - - if i.cfg.InstanceLimitsFn == nil { - return defaultInstanceLimits - } - - l := i.cfg.InstanceLimitsFn() - if l == nil { - return defaultInstanceLimits +func getTimeSeriesChunksSlice() []client.TimeSeriesChunk { + if p := tsChunksPool.Get(); p != nil { + return p } - return l + return make([]client.TimeSeriesChunk, 0, queryStreamBatchSize) } -// stopIncomingRequests is called during the shutdown process. -func (i *Ingester) stopIncomingRequests() { - i.stoppedMtx.Lock() - defer i.stoppedMtx.Unlock() - i.stopped = true +func putTimeSeriesChunksSlice(p []client.TimeSeriesChunk) { + if p != nil { + tsChunksPool.Put(p[:0]) + } } diff --git a/pkg/ingester/ingester_test.go b/pkg/ingester/ingester_test.go index d4d05fef9a..b4852d4076 100644 --- a/pkg/ingester/ingester_test.go +++ b/pkg/ingester/ingester_test.go @@ -656,7 +656,7 @@ func TestIngesterUserLimitExceeded(t *testing.T) { httpResp, ok := httpgrpc.HTTPResponseFromError(err) require.True(t, ok, "returned error is not an httpgrpc response") assert.Equal(t, http.StatusBadRequest, int(httpResp.Code)) - assert.Equal(t, wrapWithUser(makeLimitError(perUserSeriesLimit, ing.limiter.FormatError(userID, errMaxSeriesPerUserLimitExceeded)), userID).Error(), string(httpResp.Body)) + assert.Equal(t, wrapWithUser(makeLimitError(perUserSeriesLimit, ing.limiter.FormatError(userID, errMaxSeriesPerUserLimitExceeded, labels1)), userID).Error(), string(httpResp.Body)) // Append two metadata, expect no error since metadata is a best effort approach. _, err = ing.Push(ctx, cortexpb.ToWriteRequest(nil, nil, []*cortexpb.MetricMetadata{metadata1, metadata2}, nil, cortexpb.API)) @@ -778,7 +778,7 @@ func TestIngesterMetricLimitExceeded(t *testing.T) { httpResp, ok := httpgrpc.HTTPResponseFromError(err) require.True(t, ok, "returned error is not an httpgrpc response") assert.Equal(t, http.StatusBadRequest, int(httpResp.Code)) - assert.Equal(t, wrapWithUser(makeMetricLimitError(perMetricSeriesLimit, labels3, ing.limiter.FormatError(userID, errMaxSeriesPerMetricLimitExceeded)), userID).Error(), string(httpResp.Body)) + assert.Equal(t, wrapWithUser(makeMetricLimitError(perMetricSeriesLimit, labels3, ing.limiter.FormatError(userID, errMaxSeriesPerMetricLimitExceeded, labels1)), userID).Error(), string(httpResp.Body)) // Append two metadata for the same metric. Drop the second one, and expect no error since metadata is a best effort approach. _, err = ing.Push(ctx, cortexpb.ToWriteRequest(nil, nil, []*cortexpb.MetricMetadata{metadata1, metadata2}, nil, cortexpb.API)) @@ -3073,6 +3073,12 @@ func Test_Ingester_MetricsForLabelMatchers(t *testing.T) { res, err := i.MetricsForLabelMatchers(ctx, req) require.NoError(t, err) assert.ElementsMatch(t, testData.expected, res.Metric) + + // Stream + ss := mockMetricsForLabelMatchersStreamServer{ctx: ctx} + err = i.MetricsForLabelMatchersStream(req, &ss) + require.NoError(t, err) + assert.ElementsMatch(t, testData.expected, ss.res.Metric) }) } } @@ -3407,6 +3413,21 @@ func writeRequestSingleSeries(lbls labels.Labels, samples []cortexpb.Sample) *co return req } +type mockMetricsForLabelMatchersStreamServer struct { + grpc.ServerStream + ctx context.Context + res client.MetricsForLabelMatchersStreamResponse +} + +func (m *mockMetricsForLabelMatchersStreamServer) Send(response *client.MetricsForLabelMatchersStreamResponse) error { + m.res.Metric = append(m.res.Metric, response.Metric...) + return nil +} + +func (m *mockMetricsForLabelMatchersStreamServer) Context() context.Context { + return m.ctx +} + type mockQueryStreamServer struct { grpc.ServerStream ctx context.Context @@ -3424,10 +3445,25 @@ func (m *mockQueryStreamServer) Context() context.Context { } func BenchmarkIngester_QueryStream_Chunks(b *testing.B) { - benchmarkQueryStream(b) + tc := []struct { + samplesCount, seriesCount int + }{ + {samplesCount: 10, seriesCount: 10}, + {samplesCount: 10, seriesCount: 50}, + {samplesCount: 10, seriesCount: 100}, + {samplesCount: 50, seriesCount: 10}, + {samplesCount: 50, seriesCount: 50}, + {samplesCount: 50, seriesCount: 100}, + } + + for _, c := range tc { + b.Run(fmt.Sprintf("samplesCount=%v; seriesCount=%v", c.samplesCount, c.seriesCount), func(b *testing.B) { + benchmarkQueryStream(b, c.samplesCount, c.seriesCount) + }) + } } -func benchmarkQueryStream(b *testing.B) { +func benchmarkQueryStream(b *testing.B, samplesCount, seriesCount int) { cfg := defaultIngesterTestConfig(b) // Create ingester. @@ -3444,7 +3480,6 @@ func benchmarkQueryStream(b *testing.B) { // Push series. ctx := user.InjectOrgID(context.Background(), userID) - const samplesCount = 1000 samples := make([]cortexpb.Sample, 0, samplesCount) for i := 0; i < samplesCount; i++ { @@ -3454,7 +3489,6 @@ func benchmarkQueryStream(b *testing.B) { }) } - const seriesCount = 100 for s := 0; s < seriesCount; s++ { _, err = i.Push(ctx, writeRequestSingleSeries(labels.Labels{{Name: labels.MetricName, Value: "foo"}, {Name: "l", Value: strconv.Itoa(s)}}, samples)) require.NoError(b, err) @@ -3462,7 +3496,7 @@ func benchmarkQueryStream(b *testing.B) { req := &client.QueryRequest{ StartTimestampMs: 0, - EndTimestampMs: samplesCount + 1, + EndTimestampMs: int64(samplesCount + 1), Matchers: []*client.LabelMatcher{{ Type: client.EQUAL, @@ -3474,6 +3508,7 @@ func benchmarkQueryStream(b *testing.B) { mockStream := &mockQueryStreamServer{ctx: ctx} b.ResetTimer() + b.ReportAllocs() for ix := 0; ix < b.N; ix++ { err := i.QueryStream(req, mockStream) @@ -5553,22 +5588,22 @@ func TestExpendedPostingsCache(t *testing.T) { if c.expectedHeadPostingCall > 0 || c.expectedBlockPostingCall > 0 { metric := ` - # HELP cortex_ingester_expanded_postings_cache_requests Total number of requests to the cache. - # TYPE cortex_ingester_expanded_postings_cache_requests counter + # HELP cortex_ingester_expanded_postings_cache_requests_total Total number of requests to the cache. + # TYPE cortex_ingester_expanded_postings_cache_requests_total counter ` if c.expectedBlockPostingCall > 0 { metric += ` - cortex_ingester_expanded_postings_cache_requests{cache="block"} 4 + cortex_ingester_expanded_postings_cache_requests_total{cache="block"} 4 ` } if c.expectedHeadPostingCall > 0 { metric += ` - cortex_ingester_expanded_postings_cache_requests{cache="head"} 4 + cortex_ingester_expanded_postings_cache_requests_total{cache="head"} 4 ` } - err = testutil.GatherAndCompare(r, bytes.NewBufferString(metric), "cortex_ingester_expanded_postings_cache_requests") + err = testutil.GatherAndCompare(r, bytes.NewBufferString(metric), "cortex_ingester_expanded_postings_cache_requests_total") require.NoError(t, err) } @@ -5583,22 +5618,22 @@ func TestExpendedPostingsCache(t *testing.T) { if c.expectedHeadPostingCall > 0 || c.expectedBlockPostingCall > 0 { metric := ` - # HELP cortex_ingester_expanded_postings_cache_hits Total number of hit requests to the cache. - # TYPE cortex_ingester_expanded_postings_cache_hits counter + # HELP cortex_ingester_expanded_postings_cache_hits_total Total number of hit requests to the cache. + # TYPE cortex_ingester_expanded_postings_cache_hits_total counter ` if c.expectedBlockPostingCall > 0 { metric += ` - cortex_ingester_expanded_postings_cache_hits{cache="block"} 4 + cortex_ingester_expanded_postings_cache_hits_total{cache="block"} 4 ` } if c.expectedHeadPostingCall > 0 { metric += ` - cortex_ingester_expanded_postings_cache_hits{cache="head"} 4 + cortex_ingester_expanded_postings_cache_hits_total{cache="head"} 4 ` } - err = testutil.GatherAndCompare(r, bytes.NewBufferString(metric), "cortex_ingester_expanded_postings_cache_hits") + err = testutil.GatherAndCompare(r, bytes.NewBufferString(metric), "cortex_ingester_expanded_postings_cache_hits_total") require.NoError(t, err) } @@ -5644,10 +5679,10 @@ func TestExpendedPostingsCache(t *testing.T) { require.Equal(t, postingsForMatchersCalls.Load(), int64(c.expectedBlockPostingCall)) if c.cacheConfig.Head.Enabled { err = testutil.GatherAndCompare(r, bytes.NewBufferString(` - # HELP cortex_ingester_expanded_postings_non_cacheable_queries Total number of non cacheable queries. - # TYPE cortex_ingester_expanded_postings_non_cacheable_queries counter - cortex_ingester_expanded_postings_non_cacheable_queries{cache="head"} 1 -`), "cortex_ingester_expanded_postings_non_cacheable_queries") + # HELP cortex_ingester_expanded_postings_non_cacheable_queries_total Total number of non cacheable queries. + # TYPE cortex_ingester_expanded_postings_non_cacheable_queries_total counter + cortex_ingester_expanded_postings_non_cacheable_queries_total{cache="head"} 1 +`), "cortex_ingester_expanded_postings_non_cacheable_queries_total") require.NoError(t, err) } diff --git a/pkg/ingester/limiter.go b/pkg/ingester/limiter.go index 0aaf6b312a..94dd409b3b 100644 --- a/pkg/ingester/limiter.go +++ b/pkg/ingester/limiter.go @@ -130,16 +130,16 @@ func (l *Limiter) AssertMaxSeriesPerLabelSet(userID string, metric labels.Labels // FormatError returns the input error enriched with the actual limits for the given user. // It acts as pass-through if the input error is unknown. -func (l *Limiter) FormatError(userID string, err error) error { +func (l *Limiter) FormatError(userID string, err error, lbls labels.Labels) error { switch { case errors.Is(err, errMaxSeriesPerUserLimitExceeded): return l.formatMaxSeriesPerUserError(userID) case errors.Is(err, errMaxSeriesPerMetricLimitExceeded): - return l.formatMaxSeriesPerMetricError(userID) + return l.formatMaxSeriesPerMetricError(userID, lbls.Get(labels.MetricName)) case errors.Is(err, errMaxMetadataPerUserLimitExceeded): return l.formatMaxMetadataPerUserError(userID) case errors.Is(err, errMaxMetadataPerMetricLimitExceeded): - return l.formatMaxMetadataPerMetricError(userID) + return l.formatMaxMetadataPerMetricError(userID, lbls.Get(labels.MetricName)) case errors.As(err, &errMaxSeriesPerLabelSetLimitExceeded{}): e := errMaxSeriesPerLabelSetLimitExceeded{} errors.As(err, &e) @@ -158,13 +158,13 @@ func (l *Limiter) formatMaxSeriesPerUserError(userID string) error { minNonZero(localLimit, globalLimit), l.AdminLimitMessage, localLimit, globalLimit, actualLimit) } -func (l *Limiter) formatMaxSeriesPerMetricError(userID string) error { +func (l *Limiter) formatMaxSeriesPerMetricError(userID string, metric string) error { actualLimit := l.maxSeriesPerMetric(userID) localLimit := l.limits.MaxLocalSeriesPerMetric(userID) globalLimit := l.limits.MaxGlobalSeriesPerMetric(userID) - return fmt.Errorf("per-metric series limit of %d exceeded, %s (local limit: %d global limit: %d actual local limit: %d)", - minNonZero(localLimit, globalLimit), l.AdminLimitMessage, localLimit, globalLimit, actualLimit) + return fmt.Errorf("per-metric series limit of %d exceeded for metric %s, %s (local limit: %d global limit: %d actual local limit: %d)", + minNonZero(localLimit, globalLimit), metric, l.AdminLimitMessage, localLimit, globalLimit, actualLimit) } func (l *Limiter) formatMaxMetadataPerUserError(userID string) error { @@ -176,13 +176,13 @@ func (l *Limiter) formatMaxMetadataPerUserError(userID string) error { minNonZero(localLimit, globalLimit), l.AdminLimitMessage, localLimit, globalLimit, actualLimit) } -func (l *Limiter) formatMaxMetadataPerMetricError(userID string) error { +func (l *Limiter) formatMaxMetadataPerMetricError(userID string, metric string) error { actualLimit := l.maxMetadataPerMetric(userID) localLimit := l.limits.MaxLocalMetadataPerMetric(userID) globalLimit := l.limits.MaxGlobalMetadataPerMetric(userID) - return fmt.Errorf("per-metric metadata limit of %d exceeded, %s (local limit: %d global limit: %d actual local limit: %d)", - minNonZero(localLimit, globalLimit), l.AdminLimitMessage, localLimit, globalLimit, actualLimit) + return fmt.Errorf("per-metric metadata limit of %d exceeded for metric %s, %s (local limit: %d global limit: %d actual local limit: %d)", + minNonZero(localLimit, globalLimit), metric, l.AdminLimitMessage, localLimit, globalLimit, actualLimit) } func (l *Limiter) formatMaxSeriesPerLabelSetError(err errMaxSeriesPerLabelSetLimitExceeded) error { diff --git a/pkg/ingester/limiter_test.go b/pkg/ingester/limiter_test.go index 944af00293..a1043b053e 100644 --- a/pkg/ingester/limiter_test.go +++ b/pkg/ingester/limiter_test.go @@ -588,21 +588,22 @@ func TestLimiter_FormatError(t *testing.T) { require.NoError(t, err) limiter := NewLimiter(limits, ring, util.ShardingStrategyDefault, true, 3, false, "please contact administrator to raise it") + lbls := labels.FromStrings(labels.MetricName, "testMetric") - actual := limiter.FormatError("user-1", errMaxSeriesPerUserLimitExceeded) + actual := limiter.FormatError("user-1", errMaxSeriesPerUserLimitExceeded, lbls) assert.EqualError(t, actual, "per-user series limit of 100 exceeded, please contact administrator to raise it (local limit: 0 global limit: 100 actual local limit: 100)") - actual = limiter.FormatError("user-1", errMaxSeriesPerMetricLimitExceeded) - assert.EqualError(t, actual, "per-metric series limit of 20 exceeded, please contact administrator to raise it (local limit: 0 global limit: 20 actual local limit: 20)") + actual = limiter.FormatError("user-1", errMaxSeriesPerMetricLimitExceeded, lbls) + assert.EqualError(t, actual, "per-metric series limit of 20 exceeded for metric testMetric, please contact administrator to raise it (local limit: 0 global limit: 20 actual local limit: 20)") - actual = limiter.FormatError("user-1", errMaxMetadataPerUserLimitExceeded) + actual = limiter.FormatError("user-1", errMaxMetadataPerUserLimitExceeded, lbls) assert.EqualError(t, actual, "per-user metric metadata limit of 10 exceeded, please contact administrator to raise it (local limit: 0 global limit: 10 actual local limit: 10)") - actual = limiter.FormatError("user-1", errMaxMetadataPerMetricLimitExceeded) - assert.EqualError(t, actual, "per-metric metadata limit of 3 exceeded, please contact administrator to raise it (local limit: 0 global limit: 3 actual local limit: 3)") + actual = limiter.FormatError("user-1", errMaxMetadataPerMetricLimitExceeded, lbls) + assert.EqualError(t, actual, "per-metric metadata limit of 3 exceeded for metric testMetric, please contact administrator to raise it (local limit: 0 global limit: 3 actual local limit: 3)") input := errors.New("unknown error") - actual = limiter.FormatError("user-1", input) + actual = limiter.FormatError("user-1", input, lbls) assert.Equal(t, input, actual) } diff --git a/pkg/ingester/user_metrics_metadata.go b/pkg/ingester/user_metrics_metadata.go index 96adfaa9ba..dcb5fd6bbf 100644 --- a/pkg/ingester/user_metrics_metadata.go +++ b/pkg/ingester/user_metrics_metadata.go @@ -45,7 +45,7 @@ func (mm *userMetricsMetadata) add(metric string, metadata *cortexpb.MetricMetad // Verify that the user can create more metric metadata given we don't have a set for that metric name. if err := mm.limiter.AssertMaxMetricsWithMetadataPerUser(mm.userID, len(mm.metricToMetadata)); err != nil { mm.validateMetrics.DiscardedMetadata.WithLabelValues(mm.userID, perUserMetadataLimit).Inc() - return makeLimitError(perUserMetadataLimit, mm.limiter.FormatError(mm.userID, err)) + return makeLimitError(perUserMetadataLimit, mm.limiter.FormatError(mm.userID, err, labels.FromStrings(labels.MetricName, metric))) } set = metricMetadataSet{} mm.metricToMetadata[metric] = set @@ -53,7 +53,7 @@ func (mm *userMetricsMetadata) add(metric string, metadata *cortexpb.MetricMetad if err := mm.limiter.AssertMaxMetadataPerMetric(mm.userID, len(set)); err != nil { mm.validateMetrics.DiscardedMetadata.WithLabelValues(mm.userID, perMetricMetadataLimit).Inc() - return makeMetricLimitError(perMetricMetadataLimit, labels.FromStrings(labels.MetricName, metric), mm.limiter.FormatError(mm.userID, err)) + return makeMetricLimitError(perMetricMetadataLimit, labels.FromStrings(labels.MetricName, metric), mm.limiter.FormatError(mm.userID, err, labels.FromStrings(labels.MetricName, metric))) } // if we have seen this metadata before, it is a no-op and we don't need to change our metrics. diff --git a/pkg/querier/codec/protobuf_codec.go b/pkg/querier/codec/protobuf_codec.go index 2544499b9c..783adcd183 100644 --- a/pkg/querier/codec/protobuf_codec.go +++ b/pkg/querier/codec/protobuf_codec.go @@ -25,8 +25,8 @@ func (p ProtobufCodec) ContentType() v1.MIMEType { if !p.CortexInternal { return v1.MIMEType{Type: "application", SubType: "x-protobuf"} } - // TODO: switch to use constants. - return v1.MIMEType{Type: "application", SubType: "x-cortex-query+proto"} + + return v1.MIMEType{Type: "application", SubType: tripperware.QueryResponseCortexMIMESubType} } func (p ProtobufCodec) CanEncode(resp *v1.Response) bool { diff --git a/pkg/querier/codec/protobuf_codec_test.go b/pkg/querier/codec/protobuf_codec_test.go index 3b32fc1208..cef4e5649d 100644 --- a/pkg/querier/codec/protobuf_codec_test.go +++ b/pkg/querier/codec/protobuf_codec_test.go @@ -46,6 +46,7 @@ func TestProtobufCodec_Encode(t *testing.T) { expected *tripperware.PrometheusResponse }{ { + name: "vector", data: &v1.QueryData{ ResultType: parser.ValueTypeVector, Result: promql.Vector{ @@ -89,6 +90,7 @@ func TestProtobufCodec_Encode(t *testing.T) { }, }, { + name: "scalar", data: &v1.QueryData{ ResultType: parser.ValueTypeScalar, Result: promql.Scalar{T: 1000, V: 1}, @@ -106,6 +108,7 @@ func TestProtobufCodec_Encode(t *testing.T) { }, }, { + name: "matrix", data: &v1.QueryData{ ResultType: parser.ValueTypeMatrix, Result: promql.Matrix{ @@ -151,39 +154,7 @@ func TestProtobufCodec_Encode(t *testing.T) { }, }, { - data: &v1.QueryData{ - ResultType: parser.ValueTypeMatrix, - Result: promql.Matrix{ - promql.Series{ - Floats: []promql.FPoint{{F: 1, T: 1000}}, - Metric: labels.FromStrings("__name__", "foo"), - }, - }, - }, - expected: &tripperware.PrometheusResponse{ - Status: tripperware.StatusSuccess, - Data: tripperware.PrometheusData{ - ResultType: model.ValMatrix.String(), - Result: tripperware.PrometheusQueryResult{ - Result: &tripperware.PrometheusQueryResult_Matrix{ - Matrix: &tripperware.Matrix{ - SampleStreams: []tripperware.SampleStream{ - { - Labels: []cortexpb.LabelAdapter{ - {Name: "__name__", Value: "foo"}, - }, - Samples: []cortexpb.Sample{ - {Value: 1, TimestampMs: 1000}, - }, - }, - }, - }, - }, - }, - }, - }, - }, - { + name: "matrix with multiple float samples", data: &v1.QueryData{ ResultType: parser.ValueTypeMatrix, Result: promql.Matrix{ @@ -227,6 +198,7 @@ func TestProtobufCodec_Encode(t *testing.T) { }, }, { + name: "matrix with histogram and not cortex internal", data: &v1.QueryData{ ResultType: parser.ValueTypeMatrix, Result: promql.Matrix{ @@ -316,6 +288,7 @@ func TestProtobufCodec_Encode(t *testing.T) { }, }, { + name: "vector with histogram and not cortex internal", data: &v1.QueryData{ ResultType: parser.ValueTypeVector, Result: promql.Vector{ @@ -404,7 +377,7 @@ func TestProtobufCodec_Encode(t *testing.T) { }, }, { - name: "cortex internal with native histogram", + name: "vector with histogram and cortex internal", cortexInternal: true, data: &v1.QueryData{ ResultType: parser.ValueTypeVector, diff --git a/pkg/querier/tenantfederation/merge_queryable.go b/pkg/querier/tenantfederation/merge_queryable.go index 92f4f47b6f..9364e01510 100644 --- a/pkg/querier/tenantfederation/merge_queryable.go +++ b/pkg/querier/tenantfederation/merge_queryable.go @@ -7,6 +7,8 @@ import ( "strings" "github.com/pkg/errors" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/tsdb/chunkenc" @@ -19,9 +21,9 @@ import ( ) const ( - defaultTenantLabel = "__tenant_id__" - retainExistingPrefix = "original_" - maxConcurrency = 16 + defaultTenantLabel = "__tenant_id__" + retainExistingPrefix = "original_" + defaultMaxConcurrency = 16 ) // NewQueryable returns a queryable that iterates through all the tenant IDs @@ -36,8 +38,8 @@ const ( // If the label "__tenant_id__" is already existing, its value is overwritten // by the tenant ID and the previous value is exposed through a new label // prefixed with "original_". This behaviour is not implemented recursively. -func NewQueryable(upstream storage.Queryable, byPassWithSingleQuerier bool) storage.Queryable { - return NewMergeQueryable(defaultTenantLabel, tenantQuerierCallback(upstream), byPassWithSingleQuerier) +func NewQueryable(upstream storage.Queryable, maxConcurrent int, byPassWithSingleQuerier bool, reg prometheus.Registerer) storage.Queryable { + return NewMergeQueryable(defaultTenantLabel, maxConcurrent, tenantQuerierCallback(upstream), byPassWithSingleQuerier, reg) } func tenantQuerierCallback(queryable storage.Queryable) MergeQuerierCallback { @@ -79,18 +81,28 @@ type MergeQuerierCallback func(ctx context.Context, mint int64, maxt int64) (ids // If the label `idLabelName` is already existing, its value is overwritten and // the previous value is exposed through a new label prefixed with "original_". // This behaviour is not implemented recursively. -func NewMergeQueryable(idLabelName string, callback MergeQuerierCallback, byPassWithSingleQuerier bool) storage.Queryable { +func NewMergeQueryable(idLabelName string, maxConcurrent int, callback MergeQuerierCallback, byPassWithSingleQuerier bool, reg prometheus.Registerer) storage.Queryable { return &mergeQueryable{ idLabelName: idLabelName, + maxConcurrent: maxConcurrent, callback: callback, byPassWithSingleQuerier: byPassWithSingleQuerier, + + tenantsPerQuery: promauto.With(reg).NewHistogram(prometheus.HistogramOpts{ + Namespace: "cortex", + Name: "querier_federated_tenants_per_query", + Help: "Number of tenants per query.", + Buckets: []float64{1, 2, 4, 8, 16, 32, 64}, + }), } } type mergeQueryable struct { idLabelName string + maxConcurrent int byPassWithSingleQuerier bool callback MergeQuerierCallback + tenantsPerQuery prometheus.Histogram } // Querier returns a new mergeQuerier, which aggregates results from multiple @@ -98,10 +110,12 @@ type mergeQueryable struct { func (m *mergeQueryable) Querier(mint int64, maxt int64) (storage.Querier, error) { return &mergeQuerier{ idLabelName: m.idLabelName, + maxConcurrent: m.maxConcurrent, mint: mint, maxt: maxt, byPassWithSingleQuerier: m.byPassWithSingleQuerier, callback: m.callback, + tenantsPerQuery: m.tenantsPerQuery, }, nil } @@ -112,11 +126,13 @@ func (m *mergeQueryable) Querier(mint int64, maxt int64) (storage.Querier, error // the previous value is exposed through a new label prefixed with "original_". // This behaviour is not implemented recursively type mergeQuerier struct { - idLabelName string - mint, maxt int64 - callback MergeQuerierCallback + idLabelName string + mint, maxt int64 + callback MergeQuerierCallback + maxConcurrent int byPassWithSingleQuerier bool + tenantsPerQuery prometheus.Histogram } // LabelValues returns all potential values for a label name. It is not safe @@ -130,6 +146,8 @@ func (m *mergeQuerier) LabelValues(ctx context.Context, name string, hints *stor return nil, nil, err } + m.tenantsPerQuery.Observe(float64(len(ids))) + // by pass when only single querier is returned if m.byPassWithSingleQuerier && len(queriers) == 1 { return queriers[0].LabelValues(ctx, name, hints, matchers...) @@ -169,6 +187,8 @@ func (m *mergeQuerier) LabelNames(ctx context.Context, hints *storage.LabelHints return nil, nil, err } + m.tenantsPerQuery.Observe(float64(len(ids))) + // by pass when only single querier is returned if m.byPassWithSingleQuerier && len(queriers) == 1 { return queriers[0].LabelNames(ctx, hints, matchers...) @@ -257,7 +277,7 @@ func (m *mergeQuerier) mergeDistinctStringSliceWithTenants(ctx context.Context, return nil } - err := concurrency.ForEach(ctx, jobs, maxConcurrency, run) + err := concurrency.ForEach(ctx, jobs, m.maxConcurrent, run) if err != nil { return nil, nil, err } @@ -309,6 +329,8 @@ func (m *mergeQuerier) Select(ctx context.Context, sortSeries bool, hints *stora return storage.ErrSeriesSet(err) } + m.tenantsPerQuery.Observe(float64(len(ids))) + // by pass when only single querier is returned if m.byPassWithSingleQuerier && len(queriers) == 1 { return queriers[0].Select(ctx, sortSeries, hints, matchers...) @@ -352,7 +374,7 @@ func (m *mergeQuerier) Select(ctx context.Context, sortSeries bool, hints *stora return nil } - if err := concurrency.ForEach(ctx, jobs, maxConcurrency, run); err != nil { + if err := concurrency.ForEach(ctx, jobs, m.maxConcurrent, run); err != nil { return storage.ErrSeriesSet(err) } diff --git a/pkg/querier/tenantfederation/merge_queryable_test.go b/pkg/querier/tenantfederation/merge_queryable_test.go index 2c296673a9..595ef572fa 100644 --- a/pkg/querier/tenantfederation/merge_queryable_test.go +++ b/pkg/querier/tenantfederation/merge_queryable_test.go @@ -12,6 +12,8 @@ import ( "github.com/opentracing/opentracing-go" "github.com/opentracing/opentracing-go/mocktracer" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/testutil" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/storage" @@ -297,12 +299,15 @@ type mergeQueryableScenario struct { doNotByPassSingleQuerier bool } -func (s *mergeQueryableScenario) init() (storage.Querier, error) { +func (s *mergeQueryableScenario) init() (storage.Querier, prometheus.Gatherer, error) { // initialize with default tenant label - q := NewQueryable(&s.queryable, !s.doNotByPassSingleQuerier) + reg := prometheus.NewPedanticRegistry() + q := NewQueryable(&s.queryable, defaultMaxConcurrency, !s.doNotByPassSingleQuerier, reg) // retrieve querier - return q.Querier(mint, maxt) + querier, err := q.Querier(mint, maxt) + + return querier, reg, err } // selectTestCase is the inputs and expected outputs of a call to Select. @@ -319,6 +324,8 @@ type selectTestCase struct { expectedWarnings []string // expectedQueryErr is the error expected when querying. expectedQueryErr error + // expectedMetrics is the expected metrics. + expectedMetrics string } // selectScenario tests a call to Select over a range of test cases in a specific scenario. @@ -339,6 +346,8 @@ type labelNamesTestCase struct { expectedWarnings []string // expectedQueryErr is the error expected when querying. expectedQueryErr error + // expectedMetrics is the expected metrics. + expectedMetrics string } // labelNamesScenario tests a call to LabelNames in a specific scenario. @@ -361,6 +370,8 @@ type labelValuesTestCase struct { expectedWarnings []string // expectedQueryErr is the error expected when querying. expectedQueryErr error + // expectedMetrics is the expected metrics. + expectedMetrics string } // labelValuesScenario tests a call to LabelValues over a range of test cases in a specific scenario. @@ -373,7 +384,7 @@ func TestMergeQueryable_Querier(t *testing.T) { t.Run("querying without a tenant specified should error", func(t *testing.T) { t.Parallel() queryable := &mockTenantQueryableWithFilter{} - q := NewQueryable(queryable, false /* byPassWithSingleQuerier */) + q := NewQueryable(queryable, defaultMaxConcurrency, false /* byPassWithSingleQuerier */, nil) querier, err := q.Querier(mint, maxt) require.NoError(t, err) @@ -428,6 +439,36 @@ var ( }, }, } + + expectedSingleTenantsMetrics = ` +# HELP cortex_querier_federated_tenants_per_query Number of tenants per query. +# TYPE cortex_querier_federated_tenants_per_query histogram +cortex_querier_federated_tenants_per_query_bucket{le="1"} 1 +cortex_querier_federated_tenants_per_query_bucket{le="2"} 1 +cortex_querier_federated_tenants_per_query_bucket{le="4"} 1 +cortex_querier_federated_tenants_per_query_bucket{le="8"} 1 +cortex_querier_federated_tenants_per_query_bucket{le="16"} 1 +cortex_querier_federated_tenants_per_query_bucket{le="32"} 1 +cortex_querier_federated_tenants_per_query_bucket{le="64"} 1 +cortex_querier_federated_tenants_per_query_bucket{le="+Inf"} 1 +cortex_querier_federated_tenants_per_query_sum 1 +cortex_querier_federated_tenants_per_query_count 1 +` + + expectedThreeTenantsMetrics = ` +# HELP cortex_querier_federated_tenants_per_query Number of tenants per query. +# TYPE cortex_querier_federated_tenants_per_query histogram +cortex_querier_federated_tenants_per_query_bucket{le="1"} 0 +cortex_querier_federated_tenants_per_query_bucket{le="2"} 0 +cortex_querier_federated_tenants_per_query_bucket{le="4"} 1 +cortex_querier_federated_tenants_per_query_bucket{le="8"} 1 +cortex_querier_federated_tenants_per_query_bucket{le="16"} 1 +cortex_querier_federated_tenants_per_query_bucket{le="32"} 1 +cortex_querier_federated_tenants_per_query_bucket{le="64"} 1 +cortex_querier_federated_tenants_per_query_bucket{le="+Inf"} 1 +cortex_querier_federated_tenants_per_query_sum 3 +cortex_querier_federated_tenants_per_query_count 1 +` ) func TestMergeQueryable_Select(t *testing.T) { @@ -441,6 +482,7 @@ func TestMergeQueryable_Select(t *testing.T) { { name: "should return all series when no matchers are provided", expectedSeriesCount: 6, + expectedMetrics: expectedThreeTenantsMetrics, }, { name: "should return only series for team-a and team-c tenants when there is a not-equals matcher for the team-b tenant", @@ -466,6 +508,7 @@ func TestMergeQueryable_Select(t *testing.T) { {Name: "instance", Value: "host2.team-c"}, }, }, + expectedMetrics: expectedThreeTenantsMetrics, }, { name: "should return only series for team-b when there is an equals matcher for the team-b tenant", @@ -482,11 +525,13 @@ func TestMergeQueryable_Select(t *testing.T) { {Name: "instance", Value: "host2.team-b"}, }, }, + expectedMetrics: expectedThreeTenantsMetrics, }, { name: "should return one series for each tenant when there is an equals matcher for the host1 instance", matchers: []*labels.Matcher{{Name: "instance", Value: "host1", Type: labels.MatchEqual}}, expectedSeriesCount: 3, + expectedMetrics: expectedThreeTenantsMetrics, }, }, }, @@ -531,17 +576,20 @@ func TestMergeQueryable_Select(t *testing.T) { {Name: "original___tenant_id__", Value: "original-value"}, }, }, + expectedMetrics: expectedThreeTenantsMetrics, }, { name: "should return only series for team-a and team-c tenants when there is with not-equals matcher for the team-b tenant", matchers: []*labels.Matcher{{Name: defaultTenantLabel, Value: "team-b", Type: labels.MatchNotEqual}}, expectedSeriesCount: 4, + expectedMetrics: expectedThreeTenantsMetrics, }, { name: "should return no series where there are conflicting tenant matchers", matchers: []*labels.Matcher{ {Name: defaultTenantLabel, Value: "team-a", Type: labels.MatchEqual}, {Name: defaultTenantLabel, Value: "team-c", Type: labels.MatchEqual}}, expectedSeriesCount: 0, + expectedMetrics: expectedThreeTenantsMetrics, }, { name: "should return only series for team-b when there is an equals matcher for team-b tenant", @@ -560,21 +608,25 @@ func TestMergeQueryable_Select(t *testing.T) { {Name: "original___tenant_id__", Value: "original-value"}, }, }, + expectedMetrics: expectedThreeTenantsMetrics, }, { name: "should return all series when there is an equals matcher for the original value of __tenant_id__ using the revised tenant label", matchers: []*labels.Matcher{{Name: originalDefaultTenantLabel, Value: "original-value", Type: labels.MatchEqual}}, expectedSeriesCount: 6, + expectedMetrics: expectedThreeTenantsMetrics, }, { name: "should return all series when there is a regexp matcher for the original value of __tenant_id__ using the revised tenant label", matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchRegexp, originalDefaultTenantLabel, "original-value")}, expectedSeriesCount: 6, + expectedMetrics: expectedThreeTenantsMetrics, }, { name: "should return no series when there is a not-equals matcher for the original value of __tenant_id__ using the revised tenant label", matchers: []*labels.Matcher{{Name: originalDefaultTenantLabel, Value: "original-value", Type: labels.MatchNotEqual}}, expectedSeriesCount: 0, + expectedMetrics: expectedThreeTenantsMetrics, }, }, }, @@ -587,6 +639,7 @@ func TestMergeQueryable_Select(t *testing.T) { `warning querying tenant_id team-c: out of office`, }, expectedSeriesCount: 6, + expectedMetrics: expectedThreeTenantsMetrics, }, }}, { @@ -594,31 +647,32 @@ func TestMergeQueryable_Select(t *testing.T) { selectTestCases: []selectTestCase{{ name: "should return any error encountered with any tenant", expectedQueryErr: errors.New("error querying tenant_id team-b: failure xyz"), + expectedMetrics: expectedThreeTenantsMetrics, }}, }, } { scenario := scenario t.Run(scenario.name, func(t *testing.T) { - t.Parallel() - querier, err := scenario.init() - require.NoError(t, err) - - // inject tenants into context - ctx := context.Background() - if len(scenario.tenants) > 0 { - ctx = user.InjectOrgID(ctx, strings.Join(scenario.tenants, "|")) - } - for _, tc := range scenario.selectTestCases { tc := tc t.Run(tc.name, func(t *testing.T) { t.Parallel() + querier, reg, err := scenario.init() + require.NoError(t, err) + + // inject tenants into context + ctx := context.Background() + if len(scenario.tenants) > 0 { + ctx = user.InjectOrgID(ctx, strings.Join(scenario.tenants, "|")) + } + seriesSet := querier.Select(ctx, true, &storage.SelectHints{Start: mint, End: maxt}, tc.matchers...) if tc.expectedQueryErr != nil { require.EqualError(t, seriesSet.Err(), tc.expectedQueryErr.Error()) } else { require.NoError(t, seriesSet.Err()) + assert.NoError(t, testutil.GatherAndCompare(reg, strings.NewReader(tc.expectedMetrics), "cortex_querier_federated_tenants_per_query")) assertEqualWarnings(t, tc.expectedWarnings, seriesSet.Warnings()) } @@ -650,6 +704,7 @@ func TestMergeQueryable_LabelNames(t *testing.T) { labelNamesTestCase: labelNamesTestCase{ name: "should not return the __tenant_id__ label as the MergeQueryable has been bypassed", expectedLabelNames: []string{"instance", "tenant-team-a"}, + expectedMetrics: expectedSingleTenantsMetrics, }, }, { @@ -658,6 +713,7 @@ func TestMergeQueryable_LabelNames(t *testing.T) { name: "should not return the __tenant_id__ label as the MergeQueryable has been bypassed with matchers", matchers: []*labels.Matcher{labels.MustNewMatcher(labels.MatchRegexp, seriesWithLabelNames, "bar|foo")}, expectedLabelNames: []string{"bar", "foo", "instance", "tenant-team-a"}, + expectedMetrics: expectedSingleTenantsMetrics, }, }, { @@ -665,6 +721,7 @@ func TestMergeQueryable_LabelNames(t *testing.T) { labelNamesTestCase: labelNamesTestCase{ name: "should return the __tenant_id__ label as the MergeQueryable has not been bypassed", expectedLabelNames: []string{defaultTenantLabel, "instance", "tenant-team-a"}, + expectedMetrics: expectedSingleTenantsMetrics, }, }, { @@ -672,6 +729,7 @@ func TestMergeQueryable_LabelNames(t *testing.T) { labelNamesTestCase: labelNamesTestCase{ name: "should return the __tenant_id__ label and all tenant team labels", expectedLabelNames: []string{defaultTenantLabel, "instance", "tenant-team-a", "tenant-team-b", "tenant-team-c"}, + expectedMetrics: expectedThreeTenantsMetrics, }, }, { @@ -679,6 +737,7 @@ func TestMergeQueryable_LabelNames(t *testing.T) { labelNamesTestCase: labelNamesTestCase{ name: "should return the __tenant_id__ label and all tenant team labels, and the __original_tenant_id__ label", expectedLabelNames: []string{defaultTenantLabel, "instance", originalDefaultTenantLabel, "tenant-team-a", "tenant-team-b", "tenant-team-c"}, + expectedMetrics: expectedThreeTenantsMetrics, }, }, { @@ -690,6 +749,7 @@ func TestMergeQueryable_LabelNames(t *testing.T) { `warning querying tenant_id team-b: don't like them`, `warning querying tenant_id team-c: out of office`, }, + expectedMetrics: expectedThreeTenantsMetrics, }, }, { @@ -697,6 +757,7 @@ func TestMergeQueryable_LabelNames(t *testing.T) { labelNamesTestCase: labelNamesTestCase{ name: "should return any error encountered with any tenant", expectedQueryErr: errors.New("error querying tenant_id team-b: failure xyz"), + expectedMetrics: expectedThreeTenantsMetrics, }, }, { @@ -709,6 +770,7 @@ func TestMergeQueryable_LabelNames(t *testing.T) { `warning querying tenant_id team-b: don't like them`, `warning querying tenant_id team-c: out of office`, }, + expectedMetrics: expectedThreeTenantsMetrics, }, }, { @@ -724,6 +786,7 @@ func TestMergeQueryable_LabelNames(t *testing.T) { expectedWarnings: []string{ `warning querying tenant_id team-b: don't like them`, }, + expectedMetrics: expectedThreeTenantsMetrics, }, }, { @@ -739,6 +802,7 @@ func TestMergeQueryable_LabelNames(t *testing.T) { `warning querying tenant_id team-b: don't like them`, `warning querying tenant_id team-c: out of office`, }, + expectedMetrics: expectedThreeTenantsMetrics, }, }, { @@ -753,13 +817,14 @@ func TestMergeQueryable_LabelNames(t *testing.T) { expectedWarnings: []string{ `warning querying tenant_id team-b: don't like them`, }, + expectedMetrics: expectedThreeTenantsMetrics, }, }, } { scenario := scenario t.Run(scenario.mergeQueryableScenario.name, func(t *testing.T) { t.Parallel() - querier, err := scenario.init() + querier, reg, err := scenario.init() require.NoError(t, err) // inject tenants into context @@ -775,6 +840,7 @@ func TestMergeQueryable_LabelNames(t *testing.T) { require.EqualError(t, err, scenario.labelNamesTestCase.expectedQueryErr.Error()) } else { require.NoError(t, err) + assert.NoError(t, testutil.GatherAndCompare(reg, strings.NewReader(scenario.expectedMetrics), "cortex_querier_federated_tenants_per_query")) assert.Equal(t, scenario.labelNamesTestCase.expectedLabelNames, labelNames) assertEqualWarnings(t, scenario.labelNamesTestCase.expectedWarnings, warnings) } @@ -796,11 +862,13 @@ func TestMergeQueryable_LabelValues(t *testing.T) { name: "should return all label values for instance when no matchers are provided", labelName: "instance", expectedLabelValues: []string{"host1", "host2.team-a"}, + expectedMetrics: expectedSingleTenantsMetrics, }, { name: "should return no tenant values for the __tenant_id__ label as the MergeQueryable has been bypassed", labelName: defaultTenantLabel, expectedLabelValues: nil, + expectedMetrics: expectedSingleTenantsMetrics, }, }, }, @@ -811,11 +879,13 @@ func TestMergeQueryable_LabelValues(t *testing.T) { name: "should return all label values for instance when no matchers are provided", labelName: "instance", expectedLabelValues: []string{"host1", "host2.team-a"}, + expectedMetrics: expectedSingleTenantsMetrics, }, { name: "should return a tenant team value for the __tenant_id__ label as the MergeQueryable has not been bypassed", labelName: defaultTenantLabel, expectedLabelValues: []string{"team-a"}, + expectedMetrics: expectedSingleTenantsMetrics, }, }, }, @@ -826,6 +896,7 @@ func TestMergeQueryable_LabelValues(t *testing.T) { name: "should return all label values for instance when no matchers are provided", labelName: "instance", expectedLabelValues: []string{"host1", "host2.team-a", "host2.team-b", "host2.team-c"}, + expectedMetrics: expectedThreeTenantsMetrics, }, { name: "should propagate non-tenant matchers to downstream queriers", @@ -838,6 +909,7 @@ func TestMergeQueryable_LabelValues(t *testing.T) { "warning querying tenant_id team-b: " + mockMatchersNotImplemented, "warning querying tenant_id team-c: " + mockMatchersNotImplemented, }, + expectedMetrics: expectedThreeTenantsMetrics, }, { name: "should return no values for the instance label when there are conflicting tenant matchers", @@ -847,29 +919,34 @@ func TestMergeQueryable_LabelValues(t *testing.T) { }, labelName: "instance", expectedLabelValues: []string{}, + expectedMetrics: expectedThreeTenantsMetrics, }, { name: "should only query tenant-b when there is an equals matcher for team-b tenant", matchers: []*labels.Matcher{{Name: defaultTenantLabel, Value: "team-b", Type: labels.MatchEqual}}, labelName: "instance", expectedLabelValues: []string{"host1", "host2.team-b"}, + expectedMetrics: expectedThreeTenantsMetrics, }, { name: "should return all tenant team values for the __tenant_id__ label when no matchers are provided", labelName: defaultTenantLabel, expectedLabelValues: []string{"team-a", "team-b", "team-c"}, + expectedMetrics: expectedThreeTenantsMetrics, }, { name: "should return only label values for team-a and team-c tenants when there is a not-equals matcher for team-b tenant", labelName: defaultTenantLabel, matchers: []*labels.Matcher{{Name: defaultTenantLabel, Value: "team-b", Type: labels.MatchNotEqual}}, expectedLabelValues: []string{"team-a", "team-c"}, + expectedMetrics: expectedThreeTenantsMetrics, }, { name: "should return only label values for team-b tenant when there is an equals matcher for team-b tenant", labelName: defaultTenantLabel, matchers: []*labels.Matcher{{Name: defaultTenantLabel, Value: "team-b", Type: labels.MatchEqual}}, expectedLabelValues: []string{"team-b"}, + expectedMetrics: expectedThreeTenantsMetrics, }, }, }, @@ -880,16 +957,19 @@ func TestMergeQueryable_LabelValues(t *testing.T) { name: "should return all label values for instance when no matchers are provided", labelName: "instance", expectedLabelValues: []string{"host1", "host2.team-a", "host2.team-b", "host2.team-c"}, + expectedMetrics: expectedThreeTenantsMetrics, }, { name: "should return all tenant values for __tenant_id__ label name", labelName: defaultTenantLabel, expectedLabelValues: []string{"team-a", "team-b", "team-c"}, + expectedMetrics: expectedThreeTenantsMetrics, }, { name: "should return the original value for the revised tenant label name when no matchers are provided", labelName: originalDefaultTenantLabel, expectedLabelValues: []string{"original-value"}, + expectedMetrics: expectedThreeTenantsMetrics, }, { name: "should return the original value for the revised tenant label name with matchers", @@ -901,6 +981,7 @@ func TestMergeQueryable_LabelValues(t *testing.T) { "warning querying tenant_id team-b: " + mockMatchersNotImplemented, "warning querying tenant_id team-c: " + mockMatchersNotImplemented, }, + expectedMetrics: expectedThreeTenantsMetrics, }, }, }, @@ -914,6 +995,7 @@ func TestMergeQueryable_LabelValues(t *testing.T) { `warning querying tenant_id team-b: don't like them`, `warning querying tenant_id team-c: out of office`, }, + expectedMetrics: expectedThreeTenantsMetrics, }}, }, { @@ -922,6 +1004,7 @@ func TestMergeQueryable_LabelValues(t *testing.T) { name: "should not return warnings as the underlying queryables are not queried in requests for the __tenant_id__ label", labelName: defaultTenantLabel, expectedLabelValues: []string{"team-a", "team-b", "team-c"}, + expectedMetrics: expectedThreeTenantsMetrics, }}, }, { @@ -930,6 +1013,7 @@ func TestMergeQueryable_LabelValues(t *testing.T) { name: "should return any error encountered with any tenant", labelName: "instance", expectedQueryErr: errors.New("error querying tenant_id team-b: failure xyz"), + expectedMetrics: expectedThreeTenantsMetrics, }}, }, { @@ -938,30 +1022,31 @@ func TestMergeQueryable_LabelValues(t *testing.T) { name: "should not return errors as the underlying queryables are not queried in requests for the __tenant_id__ label", labelName: defaultTenantLabel, expectedLabelValues: []string{"team-a", "team-b", "team-c"}, + expectedMetrics: expectedThreeTenantsMetrics, }}, }, } { scenario := scenario t.Run(scenario.name, func(t *testing.T) { - t.Parallel() - querier, err := scenario.init() - require.NoError(t, err) - - // inject tenants into context - ctx := context.Background() - if len(scenario.tenants) > 0 { - ctx = user.InjectOrgID(ctx, strings.Join(scenario.tenants, "|")) - } - for _, tc := range scenario.labelValuesTestCases { tc := tc t.Run(tc.name, func(t *testing.T) { t.Parallel() + querier, reg, err := scenario.init() + require.NoError(t, err) + + // inject tenants into context + ctx := context.Background() + if len(scenario.tenants) > 0 { + ctx = user.InjectOrgID(ctx, strings.Join(scenario.tenants, "|")) + } + actLabelValues, warnings, err := querier.LabelValues(ctx, tc.labelName, nil, tc.matchers...) if tc.expectedQueryErr != nil { require.EqualError(t, err, tc.expectedQueryErr.Error()) } else { require.NoError(t, err) + assert.NoError(t, testutil.GatherAndCompare(reg, strings.NewReader(tc.expectedMetrics), "cortex_querier_federated_tenants_per_query")) assert.Equal(t, tc.expectedLabelValues, actLabelValues, fmt.Sprintf("unexpected values for label '%s'", tc.labelName)) assertEqualWarnings(t, tc.expectedWarnings, warnings) } @@ -1030,7 +1115,7 @@ func TestTracingMergeQueryable(t *testing.T) { // set a multi tenant resolver tenant.WithDefaultResolver(tenant.NewMultiResolver()) filter := mockTenantQueryableWithFilter{} - q := NewQueryable(&filter, false) + q := NewQueryable(&filter, defaultMaxConcurrency, false, nil) // retrieve querier if set querier, err := q.Querier(mint, maxt) require.NoError(t, err) diff --git a/pkg/querier/tenantfederation/tenant_federation.go b/pkg/querier/tenantfederation/tenant_federation.go index af5bd7b929..56e5fb59db 100644 --- a/pkg/querier/tenantfederation/tenant_federation.go +++ b/pkg/querier/tenantfederation/tenant_federation.go @@ -7,8 +7,11 @@ import ( type Config struct { // Enabled switches on support for multi tenant query federation Enabled bool `yaml:"enabled"` + // MaxConcurrent The number of workers used for processing federated query. + MaxConcurrent int `yaml:"max_concurrent"` } func (cfg *Config) RegisterFlags(f *flag.FlagSet) { f.BoolVar(&cfg.Enabled, "tenant-federation.enabled", false, "If enabled on all Cortex services, queries can be federated across multiple tenants. The tenant IDs involved need to be specified separated by a `|` character in the `X-Scope-OrgID` header (experimental).") + f.IntVar(&cfg.MaxConcurrent, "tenant-federation.max-concurrent", defaultMaxConcurrency, "The number of workers used to process each federated query.") } diff --git a/pkg/querier/tripperware/instantquery/instant_query.go b/pkg/querier/tripperware/instantquery/instant_query.go index 75c9715d39..54016c7746 100644 --- a/pkg/querier/tripperware/instantquery/instant_query.go +++ b/pkg/querier/tripperware/instantquery/instant_query.go @@ -11,9 +11,11 @@ import ( "time" jsoniter "github.com/json-iterator/go" + "github.com/munnerz/goautoneg" "github.com/opentracing/opentracing-go" otlog "github.com/opentracing/opentracing-go/log" "github.com/prometheus/common/model" + v1 "github.com/prometheus/prometheus/web/api/v1" "github.com/weaveworks/common/httpgrpc" "google.golang.org/grpc/status" @@ -29,6 +31,9 @@ var ( SortMapKeys: true, ValidateJsonRawMessage: false, }.Froze() + + rulerMIMEType = v1.MIMEType{Type: "application", SubType: tripperware.QueryResponseCortexMIMESubType} + jsonMIMEType = v1.MIMEType{Type: "application", SubType: "json"} ) type instantQueryCodec struct { @@ -68,12 +73,18 @@ func (c instantQueryCodec) DecodeRequest(_ context.Context, r *http.Request, for result.Stats = r.FormValue("stats") result.Path = r.URL.Path - // Include the specified headers from http request in prometheusRequest. - for _, header := range forwardHeaders { - for h, hv := range r.Header { - if strings.EqualFold(h, header) { - result.Headers[h] = hv - break + isSourceRuler := strings.Contains(r.Header.Get("User-Agent"), tripperware.RulerUserAgent) + if isSourceRuler { + // When the source is the Ruler, then forward whole headers + result.Headers = r.Header + } else { + // Include the specified headers from http request in prometheusRequest. + for _, header := range forwardHeaders { + for h, hv := range r.Header { + if strings.EqualFold(h, header) { + result.Headers[h] = hv + break + } } } } @@ -155,7 +166,11 @@ func (c instantQueryCodec) EncodeRequest(ctx context.Context, r tripperware.Requ } } - tripperware.SetRequestHeaders(h, c.defaultCodecType, c.compression) + isSourceRuler := strings.Contains(h.Get("User-Agent"), tripperware.RulerUserAgent) + if !isSourceRuler { + // When the source is the Ruler, skip set header + tripperware.SetRequestHeaders(h, c.defaultCodecType, c.compression) + } req := &http.Request{ Method: "GET", @@ -168,7 +183,7 @@ func (c instantQueryCodec) EncodeRequest(ctx context.Context, r tripperware.Requ return req.WithContext(ctx), nil } -func (instantQueryCodec) EncodeResponse(ctx context.Context, res tripperware.Response) (*http.Response, error) { +func (c instantQueryCodec) EncodeResponse(ctx context.Context, req *http.Request, res tripperware.Response) (*http.Response, error) { sp, _ := opentracing.StartSpanFromContext(ctx, "APIResponse.ToHTTPResponse") defer sp.Finish() @@ -180,7 +195,7 @@ func (instantQueryCodec) EncodeResponse(ctx context.Context, res tripperware.Res queryStats := stats.FromContext(ctx) tripperware.SetQueryResponseStats(a, queryStats) - b, err := json.Marshal(a) + contentType, b, err := marshalResponse(a, req.Header.Get("Accept")) if err != nil { return nil, httpgrpc.Errorf(http.StatusInternalServerError, "error encoding response: %v", err) } @@ -189,7 +204,7 @@ func (instantQueryCodec) EncodeResponse(ctx context.Context, res tripperware.Res resp := http.Response{ Header: http.Header{ - "Content-Type": []string{tripperware.ApplicationJson}, + "Content-Type": []string{contentType}, }, Body: io.NopCloser(bytes.NewBuffer(b)), StatusCode: http.StatusOK, @@ -217,3 +232,18 @@ func decorateWithParamName(err error, field string) error { } return fmt.Errorf(errTmpl, field, err) } + +func marshalResponse(resp *tripperware.PrometheusResponse, acceptHeader string) (string, []byte, error) { + for _, clause := range goautoneg.ParseAccept(acceptHeader) { + if jsonMIMEType.Satisfies(clause) { + b, err := json.Marshal(resp) + return tripperware.ApplicationJson, b, err + } else if rulerMIMEType.Satisfies(clause) { + b, err := resp.Marshal() + return tripperware.QueryResponseCortexMIMEType, b, err + } + } + + b, err := json.Marshal(resp) + return tripperware.ApplicationJson, b, err +} diff --git a/pkg/querier/tripperware/instantquery/instant_query_test.go b/pkg/querier/tripperware/instantquery/instant_query_test.go index 8d8ed8fee7..43ba282489 100644 --- a/pkg/querier/tripperware/instantquery/instant_query_test.go +++ b/pkg/querier/tripperware/instantquery/instant_query_test.go @@ -27,6 +27,12 @@ import ( var testInstantQueryCodec = NewInstantQueryCodec(string(tripperware.NonCompression), string(tripperware.ProtobufCodecType)) +var jsonHttpReq = &http.Request{ + Header: map[string][]string{ + "Accept": {"application/json"}, + }, +} + const testHistogramResponse = `{"status":"success","data":{"resultType":"vector","result":[{"metric":{"__name__":"prometheus_http_request_duration_seconds","handler":"/metrics","instance":"localhost:9090","job":"prometheus"},"histogram":[1719528871.898,{"count":"6342","sum":"43.31319875499995","buckets":[[0,"0.0013810679320049755","0.0015060652591874421","1"],[0,"0.0015060652591874421","0.001642375811042411","7"],[0,"0.001642375811042411","0.0017910235218841233","5"],[0,"0.0017910235218841233","0.001953125","13"],[0,"0.001953125","0.0021298979153618314","19"],[0,"0.0021298979153618314","0.0023226701464896895","13"],[0,"0.0023226701464896895","0.002532889755177753","13"],[0,"0.002532889755177753","0.002762135864009951","15"],[0,"0.002762135864009951","0.0030121305183748843","12"],[0,"0.0030121305183748843","0.003284751622084822","34"],[0,"0.003284751622084822","0.0035820470437682465","188"],[0,"0.0035820470437682465","0.00390625","372"],[0,"0.00390625","0.004259795830723663","400"],[0,"0.004259795830723663","0.004645340292979379","411"],[0,"0.004645340292979379","0.005065779510355506","425"],[0,"0.005065779510355506","0.005524271728019902","425"],[0,"0.005524271728019902","0.0060242610367497685","521"],[0,"0.0060242610367497685","0.006569503244169644","621"],[0,"0.006569503244169644","0.007164094087536493","593"],[0,"0.007164094087536493","0.0078125","506"],[0,"0.0078125","0.008519591661447326","458"],[0,"0.008519591661447326","0.009290680585958758","346"],[0,"0.009290680585958758","0.010131559020711013","285"],[0,"0.010131559020711013","0.011048543456039804","196"],[0,"0.011048543456039804","0.012048522073499537","129"],[0,"0.012048522073499537","0.013139006488339287","85"],[0,"0.013139006488339287","0.014328188175072986","65"],[0,"0.014328188175072986","0.015625","54"],[0,"0.015625","0.01703918332289465","53"],[0,"0.01703918332289465","0.018581361171917516","20"],[0,"0.018581361171917516","0.020263118041422026","21"],[0,"0.020263118041422026","0.022097086912079608","15"],[0,"0.022097086912079608","0.024097044146999074","11"],[0,"0.024097044146999074","0.026278012976678575","2"],[0,"0.026278012976678575","0.028656376350145972","3"],[0,"0.028656376350145972","0.03125","3"],[0,"0.04052623608284405","0.044194173824159216","2"]]}]}]}}` func sortPrometheusResponseHeader(headers []*tripperware.PrometheusResponseHeader) { @@ -394,7 +400,7 @@ func TestResponse(t *testing.T) { promBody: &tripperware.PrometheusResponse{ Status: "success", Data: tripperware.PrometheusData{ - ResultType: model.ValString.String(), + ResultType: model.ValScalar.String(), Result: tripperware.PrometheusQueryResult{ Result: &tripperware.PrometheusQueryResult_RawBytes{ RawBytes: []byte(`{"resultType":"scalar","result":[1,"13"]}`), @@ -458,7 +464,7 @@ func TestResponse(t *testing.T) { Body: io.NopCloser(bytes.NewBuffer([]byte(tc.jsonBody))), ContentLength: int64(len(tc.jsonBody)), } - resp2, err := testInstantQueryCodec.EncodeResponse(context.Background(), resp) + resp2, err := testInstantQueryCodec.EncodeResponse(context.Background(), jsonHttpReq, resp) require.NoError(t, err) assert.Equal(t, response, resp2) }) @@ -735,7 +741,7 @@ func TestMergeResponse(t *testing.T) { cancelCtx() return } - dr, err := testInstantQueryCodec.EncodeResponse(ctx, resp) + dr, err := testInstantQueryCodec.EncodeResponse(ctx, jsonHttpReq, resp) assert.Equal(t, tc.expectedErr, err) contents, err := io.ReadAll(dr.Body) assert.Equal(t, tc.expectedErr, err) @@ -1750,7 +1756,7 @@ func TestMergeResponseProtobuf(t *testing.T) { cancelCtx() return } - dr, err := testInstantQueryCodec.EncodeResponse(ctx, resp) + dr, err := testInstantQueryCodec.EncodeResponse(ctx, jsonHttpReq, resp) assert.Equal(t, tc.expectedErr, err) contents, err := io.ReadAll(dr.Body) assert.Equal(t, tc.expectedErr, err) @@ -1760,6 +1766,55 @@ func TestMergeResponseProtobuf(t *testing.T) { } } +func Test_marshalResponseContentType(t *testing.T) { + mockResp := &tripperware.PrometheusResponse{} + + tests := []struct { + name string + acceptHeader string + expectedContentType string + }{ + { + name: "empty accept header", + acceptHeader: "", + expectedContentType: tripperware.ApplicationJson, + }, + { + name: "type and subtype are *", + acceptHeader: "*/*", + expectedContentType: tripperware.ApplicationJson, + }, + { + name: "sub type is *", + acceptHeader: "application/*", + expectedContentType: tripperware.ApplicationJson, + }, + { + name: "json type", + acceptHeader: tripperware.ApplicationJson, + expectedContentType: tripperware.ApplicationJson, + }, + { + name: "proto type", + acceptHeader: tripperware.ApplicationProtobuf, + expectedContentType: tripperware.ApplicationJson, + }, + { + name: "cortex type, json type", + acceptHeader: "application/x-cortex-query+proto, application/json", + expectedContentType: "application/x-cortex-query+proto", + }, + } + + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + contentType, _, err := marshalResponse(mockResp, test.acceptHeader) + require.NoError(t, err) + require.Equal(t, test.expectedContentType, contentType) + }) + } +} + func Benchmark_Decode(b *testing.B) { maxSamplesCount := 1000000 samples := make([]tripperware.SampleStream, maxSamplesCount) diff --git a/pkg/querier/tripperware/query.go b/pkg/querier/tripperware/query.go index 28e7c2430b..21ac0691e2 100644 --- a/pkg/querier/tripperware/query.go +++ b/pkg/querier/tripperware/query.go @@ -12,10 +12,9 @@ import ( "time" "unsafe" - "github.com/golang/snappy" - "github.com/go-kit/log" "github.com/gogo/protobuf/proto" + "github.com/golang/snappy" jsoniter "github.com/json-iterator/go" "github.com/opentracing/opentracing-go" otlog "github.com/opentracing/opentracing-go/log" @@ -48,6 +47,13 @@ const ( ProtobufCodecType CodecType = "protobuf" ApplicationProtobuf string = "application/x-protobuf" ApplicationJson string = "application/json" + + QueryResponseCortexMIMEType = "application/" + QueryResponseCortexMIMESubType + QueryResponseCortexMIMESubType = "x-cortex-query+proto" + RulerUserAgent = "CortexRuler" + + SourceRuler = "ruler" + SourceAPI = "api" ) // Codec is used to encode/decode query range requests and responses so they can be passed down to middlewares. @@ -62,7 +68,7 @@ type Codec interface { // EncodeRequest encodes a Request into an http request. EncodeRequest(context.Context, Request) (*http.Request, error) // EncodeResponse encodes a Response into an http response. - EncodeResponse(context.Context, Response) (*http.Response, error) + EncodeResponse(context.Context, *http.Request, Response) (*http.Response, error) } // Merger is used by middlewares making multiple requests to merge back all responses into a single one. @@ -763,7 +769,12 @@ func SetRequestHeaders(h http.Header, defaultCodecType CodecType, compression Co } func UnmarshalResponse(r *http.Response, buf []byte, resp *PrometheusResponse) error { - if r.Header != nil && r.Header.Get("Content-Type") == ApplicationProtobuf { + if r.Header == nil { + return json.Unmarshal(buf, resp) + } + + contentType := r.Header.Get("Content-Type") + if contentType == ApplicationProtobuf || contentType == QueryResponseCortexMIMEType { return proto.Unmarshal(buf, resp) } else { return json.Unmarshal(buf, resp) diff --git a/pkg/querier/tripperware/queryrange/marshaling_test.go b/pkg/querier/tripperware/queryrange/marshaling_test.go index 4652a0e10b..d0efd0e8d4 100644 --- a/pkg/querier/tripperware/queryrange/marshaling_test.go +++ b/pkg/querier/tripperware/queryrange/marshaling_test.go @@ -80,7 +80,7 @@ func BenchmarkPrometheusCodec_EncodeResponse(b *testing.B) { b.ReportAllocs() for n := 0; n < b.N; n++ { - _, err := PrometheusCodec.EncodeResponse(context.Background(), res) + _, err := PrometheusCodec.EncodeResponse(context.Background(), nil, res) require.NoError(b, err) } } diff --git a/pkg/querier/tripperware/queryrange/query_range.go b/pkg/querier/tripperware/queryrange/query_range.go index 875519cd97..1693408ea9 100644 --- a/pkg/querier/tripperware/queryrange/query_range.go +++ b/pkg/querier/tripperware/queryrange/query_range.go @@ -230,7 +230,7 @@ func (c prometheusCodec) DecodeResponse(ctx context.Context, r *http.Response, _ return &resp, nil } -func (prometheusCodec) EncodeResponse(ctx context.Context, res tripperware.Response) (*http.Response, error) { +func (prometheusCodec) EncodeResponse(ctx context.Context, _ *http.Request, res tripperware.Response) (*http.Response, error) { sp, _ := opentracing.StartSpanFromContext(ctx, "APIResponse.ToHTTPResponse") defer sp.Finish() diff --git a/pkg/querier/tripperware/queryrange/query_range_test.go b/pkg/querier/tripperware/queryrange/query_range_test.go index 256d2800ff..780077274d 100644 --- a/pkg/querier/tripperware/queryrange/query_range_test.go +++ b/pkg/querier/tripperware/queryrange/query_range_test.go @@ -307,7 +307,7 @@ func TestResponse(t *testing.T) { Body: io.NopCloser(bytes.NewBuffer([]byte(tc.jsonBody))), ContentLength: int64(len(tc.jsonBody)), } - resp2, err := PrometheusCodec.EncodeResponse(context.Background(), resp) + resp2, err := PrometheusCodec.EncodeResponse(context.Background(), nil, resp) require.NoError(t, err) assert.Equal(t, response, resp2) cancelCtx() @@ -431,7 +431,7 @@ func TestResponseWithStats(t *testing.T) { Body: io.NopCloser(bytes.NewBuffer([]byte(tc.jsonBody))), ContentLength: int64(len(tc.jsonBody)), } - resp2, err := PrometheusCodec.EncodeResponse(context.Background(), resp) + resp2, err := PrometheusCodec.EncodeResponse(context.Background(), nil, resp) require.NoError(t, err) assert.Equal(t, response, resp2) }) diff --git a/pkg/querier/tripperware/queryrange/split_by_interval.go b/pkg/querier/tripperware/queryrange/split_by_interval.go index 7ff38f4ec3..064b53d760 100644 --- a/pkg/querier/tripperware/queryrange/split_by_interval.go +++ b/pkg/querier/tripperware/queryrange/split_by_interval.go @@ -110,6 +110,15 @@ func evaluateAtModifierFunction(query string, start, end int64) (string, error) } selector.StartOrEnd = 0 } + if selector, ok := n.(*parser.SubqueryExpr); ok { + switch selector.StartOrEnd { + case parser.START: + selector.Timestamp = &start + case parser.END: + selector.Timestamp = &end + } + selector.StartOrEnd = 0 + } return nil }) return expr.String(), err diff --git a/pkg/querier/tripperware/queryrange/split_by_interval_test.go b/pkg/querier/tripperware/queryrange/split_by_interval_test.go index e9cfd8526f..a38bee5034 100644 --- a/pkg/querier/tripperware/queryrange/split_by_interval_test.go +++ b/pkg/querier/tripperware/queryrange/split_by_interval_test.go @@ -276,7 +276,7 @@ func TestSplitByDay(t *testing.T) { mergedResponse, err := PrometheusCodec.MergeResponse(context.Background(), nil, parsedResponse, parsedResponse) require.NoError(t, err) - mergedHTTPResponse, err := PrometheusCodec.EncodeResponse(context.Background(), mergedResponse) + mergedHTTPResponse, err := PrometheusCodec.EncodeResponse(context.Background(), nil, mergedResponse) require.NoError(t, err) mergedHTTPResponseBody, err := io.ReadAll(mergedHTTPResponse.Body) @@ -377,6 +377,14 @@ func Test_evaluateAtModifier(t *testing.T) { [2m:]) [10m:])`, }, + { + in: `irate(kube_pod_info{namespace="test"}[1h:1m] @ start())`, + expected: `irate(kube_pod_info{namespace="test"}[1h:1m] @ 1546300.800)`, + }, + { + in: `irate(kube_pod_info{namespace="test"} @ end()[1h:1m] @ start())`, + expected: `irate(kube_pod_info{namespace="test"} @ 1646300.800 [1h:1m] @ 1546300.800)`, + }, { // parse error: @ modifier must be preceded by an instant vector selector or range vector selector or a subquery in: "sum(http_requests_total[5m]) @ 10.001", diff --git a/pkg/querier/tripperware/roundtrip.go b/pkg/querier/tripperware/roundtrip.go index 69b46dd66b..fe28c9244e 100644 --- a/pkg/querier/tripperware/roundtrip.go +++ b/pkg/querier/tripperware/roundtrip.go @@ -17,6 +17,7 @@ package tripperware import ( "context" + "fmt" "io" "net/http" "strings" @@ -115,7 +116,7 @@ func NewQueryTripperware( queriesPerTenant := promauto.With(registerer).NewCounterVec(prometheus.CounterOpts{ Name: "cortex_query_frontend_queries_total", Help: "Total queries sent per tenant.", - }, []string{"op", "user"}) + }, []string{"op", "source", "user"}) rejectedQueriesPerTenant := promauto.With(registerer).NewCounterVec(prometheus.CounterOpts{ Name: "cortex_query_frontend_rejected_queries_total", @@ -156,7 +157,8 @@ func NewQueryTripperware( now := time.Now() userStr := tenant.JoinTenantIDs(tenantIDs) activeUsers.UpdateUserTimestamp(userStr, now) - queriesPerTenant.WithLabelValues(op, userStr).Inc() + source := getSource(r.Header.Get("User-Agent")) + queriesPerTenant.WithLabelValues(op, source, userStr).Inc() if maxSubQuerySteps > 0 && (isQuery || isQueryRange) { query := r.FormValue("query") @@ -211,7 +213,7 @@ func (q roundTripper) RoundTrip(r *http.Request) (*http.Response, error) { return nil, err } - return q.codec.EncodeResponse(r.Context(), response) + return q.codec.EncodeResponse(r.Context(), r, response) } // Do implements Handler. @@ -240,3 +242,13 @@ func (q roundTripper) Do(ctx context.Context, r Request) (Response, error) { return q.codec.DecodeResponse(ctx, response, r) } + +func getSource(userAgent string) string { + fmt.Println("userAgent", userAgent) + if strings.Contains(userAgent, RulerUserAgent) { + // caller is ruler + return SourceRuler + } + + return SourceAPI +} diff --git a/pkg/querier/tripperware/roundtrip_test.go b/pkg/querier/tripperware/roundtrip_test.go index 263c9a9b0e..12aa5f1240 100644 --- a/pkg/querier/tripperware/roundtrip_test.go +++ b/pkg/querier/tripperware/roundtrip_test.go @@ -3,6 +3,7 @@ package tripperware import ( "bytes" "context" + "fmt" "io" "net/http" "net/http/httptest" @@ -12,7 +13,10 @@ import ( "time" "github.com/go-kit/log" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/testutil" "github.com/prometheus/common/model" + "github.com/prometheus/common/version" "github.com/stretchr/testify/require" "github.com/thanos-io/thanos/pkg/querysharding" "github.com/weaveworks/common/httpgrpc" @@ -59,7 +63,7 @@ func (c mockCodec) DecodeRequest(_ context.Context, r *http.Request, _ []string) return mockRequest{}, nil } -func (c mockCodec) EncodeResponse(_ context.Context, resp Response) (*http.Response, error) { +func (c mockCodec) EncodeResponse(_ context.Context, _ *http.Request, resp Response) (*http.Response, error) { r := resp.(*mockResponse) return &http.Response{ Header: http.Header{ @@ -125,48 +129,92 @@ func TestRoundTrip(t *testing.T) { expectedErr error limits Limits maxSubQuerySteps int64 + userAgent string + expectedMetric string }{ { path: "/foo", expectedBody: "bar", limits: defaultOverrides, maxSubQuerySteps: 11000, + userAgent: "dummyUserAgent/1.0", + expectedMetric: ` +# HELP cortex_query_frontend_queries_total Total queries sent per tenant. +# TYPE cortex_query_frontend_queries_total counter +cortex_query_frontend_queries_total{op="query", source="api", user="1"} 1 +`, }, { path: queryExemplar, expectedBody: "bar", limits: defaultOverrides, maxSubQuerySteps: 11000, + userAgent: "dummyUserAgent/1.1", + expectedMetric: ` +# HELP cortex_query_frontend_queries_total Total queries sent per tenant. +# TYPE cortex_query_frontend_queries_total counter +cortex_query_frontend_queries_total{op="query", source="api", user="1"} 1 +`, }, { path: seriesQuery, expectedBody: "bar", limits: defaultOverrides, maxSubQuerySteps: 11000, + userAgent: "dummyUserAgent/1.2", + expectedMetric: ` +# HELP cortex_query_frontend_queries_total Total queries sent per tenant. +# TYPE cortex_query_frontend_queries_total counter +cortex_query_frontend_queries_total{op="series", source="api", user="1"} 1 +`, }, { path: queryRange, expectedBody: responseBody, limits: defaultOverrides, maxSubQuerySteps: 11000, + userAgent: "dummyUserAgent/1.0", + expectedMetric: ` +# HELP cortex_query_frontend_queries_total Total queries sent per tenant. +# TYPE cortex_query_frontend_queries_total counter +cortex_query_frontend_queries_total{op="query_range", source="api", user="1"} 1 +`, }, { path: query, expectedBody: instantResponseBody, limits: defaultOverrides, maxSubQuerySteps: 11000, + userAgent: "dummyUserAgent/1.1", + expectedMetric: ` +# HELP cortex_query_frontend_queries_total Total queries sent per tenant. +# TYPE cortex_query_frontend_queries_total counter +cortex_query_frontend_queries_total{op="query", source="api", user="1"} 1 +`, }, { path: queryNonShardable, expectedBody: instantResponseBody, limits: defaultOverrides, maxSubQuerySteps: 11000, + userAgent: "dummyUserAgent/1.2", + expectedMetric: ` +# HELP cortex_query_frontend_queries_total Total queries sent per tenant. +# TYPE cortex_query_frontend_queries_total counter +cortex_query_frontend_queries_total{op="query", source="api", user="1"} 1 +`, }, { path: query, expectedBody: instantResponseBody, limits: shardingOverrides, maxSubQuerySteps: 11000, + userAgent: "dummyUserAgent/1.0", + expectedMetric: ` +# HELP cortex_query_frontend_queries_total Total queries sent per tenant. +# TYPE cortex_query_frontend_queries_total counter +cortex_query_frontend_queries_total{op="query", source="api", user="1"} 1 +`, }, // Shouldn't hit subquery step limit because max steps is set to 0 so this check is disabled. { @@ -174,6 +222,12 @@ func TestRoundTrip(t *testing.T) { expectedBody: instantResponseBody, limits: defaultOverrides, maxSubQuerySteps: 0, + userAgent: "dummyUserAgent/1.0", + expectedMetric: ` +# HELP cortex_query_frontend_queries_total Total queries sent per tenant. +# TYPE cortex_query_frontend_queries_total counter +cortex_query_frontend_queries_total{op="query", source="api", user="1"} 1 +`, }, // Shouldn't hit subquery step limit because max steps is higher, which is 100K. { @@ -181,12 +235,24 @@ func TestRoundTrip(t *testing.T) { expectedBody: instantResponseBody, limits: defaultOverrides, maxSubQuerySteps: 100000, + userAgent: "dummyUserAgent/1.0", + expectedMetric: ` +# HELP cortex_query_frontend_queries_total Total queries sent per tenant. +# TYPE cortex_query_frontend_queries_total counter +cortex_query_frontend_queries_total{op="query", source="api", user="1"} 1 +`, }, { path: querySubqueryStepSizeTooSmall, expectedErr: httpgrpc.Errorf(http.StatusBadRequest, ErrSubQueryStepTooSmall, 11000), limits: defaultOverrides, maxSubQuerySteps: 11000, + userAgent: fmt.Sprintf("%s/%s", RulerUserAgent, version.Version), + expectedMetric: ` +# HELP cortex_query_frontend_queries_total Total queries sent per tenant. +# TYPE cortex_query_frontend_queries_total counter +cortex_query_frontend_queries_total{op="query", source="ruler", user="1"} 1 +`, }, { // The query should go to instant query middlewares rather than forwarding to next. @@ -194,6 +260,12 @@ func TestRoundTrip(t *testing.T) { expectedBody: instantResponseBody, limits: defaultOverrides, maxSubQuerySteps: 11000, + userAgent: "dummyUserAgent/2.0", + expectedMetric: ` +# HELP cortex_query_frontend_queries_total Total queries sent per tenant. +# TYPE cortex_query_frontend_queries_total counter +cortex_query_frontend_queries_total{op="query", source="api", user="1"} 1 +`, }, } { t.Run(tc.path, func(t *testing.T) { @@ -209,8 +281,11 @@ func TestRoundTrip(t *testing.T) { err = user.InjectOrgIDIntoHTTPRequest(ctx, req) require.NoError(t, err) + req.Header.Set("User-Agent", tc.userAgent) + + reg := prometheus.NewPedanticRegistry() tw := NewQueryTripperware(log.NewNopLogger(), - nil, + reg, nil, rangeMiddlewares, instantMiddlewares, @@ -231,6 +306,7 @@ func TestRoundTrip(t *testing.T) { bs, err := io.ReadAll(resp.Body) require.NoError(t, err) require.Equal(t, tc.expectedBody, string(bs)) + require.NoError(t, testutil.GatherAndCompare(reg, strings.NewReader(tc.expectedMetric), "cortex_query_frontend_queries_total")) } else { require.Equal(t, tc.expectedErr, err) } diff --git a/pkg/ruler/frontend_client.go b/pkg/ruler/frontend_client.go index e75997ce1a..dff0fca11c 100644 --- a/pkg/ruler/frontend_client.go +++ b/pkg/ruler/frontend_client.go @@ -15,6 +15,7 @@ import ( "github.com/weaveworks/common/httpgrpc" "github.com/weaveworks/common/user" + "github.com/cortexproject/cortex/pkg/querier/tripperware" "github.com/cortexproject/cortex/pkg/util/spanlogger" ) @@ -22,22 +23,29 @@ const ( orgIDHeader = "X-Scope-OrgID" instantQueryPath = "/api/v1/query" mimeTypeForm = "application/x-www-form-urlencoded" - contentTypeJSON = "application/json" ) +var jsonDecoder JsonDecoder +var protobufDecoder ProtobufDecoder + type FrontendClient struct { client httpgrpc.HTTPClient timeout time.Duration prometheusHTTPPrefix string - jsonDecoder JsonDecoder + queryResponseFormat string + decoders map[string]Decoder } -func NewFrontendClient(client httpgrpc.HTTPClient, timeout time.Duration, prometheusHTTPPrefix string) *FrontendClient { +func NewFrontendClient(client httpgrpc.HTTPClient, timeout time.Duration, prometheusHTTPPrefix, queryResponseFormat string) *FrontendClient { return &FrontendClient{ client: client, timeout: timeout, prometheusHTTPPrefix: prometheusHTTPPrefix, - jsonDecoder: JsonDecoder{}, + queryResponseFormat: queryResponseFormat, + decoders: map[string]Decoder{ + jsonDecoder.ContentType(): jsonDecoder, + protobufDecoder.ContentType(): protobufDecoder, + }, } } @@ -55,15 +63,23 @@ func (p *FrontendClient) makeRequest(ctx context.Context, qs string, ts time.Tim return nil, err } + acceptHeader := "" + switch p.queryResponseFormat { + case queryResponseFormatJson: + acceptHeader = jsonDecoder.ContentType() + case queryResponseFormatProtobuf: + acceptHeader = fmt.Sprintf("%s,%s", protobufDecoder.ContentType(), jsonDecoder.ContentType()) + } + req := &httpgrpc.HTTPRequest{ Method: http.MethodPost, Url: p.prometheusHTTPPrefix + instantQueryPath, Body: body, Headers: []*httpgrpc.Header{ - {Key: textproto.CanonicalMIMEHeaderKey("User-Agent"), Values: []string{fmt.Sprintf("Cortex/%s", version.Version)}}, + {Key: textproto.CanonicalMIMEHeaderKey("User-Agent"), Values: []string{fmt.Sprintf("%s/%s", tripperware.RulerUserAgent, version.Version)}}, {Key: textproto.CanonicalMIMEHeaderKey("Content-Type"), Values: []string{mimeTypeForm}}, {Key: textproto.CanonicalMIMEHeaderKey("Content-Length"), Values: []string{strconv.Itoa(len(body))}}, - {Key: textproto.CanonicalMIMEHeaderKey("Accept"), Values: []string{contentTypeJSON}}, + {Key: textproto.CanonicalMIMEHeaderKey("Accept"), Values: []string{acceptHeader}}, {Key: textproto.CanonicalMIMEHeaderKey(orgIDHeader), Values: []string{orgID}}, }, } @@ -91,7 +107,15 @@ func (p *FrontendClient) InstantQuery(ctx context.Context, qs string, t time.Tim return nil, err } - vector, warning, err := p.jsonDecoder.Decode(resp.Body) + contentType := extractHeader(resp.Headers, "Content-Type") + decoder, ok := p.decoders[contentType] + if !ok { + err = fmt.Errorf("unknown content type: %s", contentType) + level.Error(log).Log("err", err, "query", qs) + return nil, err + } + + vector, warning, err := decoder.Decode(resp.Body) if err != nil { level.Error(log).Log("err", err, "query", qs) return nil, err @@ -103,3 +127,13 @@ func (p *FrontendClient) InstantQuery(ctx context.Context, qs string, t time.Tim return vector, nil } + +func extractHeader(headers []*httpgrpc.Header, target string) string { + for _, h := range headers { + if h.Key == target && len(h.Values) > 0 { + return h.Values[0] + } + } + + return "" +} diff --git a/pkg/ruler/frontend_client_pool.go b/pkg/ruler/frontend_client_pool.go index b9b512cfab..7b131621aa 100644 --- a/pkg/ruler/frontend_client_pool.go +++ b/pkg/ruler/frontend_client_pool.go @@ -20,6 +20,7 @@ import ( type frontendPool struct { timeout time.Duration + queryResponseFormat string prometheusHTTPPrefix string grpcConfig grpcclient.Config @@ -29,6 +30,7 @@ type frontendPool struct { func newFrontendPool(cfg Config, log log.Logger, reg prometheus.Registerer) *client.Pool { p := &frontendPool{ timeout: cfg.FrontendTimeout, + queryResponseFormat: cfg.QueryResponseFormat, prometheusHTTPPrefix: cfg.PrometheusHTTPPrefix, grpcConfig: cfg.GRPCClientConfig, frontendClientRequestDuration: promauto.With(reg).NewHistogramVec(prometheus.HistogramOpts{ @@ -68,7 +70,7 @@ func (f *frontendPool) createFrontendClient(addr string) (client.PoolClient, err } return &frontendClient{ - FrontendClient: NewFrontendClient(httpgrpc.NewHTTPClient(conn), f.timeout, f.prometheusHTTPPrefix), + FrontendClient: NewFrontendClient(httpgrpc.NewHTTPClient(conn), f.timeout, f.prometheusHTTPPrefix, f.queryResponseFormat), HealthClient: grpc_health_v1.NewHealthClient(conn), }, nil } diff --git a/pkg/ruler/frontend_client_test.go b/pkg/ruler/frontend_client_test.go index 104780f58c..d46df39738 100644 --- a/pkg/ruler/frontend_client_test.go +++ b/pkg/ruler/frontend_client_test.go @@ -13,6 +13,9 @@ import ( "github.com/weaveworks/common/httpgrpc" "github.com/weaveworks/common/user" "google.golang.org/grpc" + + "github.com/cortexproject/cortex/pkg/cortexpb" + "github.com/cortexproject/cortex/pkg/querier/tripperware" ) type mockHTTPGRPCClient func(ctx context.Context, req *httpgrpc.HTTPRequest, _ ...grpc.CallOption) (*httpgrpc.HTTPResponse, error) @@ -28,7 +31,7 @@ func TestTimeout(t *testing.T) { } ctx := context.Background() ctx = user.InjectOrgID(ctx, "userID") - frontendClient := NewFrontendClient(mockHTTPGRPCClient(mockClientFn), time.Second*5, "/prometheus") + frontendClient := NewFrontendClient(mockHTTPGRPCClient(mockClientFn), time.Second*5, "/prometheus", "json") _, err := frontendClient.InstantQuery(ctx, "query", time.Now()) require.Equal(t, context.DeadlineExceeded, err) } @@ -37,12 +40,12 @@ func TestNoOrgId(t *testing.T) { mockClientFn := func(ctx context.Context, _ *httpgrpc.HTTPRequest, _ ...grpc.CallOption) (*httpgrpc.HTTPResponse, error) { return nil, nil } - frontendClient := NewFrontendClient(mockHTTPGRPCClient(mockClientFn), time.Second*5, "/prometheus") + frontendClient := NewFrontendClient(mockHTTPGRPCClient(mockClientFn), time.Second*5, "/prometheus", "json") _, err := frontendClient.InstantQuery(context.Background(), "query", time.Now()) require.Equal(t, user.ErrNoOrgID, err) } -func TestInstantQuery(t *testing.T) { +func TestInstantQueryJsonCodec(t *testing.T) { tests := []struct { description string responseBody string @@ -148,10 +151,195 @@ func TestInstantQuery(t *testing.T) { } ctx := context.Background() ctx = user.InjectOrgID(ctx, "userID") - frontendClient := NewFrontendClient(mockHTTPGRPCClient(mockClientFn), time.Second*5, "/prometheus") + frontendClient := NewFrontendClient(mockHTTPGRPCClient(mockClientFn), time.Second*5, "/prometheus", "json") + vector, err := frontendClient.InstantQuery(ctx, "query", time.Now()) + require.Equal(t, test.expected, vector) + require.Equal(t, test.expectedErr, err) + }) + } +} + +func TestInstantQueryProtoCodec(t *testing.T) { + var tests = []struct { + description string + responseBody *tripperware.PrometheusResponse + expected promql.Vector + expectedErr error + }{ + { + description: "empty vector", + responseBody: &tripperware.PrometheusResponse{ + Status: "success", + Data: tripperware.PrometheusData{ + ResultType: "vector", + Result: tripperware.PrometheusQueryResult{ + Result: &tripperware.PrometheusQueryResult_Vector{ + Vector: &tripperware.Vector{ + Samples: []tripperware.Sample{}, + }, + }, + }, + }, + }, + expected: promql.Vector{}, + expectedErr: nil, + }, + { + description: "vector with series", + responseBody: &tripperware.PrometheusResponse{ + Status: "success", + Data: tripperware.PrometheusData{ + ResultType: "vector", + Result: tripperware.PrometheusQueryResult{ + Result: &tripperware.PrometheusQueryResult_Vector{ + Vector: &tripperware.Vector{ + Samples: []tripperware.Sample{ + { + Labels: cortexpb.FromLabelsToLabelAdapters(labels.FromStrings("foo", "bar")), + Sample: &cortexpb.Sample{ + Value: 1.234, + TimestampMs: 1724146338123, + }, + }, + { + Labels: cortexpb.FromLabelsToLabelAdapters(labels.FromStrings("bar", "baz")), + Sample: &cortexpb.Sample{ + Value: 5.678, + TimestampMs: 1724146338456, + }, + }, + }, + }, + }, + }, + }, + }, + expected: promql.Vector{ + { + Metric: labels.FromStrings("foo", "bar"), + T: 1724146338123, + F: 1.234, + }, + { + Metric: labels.FromStrings("bar", "baz"), + T: 1724146338456, + F: 5.678, + }, + }, + expectedErr: nil, + }, + { + description: "get scalar", + responseBody: &tripperware.PrometheusResponse{ + Status: "success", + Data: tripperware.PrometheusData{ + ResultType: "scalar", + Result: tripperware.PrometheusQueryResult{ + Result: &tripperware.PrometheusQueryResult_RawBytes{ + RawBytes: []byte(`{"resultType":"scalar","result":[1724146338.123,"1.234"]}`), + }, + }, + }, + }, + expected: promql.Vector{ + { + Metric: labels.EmptyLabels(), + T: 1724146338123, + F: 1.234, + }, + }, + expectedErr: nil, + }, + { + description: "get matrix", + responseBody: &tripperware.PrometheusResponse{ + Status: "success", + Data: tripperware.PrometheusData{ + ResultType: "matrix", + Result: tripperware.PrometheusQueryResult{ + Result: &tripperware.PrometheusQueryResult_Matrix{ + Matrix: &tripperware.Matrix{ + SampleStreams: []tripperware.SampleStream{}, + }, + }, + }, + }, + }, + expectedErr: errors.New("rule result is not a vector or scalar"), + }, + { + description: "get string", + responseBody: &tripperware.PrometheusResponse{ + Status: "success", + Data: tripperware.PrometheusData{ + ResultType: "string", + Result: tripperware.PrometheusQueryResult{ + Result: &tripperware.PrometheusQueryResult_RawBytes{ + RawBytes: []byte(`{"resultType":"string","result":[1724146338.123,"string"]}`), + }, + }, + }, + }, + expectedErr: errors.New("rule result is not a vector or scalar"), + }, + } + for _, test := range tests { + t.Run(test.description, func(t *testing.T) { + mockClientFn := func(ctx context.Context, _ *httpgrpc.HTTPRequest, _ ...grpc.CallOption) (*httpgrpc.HTTPResponse, error) { + d, err := test.responseBody.Marshal() + if err != nil { + return nil, err + } + return &httpgrpc.HTTPResponse{ + Code: http.StatusOK, + Headers: []*httpgrpc.Header{ + {Key: "Content-Type", Values: []string{"application/x-cortex-query+proto"}}, + }, + Body: d, + }, nil + } + ctx := context.Background() + ctx = user.InjectOrgID(ctx, "userID") + frontendClient := NewFrontendClient(mockHTTPGRPCClient(mockClientFn), time.Second*5, "/prometheus", "protobuf") vector, err := frontendClient.InstantQuery(ctx, "query", time.Now()) require.Equal(t, test.expected, vector) require.Equal(t, test.expectedErr, err) }) } } + +func Test_extractHeader(t *testing.T) { + tests := []struct { + description string + headers []*httpgrpc.Header + expectedOutput string + }{ + { + description: "cortex query proto", + headers: []*httpgrpc.Header{ + { + Key: "Content-Type", + Values: []string{"application/x-cortex-query+proto"}, + }, + }, + expectedOutput: "application/x-cortex-query+proto", + }, + { + description: "json", + headers: []*httpgrpc.Header{ + { + Key: "Content-Type", + Values: []string{"application/json"}, + }, + }, + expectedOutput: "application/json", + }, + } + + target := "Content-Type" + for _, test := range tests { + t.Run(test.description, func(t *testing.T) { + require.Equal(t, test.expectedOutput, extractHeader(test.headers, target)) + }) + } +} diff --git a/pkg/ruler/frontend_decoder.go b/pkg/ruler/frontend_decoder.go index c8e653a05f..3ea23875b2 100644 --- a/pkg/ruler/frontend_decoder.go +++ b/pkg/ruler/frontend_decoder.go @@ -10,6 +10,8 @@ import ( "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/promql" + "github.com/cortexproject/cortex/pkg/cortexpb" + "github.com/cortexproject/cortex/pkg/querier/tripperware" "github.com/cortexproject/cortex/pkg/util/api" ) @@ -18,9 +20,19 @@ const ( ) type JsonDecoder struct{} -type Warning []string +type ProtobufDecoder struct{} +type Warnings []string -func (j JsonDecoder) Decode(body []byte) (promql.Vector, Warning, error) { +type Decoder interface { + Decode(body []byte) (promql.Vector, Warnings, error) + ContentType() string +} + +func (j JsonDecoder) ContentType() string { + return "application/json" +} + +func (j JsonDecoder) Decode(body []byte) (promql.Vector, Warnings, error) { var response api.Response if err := json.NewDecoder(bytes.NewReader(body)).Decode(&response); err != nil { @@ -48,7 +60,7 @@ func (j JsonDecoder) Decode(body []byte) (promql.Vector, Warning, error) { if err := json.Unmarshal(data.Result, &scalar); err != nil { return nil, nil, err } - return j.scalarToPromQLVector(scalar), response.Warnings, nil + return scalarToPromQLVector(scalar), response.Warnings, nil case model.ValVector: var vector model.Vector if err := json.Unmarshal(data.Result, &vector); err != nil { @@ -79,10 +91,72 @@ func (j JsonDecoder) vectorToPromQLVector(vector model.Vector) promql.Vector { return v } -func (j JsonDecoder) scalarToPromQLVector(scalar model.Scalar) promql.Vector { +func (p ProtobufDecoder) ContentType() string { + return tripperware.QueryResponseCortexMIMEType +} + +func (p ProtobufDecoder) Decode(body []byte) (promql.Vector, Warnings, error) { + resp := tripperware.PrometheusResponse{} + if err := resp.Unmarshal(body); err != nil { + return nil, nil, err + } + + if resp.Status == statusError { + return nil, resp.Warnings, fmt.Errorf("failed to execute query with error: %s", resp.Error) + } + + switch resp.Data.ResultType { + case "scalar": + data := struct { + Type model.ValueType `json:"resultType"` + Result json.RawMessage `json:"result"` + }{} + + if err := json.Unmarshal(resp.Data.Result.GetRawBytes(), &data); err != nil { + return nil, nil, err + } + + var s model.Scalar + if err := json.Unmarshal(data.Result, &s); err != nil { + return nil, nil, err + } + return scalarToPromQLVector(s), resp.Warnings, nil + case "vector": + return p.vectorToPromQLVector(resp.Data.Result.GetVector()), resp.Warnings, nil + default: + return nil, resp.Warnings, errors.New("rule result is not a vector or scalar") + } +} + +func (p ProtobufDecoder) vectorToPromQLVector(vector *tripperware.Vector) promql.Vector { + v := make([]promql.Sample, 0, len(vector.Samples)) + for _, sample := range vector.Samples { + metric := cortexpb.FromLabelAdaptersToLabels(sample.Labels) + + if sample.Sample != nil { + v = append(v, promql.Sample{ + T: sample.Sample.TimestampMs, + F: sample.Sample.Value, + Metric: metric, + }) + } + + if sample.RawHistogram != nil { + v = append(v, promql.Sample{ + T: sample.RawHistogram.TimestampMs, + H: cortexpb.FloatHistogramProtoToFloatHistogram(*sample.RawHistogram), + Metric: metric, + }) + } + } + + return v +} + +func scalarToPromQLVector(s model.Scalar) promql.Vector { return promql.Vector{promql.Sample{ - T: int64(scalar.Timestamp), - F: float64(scalar.Value), + T: int64(s.Timestamp), + F: float64(s.Value), Metric: labels.Labels{}, }} } diff --git a/pkg/ruler/frontend_decoder_test.go b/pkg/ruler/frontend_decoder_test.go index cc24fe732b..4cabe47859 100644 --- a/pkg/ruler/frontend_decoder_test.go +++ b/pkg/ruler/frontend_decoder_test.go @@ -4,19 +4,186 @@ import ( "errors" "testing" + "github.com/prometheus/prometheus/model/histogram" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/promql" "github.com/stretchr/testify/require" + + "github.com/cortexproject/cortex/pkg/cortexpb" + "github.com/cortexproject/cortex/pkg/querier/tripperware" ) -func TestDecode(t *testing.T) { - jsonDecoder := JsonDecoder{} +func TestProtoDecode(t *testing.T) { + tests := []struct { + description string + resp *tripperware.PrometheusResponse + expectedVector promql.Vector + expectedWarning []string + expectedErr error + }{ + { + description: "vector", + resp: &tripperware.PrometheusResponse{ + Status: "success", + Data: tripperware.PrometheusData{ + ResultType: "vector", + Result: tripperware.PrometheusQueryResult{ + Result: &tripperware.PrometheusQueryResult_Vector{ + Vector: &tripperware.Vector{ + Samples: []tripperware.Sample{ + { + Labels: cortexpb.FromLabelsToLabelAdapters(labels.FromStrings("foo", "bar")), + Sample: &cortexpb.Sample{ + Value: 1.234, + TimestampMs: 1724146338123, + }, + }, + }, + }, + }, + }, + }, + Warnings: []string{"a", "b", "c"}, + }, + expectedVector: promql.Vector{ + { + Metric: labels.FromStrings("foo", "bar"), + T: 1724146338123, + F: 1.234, + }, + }, + expectedWarning: []string{"a", "b", "c"}, + }, + { + description: "vector with raw histogram", + resp: &tripperware.PrometheusResponse{ + Status: "success", + Data: tripperware.PrometheusData{ + ResultType: "vector", + Result: tripperware.PrometheusQueryResult{ + Result: &tripperware.PrometheusQueryResult_Vector{ + Vector: &tripperware.Vector{ + Samples: []tripperware.Sample{ + { + Labels: cortexpb.FromLabelsToLabelAdapters(labels.FromStrings("foo", "bar")), + RawHistogram: &cortexpb.Histogram{ + Count: &cortexpb.Histogram_CountFloat{CountFloat: 10}, + Sum: 20, + Schema: 2, + ZeroThreshold: 0.001, + ZeroCount: &cortexpb.Histogram_ZeroCountFloat{ZeroCountFloat: 12}, + NegativeSpans: []cortexpb.BucketSpan{{Offset: 2, Length: 2}}, + NegativeCounts: []float64{2, 1}, + PositiveSpans: []cortexpb.BucketSpan{{Offset: 3, Length: 2}, {Offset: 1, Length: 3}}, + PositiveCounts: []float64{1, 2, 2, 1, 1}, + TimestampMs: 1724146338123, + }, + }, + }, + }, + }, + }, + }, + Warnings: []string{"a", "b", "c"}, + }, + expectedVector: promql.Vector{ + { + Metric: labels.FromStrings("foo", "bar"), + T: 1724146338123, + H: &histogram.FloatHistogram{ + Schema: 2, + ZeroThreshold: 0.001, + ZeroCount: 12, + Count: 10, + Sum: 20, + PositiveSpans: []histogram.Span{{Offset: 3, Length: 2}, {Offset: 1, Length: 3}}, + NegativeSpans: []histogram.Span{{Offset: 2, Length: 2}}, + PositiveBuckets: []float64{1, 2, 2, 1, 1}, + NegativeBuckets: []float64{2, 1}, + CustomValues: nil, + }, + }, + }, + expectedWarning: []string{"a", "b", "c"}, + }, + { + description: "matrix", + resp: &tripperware.PrometheusResponse{ + Status: "success", + Data: tripperware.PrometheusData{ + ResultType: "matrix", + Result: tripperware.PrometheusQueryResult{ + Result: &tripperware.PrometheusQueryResult_Matrix{}, + }, + }, + Warnings: []string{"a", "b", "c"}, + }, + expectedVector: nil, + expectedWarning: []string{"a", "b", "c"}, + expectedErr: errors.New("rule result is not a vector or scalar"), + }, + { + description: "scalar", + resp: &tripperware.PrometheusResponse{ + Status: "success", + Data: tripperware.PrometheusData{ + ResultType: "scalar", + Result: tripperware.PrometheusQueryResult{ + Result: &tripperware.PrometheusQueryResult_RawBytes{ + RawBytes: []byte(`{"resultType":"scalar","result":[1724146338.123,"1.234"]}`), + }, + }, + }, + Warnings: []string{"a", "b", "c"}, + }, + expectedVector: promql.Vector{ + { + Metric: labels.EmptyLabels(), + T: 1724146338123, + F: 1.234, + }, + }, + expectedWarning: []string{"a", "b", "c"}, + }, + { + description: "string", + resp: &tripperware.PrometheusResponse{ + Status: "success", + Data: tripperware.PrometheusData{ + ResultType: "string", + Result: tripperware.PrometheusQueryResult{ + Result: &tripperware.PrometheusQueryResult_RawBytes{ + RawBytes: []byte(`{"resultType":"string","result":[1724146338.123,"1.234"]}`), + }, + }, + }, + Warnings: []string{"a", "b", "c"}, + }, + expectedVector: nil, + expectedWarning: []string{"a", "b", "c"}, + expectedErr: errors.New("rule result is not a vector or scalar"), + }, + } + + for _, test := range tests { + t.Run(test.description, func(t *testing.T) { + b, err := test.resp.Marshal() + require.NoError(t, err) + + vector, _, err := protobufDecoder.Decode(b) + require.Equal(t, test.expectedErr, err) + require.Equal(t, test.expectedVector, vector) + require.Equal(t, test.expectedWarning, test.resp.Warnings) + }) + } +} +func TestJsonDecode(t *testing.T) { tests := []struct { description string body string expectedVector promql.Vector - expectedWarning Warning + expectedWarning Warnings expectedErr error }{ { diff --git a/pkg/ruler/ruler.go b/pkg/ruler/ruler.go index d77b4d0a41..812fd4f15c 100644 --- a/pkg/ruler/ruler.go +++ b/pkg/ruler/ruler.go @@ -47,10 +47,13 @@ import ( var ( supportedShardingStrategies = []string{util.ShardingStrategyDefault, util.ShardingStrategyShuffle} + supportedQueryResponseFormats = []string{queryResponseFormatJson, queryResponseFormatProtobuf} + // Validation errors. - errInvalidShardingStrategy = errors.New("invalid sharding strategy") - errInvalidTenantShardSize = errors.New("invalid tenant shard size, the value must be greater than 0") - errInvalidMaxConcurrentEvals = errors.New("invalid max concurrent evals, the value must be greater than 0") + errInvalidShardingStrategy = errors.New("invalid sharding strategy") + errInvalidTenantShardSize = errors.New("invalid tenant shard size, the value must be greater than 0") + errInvalidMaxConcurrentEvals = errors.New("invalid max concurrent evals, the value must be greater than 0") + errInvalidQueryResponseFormat = errors.New("invalid query response format") ) const ( @@ -82,6 +85,10 @@ const ( unknownHealthFilter string = "unknown" okHealthFilter string = "ok" errHealthFilter string = "err" + + // query response formats + queryResponseFormatJson = "json" + queryResponseFormatProtobuf = "protobuf" ) type DisabledRuleGroupErr struct { @@ -96,6 +103,9 @@ func (e *DisabledRuleGroupErr) Error() string { type Config struct { // This is used for query to query frontend to evaluate rules FrontendAddress string `yaml:"frontend_address"` + // Query response format of query frontend for evaluating rules + // It will only take effect FrontendAddress is configured. + QueryResponseFormat string `yaml:"query_response_format"` // HTTP timeout duration when querying to query frontend to evaluate rules FrontendTimeout time.Duration `yaml:"-"` // Query frontend GRPC Client configuration. @@ -185,6 +195,10 @@ func (cfg *Config) Validate(limits validation.Limits, log log.Logger) error { if cfg.ConcurrentEvalsEnabled && cfg.MaxConcurrentEvals <= 0 { return errInvalidMaxConcurrentEvals } + + if !util.StringsContain(supportedQueryResponseFormats, cfg.QueryResponseFormat) { + return errInvalidQueryResponseFormat + } return nil } @@ -207,6 +221,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) { flagext.DeprecatedFlag(f, "ruler.alertmanager-use-v2", "This flag is no longer functional. V1 API is deprecated and removed", util_log.Logger) f.StringVar(&cfg.FrontendAddress, "ruler.frontend-address", "", "[Experimental] GRPC listen address of the Query Frontend, in host:port format. If set, Ruler queries to Query Frontends via gRPC. If not set, ruler queries to Ingesters directly.") + f.StringVar(&cfg.QueryResponseFormat, "ruler.query-response-format", queryResponseFormatProtobuf, fmt.Sprintf("[Experimental] Query response format to get query results from Query Frontend when the rule evaluation. It will only take effect when `-ruler.frontend-address` is configured. Supported values: %s", strings.Join(supportedQueryResponseFormats, ","))) cfg.ExternalURL.URL, _ = url.Parse("") // Must be non-nil f.Var(&cfg.ExternalURL, "ruler.external.url", "URL of alerts return path.") f.DurationVar(&cfg.EvaluationInterval, "ruler.evaluation-interval", 1*time.Minute, "How frequently to evaluate rules") diff --git a/pkg/storage/tsdb/expanded_postings_cache.go b/pkg/storage/tsdb/expanded_postings_cache.go index 921881b5c5..3ea8da709e 100644 --- a/pkg/storage/tsdb/expanded_postings_cache.go +++ b/pkg/storage/tsdb/expanded_postings_cache.go @@ -40,25 +40,30 @@ type ExpandedPostingsCacheMetrics struct { CacheRequests *prometheus.CounterVec CacheHits *prometheus.CounterVec CacheEvicts *prometheus.CounterVec + CacheMiss *prometheus.CounterVec NonCacheableQueries *prometheus.CounterVec } func NewPostingCacheMetrics(r prometheus.Registerer) *ExpandedPostingsCacheMetrics { return &ExpandedPostingsCacheMetrics{ CacheRequests: promauto.With(r).NewCounterVec(prometheus.CounterOpts{ - Name: "cortex_ingester_expanded_postings_cache_requests", + Name: "cortex_ingester_expanded_postings_cache_requests_total", Help: "Total number of requests to the cache.", }, []string{"cache"}), CacheHits: promauto.With(r).NewCounterVec(prometheus.CounterOpts{ - Name: "cortex_ingester_expanded_postings_cache_hits", + Name: "cortex_ingester_expanded_postings_cache_hits_total", Help: "Total number of hit requests to the cache.", }, []string{"cache"}), + CacheMiss: promauto.With(r).NewCounterVec(prometheus.CounterOpts{ + Name: "cortex_ingester_expanded_postings_cache_miss_total", + Help: "Total number of miss requests to the cache.", + }, []string{"cache", "reason"}), CacheEvicts: promauto.With(r).NewCounterVec(prometheus.CounterOpts{ - Name: "cortex_ingester_expanded_postings_cache_evicts", + Name: "cortex_ingester_expanded_postings_cache_evicts_total", Help: "Total number of evictions in the cache, excluding items that got evicted due to TTL.", }, []string{"cache", "reason"}), NonCacheableQueries: promauto.With(r).NewCounterVec(prometheus.CounterOpts{ - Name: "cortex_ingester_expanded_postings_non_cacheable_queries", + Name: "cortex_ingester_expanded_postings_non_cacheable_queries_total", Help: "Total number of non cacheable queries.", }, []string{"cache"}), } @@ -208,7 +213,7 @@ func (c *blocksPostingsForMatchersCache) fetchPostings(blockID ulid.ULID, ix tsd return nil, 0, err } - key := c.cacheKey(seed, blockID, ms...) + key := cacheKey(seed, blockID, ms...) promise, loaded := cache.getPromiseForKey(key, fetch) if loaded { c.metrics.CacheHits.WithLabelValues(cache.name).Inc() @@ -235,7 +240,7 @@ func (c *blocksPostingsForMatchersCache) getSeedForMetricName(metricName string) return c.seedByHash.getSeed(c.userId, metricName) } -func (c *blocksPostingsForMatchersCache) cacheKey(seed string, blockID ulid.ULID, ms ...*labels.Matcher) string { +func cacheKey(seed string, blockID ulid.ULID, ms ...*labels.Matcher) string { slices.SortFunc(ms, func(i, j *labels.Matcher) int { if i.Type != j.Type { return int(i.Type - j.Type) @@ -254,15 +259,16 @@ func (c *blocksPostingsForMatchersCache) cacheKey(seed string, blockID ulid.ULID sepLen = 1 ) - var size int + size := len(seed) + len(blockID.String()) + 2*sepLen for _, m := range ms { - size += len(seed) + len(blockID.String()) + len(m.Name) + len(m.Value) + typeLen + 2*sepLen + size += len(m.Name) + len(m.Value) + typeLen + sepLen } sb := strings.Builder{} sb.Grow(size) sb.WriteString(seed) sb.WriteByte('|') sb.WriteString(blockID.String()) + sb.WriteByte('|') for _, m := range ms { sb.WriteString(m.Name) sb.WriteString(m.Type.String()) @@ -373,6 +379,7 @@ func (c *fifoCache[V]) getPromiseForKey(k string, fetch func() (V, int64, error) loaded, ok := c.cachedValues.LoadOrStore(k, r) if !ok { + c.metrics.CacheMiss.WithLabelValues(c.name, "miss").Inc() r.v, r.sizeBytes, r.err = fetch() r.sizeBytes += int64(len(k)) r.ts = c.timeNow() @@ -386,7 +393,7 @@ func (c *fifoCache[V]) getPromiseForKey(k string, fetch func() (V, int64, error) // If is cached but is expired, lets try to replace the cache value. if loaded.(*cacheEntryPromise[V]).isExpired(c.cfg.Ttl, c.timeNow()) && c.cachedValues.CompareAndSwap(k, loaded, r) { - c.metrics.CacheEvicts.WithLabelValues(c.name, "expired").Inc() + c.metrics.CacheMiss.WithLabelValues(c.name, "expired").Inc() r.v, r.sizeBytes, r.err = fetch() r.sizeBytes += int64(len(k)) c.updateSize(loaded.(*cacheEntryPromise[V]).sizeBytes, r.sizeBytes) diff --git a/pkg/storage/tsdb/expanded_postings_cache_test.go b/pkg/storage/tsdb/expanded_postings_cache_test.go index 6a9476072a..19272c00e7 100644 --- a/pkg/storage/tsdb/expanded_postings_cache_test.go +++ b/pkg/storage/tsdb/expanded_postings_cache_test.go @@ -8,12 +8,43 @@ import ( "testing" "time" + "github.com/oklog/ulid" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/testutil" + "github.com/prometheus/prometheus/model/labels" "github.com/stretchr/testify/require" "go.uber.org/atomic" ) +func TestCacheKey(t *testing.T) { + blockID := ulid.MustNew(1, nil) + seed := "seed123" + matchers := []*labels.Matcher{ + { + Type: labels.MatchEqual, + Name: "name_1", + Value: "value_1", + }, + { + Type: labels.MatchNotEqual, + Name: "name_2", + Value: "value_2", + }, + { + Type: labels.MatchRegexp, + Name: "name_3", + Value: "value_4", + }, + { + Type: labels.MatchNotRegexp, + Name: "name_5", + Value: "value_4", + }, + } + r := cacheKey(seed, blockID, matchers...) + require.Equal(t, "seed123|00000000010000000000000000|name_1=value_1|name_2!=value_2|name_3=~value_4|name_5!~value_4|", r) +} + func Test_ShouldFetchPromiseOnlyOnce(t *testing.T) { cfg := PostingsCacheConfig{ Enabled: true, @@ -123,10 +154,10 @@ func TestFifoCacheExpire(t *testing.T) { if c.expectedFinalItems != numberOfKeys { err := testutil.GatherAndCompare(r, bytes.NewBufferString(fmt.Sprintf(` - # HELP cortex_ingester_expanded_postings_cache_evicts Total number of evictions in the cache, excluding items that got evicted due to TTL. - # TYPE cortex_ingester_expanded_postings_cache_evicts counter - cortex_ingester_expanded_postings_cache_evicts{cache="test",reason="full"} %v -`, numberOfKeys-c.expectedFinalItems)), "cortex_ingester_expanded_postings_cache_evicts") + # HELP cortex_ingester_expanded_postings_cache_evicts_total Total number of evictions in the cache, excluding items that got evicted due to TTL. + # TYPE cortex_ingester_expanded_postings_cache_evicts_total counter + cortex_ingester_expanded_postings_cache_evicts_total{cache="test",reason="full"} %v +`, numberOfKeys-c.expectedFinalItems)), "cortex_ingester_expanded_postings_cache_evicts_total") require.NoError(t, err) } @@ -150,10 +181,11 @@ func TestFifoCacheExpire(t *testing.T) { } err := testutil.GatherAndCompare(r, bytes.NewBufferString(fmt.Sprintf(` - # HELP cortex_ingester_expanded_postings_cache_evicts Total number of evictions in the cache, excluding items that got evicted due to TTL. - # TYPE cortex_ingester_expanded_postings_cache_evicts counter - cortex_ingester_expanded_postings_cache_evicts{cache="test",reason="expired"} %v -`, numberOfKeys)), "cortex_ingester_expanded_postings_cache_evicts") + # HELP cortex_ingester_expanded_postings_cache_miss_total Total number of miss requests to the cache. + # TYPE cortex_ingester_expanded_postings_cache_miss_total counter + cortex_ingester_expanded_postings_cache_miss_total{cache="test",reason="expired"} %v + cortex_ingester_expanded_postings_cache_miss_total{cache="test",reason="miss"} %v +`, numberOfKeys, numberOfKeys)), "cortex_ingester_expanded_postings_cache_miss_total") require.NoError(t, err) cache.timeNow = func() time.Time { @@ -164,12 +196,12 @@ func TestFifoCacheExpire(t *testing.T) { return 2, 18, nil }) - // Should expire all keys again as ttl is expired + // Should expire all keys expired keys err = testutil.GatherAndCompare(r, bytes.NewBufferString(fmt.Sprintf(` - # HELP cortex_ingester_expanded_postings_cache_evicts Total number of evictions in the cache, excluding items that got evicted due to TTL. - # TYPE cortex_ingester_expanded_postings_cache_evicts counter - cortex_ingester_expanded_postings_cache_evicts{cache="test",reason="expired"} %v -`, numberOfKeys*2)), "cortex_ingester_expanded_postings_cache_evicts") + # HELP cortex_ingester_expanded_postings_cache_evicts_total Total number of evictions in the cache, excluding items that got evicted due to TTL. + # TYPE cortex_ingester_expanded_postings_cache_evicts_total counter + cortex_ingester_expanded_postings_cache_evicts_total{cache="test",reason="expired"} %v +`, numberOfKeys)), "cortex_ingester_expanded_postings_cache_evicts_total") require.NoError(t, err) } }) diff --git a/pkg/storage/tsdb/meta_extensions.go b/pkg/storage/tsdb/meta_extensions.go new file mode 100644 index 0000000000..b6b8a7acf0 --- /dev/null +++ b/pkg/storage/tsdb/meta_extensions.go @@ -0,0 +1,71 @@ +package tsdb + +import ( + "fmt" + "strconv" + + "github.com/thanos-io/thanos/pkg/block/metadata" +) + +type CortexMetaExtensions struct { + PartitionInfo *PartitionInfo `json:"partition_info,omitempty"` + TimeRange int64 `json:"time_range,omitempty"` +} + +type PartitionInfo struct { + PartitionedGroupID uint32 `json:"partitioned_group_id"` + PartitionCount int `json:"partition_count"` + PartitionID int `json:"partition_id"` + PartitionedGroupCreationTime int64 `json:"partitioned_group_creation_time"` +} + +var ( + DefaultPartitionInfo = PartitionInfo{ + PartitionedGroupID: 0, + PartitionID: 0, + PartitionCount: 1, + PartitionedGroupCreationTime: 0, + } +) + +func (c *CortexMetaExtensions) TimeRangeStr() string { + return strconv.FormatInt(c.TimeRange, 10) +} + +func ConvertToCortexMetaExtensions(extensions any) (*CortexMetaExtensions, error) { + defaultPartitionInfo := DefaultPartitionInfo + cortexExtensions, err := metadata.ConvertExtensions(extensions, &CortexMetaExtensions{ + PartitionInfo: &defaultPartitionInfo, + }) + + if err != nil { + return nil, err + } + if cortexExtensions == nil { + return nil, nil + } + converted, ok := cortexExtensions.(*CortexMetaExtensions) + if !ok { + return nil, fmt.Errorf("unable to convert extensions to CortexMetaExtensions") + } + return converted, nil +} + +func ConvertToPartitionInfo(extensions any) (*PartitionInfo, error) { + cortexExtensions, err := ConvertToCortexMetaExtensions(extensions) + if err != nil { + return nil, err + } + if cortexExtensions == nil { + return nil, nil + } + return cortexExtensions.PartitionInfo, nil +} + +func GetCortexMetaExtensionsFromMeta(meta metadata.Meta) (*CortexMetaExtensions, error) { + return ConvertToCortexMetaExtensions(meta.Thanos.Extensions) +} + +func GetPartitionInfo(meta metadata.Meta) (*PartitionInfo, error) { + return ConvertToPartitionInfo(meta.Thanos.Extensions) +} diff --git a/pkg/storage/tsdb/meta_extensions_test.go b/pkg/storage/tsdb/meta_extensions_test.go new file mode 100644 index 0000000000..6f296eb461 --- /dev/null +++ b/pkg/storage/tsdb/meta_extensions_test.go @@ -0,0 +1,182 @@ +package tsdb + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/thanos-io/thanos/pkg/block/metadata" +) + +func TestGetPartitionedInfo(t *testing.T) { + for _, tcase := range []struct { + name string + meta metadata.Meta + expected *PartitionInfo + }{ + { + name: "partition info with all information provided", + meta: metadata.Meta{ + Thanos: metadata.Thanos{ + Extensions: &CortexMetaExtensions{ + PartitionInfo: &PartitionInfo{ + PartitionedGroupID: 123, + PartitionID: 8, + PartitionCount: 32, + }, + }, + }, + }, + expected: &PartitionInfo{ + PartitionedGroupID: 123, + PartitionID: 8, + PartitionCount: 32, + }, + }, + { + name: "partition info with only PartitionedGroupID provided", + meta: metadata.Meta{ + Thanos: metadata.Thanos{ + Extensions: &CortexMetaExtensions{ + PartitionInfo: &PartitionInfo{ + PartitionedGroupID: 123, + }, + }, + }, + }, + expected: &PartitionInfo{ + PartitionedGroupID: 123, + PartitionID: 0, + PartitionCount: 0, + }, + }, + { + name: "partition info with only PartitionID provided", + meta: metadata.Meta{ + Thanos: metadata.Thanos{ + Extensions: &CortexMetaExtensions{ + PartitionInfo: &PartitionInfo{ + PartitionID: 5, + }, + }, + }, + }, + expected: &PartitionInfo{ + PartitionedGroupID: 0, + PartitionID: 5, + PartitionCount: 0, + }, + }, + { + name: "partition info with only PartitionCount provided", + meta: metadata.Meta{ + Thanos: metadata.Thanos{ + Extensions: &CortexMetaExtensions{ + PartitionInfo: &PartitionInfo{ + PartitionCount: 4, + }, + }, + }, + }, + expected: &PartitionInfo{ + PartitionedGroupID: 0, + PartitionID: 0, + PartitionCount: 4, + }, + }, + { + name: "meta with empty partition info provided", + meta: metadata.Meta{ + Thanos: metadata.Thanos{ + Extensions: &CortexMetaExtensions{ + PartitionInfo: &PartitionInfo{}, + }, + }, + }, + expected: &PartitionInfo{ + PartitionedGroupID: 0, + PartitionID: 0, + PartitionCount: 0, + }, + }, + { + name: "meta with nil partition info provided", + meta: metadata.Meta{ + Thanos: metadata.Thanos{ + Extensions: &CortexMetaExtensions{ + PartitionInfo: nil, + }, + }, + }, + expected: &PartitionInfo{ + PartitionedGroupID: 0, + PartitionID: 0, + PartitionCount: 1, + }, + }, + { + name: "meta with non CortexMetaExtensions provided", + meta: metadata.Meta{ + Thanos: metadata.Thanos{ + Extensions: struct { + dummy string + }{ + dummy: "test_dummy", + }, + }, + }, + expected: &PartitionInfo{ + PartitionedGroupID: 0, + PartitionID: 0, + PartitionCount: 1, + }, + }, + { + name: "meta with invalid CortexMetaExtensions provided", + meta: metadata.Meta{ + Thanos: metadata.Thanos{ + Extensions: struct { + PartitionInfo struct { + PartitionedGroupID uint32 `json:"partitionedGroupId"` + PartitionCount int `json:"partitionCount"` + PartitionID int `json:"partitionId"` + } `json:"partition_info,omitempty"` + }{ + PartitionInfo: struct { + PartitionedGroupID uint32 `json:"partitionedGroupId"` + PartitionCount int `json:"partitionCount"` + PartitionID int `json:"partitionId"` + }{ + PartitionedGroupID: 123, + PartitionID: 8, + PartitionCount: 32, + }, + }, + }, + }, + expected: &PartitionInfo{ + PartitionedGroupID: 0, + PartitionID: 0, + PartitionCount: 1, + }, + }, + { + name: "meta does not have any extensions", + meta: metadata.Meta{ + Thanos: metadata.Thanos{ + Extensions: nil, + }, + }, + expected: nil, + }, + } { + t.Run(tcase.name, func(t *testing.T) { + result, err := GetPartitionInfo(tcase.meta) + assert.NoError(t, err) + if tcase.expected == nil { + assert.Nil(t, result) + } else { + assert.Equal(t, *tcase.expected, *result) + } + }) + } +} diff --git a/pkg/storegateway/bucket_store_metrics.go b/pkg/storegateway/bucket_store_metrics.go index 8319e6a70f..4938e73b41 100644 --- a/pkg/storegateway/bucket_store_metrics.go +++ b/pkg/storegateway/bucket_store_metrics.go @@ -55,6 +55,8 @@ type BucketStoreMetrics struct { indexHeaderLazyUnloadCount *prometheus.Desc indexHeaderLazyUnloadFailedCount *prometheus.Desc indexHeaderLazyLoadDuration *prometheus.Desc + indexHeaderDownloadDuration *prometheus.Desc + indexHeaderLoadDuration *prometheus.Desc } func NewBucketStoreMetrics() *BucketStoreMetrics { @@ -205,6 +207,14 @@ func NewBucketStoreMetrics() *BucketStoreMetrics { "cortex_bucket_store_indexheader_lazy_load_duration_seconds", "Duration of the index-header lazy loading in seconds.", nil, nil), + indexHeaderDownloadDuration: prometheus.NewDesc( + "cortex_bucket_store_indexheader_download_duration_seconds", + "Duration of the index-header download from objstore in seconds.", + nil, nil), + indexHeaderLoadDuration: prometheus.NewDesc( + "cortex_bucket_store_indexheader_load_duration_seconds", + "Duration of the index-header loading in seconds.", + nil, nil), lazyExpandedPostingsCount: prometheus.NewDesc( "cortex_bucket_store_lazy_expanded_postings_total", @@ -272,6 +282,8 @@ func (m *BucketStoreMetrics) Describe(out chan<- *prometheus.Desc) { out <- m.indexHeaderLazyUnloadCount out <- m.indexHeaderLazyUnloadFailedCount out <- m.indexHeaderLazyLoadDuration + out <- m.indexHeaderDownloadDuration + out <- m.indexHeaderLoadDuration out <- m.lazyExpandedPostingsCount out <- m.lazyExpandedPostingGroups @@ -323,6 +335,8 @@ func (m *BucketStoreMetrics) Collect(out chan<- prometheus.Metric) { data.SendSumOfCounters(out, m.indexHeaderLazyUnloadCount, "thanos_bucket_store_indexheader_lazy_unload_total") data.SendSumOfCounters(out, m.indexHeaderLazyUnloadFailedCount, "thanos_bucket_store_indexheader_lazy_unload_failed_total") data.SendSumOfHistograms(out, m.indexHeaderLazyLoadDuration, "thanos_bucket_store_indexheader_lazy_load_duration_seconds") + data.SendSumOfHistograms(out, m.indexHeaderDownloadDuration, "thanos_bucket_store_indexheader_download_duration_seconds") + data.SendSumOfHistograms(out, m.indexHeaderLoadDuration, "thanos_bucket_store_indexheader_load_duration_seconds") data.SendSumOfCounters(out, m.lazyExpandedPostingsCount, "thanos_bucket_store_lazy_expanded_postings_total") data.SendSumOfCountersWithLabels(out, m.lazyExpandedPostingGroups, "thanos_bucket_store_lazy_expanded_posting_groups_total", "reason") diff --git a/pkg/storegateway/bucket_store_metrics_test.go b/pkg/storegateway/bucket_store_metrics_test.go index 079f2017f0..ac4ff00df8 100644 --- a/pkg/storegateway/bucket_store_metrics_test.go +++ b/pkg/storegateway/bucket_store_metrics_test.go @@ -493,6 +493,22 @@ func TestBucketStoreMetrics(t *testing.T) { # HELP cortex_bucket_store_empty_postings_total Total number of empty postings when fetching block series. # TYPE cortex_bucket_store_empty_postings_total counter cortex_bucket_store_empty_postings_total 112595 + + # HELP cortex_bucket_store_indexheader_download_duration_seconds Duration of the index-header download from objstore in seconds. + # TYPE cortex_bucket_store_indexheader_download_duration_seconds histogram + cortex_bucket_store_indexheader_download_duration_seconds_bucket{le="0.01"} 0 + cortex_bucket_store_indexheader_download_duration_seconds_bucket{le="0.02"} 0 + cortex_bucket_store_indexheader_download_duration_seconds_bucket{le="0.05"} 0 + cortex_bucket_store_indexheader_download_duration_seconds_bucket{le="0.1"} 0 + cortex_bucket_store_indexheader_download_duration_seconds_bucket{le="0.2"} 0 + cortex_bucket_store_indexheader_download_duration_seconds_bucket{le="0.5"} 0 + cortex_bucket_store_indexheader_download_duration_seconds_bucket{le="1"} 3 + cortex_bucket_store_indexheader_download_duration_seconds_bucket{le="2"} 3 + cortex_bucket_store_indexheader_download_duration_seconds_bucket{le="5"} 3 + cortex_bucket_store_indexheader_download_duration_seconds_bucket{le="+Inf"} 3 + cortex_bucket_store_indexheader_download_duration_seconds_sum 2.25 + cortex_bucket_store_indexheader_download_duration_seconds_count 3 + # HELP cortex_bucket_store_postings_fetch_duration_seconds Time it takes to fetch postings to respond a request sent to store-gateway. It includes both the time to fetch it from cache and from storage in case of cache misses. # TYPE cortex_bucket_store_postings_fetch_duration_seconds histogram cortex_bucket_store_postings_fetch_duration_seconds_bucket{le="0.001"} 0 @@ -543,6 +559,22 @@ func TestBucketStoreMetrics(t *testing.T) { # HELP cortex_bucket_store_indexheader_lazy_unload_total Total number of index-header lazy unload operations. # TYPE cortex_bucket_store_indexheader_lazy_unload_total counter cortex_bucket_store_indexheader_lazy_unload_total 1.396178e+06 + + # HELP cortex_bucket_store_indexheader_load_duration_seconds Duration of the index-header loading in seconds. + # TYPE cortex_bucket_store_indexheader_load_duration_seconds histogram + cortex_bucket_store_indexheader_load_duration_seconds_bucket{le="0.01"} 0 + cortex_bucket_store_indexheader_load_duration_seconds_bucket{le="0.02"} 0 + cortex_bucket_store_indexheader_load_duration_seconds_bucket{le="0.05"} 0 + cortex_bucket_store_indexheader_load_duration_seconds_bucket{le="0.1"} 0 + cortex_bucket_store_indexheader_load_duration_seconds_bucket{le="0.2"} 0 + cortex_bucket_store_indexheader_load_duration_seconds_bucket{le="0.5"} 0 + cortex_bucket_store_indexheader_load_duration_seconds_bucket{le="1"} 3 + cortex_bucket_store_indexheader_load_duration_seconds_bucket{le="2"} 3 + cortex_bucket_store_indexheader_load_duration_seconds_bucket{le="5"} 3 + cortex_bucket_store_indexheader_load_duration_seconds_bucket{le="+Inf"} 3 + cortex_bucket_store_indexheader_load_duration_seconds_sum 2.55 + cortex_bucket_store_indexheader_load_duration_seconds_count 3 + # HELP cortex_bucket_store_lazy_expanded_posting_groups_total Total number of posting groups that are marked as lazy and corresponding reason. # TYPE cortex_bucket_store_lazy_expanded_posting_groups_total counter cortex_bucket_store_lazy_expanded_posting_groups_total{reason="keys_limit"} 202671 @@ -685,6 +717,8 @@ func populateMockedBucketStoreMetrics(base float64) *prometheus.Registry { m.indexHeaderLazyUnloadCount.Add(62 * base) m.indexHeaderLazyUnloadFailedCount.Add(63 * base) m.indexHeaderLazyLoadDuration.Observe(0.65) + m.indexHeaderDownloadDuration.Observe(0.75) + m.indexHeaderLoadDuration.Observe(0.85) m.emptyPostingCount.Add(5 * base) @@ -737,6 +771,8 @@ type mockedBucketStoreMetrics struct { indexHeaderLazyUnloadCount prometheus.Counter indexHeaderLazyUnloadFailedCount prometheus.Counter indexHeaderLazyLoadDuration prometheus.Histogram + indexHeaderDownloadDuration prometheus.Histogram + indexHeaderLoadDuration prometheus.Histogram lazyExpandedPostingsCount prometheus.Counter lazyExpandedPostingGroups *prometheus.CounterVec @@ -913,6 +949,16 @@ func newMockedBucketStoreMetrics(reg prometheus.Registerer) *mockedBucketStoreMe Help: "Duration of the index-header lazy loading in seconds.", Buckets: []float64{0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1, 2, 5}, }) + m.indexHeaderDownloadDuration = promauto.With(reg).NewHistogram(prometheus.HistogramOpts{ + Name: "thanos_bucket_store_indexheader_download_duration_seconds", + Help: "Duration of the index-header download from objstore in seconds.", + Buckets: []float64{0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1, 2, 5}, + }) + m.indexHeaderLoadDuration = promauto.With(reg).NewHistogram(prometheus.HistogramOpts{ + Name: "thanos_bucket_store_indexheader_load_duration_seconds", + Help: "Duration of the index-header loading in seconds.", + Buckets: []float64{0.01, 0.02, 0.05, 0.1, 0.2, 0.5, 1, 2, 5}, + }) m.emptyPostingCount = promauto.With(reg).NewCounter(prometheus.CounterOpts{ Name: "thanos_bucket_store_empty_postings_total", diff --git a/pkg/util/shard.go b/pkg/util/shard.go index 5d3de01cc4..364f39656f 100644 --- a/pkg/util/shard.go +++ b/pkg/util/shard.go @@ -11,7 +11,7 @@ const ( ShardingStrategyDefault = "default" ShardingStrategyShuffle = "shuffle-sharding" - // Compaction mode + // Compaction strategies CompactionStrategyDefault = "default" CompactionStrategyPartitioning = "partitioning" ) diff --git a/pkg/util/validation/limits.go b/pkg/util/validation/limits.go index 38f31e8da0..7d2ab8518d 100644 --- a/pkg/util/validation/limits.go +++ b/pkg/util/validation/limits.go @@ -192,8 +192,10 @@ type Limits struct { MaxDownloadedBytesPerRequest int `yaml:"max_downloaded_bytes_per_request" json:"max_downloaded_bytes_per_request"` // Compactor. - CompactorBlocksRetentionPeriod model.Duration `yaml:"compactor_blocks_retention_period" json:"compactor_blocks_retention_period"` - CompactorTenantShardSize int `yaml:"compactor_tenant_shard_size" json:"compactor_tenant_shard_size"` + CompactorBlocksRetentionPeriod model.Duration `yaml:"compactor_blocks_retention_period" json:"compactor_blocks_retention_period"` + CompactorTenantShardSize int `yaml:"compactor_tenant_shard_size" json:"compactor_tenant_shard_size"` + CompactorPartitionIndexSizeBytes int64 `yaml:"compactor_partition_index_size_bytes" json:"compactor_partition_index_size_bytes"` + CompactorPartitionSeriesCount int64 `yaml:"compactor_partition_series_count" json:"compactor_partition_series_count"` // This config doesn't have a CLI flag registered here because they're registered in // their own original config struct. @@ -282,6 +284,9 @@ func (l *Limits) RegisterFlags(f *flag.FlagSet) { f.Var(&l.CompactorBlocksRetentionPeriod, "compactor.blocks-retention-period", "Delete blocks containing samples older than the specified retention period. 0 to disable.") f.IntVar(&l.CompactorTenantShardSize, "compactor.tenant-shard-size", 0, "The default tenant's shard size when the shuffle-sharding strategy is used by the compactor. When this setting is specified in the per-tenant overrides, a value of 0 disables shuffle sharding for the tenant.") + // Default to 64GB because this is the hard limit of index size in Cortex + f.Int64Var(&l.CompactorPartitionIndexSizeBytes, "compactor.partition-index-size-bytes", 68719476736, "Index size limit in bytes for each compaction partition. 0 means no limit") + f.Int64Var(&l.CompactorPartitionSeriesCount, "compactor.partition-series-count", 0, "Time series count limit for each compaction partition. 0 means no limit") // Store-gateway. f.Float64Var(&l.StoreGatewayTenantShardSize, "store-gateway.tenant-shard-size", 0, "The default tenant's shard size when the shuffle-sharding strategy is used. Must be set when the store-gateway sharding is enabled with the shuffle-sharding strategy. When this setting is specified in the per-tenant overrides, a value of 0 disables shuffle sharding for the tenant. If the value is < 1 the shard size will be a percentage of the total store-gateways.") @@ -799,6 +804,16 @@ func (o *Overrides) CompactorTenantShardSize(userID string) int { return o.GetOverridesForUser(userID).CompactorTenantShardSize } +// CompactorPartitionIndexSizeBytes returns shard size (number of rulers) used by this tenant when using shuffle-sharding strategy. +func (o *Overrides) CompactorPartitionIndexSizeBytes(userID string) int64 { + return o.GetOverridesForUser(userID).CompactorPartitionIndexSizeBytes +} + +// CompactorPartitionSeriesCount returns shard size (number of rulers) used by this tenant when using shuffle-sharding strategy. +func (o *Overrides) CompactorPartitionSeriesCount(userID string) int64 { + return o.GetOverridesForUser(userID).CompactorPartitionSeriesCount +} + // MetricRelabelConfigs returns the metric relabel configs for a given user. func (o *Overrides) MetricRelabelConfigs(userID string) []*relabel.Config { return o.GetOverridesForUser(userID).MetricRelabelConfigs diff --git a/vendor/github.com/cespare/xxhash/LICENSE.txt b/vendor/github.com/cespare/xxhash/LICENSE.txt deleted file mode 100644 index 24b53065f4..0000000000 --- a/vendor/github.com/cespare/xxhash/LICENSE.txt +++ /dev/null @@ -1,22 +0,0 @@ -Copyright (c) 2016 Caleb Spare - -MIT License - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE -LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION -OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION -WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. diff --git a/vendor/github.com/cespare/xxhash/README.md b/vendor/github.com/cespare/xxhash/README.md deleted file mode 100644 index 0982fd25e5..0000000000 --- a/vendor/github.com/cespare/xxhash/README.md +++ /dev/null @@ -1,50 +0,0 @@ -# xxhash - -[![GoDoc](https://godoc.org/github.com/cespare/xxhash?status.svg)](https://godoc.org/github.com/cespare/xxhash) - -xxhash is a Go implementation of the 64-bit -[xxHash](http://cyan4973.github.io/xxHash/) algorithm, XXH64. This is a -high-quality hashing algorithm that is much faster than anything in the Go -standard library. - -The API is very small, taking its cue from the other hashing packages in the -standard library: - - $ go doc github.com/cespare/xxhash ! - package xxhash // import "github.com/cespare/xxhash" - - Package xxhash implements the 64-bit variant of xxHash (XXH64) as described - at http://cyan4973.github.io/xxHash/. - - func New() hash.Hash64 - func Sum64(b []byte) uint64 - func Sum64String(s string) uint64 - -This implementation provides a fast pure-Go implementation and an even faster -assembly implementation for amd64. - -## Benchmarks - -Here are some quick benchmarks comparing the pure-Go and assembly -implementations of Sum64 against another popular Go XXH64 implementation, -[github.com/OneOfOne/xxhash](https://github.com/OneOfOne/xxhash): - -| input size | OneOfOne | cespare (purego) | cespare | -| --- | --- | --- | --- | -| 5 B | 416 MB/s | 720 MB/s | 872 MB/s | -| 100 B | 3980 MB/s | 5013 MB/s | 5252 MB/s | -| 4 KB | 12727 MB/s | 12999 MB/s | 13026 MB/s | -| 10 MB | 9879 MB/s | 10775 MB/s | 10913 MB/s | - -These numbers were generated with: - -``` -$ go test -benchtime 10s -bench '/OneOfOne,' -$ go test -tags purego -benchtime 10s -bench '/xxhash,' -$ go test -benchtime 10s -bench '/xxhash,' -``` - -## Projects using this package - -- [InfluxDB](https://github.com/influxdata/influxdb) -- [Prometheus](https://github.com/prometheus/prometheus) diff --git a/vendor/github.com/cespare/xxhash/rotate.go b/vendor/github.com/cespare/xxhash/rotate.go deleted file mode 100644 index f3eac5ebc0..0000000000 --- a/vendor/github.com/cespare/xxhash/rotate.go +++ /dev/null @@ -1,14 +0,0 @@ -// +build !go1.9 - -package xxhash - -// TODO(caleb): After Go 1.10 comes out, remove this fallback code. - -func rol1(x uint64) uint64 { return (x << 1) | (x >> (64 - 1)) } -func rol7(x uint64) uint64 { return (x << 7) | (x >> (64 - 7)) } -func rol11(x uint64) uint64 { return (x << 11) | (x >> (64 - 11)) } -func rol12(x uint64) uint64 { return (x << 12) | (x >> (64 - 12)) } -func rol18(x uint64) uint64 { return (x << 18) | (x >> (64 - 18)) } -func rol23(x uint64) uint64 { return (x << 23) | (x >> (64 - 23)) } -func rol27(x uint64) uint64 { return (x << 27) | (x >> (64 - 27)) } -func rol31(x uint64) uint64 { return (x << 31) | (x >> (64 - 31)) } diff --git a/vendor/github.com/cespare/xxhash/rotate19.go b/vendor/github.com/cespare/xxhash/rotate19.go deleted file mode 100644 index b99612bab8..0000000000 --- a/vendor/github.com/cespare/xxhash/rotate19.go +++ /dev/null @@ -1,14 +0,0 @@ -// +build go1.9 - -package xxhash - -import "math/bits" - -func rol1(x uint64) uint64 { return bits.RotateLeft64(x, 1) } -func rol7(x uint64) uint64 { return bits.RotateLeft64(x, 7) } -func rol11(x uint64) uint64 { return bits.RotateLeft64(x, 11) } -func rol12(x uint64) uint64 { return bits.RotateLeft64(x, 12) } -func rol18(x uint64) uint64 { return bits.RotateLeft64(x, 18) } -func rol23(x uint64) uint64 { return bits.RotateLeft64(x, 23) } -func rol27(x uint64) uint64 { return bits.RotateLeft64(x, 27) } -func rol31(x uint64) uint64 { return bits.RotateLeft64(x, 31) } diff --git a/vendor/github.com/cespare/xxhash/xxhash.go b/vendor/github.com/cespare/xxhash/xxhash.go deleted file mode 100644 index f896bd28f0..0000000000 --- a/vendor/github.com/cespare/xxhash/xxhash.go +++ /dev/null @@ -1,168 +0,0 @@ -// Package xxhash implements the 64-bit variant of xxHash (XXH64) as described -// at http://cyan4973.github.io/xxHash/. -package xxhash - -import ( - "encoding/binary" - "hash" -) - -const ( - prime1 uint64 = 11400714785074694791 - prime2 uint64 = 14029467366897019727 - prime3 uint64 = 1609587929392839161 - prime4 uint64 = 9650029242287828579 - prime5 uint64 = 2870177450012600261 -) - -// NOTE(caleb): I'm using both consts and vars of the primes. Using consts where -// possible in the Go code is worth a small (but measurable) performance boost -// by avoiding some MOVQs. Vars are needed for the asm and also are useful for -// convenience in the Go code in a few places where we need to intentionally -// avoid constant arithmetic (e.g., v1 := prime1 + prime2 fails because the -// result overflows a uint64). -var ( - prime1v = prime1 - prime2v = prime2 - prime3v = prime3 - prime4v = prime4 - prime5v = prime5 -) - -type xxh struct { - v1 uint64 - v2 uint64 - v3 uint64 - v4 uint64 - total int - mem [32]byte - n int // how much of mem is used -} - -// New creates a new hash.Hash64 that implements the 64-bit xxHash algorithm. -func New() hash.Hash64 { - var x xxh - x.Reset() - return &x -} - -func (x *xxh) Reset() { - x.n = 0 - x.total = 0 - x.v1 = prime1v + prime2 - x.v2 = prime2 - x.v3 = 0 - x.v4 = -prime1v -} - -func (x *xxh) Size() int { return 8 } -func (x *xxh) BlockSize() int { return 32 } - -// Write adds more data to x. It always returns len(b), nil. -func (x *xxh) Write(b []byte) (n int, err error) { - n = len(b) - x.total += len(b) - - if x.n+len(b) < 32 { - // This new data doesn't even fill the current block. - copy(x.mem[x.n:], b) - x.n += len(b) - return - } - - if x.n > 0 { - // Finish off the partial block. - copy(x.mem[x.n:], b) - x.v1 = round(x.v1, u64(x.mem[0:8])) - x.v2 = round(x.v2, u64(x.mem[8:16])) - x.v3 = round(x.v3, u64(x.mem[16:24])) - x.v4 = round(x.v4, u64(x.mem[24:32])) - b = b[32-x.n:] - x.n = 0 - } - - if len(b) >= 32 { - // One or more full blocks left. - b = writeBlocks(x, b) - } - - // Store any remaining partial block. - copy(x.mem[:], b) - x.n = len(b) - - return -} - -func (x *xxh) Sum(b []byte) []byte { - s := x.Sum64() - return append( - b, - byte(s>>56), - byte(s>>48), - byte(s>>40), - byte(s>>32), - byte(s>>24), - byte(s>>16), - byte(s>>8), - byte(s), - ) -} - -func (x *xxh) Sum64() uint64 { - var h uint64 - - if x.total >= 32 { - v1, v2, v3, v4 := x.v1, x.v2, x.v3, x.v4 - h = rol1(v1) + rol7(v2) + rol12(v3) + rol18(v4) - h = mergeRound(h, v1) - h = mergeRound(h, v2) - h = mergeRound(h, v3) - h = mergeRound(h, v4) - } else { - h = x.v3 + prime5 - } - - h += uint64(x.total) - - i, end := 0, x.n - for ; i+8 <= end; i += 8 { - k1 := round(0, u64(x.mem[i:i+8])) - h ^= k1 - h = rol27(h)*prime1 + prime4 - } - if i+4 <= end { - h ^= uint64(u32(x.mem[i:i+4])) * prime1 - h = rol23(h)*prime2 + prime3 - i += 4 - } - for i < end { - h ^= uint64(x.mem[i]) * prime5 - h = rol11(h) * prime1 - i++ - } - - h ^= h >> 33 - h *= prime2 - h ^= h >> 29 - h *= prime3 - h ^= h >> 32 - - return h -} - -func u64(b []byte) uint64 { return binary.LittleEndian.Uint64(b) } -func u32(b []byte) uint32 { return binary.LittleEndian.Uint32(b) } - -func round(acc, input uint64) uint64 { - acc += input * prime2 - acc = rol31(acc) - acc *= prime1 - return acc -} - -func mergeRound(acc, val uint64) uint64 { - val = round(0, val) - acc ^= val - acc = acc*prime1 + prime4 - return acc -} diff --git a/vendor/github.com/cespare/xxhash/xxhash_amd64.go b/vendor/github.com/cespare/xxhash/xxhash_amd64.go deleted file mode 100644 index d617652680..0000000000 --- a/vendor/github.com/cespare/xxhash/xxhash_amd64.go +++ /dev/null @@ -1,12 +0,0 @@ -// +build !appengine -// +build gc -// +build !purego - -package xxhash - -// Sum64 computes the 64-bit xxHash digest of b. -// -//go:noescape -func Sum64(b []byte) uint64 - -func writeBlocks(x *xxh, b []byte) []byte diff --git a/vendor/github.com/cespare/xxhash/xxhash_amd64.s b/vendor/github.com/cespare/xxhash/xxhash_amd64.s deleted file mode 100644 index 757f2011f0..0000000000 --- a/vendor/github.com/cespare/xxhash/xxhash_amd64.s +++ /dev/null @@ -1,233 +0,0 @@ -// +build !appengine -// +build gc -// +build !purego - -#include "textflag.h" - -// Register allocation: -// AX h -// CX pointer to advance through b -// DX n -// BX loop end -// R8 v1, k1 -// R9 v2 -// R10 v3 -// R11 v4 -// R12 tmp -// R13 prime1v -// R14 prime2v -// R15 prime4v - -// round reads from and advances the buffer pointer in CX. -// It assumes that R13 has prime1v and R14 has prime2v. -#define round(r) \ - MOVQ (CX), R12 \ - ADDQ $8, CX \ - IMULQ R14, R12 \ - ADDQ R12, r \ - ROLQ $31, r \ - IMULQ R13, r - -// mergeRound applies a merge round on the two registers acc and val. -// It assumes that R13 has prime1v, R14 has prime2v, and R15 has prime4v. -#define mergeRound(acc, val) \ - IMULQ R14, val \ - ROLQ $31, val \ - IMULQ R13, val \ - XORQ val, acc \ - IMULQ R13, acc \ - ADDQ R15, acc - -// func Sum64(b []byte) uint64 -TEXT ·Sum64(SB), NOSPLIT, $0-32 - // Load fixed primes. - MOVQ ·prime1v(SB), R13 - MOVQ ·prime2v(SB), R14 - MOVQ ·prime4v(SB), R15 - - // Load slice. - MOVQ b_base+0(FP), CX - MOVQ b_len+8(FP), DX - LEAQ (CX)(DX*1), BX - - // The first loop limit will be len(b)-32. - SUBQ $32, BX - - // Check whether we have at least one block. - CMPQ DX, $32 - JLT noBlocks - - // Set up initial state (v1, v2, v3, v4). - MOVQ R13, R8 - ADDQ R14, R8 - MOVQ R14, R9 - XORQ R10, R10 - XORQ R11, R11 - SUBQ R13, R11 - - // Loop until CX > BX. -blockLoop: - round(R8) - round(R9) - round(R10) - round(R11) - - CMPQ CX, BX - JLE blockLoop - - MOVQ R8, AX - ROLQ $1, AX - MOVQ R9, R12 - ROLQ $7, R12 - ADDQ R12, AX - MOVQ R10, R12 - ROLQ $12, R12 - ADDQ R12, AX - MOVQ R11, R12 - ROLQ $18, R12 - ADDQ R12, AX - - mergeRound(AX, R8) - mergeRound(AX, R9) - mergeRound(AX, R10) - mergeRound(AX, R11) - - JMP afterBlocks - -noBlocks: - MOVQ ·prime5v(SB), AX - -afterBlocks: - ADDQ DX, AX - - // Right now BX has len(b)-32, and we want to loop until CX > len(b)-8. - ADDQ $24, BX - - CMPQ CX, BX - JG fourByte - -wordLoop: - // Calculate k1. - MOVQ (CX), R8 - ADDQ $8, CX - IMULQ R14, R8 - ROLQ $31, R8 - IMULQ R13, R8 - - XORQ R8, AX - ROLQ $27, AX - IMULQ R13, AX - ADDQ R15, AX - - CMPQ CX, BX - JLE wordLoop - -fourByte: - ADDQ $4, BX - CMPQ CX, BX - JG singles - - MOVL (CX), R8 - ADDQ $4, CX - IMULQ R13, R8 - XORQ R8, AX - - ROLQ $23, AX - IMULQ R14, AX - ADDQ ·prime3v(SB), AX - -singles: - ADDQ $4, BX - CMPQ CX, BX - JGE finalize - -singlesLoop: - MOVBQZX (CX), R12 - ADDQ $1, CX - IMULQ ·prime5v(SB), R12 - XORQ R12, AX - - ROLQ $11, AX - IMULQ R13, AX - - CMPQ CX, BX - JL singlesLoop - -finalize: - MOVQ AX, R12 - SHRQ $33, R12 - XORQ R12, AX - IMULQ R14, AX - MOVQ AX, R12 - SHRQ $29, R12 - XORQ R12, AX - IMULQ ·prime3v(SB), AX - MOVQ AX, R12 - SHRQ $32, R12 - XORQ R12, AX - - MOVQ AX, ret+24(FP) - RET - -// writeBlocks uses the same registers as above except that it uses AX to store -// the x pointer. - -// func writeBlocks(x *xxh, b []byte) []byte -TEXT ·writeBlocks(SB), NOSPLIT, $0-56 - // Load fixed primes needed for round. - MOVQ ·prime1v(SB), R13 - MOVQ ·prime2v(SB), R14 - - // Load slice. - MOVQ b_base+8(FP), CX - MOVQ CX, ret_base+32(FP) // initialize return base pointer; see NOTE below - MOVQ b_len+16(FP), DX - LEAQ (CX)(DX*1), BX - SUBQ $32, BX - - // Load vN from x. - MOVQ x+0(FP), AX - MOVQ 0(AX), R8 // v1 - MOVQ 8(AX), R9 // v2 - MOVQ 16(AX), R10 // v3 - MOVQ 24(AX), R11 // v4 - - // We don't need to check the loop condition here; this function is - // always called with at least one block of data to process. -blockLoop: - round(R8) - round(R9) - round(R10) - round(R11) - - CMPQ CX, BX - JLE blockLoop - - // Copy vN back to x. - MOVQ R8, 0(AX) - MOVQ R9, 8(AX) - MOVQ R10, 16(AX) - MOVQ R11, 24(AX) - - // Construct return slice. - // NOTE: It's important that we don't construct a slice that has a base - // pointer off the end of the original slice, as in Go 1.7+ this will - // cause runtime crashes. (See discussion in, for example, - // https://github.com/golang/go/issues/16772.) - // Therefore, we calculate the length/cap first, and if they're zero, we - // keep the old base. This is what the compiler does as well if you - // write code like - // b = b[len(b):] - - // New length is 32 - (CX - BX) -> BX+32 - CX. - ADDQ $32, BX - SUBQ CX, BX - JZ afterSetBase - - MOVQ CX, ret_base+32(FP) - -afterSetBase: - MOVQ BX, ret_len+40(FP) - MOVQ BX, ret_cap+48(FP) // set cap == len - - RET diff --git a/vendor/github.com/cespare/xxhash/xxhash_other.go b/vendor/github.com/cespare/xxhash/xxhash_other.go deleted file mode 100644 index c68d13f89e..0000000000 --- a/vendor/github.com/cespare/xxhash/xxhash_other.go +++ /dev/null @@ -1,75 +0,0 @@ -// +build !amd64 appengine !gc purego - -package xxhash - -// Sum64 computes the 64-bit xxHash digest of b. -func Sum64(b []byte) uint64 { - // A simpler version would be - // x := New() - // x.Write(b) - // return x.Sum64() - // but this is faster, particularly for small inputs. - - n := len(b) - var h uint64 - - if n >= 32 { - v1 := prime1v + prime2 - v2 := prime2 - v3 := uint64(0) - v4 := -prime1v - for len(b) >= 32 { - v1 = round(v1, u64(b[0:8:len(b)])) - v2 = round(v2, u64(b[8:16:len(b)])) - v3 = round(v3, u64(b[16:24:len(b)])) - v4 = round(v4, u64(b[24:32:len(b)])) - b = b[32:len(b):len(b)] - } - h = rol1(v1) + rol7(v2) + rol12(v3) + rol18(v4) - h = mergeRound(h, v1) - h = mergeRound(h, v2) - h = mergeRound(h, v3) - h = mergeRound(h, v4) - } else { - h = prime5 - } - - h += uint64(n) - - i, end := 0, len(b) - for ; i+8 <= end; i += 8 { - k1 := round(0, u64(b[i:i+8:len(b)])) - h ^= k1 - h = rol27(h)*prime1 + prime4 - } - if i+4 <= end { - h ^= uint64(u32(b[i:i+4:len(b)])) * prime1 - h = rol23(h)*prime2 + prime3 - i += 4 - } - for ; i < end; i++ { - h ^= uint64(b[i]) * prime5 - h = rol11(h) * prime1 - } - - h ^= h >> 33 - h *= prime2 - h ^= h >> 29 - h *= prime3 - h ^= h >> 32 - - return h -} - -func writeBlocks(x *xxh, b []byte) []byte { - v1, v2, v3, v4 := x.v1, x.v2, x.v3, x.v4 - for len(b) >= 32 { - v1 = round(v1, u64(b[0:8:len(b)])) - v2 = round(v2, u64(b[8:16:len(b)])) - v3 = round(v3, u64(b[16:24:len(b)])) - v4 = round(v4, u64(b[24:32:len(b)])) - b = b[32:len(b):len(b)] - } - x.v1, x.v2, x.v3, x.v4 = v1, v2, v3, v4 - return b -} diff --git a/vendor/github.com/cespare/xxhash/xxhash_safe.go b/vendor/github.com/cespare/xxhash/xxhash_safe.go deleted file mode 100644 index dfa15ab7e2..0000000000 --- a/vendor/github.com/cespare/xxhash/xxhash_safe.go +++ /dev/null @@ -1,10 +0,0 @@ -// +build appengine - -// This file contains the safe implementations of otherwise unsafe-using code. - -package xxhash - -// Sum64String computes the 64-bit xxHash digest of s. -func Sum64String(s string) uint64 { - return Sum64([]byte(s)) -} diff --git a/vendor/github.com/cespare/xxhash/xxhash_unsafe.go b/vendor/github.com/cespare/xxhash/xxhash_unsafe.go deleted file mode 100644 index d2b64e8bb0..0000000000 --- a/vendor/github.com/cespare/xxhash/xxhash_unsafe.go +++ /dev/null @@ -1,30 +0,0 @@ -// +build !appengine - -// This file encapsulates usage of unsafe. -// xxhash_safe.go contains the safe implementations. - -package xxhash - -import ( - "reflect" - "unsafe" -) - -// Sum64String computes the 64-bit xxHash digest of s. -// It may be faster than Sum64([]byte(s)) by avoiding a copy. -// -// TODO(caleb): Consider removing this if an optimization is ever added to make -// it unnecessary: https://golang.org/issue/2205. -// -// TODO(caleb): We still have a function call; we could instead write Go/asm -// copies of Sum64 for strings to squeeze out a bit more speed. -func Sum64String(s string) uint64 { - // See https://groups.google.com/d/msg/golang-nuts/dcjzJy-bSpw/tcZYBzQqAQAJ - // for some discussion about this unsafe conversion. - var b []byte - bh := (*reflect.SliceHeader)(unsafe.Pointer(&b)) - bh.Data = (*reflect.StringHeader)(unsafe.Pointer(&s)).Data - bh.Len = len(s) - bh.Cap = len(s) - return Sum64(b) -} diff --git a/vendor/github.com/hashicorp/consul/api/api.go b/vendor/github.com/hashicorp/consul/api/api.go index d4d853d5d4..27af1ea569 100644 --- a/vendor/github.com/hashicorp/consul/api/api.go +++ b/vendor/github.com/hashicorp/consul/api/api.go @@ -1087,8 +1087,23 @@ func (c *Client) doRequest(r *request) (time.Duration, *http.Response, error) { if err != nil { return 0, nil, err } + + contentType := GetContentType(req) + + if req != nil { + req.Header.Set(contentTypeHeader, contentType) + } + start := time.Now() resp, err := c.config.HttpClient.Do(req) + + if resp != nil { + respContentType := resp.Header.Get(contentTypeHeader) + if respContentType == "" || respContentType != contentType { + resp.Header.Set(contentTypeHeader, contentType) + } + } + diff := time.Since(start) return diff, resp, err } diff --git a/vendor/github.com/hashicorp/consul/api/content_type.go b/vendor/github.com/hashicorp/consul/api/content_type.go new file mode 100644 index 0000000000..37c8cf60aa --- /dev/null +++ b/vendor/github.com/hashicorp/consul/api/content_type.go @@ -0,0 +1,81 @@ +// Copyright (c) HashiCorp, Inc. +// SPDX-License-Identifier: MPL-2.0 + +package api + +import ( + "net/http" + "strings" +) + +const ( + contentTypeHeader = "Content-Type" + plainContentType = "text/plain; charset=utf-8" + octetStream = "application/octet-stream" + jsonContentType = "application/json" // Default content type +) + +// ContentTypeRule defines a rule for determining the content type of an HTTP request. +// This rule is based on the combination of the HTTP path, method, and the desired content type. +type ContentTypeRule struct { + path string + httpMethod string + contentType string +} + +var ContentTypeRules = []ContentTypeRule{ + { + path: "/v1/snapshot", + httpMethod: http.MethodPut, + contentType: octetStream, + }, + { + path: "/v1/kv", + httpMethod: http.MethodPut, + contentType: octetStream, + }, + { + path: "/v1/event/fire", + httpMethod: http.MethodPut, + contentType: octetStream, + }, +} + +// GetContentType returns the content type for a request +// This function isused as routing logic or middleware to determine and enforce +// the appropriate content type for HTTP requests. +func GetContentType(req *http.Request) string { + reqContentType := req.Header.Get(contentTypeHeader) + + if isIndexPage(req) { + return plainContentType + } + + // For GET, DELETE, or internal API paths, ensure a valid Content-Type is returned. + if req.Method == http.MethodGet || req.Method == http.MethodDelete || strings.HasPrefix(req.URL.Path, "/v1/internal") { + if reqContentType == "" { + // Default to JSON Content-Type if no Content-Type is provided. + return jsonContentType + } + // Return the provided Content-Type if it exists. + return reqContentType + } + + for _, rule := range ContentTypeRules { + if matchesRule(req, rule) { + return rule.contentType + } + } + return jsonContentType +} + +// matchesRule checks if a request matches a content type rule +func matchesRule(req *http.Request, rule ContentTypeRule) bool { + return strings.HasPrefix(req.URL.Path, rule.path) && + (rule.httpMethod == "" || req.Method == rule.httpMethod) +} + +// isIndexPage checks if the request is for the index page +func isIndexPage(req *http.Request) bool { + return req.URL.Path == "/" || req.URL.Path == "/ui" +} diff --git a/vendor/github.com/thanos-io/promql-engine/engine/distributed.go b/vendor/github.com/thanos-io/promql-engine/engine/distributed.go index fef4243892..a7c7378209 100644 --- a/vendor/github.com/thanos-io/promql-engine/engine/distributed.go +++ b/vendor/github.com/thanos-io/promql-engine/engine/distributed.go @@ -69,37 +69,45 @@ func NewDistributedEngine(opts Opts, endpoints api.RemoteEndpoints) *Distributed func (l DistributedEngine) SetQueryLogger(log promql.QueryLogger) {} func (l DistributedEngine) NewInstantQuery(ctx context.Context, q storage.Queryable, opts promql.QueryOpts, qs string, ts time.Time) (promql.Query, error) { + return l.MakeInstantQuery(ctx, q, fromPromQLOpts(opts), qs, ts) +} + +func (l DistributedEngine) NewRangeQuery(ctx context.Context, q storage.Queryable, opts promql.QueryOpts, qs string, start, end time.Time, interval time.Duration) (promql.Query, error) { + return l.MakeRangeQuery(ctx, q, fromPromQLOpts(opts), qs, start, end, interval) +} + +func (l DistributedEngine) MakeInstantQueryFromPlan(ctx context.Context, q storage.Queryable, opts *QueryOpts, plan logicalplan.Node, ts time.Time) (promql.Query, error) { // Truncate milliseconds to avoid mismatch in timestamps between remote and local engines. // Some clients might only support second precision when executing queries. ts = ts.Truncate(time.Second) - return l.remoteEngine.NewInstantQuery(ctx, q, opts, qs, ts) + return l.remoteEngine.MakeInstantQueryFromPlan(ctx, q, opts, plan, ts) } -func (l DistributedEngine) NewRangeQuery(ctx context.Context, q storage.Queryable, opts promql.QueryOpts, qs string, start, end time.Time, interval time.Duration) (promql.Query, error) { +func (l DistributedEngine) MakeRangeQueryFromPlan(ctx context.Context, q storage.Queryable, opts *QueryOpts, plan logicalplan.Node, start, end time.Time, interval time.Duration) (promql.Query, error) { // Truncate milliseconds to avoid mismatch in timestamps between remote and local engines. // Some clients might only support second precision when executing queries. start = start.Truncate(time.Second) end = end.Truncate(time.Second) interval = interval.Truncate(time.Second) - return l.remoteEngine.NewRangeQuery(ctx, q, opts, qs, start, end, interval) + return l.remoteEngine.MakeRangeQueryFromPlan(ctx, q, opts, plan, start, end, interval) } -func (l DistributedEngine) NewRangeQueryFromPlan(ctx context.Context, q storage.Queryable, opts promql.QueryOpts, plan logicalplan.Node, start, end time.Time, interval time.Duration) (promql.Query, error) { +func (l DistributedEngine) MakeInstantQuery(ctx context.Context, q storage.Queryable, opts *QueryOpts, qs string, ts time.Time) (promql.Query, error) { // Truncate milliseconds to avoid mismatch in timestamps between remote and local engines. // Some clients might only support second precision when executing queries. - start = start.Truncate(time.Second) - end = end.Truncate(time.Second) - interval = interval.Truncate(time.Second) + ts = ts.Truncate(time.Second) - return l.remoteEngine.NewRangeQueryFromPlan(ctx, q, opts, plan, start, end, interval) + return l.remoteEngine.MakeInstantQuery(ctx, q, opts, qs, ts) } -func (l DistributedEngine) NewInstantQueryFromPlan(ctx context.Context, q storage.Queryable, opts promql.QueryOpts, plan logicalplan.Node, ts time.Time) (promql.Query, error) { +func (l DistributedEngine) MakeRangeQuery(ctx context.Context, q storage.Queryable, opts *QueryOpts, qs string, start, end time.Time, interval time.Duration) (promql.Query, error) { // Truncate milliseconds to avoid mismatch in timestamps between remote and local engines. // Some clients might only support second precision when executing queries. - ts = ts.Truncate(time.Second) + start = start.Truncate(time.Second) + end = end.Truncate(time.Second) + interval = interval.Truncate(time.Second) - return l.remoteEngine.NewInstantQueryFromPlan(ctx, q, opts, plan, ts) + return l.remoteEngine.MakeRangeQuery(ctx, q, opts, qs, start, end, interval) } diff --git a/vendor/github.com/thanos-io/promql-engine/engine/engine.go b/vendor/github.com/thanos-io/promql-engine/engine/engine.go index 6a5648544c..152947e042 100644 --- a/vendor/github.com/thanos-io/promql-engine/engine/engine.go +++ b/vendor/github.com/thanos-io/promql-engine/engine/engine.go @@ -75,6 +75,9 @@ type Opts struct { // EnableAnalysis enables query analysis. EnableAnalysis bool + // EnablePartialResponses enables partial responses in distributed mode. + EnablePartialResponses bool + // SelectorBatchSize specifies the maximum number of samples to be returned by selectors in a single batch. SelectorBatchSize int64 @@ -96,6 +99,33 @@ func (o Opts) getLogicalOptimizers() []logicalplan.Optimizer { return optimizers } +// QueryOpts implements promql.QueryOpts but allows to override more engine default options. +type QueryOpts struct { + // These values are used to implement promql.QueryOpts, they have weird "Param" suffix because + // they are accessed by methods of the same name. + LookbackDeltaParam time.Duration + EnablePerStepStatsParam bool + + // DecodingConcurrency can be used to override the DecodingConcurrency engine setting. + DecodingConcurrency int + + // EnablePartialResponses can be used to override the EnablePartialResponses engine setting. + EnablePartialResponses bool +} + +func (opts QueryOpts) LookbackDelta() time.Duration { return opts.LookbackDeltaParam } +func (opts QueryOpts) EnablePerStepStats() bool { return opts.EnablePerStepStatsParam } + +func fromPromQLOpts(opts promql.QueryOpts) *QueryOpts { + if opts == nil { + return &QueryOpts{} + } + return &QueryOpts{ + LookbackDeltaParam: opts.LookbackDelta(), + EnablePerStepStatsParam: opts.EnablePerStepStats(), + } +} + // New creates a new query engine with the given options. The query engine will // use the storage passed in NewInstantQuery and NewRangeQuery for retrieving // data when executing queries. @@ -184,14 +214,15 @@ func NewWithScanners(opts Opts, scanners engstorage.Scanners) *Engine { disableDuplicateLabelChecks: opts.DisableDuplicateLabelChecks, disableFallback: opts.DisableFallback, - logger: opts.Logger, - lookbackDelta: opts.LookbackDelta, - enablePerStepStats: opts.EnablePerStepStats, - logicalOptimizers: opts.getLogicalOptimizers(), - timeout: opts.Timeout, - metrics: metrics, - extLookbackDelta: opts.ExtLookbackDelta, - enableAnalysis: opts.EnableAnalysis, + logger: opts.Logger, + lookbackDelta: opts.LookbackDelta, + enablePerStepStats: opts.EnablePerStepStats, + logicalOptimizers: opts.getLogicalOptimizers(), + timeout: opts.Timeout, + metrics: metrics, + extLookbackDelta: opts.ExtLookbackDelta, + enableAnalysis: opts.EnableAnalysis, + enablePartialResponses: opts.EnablePartialResponses, noStepSubqueryIntervalFn: func(d time.Duration) time.Duration { return time.Duration(opts.NoStepSubqueryIntervalFn(d.Milliseconds()) * 1000000) }, @@ -225,45 +256,21 @@ type Engine struct { extLookbackDelta time.Duration decodingConcurrency int enableAnalysis bool + enablePartialResponses bool noStepSubqueryIntervalFn func(time.Duration) time.Duration } -func (e *Engine) NewInstantQuery(ctx context.Context, q storage.Queryable, opts promql.QueryOpts, qs string, ts time.Time) (promql.Query, error) { - idx, err := e.activeQueryTracker.Insert(ctx, qs) - if err != nil { - return nil, err - } - defer e.activeQueryTracker.Delete(idx) - +func (e *Engine) MakeInstantQuery(ctx context.Context, q storage.Queryable, opts *QueryOpts, qs string, ts time.Time) (promql.Query, error) { expr, err := parser.NewParser(qs, parser.WithFunctions(e.functions)).ParseExpr() if err != nil { return nil, err } - - if opts == nil { - opts = promql.NewPrometheusQueryOpts(false, e.lookbackDelta) - } - if opts.LookbackDelta() <= 0 { - opts = promql.NewPrometheusQueryOpts(opts.EnablePerStepStats(), e.lookbackDelta) - } - // determine sorting order before optimizers run, we do this by looking for "sort" // and "sort_desc" and optimize them away afterwards since they are only needed at // the presentation layer and not when computing the results. resultSort := newResultSort(expr) - qOpts := &query.Options{ - Start: ts, - End: ts, - Step: 0, - StepsBatch: stepsBatch, - LookbackDelta: opts.LookbackDelta(), - EnablePerStepStats: e.enablePerStepStats && opts.EnablePerStepStats(), - ExtLookbackDelta: e.extLookbackDelta, - EnableAnalysis: e.enableAnalysis, - NoStepSubqueryIntervalFn: e.noStepSubqueryIntervalFn, - DecodingConcurrency: e.decodingConcurrency, - } + qOpts := e.makeQueryOpts(ts, ts, 0, opts) if qOpts.StepsBatch > 64 { return nil, ErrStepsBatchTooLarge } @@ -301,20 +308,13 @@ func (e *Engine) NewInstantQuery(ctx context.Context, q storage.Queryable, opts }, nil } -func (e *Engine) NewInstantQueryFromPlan(ctx context.Context, q storage.Queryable, opts promql.QueryOpts, root logicalplan.Node, ts time.Time) (promql.Query, error) { +func (e *Engine) MakeInstantQueryFromPlan(ctx context.Context, q storage.Queryable, opts *QueryOpts, root logicalplan.Node, ts time.Time) (promql.Query, error) { idx, err := e.activeQueryTracker.Insert(ctx, root.String()) if err != nil { return nil, err } defer e.activeQueryTracker.Delete(idx) - if opts == nil { - opts = promql.NewPrometheusQueryOpts(false, e.lookbackDelta) - } - if opts.LookbackDelta() <= 0 { - opts = promql.NewPrometheusQueryOpts(opts.EnablePerStepStats(), e.lookbackDelta) - } - qOpts := e.makeQueryOpts(ts, ts, 0, opts) if qOpts.StepsBatch > 64 { return nil, ErrStepsBatchTooLarge @@ -355,7 +355,7 @@ func (e *Engine) NewInstantQueryFromPlan(ctx context.Context, q storage.Queryabl }, nil } -func (e *Engine) NewRangeQuery(ctx context.Context, q storage.Queryable, opts promql.QueryOpts, qs string, start, end time.Time, step time.Duration) (promql.Query, error) { +func (e *Engine) MakeRangeQuery(ctx context.Context, q storage.Queryable, opts *QueryOpts, qs string, start, end time.Time, step time.Duration) (promql.Query, error) { idx, err := e.activeQueryTracker.Insert(ctx, qs) if err != nil { return nil, err @@ -371,12 +371,6 @@ func (e *Engine) NewRangeQuery(ctx context.Context, q storage.Queryable, opts pr if expr.Type() != parser.ValueTypeVector && expr.Type() != parser.ValueTypeScalar { return nil, errors.Newf("invalid expression type %q for range query, must be Scalar or instant Vector", parser.DocumentedType(expr.Type())) } - if opts == nil { - opts = promql.NewPrometheusQueryOpts(false, e.lookbackDelta) - } - if opts.LookbackDelta() <= 0 { - opts = promql.NewPrometheusQueryOpts(opts.EnablePerStepStats(), e.lookbackDelta) - } qOpts := e.makeQueryOpts(start, end, step, opts) if qOpts.StepsBatch > 64 { return nil, ErrStepsBatchTooLarge @@ -413,19 +407,13 @@ func (e *Engine) NewRangeQuery(ctx context.Context, q storage.Queryable, opts pr }, nil } -func (e *Engine) NewRangeQueryFromPlan(ctx context.Context, q storage.Queryable, opts promql.QueryOpts, root logicalplan.Node, start, end time.Time, step time.Duration) (promql.Query, error) { +func (e *Engine) MakeRangeQueryFromPlan(ctx context.Context, q storage.Queryable, opts *QueryOpts, root logicalplan.Node, start, end time.Time, step time.Duration) (promql.Query, error) { idx, err := e.activeQueryTracker.Insert(ctx, root.String()) if err != nil { return nil, err } defer e.activeQueryTracker.Delete(idx) - if opts == nil { - opts = promql.NewPrometheusQueryOpts(false, e.lookbackDelta) - } - if opts.LookbackDelta() <= 0 { - opts = promql.NewPrometheusQueryOpts(opts.EnablePerStepStats(), e.lookbackDelta) - } qOpts := e.makeQueryOpts(start, end, step, opts) if qOpts.StepsBatch > 64 { return nil, ErrStepsBatchTooLarge @@ -461,20 +449,50 @@ func (e *Engine) NewRangeQueryFromPlan(ctx context.Context, q storage.Queryable, }, nil } -func (e *Engine) makeQueryOpts(start time.Time, end time.Time, step time.Duration, opts promql.QueryOpts) *query.Options { - qOpts := &query.Options{ +// PromQL compatibility constructors + +// NewInstantQuery implements the promql.Engine interface. +func (e *Engine) NewInstantQuery(ctx context.Context, q storage.Queryable, opts promql.QueryOpts, qs string, ts time.Time) (promql.Query, error) { + return e.MakeInstantQuery(ctx, q, fromPromQLOpts(opts), qs, ts) +} + +// NewRangeQuery implements the promql.Engine interface. +func (e *Engine) NewRangeQuery(ctx context.Context, q storage.Queryable, opts promql.QueryOpts, qs string, start, end time.Time, step time.Duration) (promql.Query, error) { + return e.MakeRangeQuery(ctx, q, fromPromQLOpts(opts), qs, start, end, step) +} + +func (e *Engine) makeQueryOpts(start time.Time, end time.Time, step time.Duration, opts *QueryOpts) *query.Options { + res := &query.Options{ Start: start, End: end, Step: step, StepsBatch: stepsBatch, - LookbackDelta: opts.LookbackDelta(), - EnablePerStepStats: e.enablePerStepStats && opts.EnablePerStepStats(), + LookbackDelta: e.lookbackDelta, + EnablePerStepStats: e.enablePerStepStats, ExtLookbackDelta: e.extLookbackDelta, EnableAnalysis: e.enableAnalysis, + EnablePartialResponses: e.enablePartialResponses, NoStepSubqueryIntervalFn: e.noStepSubqueryIntervalFn, DecodingConcurrency: e.decodingConcurrency, } - return qOpts + if opts == nil { + return res + } + + if opts.LookbackDelta() > 0 { + res.LookbackDelta = opts.LookbackDelta() + } + if opts.EnablePerStepStats() { + res.EnablePerStepStats = opts.EnablePerStepStats() + } + + if opts.DecodingConcurrency != 0 { + res.DecodingConcurrency = opts.DecodingConcurrency + } + if opts.EnablePartialResponses { + res.EnablePartialResponses = opts.EnablePartialResponses + } + return res } func (e *Engine) storageScanners(queryable storage.Queryable, qOpts *query.Options, lplan logicalplan.Plan) (engstorage.Scanners, error) { diff --git a/vendor/github.com/thanos-io/promql-engine/execution/execution.go b/vendor/github.com/thanos-io/promql-engine/execution/execution.go index 8746693b02..d465410199 100644 --- a/vendor/github.com/thanos-io/promql-engine/execution/execution.go +++ b/vendor/github.com/thanos-io/promql-engine/execution/execution.go @@ -386,7 +386,7 @@ func newRemoteExecution(ctx context.Context, e logicalplan.RemoteExecution, opts // We need to set the lookback for the selector to 0 since the remote query already applies one lookback. selectorOpts := *opts selectorOpts.LookbackDelta = 0 - remoteExec := remote.NewExecution(qry, model.NewVectorPool(opts.StepsBatch), e.QueryRangeStart, &selectorOpts, hints) + remoteExec := remote.NewExecution(qry, model.NewVectorPool(opts.StepsBatch), e.QueryRangeStart, e.Engine.LabelSets(), &selectorOpts, hints) return exchange.NewConcurrent(remoteExec, 2, opts), nil } diff --git a/vendor/github.com/thanos-io/promql-engine/execution/remote/operator.go b/vendor/github.com/thanos-io/promql-engine/execution/remote/operator.go index 645b9b273d..35f290dd71 100644 --- a/vendor/github.com/thanos-io/promql-engine/execution/remote/operator.go +++ b/vendor/github.com/thanos-io/promql-engine/execution/remote/operator.go @@ -9,6 +9,7 @@ import ( "sync" "time" + "github.com/efficientgo/core/errors" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/promql" "github.com/prometheus/prometheus/storage" @@ -29,8 +30,8 @@ type Execution struct { model.OperatorTelemetry } -func NewExecution(query promql.Query, pool *model.VectorPool, queryRangeStart time.Time, opts *query.Options, _ storage.SelectHints) *Execution { - storage := newStorageFromQuery(query, opts) +func NewExecution(query promql.Query, pool *model.VectorPool, queryRangeStart time.Time, engineLabels []labels.Labels, opts *query.Options, _ storage.SelectHints) *Execution { + storage := newStorageFromQuery(query, opts, engineLabels) oper := &Execution{ storage: storage, query: query, @@ -90,16 +91,18 @@ func (e *Execution) Samples() *stats.QuerySamples { type storageAdapter struct { query promql.Query opts *query.Options + lbls []labels.Labels once sync.Once err error series []promstorage.SignedSeries } -func newStorageFromQuery(query promql.Query, opts *query.Options) *storageAdapter { +func newStorageFromQuery(query promql.Query, opts *query.Options, lbls []labels.Labels) *storageAdapter { return &storageAdapter{ query: query, opts: opts, + lbls: lbls, } } @@ -120,7 +123,12 @@ func (s *storageAdapter) executeQuery(ctx context.Context) { warnings.AddToContext(w, ctx) } if result.Err != nil { - s.err = result.Err + err := errors.Wrapf(result.Err, "remote exec error [%s]", s.lbls) + if s.opts.EnablePartialResponses { + warnings.AddToContext(err, ctx) + } else { + s.err = err + } return } switch val := result.Value.(type) { diff --git a/vendor/github.com/thanos-io/promql-engine/logicalplan/distribute.go b/vendor/github.com/thanos-io/promql-engine/logicalplan/distribute.go index 0f16847d0a..f1ae3e0cbd 100644 --- a/vendor/github.com/thanos-io/promql-engine/logicalplan/distribute.go +++ b/vendor/github.com/thanos-io/promql-engine/logicalplan/distribute.go @@ -254,7 +254,6 @@ func (m DistributedExecutionOptimizer) Optimize(plan Node, opts *query.Options) *current = m.distributeQuery(current, engines, m.subqueryOpts(parents, current, opts), minEngineOverlap) return true }) - return plan, *warns } diff --git a/vendor/github.com/thanos-io/promql-engine/query/options.go b/vendor/github.com/thanos-io/promql-engine/query/options.go index 5dbb3c0971..decdda36cd 100644 --- a/vendor/github.com/thanos-io/promql-engine/query/options.go +++ b/vendor/github.com/thanos-io/promql-engine/query/options.go @@ -17,6 +17,7 @@ type Options struct { ExtLookbackDelta time.Duration NoStepSubqueryIntervalFn func(time.Duration) time.Duration EnableAnalysis bool + EnablePartialResponses bool DecodingConcurrency int } diff --git a/vendor/github.com/thanos-io/promql-engine/storage/prometheus/vector_selector.go b/vendor/github.com/thanos-io/promql-engine/storage/prometheus/vector_selector.go index 082a689f63..2cc88fcd20 100644 --- a/vendor/github.com/thanos-io/promql-engine/storage/prometheus/vector_selector.go +++ b/vendor/github.com/thanos-io/promql-engine/storage/prometheus/vector_selector.go @@ -242,6 +242,5 @@ func selectPoint(it *storage.MemoizedSeriesIterator, ts, lookbackDelta, offset i if value.IsStaleNaN(v) || (fh != nil && value.IsStaleNaN(fh.Sum)) { return 0, 0, nil, false, nil } - return t, v, fh, true, nil } diff --git a/vendor/github.com/thanos-io/thanos/pkg/cacheutil/memcached_server_selector.go b/vendor/github.com/thanos-io/thanos/pkg/cacheutil/memcached_server_selector.go index 00a122e075..5426d6af33 100644 --- a/vendor/github.com/thanos-io/thanos/pkg/cacheutil/memcached_server_selector.go +++ b/vendor/github.com/thanos-io/thanos/pkg/cacheutil/memcached_server_selector.go @@ -8,7 +8,7 @@ import ( "sync" "github.com/bradfitz/gomemcache/memcache" - "github.com/cespare/xxhash" + "github.com/cespare/xxhash/v2" "github.com/facette/natsort" ) diff --git a/vendor/github.com/thanos-io/thanos/pkg/query/remote_engine.go b/vendor/github.com/thanos-io/thanos/pkg/query/remote_engine.go index 77a74c9a6f..c44dd04b8d 100644 --- a/vendor/github.com/thanos-io/thanos/pkg/query/remote_engine.go +++ b/vendor/github.com/thanos-io/thanos/pkg/query/remote_engine.go @@ -33,11 +33,12 @@ import ( // Opts are the options for a PromQL query. type Opts struct { - AutoDownsample bool - ReplicaLabels []string - PartitionLabels []string - Timeout time.Duration - EnablePartialResponse bool + AutoDownsample bool + ReplicaLabels []string + PartitionLabels []string + Timeout time.Duration + EnablePartialResponse bool + QueryDistributedWithOverlappingInterval bool } // Client is a query client that executes PromQL queries. @@ -114,6 +115,7 @@ func NewRemoteEngine(logger log.Logger, queryClient Client, opts Opts) *remoteEn // a block due to retention before other replicas did the same. // See https://github.com/thanos-io/promql-engine/issues/187. func (r *remoteEngine) MinT() int64 { + r.mintOnce.Do(func() { var ( hashBuf = make([]byte, 0, 128) @@ -126,7 +128,11 @@ func (r *remoteEngine) MinT() int64 { highestMintByLabelSet[key] = lset.MinTime continue } - if lset.MinTime > lsetMinT { + // If we are querying with overlapping intervals, we want to find the first available timestamp + // otherwise we want to find the last available timestamp. + if r.opts.QueryDistributedWithOverlappingInterval && lset.MinTime < lsetMinT { + highestMintByLabelSet[key] = lset.MinTime + } else if !r.opts.QueryDistributedWithOverlappingInterval && lset.MinTime > lsetMinT { highestMintByLabelSet[key] = lset.MinTime } } diff --git a/vendor/github.com/thanos-io/thanos/pkg/store/bucket.go b/vendor/github.com/thanos-io/thanos/pkg/store/bucket.go index 1c434f503f..33fb4732dc 100644 --- a/vendor/github.com/thanos-io/thanos/pkg/store/bucket.go +++ b/vendor/github.com/thanos-io/thanos/pkg/store/bucket.go @@ -14,13 +14,14 @@ import ( "math" "os" "path" + "slices" "sort" "strings" "sync" "time" "github.com/alecthomas/units" - "github.com/cespare/xxhash" + "github.com/cespare/xxhash/v2" "github.com/go-kit/log" "github.com/go-kit/log/level" "github.com/gogo/protobuf/types" @@ -35,7 +36,6 @@ import ( "github.com/prometheus/prometheus/tsdb/encoding" "github.com/prometheus/prometheus/tsdb/index" "github.com/weaveworks/common/httpgrpc" - "golang.org/x/exp/slices" "golang.org/x/sync/errgroup" "google.golang.org/grpc" "google.golang.org/grpc/codes" diff --git a/vendor/github.com/thanos-io/thanos/pkg/store/cache/filter_cache.go b/vendor/github.com/thanos-io/thanos/pkg/store/cache/filter_cache.go index 44d46db709..ade9da4c81 100644 --- a/vendor/github.com/thanos-io/thanos/pkg/store/cache/filter_cache.go +++ b/vendor/github.com/thanos-io/thanos/pkg/store/cache/filter_cache.go @@ -6,11 +6,11 @@ package storecache import ( "context" "fmt" + "slices" "github.com/oklog/ulid" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/storage" - "golang.org/x/exp/slices" ) type FilteredIndexCache struct { diff --git a/vendor/github.com/thanos-io/thanos/pkg/store/flushable.go b/vendor/github.com/thanos-io/thanos/pkg/store/flushable.go index 33680c3c89..aa722b6aac 100644 --- a/vendor/github.com/thanos-io/thanos/pkg/store/flushable.go +++ b/vendor/github.com/thanos-io/thanos/pkg/store/flushable.go @@ -4,8 +4,9 @@ package store import ( + "slices" + "github.com/prometheus/prometheus/model/labels" - "golang.org/x/exp/slices" "github.com/thanos-io/thanos/pkg/store/labelpb" "github.com/thanos-io/thanos/pkg/store/storepb" diff --git a/vendor/github.com/thanos-io/thanos/pkg/store/lazy_postings.go b/vendor/github.com/thanos-io/thanos/pkg/store/lazy_postings.go index ef7ae5d00a..783e04b31b 100644 --- a/vendor/github.com/thanos-io/thanos/pkg/store/lazy_postings.go +++ b/vendor/github.com/thanos-io/thanos/pkg/store/lazy_postings.go @@ -6,6 +6,7 @@ package store import ( "context" "math" + "slices" "strings" "github.com/go-kit/log/level" @@ -14,7 +15,6 @@ import ( "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/storage" "github.com/prometheus/prometheus/tsdb/index" - "golang.org/x/exp/slices" "github.com/thanos-io/thanos/pkg/block/indexheader" ) diff --git a/vendor/github.com/thanos-io/thanos/pkg/store/tsdb_selector.go b/vendor/github.com/thanos-io/thanos/pkg/store/tsdb_selector.go index 463ab96b14..761aa46737 100644 --- a/vendor/github.com/thanos-io/thanos/pkg/store/tsdb_selector.go +++ b/vendor/github.com/thanos-io/thanos/pkg/store/tsdb_selector.go @@ -4,12 +4,12 @@ package store import ( - "sort" + "maps" + "slices" "strings" "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/relabel" - "golang.org/x/exp/maps" "github.com/thanos-io/thanos/pkg/store/storepb" ) @@ -88,8 +88,7 @@ func MatchersForLabelSets(labelSets []labels.Labels) []storepb.LabelMatcher { matchers := make([]storepb.LabelMatcher, 0, len(labelNameValues)) for lblName, lblVals := range labelNameValues { - values := maps.Keys(lblVals) - sort.Strings(values) + values := slices.Sorted(maps.Keys(lblVals)) matcher := storepb.LabelMatcher{ Name: lblName, Value: strings.Join(values, "|"), diff --git a/vendor/golang.org/x/exp/maps/maps.go b/vendor/golang.org/x/exp/maps/maps.go deleted file mode 100644 index ecc0dabb74..0000000000 --- a/vendor/golang.org/x/exp/maps/maps.go +++ /dev/null @@ -1,94 +0,0 @@ -// Copyright 2021 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -// Package maps defines various functions useful with maps of any type. -package maps - -// Keys returns the keys of the map m. -// The keys will be in an indeterminate order. -func Keys[M ~map[K]V, K comparable, V any](m M) []K { - r := make([]K, 0, len(m)) - for k := range m { - r = append(r, k) - } - return r -} - -// Values returns the values of the map m. -// The values will be in an indeterminate order. -func Values[M ~map[K]V, K comparable, V any](m M) []V { - r := make([]V, 0, len(m)) - for _, v := range m { - r = append(r, v) - } - return r -} - -// Equal reports whether two maps contain the same key/value pairs. -// Values are compared using ==. -func Equal[M1, M2 ~map[K]V, K, V comparable](m1 M1, m2 M2) bool { - if len(m1) != len(m2) { - return false - } - for k, v1 := range m1 { - if v2, ok := m2[k]; !ok || v1 != v2 { - return false - } - } - return true -} - -// EqualFunc is like Equal, but compares values using eq. -// Keys are still compared with ==. -func EqualFunc[M1 ~map[K]V1, M2 ~map[K]V2, K comparable, V1, V2 any](m1 M1, m2 M2, eq func(V1, V2) bool) bool { - if len(m1) != len(m2) { - return false - } - for k, v1 := range m1 { - if v2, ok := m2[k]; !ok || !eq(v1, v2) { - return false - } - } - return true -} - -// Clear removes all entries from m, leaving it empty. -func Clear[M ~map[K]V, K comparable, V any](m M) { - for k := range m { - delete(m, k) - } -} - -// Clone returns a copy of m. This is a shallow clone: -// the new keys and values are set using ordinary assignment. -func Clone[M ~map[K]V, K comparable, V any](m M) M { - // Preserve nil in case it matters. - if m == nil { - return nil - } - r := make(M, len(m)) - for k, v := range m { - r[k] = v - } - return r -} - -// Copy copies all key/value pairs in src adding them to dst. -// When a key in src is already present in dst, -// the value in dst will be overwritten by the value associated -// with the key in src. -func Copy[M1 ~map[K]V, M2 ~map[K]V, K comparable, V any](dst M1, src M2) { - for k, v := range src { - dst[k] = v - } -} - -// DeleteFunc deletes any key/value pairs from m for which del returns true. -func DeleteFunc[M ~map[K]V, K comparable, V any](m M, del func(K, V) bool) { - for k, v := range m { - if del(k, v) { - delete(m, k) - } - } -} diff --git a/vendor/google.golang.org/protobuf/internal/errors/is_go112.go b/vendor/google.golang.org/protobuf/internal/errors/is_go112.go deleted file mode 100644 index fbcd349207..0000000000 --- a/vendor/google.golang.org/protobuf/internal/errors/is_go112.go +++ /dev/null @@ -1,40 +0,0 @@ -// Copyright 2020 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build !go1.13 -// +build !go1.13 - -package errors - -import "reflect" - -// Is is a copy of Go 1.13's errors.Is for use with older Go versions. -func Is(err, target error) bool { - if target == nil { - return err == target - } - - isComparable := reflect.TypeOf(target).Comparable() - for { - if isComparable && err == target { - return true - } - if x, ok := err.(interface{ Is(error) bool }); ok && x.Is(target) { - return true - } - if err = unwrap(err); err == nil { - return false - } - } -} - -func unwrap(err error) error { - u, ok := err.(interface { - Unwrap() error - }) - if !ok { - return nil - } - return u.Unwrap() -} diff --git a/vendor/google.golang.org/protobuf/internal/errors/is_go113.go b/vendor/google.golang.org/protobuf/internal/errors/is_go113.go deleted file mode 100644 index 5e72f1cde9..0000000000 --- a/vendor/google.golang.org/protobuf/internal/errors/is_go113.go +++ /dev/null @@ -1,13 +0,0 @@ -// Copyright 2020 The Go Authors. All rights reserved. -// Use of this source code is governed by a BSD-style -// license that can be found in the LICENSE file. - -//go:build go1.13 -// +build go1.13 - -package errors - -import "errors" - -// Is is errors.Is. -func Is(err, target error) bool { return errors.Is(err, target) } diff --git a/vendor/google.golang.org/protobuf/internal/impl/message_reflect.go b/vendor/google.golang.org/protobuf/internal/impl/message_reflect.go index 1b9b16a407..31c19b54f8 100644 --- a/vendor/google.golang.org/protobuf/internal/impl/message_reflect.go +++ b/vendor/google.golang.org/protobuf/internal/impl/message_reflect.go @@ -85,9 +85,7 @@ func (mi *MessageInfo) makeKnownFieldsFunc(si structInfo) { mi.oneofs = map[protoreflect.Name]*oneofInfo{} for i := 0; i < md.Oneofs().Len(); i++ { od := md.Oneofs().Get(i) - if !od.IsSynthetic() { - mi.oneofs[od.Name()] = makeOneofInfo(od, si, mi.Exporter) - } + mi.oneofs[od.Name()] = makeOneofInfo(od, si, mi.Exporter) } mi.denseFields = make([]*fieldInfo, fds.Len()*2) diff --git a/vendor/google.golang.org/protobuf/internal/version/version.go b/vendor/google.golang.org/protobuf/internal/version/version.go index e27eaefcf3..3018450df7 100644 --- a/vendor/google.golang.org/protobuf/internal/version/version.go +++ b/vendor/google.golang.org/protobuf/internal/version/version.go @@ -52,7 +52,7 @@ import ( const ( Major = 1 Minor = 36 - Patch = 0 + Patch = 1 PreRelease = "" ) diff --git a/vendor/modules.txt b/vendor/modules.txt index 08294793d7..3922b35f95 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -293,9 +293,6 @@ github.com/caio/go-tdigest # github.com/cenkalti/backoff/v4 v4.3.0 ## explicit; go 1.18 github.com/cenkalti/backoff/v4 -# github.com/cespare/xxhash v1.1.0 -## explicit -github.com/cespare/xxhash # github.com/cespare/xxhash/v2 v2.3.0 ## explicit; go 1.11 github.com/cespare/xxhash/v2 @@ -556,7 +553,7 @@ github.com/grpc-ecosystem/go-grpc-middleware/v2 github.com/grpc-ecosystem/grpc-gateway/v2/internal/httprule github.com/grpc-ecosystem/grpc-gateway/v2/runtime github.com/grpc-ecosystem/grpc-gateway/v2/utilities -# github.com/hashicorp/consul/api v1.30.0 +# github.com/hashicorp/consul/api v1.31.0 ## explicit; go 1.19 github.com/hashicorp/consul/api # github.com/hashicorp/errwrap v1.1.0 @@ -967,7 +964,7 @@ github.com/thanos-io/objstore/providers/gcs github.com/thanos-io/objstore/providers/s3 github.com/thanos-io/objstore/providers/swift github.com/thanos-io/objstore/tracing/opentracing -# github.com/thanos-io/promql-engine v0.0.0-20241203103240-2f49f80c7c68 +# github.com/thanos-io/promql-engine v0.0.0-20241217103156-9dbff30059cf ## explicit; go 1.22.0 github.com/thanos-io/promql-engine/api github.com/thanos-io/promql-engine/engine @@ -990,7 +987,7 @@ github.com/thanos-io/promql-engine/query github.com/thanos-io/promql-engine/ringbuffer github.com/thanos-io/promql-engine/storage github.com/thanos-io/promql-engine/storage/prometheus -# github.com/thanos-io/thanos v0.37.2-0.20241210234302-0ea6bac096ce +# github.com/thanos-io/thanos v0.37.3-0.20241224143735-2d041dc774da ## explicit; go 1.23.0 github.com/thanos-io/thanos/pkg/api/query/querypb github.com/thanos-io/thanos/pkg/block @@ -1307,12 +1304,11 @@ golang.org/x/crypto/pkcs12/internal/rc2 # golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8 ## explicit; go 1.20 golang.org/x/exp/constraints -golang.org/x/exp/maps golang.org/x/exp/slices # golang.org/x/mod v0.21.0 ## explicit; go 1.22.0 golang.org/x/mod/semver -# golang.org/x/net v0.32.0 +# golang.org/x/net v0.33.0 ## explicit; go 1.18 golang.org/x/net/bpf golang.org/x/net/context @@ -1494,7 +1490,7 @@ google.golang.org/grpc/stats google.golang.org/grpc/status google.golang.org/grpc/tap google.golang.org/grpc/test/bufconn -# google.golang.org/protobuf v1.36.0 +# google.golang.org/protobuf v1.36.1 ## explicit; go 1.21 google.golang.org/protobuf/encoding/protodelim google.golang.org/protobuf/encoding/protojson