Skip to content

Commit

Permalink
Merge branch 'main' into query_frontend_tls_redis_fix
Browse files Browse the repository at this point in the history
  • Loading branch information
fpetkovski authored Nov 28, 2024
2 parents da6344a + 1d76335 commit 1a3dc07
Show file tree
Hide file tree
Showing 143 changed files with 3,639 additions and 762 deletions.
52 changes: 0 additions & 52 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,49 +15,6 @@ executors:
- image: quay.io/thanos/docker-swift-onlyone-authv2-keystone:v0.1

jobs:
test:
executor: golang-test
environment:
GO111MODULE: "on"
steps:
- git-shallow-clone/checkout
- go/load-cache
- go/mod-download
- run:
name: Download bingo modules
command: |
make install-tool-deps
- go/save-cache
- setup_remote_docker:
version: docker24
- run:
name: Create Secret if PR is not forked
# GCS integration tests are run only for author's PR that have write access, because these tests
# require credentials. Env variables that sets up these tests will work only for these kind of PRs.
command: |
if ! [ -z ${GCP_PROJECT} ]; then
echo $GOOGLE_APPLICATION_CREDENTIALS_CONTENT > $GOOGLE_APPLICATION_CREDENTIALS
echo "Awesome! GCS and S3 AWS integration tests are enabled."
fi
- run:
name: "Run unit tests."
no_output_timeout: "30m"
environment:
THANOS_TEST_OBJSTORE_SKIP: GCS,S3,AZURE,COS,ALIYUNOSS,BOS,OCI,OBS
# Variables for Swift testing.
OS_AUTH_URL: http://127.0.0.1:5000/v2.0
OS_PASSWORD: s3cr3t
OS_PROJECT_NAME: admin
OS_REGION_NAME: RegionOne
OS_USERNAME: admin
# taskset sets CPU affinity to 2 (current CPU limit).
command: |
if [ -z ${GCP_PROJECT} ]; then
export THANOS_TEST_OBJSTORE_SKIP=${THANOS_TEST_OBJSTORE_SKIP}
fi
echo "Skipping tests for object storages: ${THANOS_TEST_OBJSTORE_SKIP}"
taskset 2 make test
# Cross build is needed for publish_release but needs to be done outside of docker.
cross_build:
machine: true
Expand Down Expand Up @@ -127,27 +84,18 @@ workflows:
version: 2
thanos:
jobs:
- test:
filters:
tags:
only: /.*/
- publish_main:
requires:
- test
filters:
branches:
only: main
- cross_build:
requires:
- test
filters:
tags:
only: /^v[0-9]+(\.[0-9]+){2}(-.+|[^-.]*)$/
branches:
ignore: /.*/
- publish_release:
requires:
- test
- cross_build
filters:
tags:
Expand Down
33 changes: 32 additions & 1 deletion .github/workflows/go.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,42 @@ on:
tags:
pull_request:

# TODO(bwplotka): Add tests here.
permissions:
contents: read

jobs:
unit:
runs-on: ubuntu-latest
name: Thanos unit tests
env:
THANOS_TEST_OBJSTORE_SKIP: GCS,S3,AZURE,COS,ALIYUNOSS,BOS,OCI,OBS,SWIFT
OS_AUTH_URL: http://127.0.0.1:5000/v2.0
OS_PASSWORD: s3cr3t
OS_PROJECT_NAME: admin
OS_REGION_NAME: RegionOne
OS_USERNAME: admin
GOBIN: /tmp/.bin
services:
swift:
image: 'quay.io/thanos/docker-swift-onlyone-authv2-keystone:v0.1'
ports:
- 5000:5000
steps:
- name: Checkout code
uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332 # v4.1.7

- name: Install Go.
uses: actions/setup-go@0a12ed9d6a96ab950c8f026ed9f722fe0da7ef32 # v5.0.2
with:
go-version: 1.23.x

- name: Install bingo modules
run: make install-tool-deps
- name: Add GOBIN to path
run: echo "/tmp/.bin" >> $GITHUB_PATH
- name: Run unit tests
run: make test

cross-build-check:
runs-on: ubuntu-latest
name: Go build for different platforms
Expand Down
3 changes: 3 additions & 0 deletions .mdox.validate.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,3 +47,6 @@ validators:
# Expired certificate
- regex: 'bestpractices\.coreinfrastructure\.org\/projects\/3048'
type: 'ignore'
# Frequent DNS issues.
- regex: 'build\.thebeat\.co'
type: 'ignore'
21 changes: 21 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,16 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re

### Fixed

### Added

### Changed

### Removed

## [v0.37.0 - <in progress>](https://github.com/thanos-io/thanos/tree/release-0.37)

### Fixed

- [#7511](https://github.com/thanos-io/thanos/pull/7511) Query Frontend: fix doubled gzip compression for response body.
- [#7592](https://github.com/thanos-io/thanos/pull/7592) Ruler: Only increment `thanos_rule_evaluation_with_warnings_total` metric for non PromQL warnings.
- [#7614](https://github.com/thanos-io/thanos/pull/7614) *: fix debug log formatting.
Expand All @@ -23,6 +33,12 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re
- [#7814](https://github.com/thanos-io/thanos/pull/7814) Store: label_values: if matchers contain **name**=="something", do not add <labelname> != "" to fetch less postings.
- [#7679](https://github.com/thanos-io/thanos/pull/7679) Query: respect store.limit.* flags when evaluating queries
- [#7821](https://github.com/thanos-io/thanos/pull/7679) Query/Receive: Fix coroutine leak introduced in https://github.com/thanos-io/thanos/pull/7796.
- [#7843](https://github.com/thanos-io/thanos/pull/7843) Query Frontend: fix slow query logging for non-query endpoints.
- [#7852](https://github.com/thanos-io/thanos/pull/7852) Query Frontend: pass "stats" parameter forward to queriers and fix Prometheus stats merging.
- [#7832](https://github.com/thanos-io/thanos/pull/7832) Query Frontend: Fix cache keys for dynamic split intervals.
- [#7885](https://github.com/thanos-io/thanos/pull/7885) Store: Return chunks to the pool after completing a Series call.
- [#7893](https://github.com/thanos-io/thanos/pull/7893) Sidecar: Fix retrieval of external labels for Prometheus v3.0.0.
- [#7903](https://github.com/thanos-io/thanos/pull/7903) Query: Fix panic on regex store matchers.

### Added
- [#7763](https://github.com/thanos-io/thanos/pull/7763) Ruler: use native histograms for client latency metrics.
Expand All @@ -31,6 +47,11 @@ We use *breaking :warning:* to mark changes that are not backward compatible (re
- [#7560](https://github.com/thanos-io/thanos/pull/7560) Query: Added the possibility of filtering rules by rule_name, rule_group or file to HTTP api.
- [#7652](https://github.com/thanos-io/thanos/pull/7652) Store: Implement metadata API limit in stores.
- [#7659](https://github.com/thanos-io/thanos/pull/7659) Receive: Add support for replication using [Cap'n Proto](https://capnproto.org/). This protocol has a lower CPU and memory footprint, which leads to a reduction in resource usage in Receivers. Before enabling it, make sure that all receivers are updated to a version which supports this replication method.
- [#7853](https://github.com/thanos-io/thanos/pull/7853) UI: Add support for selecting graph time range with mouse drag.
- [#7855](https://github.com/thanos-io/thanos/pull/7855) Compcat/Query: Add support for comma separated replica labels.
- [#7654](https://github.com/thanos-io/thanos/pull/7654) *: Add '--grpc-server-tls-min-version' flag to allow user to specify TLS version, otherwise default to TLS 1.3
- [#7854](https://github.com/thanos-io/thanos/pull/7854) Query Frontend: Add `--query-frontend.force-query-stats` flag to force collection of query statistics from upstream queriers.
- [#7907](https://github.com/thanos-io/thanos/pull/7907) Receive: Add `--receive.grpc-service-config` flag to configure gRPC service config for the receivers.

### Changed

Expand Down
2 changes: 1 addition & 1 deletion Dockerfile.e2e-tests
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Taking a non-alpine image for e2e tests so that cgo can be enabled for the race detector.
FROM golang:1.23.2 as builder
FROM golang:1.23.3 as builder

WORKDIR $GOPATH/src/github.com/thanos-io/thanos

Expand Down
2 changes: 1 addition & 1 deletion Dockerfile.multi-stage
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# By default we pin to amd64 sha. Use make docker to automatically adjust for arm64 versions.
ARG BASE_DOCKER_SHA="14d68ca3d69fceaa6224250c83d81d935c053fb13594c811038c461194599973"
FROM golang:1.23.2-alpine3.20 as builder
FROM golang:1.23.3-alpine3.20 as builder

WORKDIR $GOPATH/src/github.com/thanos-io/thanos
# Change in the docker context invalidates the cache so to leverage docker
Expand Down
2 changes: 1 addition & 1 deletion VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
0.37.0-dev
0.38.0-dev
23 changes: 16 additions & 7 deletions cmd/thanos/compact.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ import (
"github.com/thanos-io/thanos/pkg/runutil"
httpserver "github.com/thanos-io/thanos/pkg/server/http"
"github.com/thanos-io/thanos/pkg/store"
"github.com/thanos-io/thanos/pkg/strutil"
"github.com/thanos-io/thanos/pkg/tracing"
"github.com/thanos-io/thanos/pkg/ui"
)
Expand Down Expand Up @@ -205,7 +206,7 @@ func runCompact(
return err
}

bkt, err := client.NewBucket(logger, confContentYaml, component.String())
bkt, err := client.NewBucket(logger, confContentYaml, component.String(), nil)
if err != nil {
return err
}
Expand Down Expand Up @@ -254,10 +255,11 @@ func runCompact(
}

enableVerticalCompaction := conf.enableVerticalCompaction
if len(conf.dedupReplicaLabels) > 0 {
dedupReplicaLabels := strutil.ParseFlagLabels(conf.dedupReplicaLabels)
if len(dedupReplicaLabels) > 0 {
enableVerticalCompaction = true
level.Info(logger).Log(
"msg", "deduplication.replica-label specified, enabling vertical compaction", "dedupReplicaLabels", strings.Join(conf.dedupReplicaLabels, ","),
"msg", "deduplication.replica-label specified, enabling vertical compaction", "dedupReplicaLabels", strings.Join(dedupReplicaLabels, ","),
)
}
if enableVerticalCompaction {
Expand All @@ -275,7 +277,7 @@ func runCompact(
labelShardedMetaFilter,
consistencyDelayMetaFilter,
ignoreDeletionMarkFilter,
block.NewReplicaLabelRemover(logger, conf.dedupReplicaLabels),
block.NewReplicaLabelRemover(logger, dedupReplicaLabels),
duplicateBlocksFilter,
noCompactMarkerFilter,
}
Expand All @@ -288,6 +290,11 @@ func runCompact(
cf.UpdateOnChange(func(blocks []metadata.Meta, err error) {
api.SetLoaded(blocks, err)
})

var syncMetasTimeout = conf.waitInterval
if !conf.wait {
syncMetasTimeout = 0
}
sy, err = compact.NewMetaSyncer(
logger,
reg,
Expand All @@ -297,6 +304,7 @@ func runCompact(
ignoreDeletionMarkFilter,
compactMetrics.blocksMarked.WithLabelValues(metadata.DeletionMarkFilename, ""),
compactMetrics.garbageCollectedBlocks,
syncMetasTimeout,
)
if err != nil {
return errors.Wrap(err, "create syncer")
Expand Down Expand Up @@ -326,7 +334,7 @@ func runCompact(
case compact.DedupAlgorithmPenalty:
mergeFunc = dedup.NewChunkSeriesMerger()

if len(conf.dedupReplicaLabels) == 0 {
if len(dedupReplicaLabels) == 0 {
return errors.New("penalty based deduplication needs at least one replica label specified")
}
case "":
Expand Down Expand Up @@ -818,8 +826,9 @@ func (cc *compactConfig) registerFlag(cmd extkingpin.FlagClause) {
"When set to penalty, penalty based deduplication algorithm will be used. At least one replica label has to be set via --deduplication.replica-label flag.").
Default("").EnumVar(&cc.dedupFunc, compact.DedupAlgorithmPenalty, "")

cmd.Flag("deduplication.replica-label", "Label to treat as a replica indicator of blocks that can be deduplicated (repeated flag). This will merge multiple replica blocks into one. This process is irreversible."+
"Experimental. When one or more labels are set, compactor will ignore the given labels so that vertical compaction can merge the blocks."+
cmd.Flag("deduplication.replica-label", "Experimental. Label to treat as a replica indicator of blocks that can be deduplicated (repeated flag). This will merge multiple replica blocks into one. This process is irreversible. "+
"Flag may be specified multiple times as well as a comma separated list of labels. "+
"When one or more labels are set, compactor will ignore the given labels so that vertical compaction can merge the blocks."+
"Please note that by default this uses a NAIVE algorithm for merging which works well for deduplication of blocks with **precisely the same samples** like produced by Receiver replication."+
"If you need a different deduplication algorithm (e.g one that works well with Prometheus replicas), please set it via --deduplication.func.").
StringsVar(&cc.dedupReplicaLabels)
Expand Down
4 changes: 4 additions & 0 deletions cmd/thanos/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ type grpcConfig struct {
tlsSrvCert string
tlsSrvKey string
tlsSrvClientCA string
tlsMinVersion string
gracePeriod time.Duration
maxConnectionAge time.Duration
}
Expand All @@ -45,6 +46,9 @@ func (gc *grpcConfig) registerFlag(cmd extkingpin.FlagClause) *grpcConfig {
cmd.Flag("grpc-server-tls-client-ca",
"TLS CA to verify clients against. If no client CA is specified, there is no client verification on server side. (tls.NoClientCert)").
Default("").StringVar(&gc.tlsSrvClientCA)
cmd.Flag("grpc-server-tls-min-version",
"TLS supported minimum version for gRPC server. If no version is specified, it'll default to 1.3. Allowed values: [\"1.0\", \"1.1\", \"1.2\", \"1.3\"]").
Default("1.3").StringVar(&gc.tlsMinVersion)
cmd.Flag("grpc-server-max-connection-age", "The grpc server max connection age. This controls how often to re-establish connections and redo TLS handshakes.").
Default("60m").DurationVar(&gc.maxConnectionAge)
cmd.Flag("grpc-grace-period",
Expand Down
2 changes: 1 addition & 1 deletion cmd/thanos/downsample.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ func RunDownsample(
return err
}

bkt, err := client.NewBucket(logger, confContentYaml, component.Downsample.String())
bkt, err := client.NewBucket(logger, confContentYaml, component.Downsample.String(), nil)
if err != nil {
return err
}
Expand Down
10 changes: 10 additions & 0 deletions cmd/thanos/main_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,16 @@ func (b *erroringBucket) Name() string {
return b.bkt.Name()
}

// IterWithAttributes allows to iterate over objects in the bucket with their attributes.
func (b *erroringBucket) IterWithAttributes(ctx context.Context, dir string, f func(objstore.IterObjectAttributes) error, options ...objstore.IterOption) error {
return b.bkt.IterWithAttributes(ctx, dir, f, options...)
}

// SupportedIterOptions returns the supported iteration options.
func (b *erroringBucket) SupportedIterOptions() []objstore.IterOptionType {
return b.bkt.SupportedIterOptions()
}

// Ensures that downsampleBucket() stops its work properly
// after an error occurs with some blocks in the backlog.
// Testing for https://github.com/thanos-io/thanos/issues/4960.
Expand Down
23 changes: 14 additions & 9 deletions cmd/thanos/query.go
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ import (
httpserver "github.com/thanos-io/thanos/pkg/server/http"
"github.com/thanos-io/thanos/pkg/store"
"github.com/thanos-io/thanos/pkg/store/labelpb"
"github.com/thanos-io/thanos/pkg/strutil"
"github.com/thanos-io/thanos/pkg/targets"
"github.com/thanos-io/thanos/pkg/tenancy"
"github.com/thanos-io/thanos/pkg/tls"
Expand Down Expand Up @@ -122,7 +123,7 @@ func registerQuery(app *extkingpin.App) {
Default(string(query.ExternalLabels), string(query.StoreType)).
Enums(string(query.ExternalLabels), string(query.StoreType))

queryReplicaLabels := cmd.Flag("query.replica-label", "Labels to treat as a replica indicator along which data is deduplicated. Still you will be able to query without deduplication using 'dedup=false' parameter. Data includes time series, recording rules, and alerting rules.").
queryReplicaLabels := cmd.Flag("query.replica-label", "Labels to treat as a replica indicator along which data is deduplicated. Still you will be able to query without deduplication using 'dedup=false' parameter. Data includes time series, recording rules, and alerting rules. Flag may be specified multiple times as well as a comma separated list of labels.").
Strings()
queryPartitionLabels := cmd.Flag("query.partition-label", "Labels that partition the leaf queriers. This is used to scope down the labelsets of leaf queriers when using the distributed query mode. If set, these labels must form a partition of the leaf queriers. Partition labels must not intersect with replica labels. Every TSDB of a leaf querier must have these labels. This is useful when there are multiple external labels that are irrelevant for the partition as it allows the distributed engine to ignore them for some optimizations. If this is empty then all labels are used as partition labels.").Strings()

Expand Down Expand Up @@ -502,6 +503,7 @@ func runQuery(
dns.ResolverType(dnsSDResolver),
),
dnsSDInterval,
logger,
)

dnsEndpointProvider := dns.NewProvider(
Expand Down Expand Up @@ -539,6 +541,9 @@ func runQuery(
store.WithProxyStoreDebugLogging(debugLogging),
}

// Parse and sanitize the provided replica labels flags.
queryReplicaLabels = strutil.ParseFlagLabels(queryReplicaLabels)

var (
endpoints = prepareEndpointSet(
g,
Expand Down Expand Up @@ -604,7 +609,7 @@ func runQuery(
fileSDCache.Update(update)
endpoints.Update(ctxUpdate)

if err := dnsStoreProvider.Resolve(ctxUpdate, append(fileSDCache.Addresses(), storeAddrs...)); err != nil {
if err := dnsStoreProvider.Resolve(ctxUpdate, append(fileSDCache.Addresses(), storeAddrs...), true); err != nil {
level.Error(logger).Log("msg", "failed to resolve addresses for storeAPIs", "err", err)
}

Expand All @@ -624,22 +629,22 @@ func runQuery(
return runutil.Repeat(dnsSDInterval, ctx.Done(), func() error {
resolveCtx, resolveCancel := context.WithTimeout(ctx, dnsSDInterval)
defer resolveCancel()
if err := dnsStoreProvider.Resolve(resolveCtx, append(fileSDCache.Addresses(), storeAddrs...)); err != nil {
if err := dnsStoreProvider.Resolve(resolveCtx, append(fileSDCache.Addresses(), storeAddrs...), true); err != nil {
level.Error(logger).Log("msg", "failed to resolve addresses for storeAPIs", "err", err)
}
if err := dnsRuleProvider.Resolve(resolveCtx, ruleAddrs); err != nil {
if err := dnsRuleProvider.Resolve(resolveCtx, ruleAddrs, true); err != nil {
level.Error(logger).Log("msg", "failed to resolve addresses for rulesAPIs", "err", err)
}
if err := dnsTargetProvider.Resolve(ctx, targetAddrs); err != nil {
if err := dnsTargetProvider.Resolve(ctx, targetAddrs, true); err != nil {
level.Error(logger).Log("msg", "failed to resolve addresses for targetsAPIs", "err", err)
}
if err := dnsMetadataProvider.Resolve(resolveCtx, metadataAddrs); err != nil {
if err := dnsMetadataProvider.Resolve(resolveCtx, metadataAddrs, true); err != nil {
level.Error(logger).Log("msg", "failed to resolve addresses for metadataAPIs", "err", err)
}
if err := dnsExemplarProvider.Resolve(resolveCtx, exemplarAddrs); err != nil {
if err := dnsExemplarProvider.Resolve(resolveCtx, exemplarAddrs, true); err != nil {
level.Error(logger).Log("msg", "failed to resolve addresses for exemplarsAPI", "err", err)
}
if err := dnsEndpointProvider.Resolve(resolveCtx, endpointAddrs); err != nil {
if err := dnsEndpointProvider.Resolve(resolveCtx, endpointAddrs, true); err != nil {
level.Error(logger).Log("msg", "failed to resolve addresses passed using endpoint flag", "err", err)

}
Expand Down Expand Up @@ -789,7 +794,7 @@ func runQuery(
}
// Start query (proxy) gRPC StoreAPI.
{
tlsCfg, err := tls.NewServerConfig(log.With(logger, "protocol", "gRPC"), grpcServerConfig.tlsSrvCert, grpcServerConfig.tlsSrvKey, grpcServerConfig.tlsSrvClientCA)
tlsCfg, err := tls.NewServerConfig(log.With(logger, "protocol", "gRPC"), grpcServerConfig.tlsSrvCert, grpcServerConfig.tlsSrvKey, grpcServerConfig.tlsSrvClientCA, grpcServerConfig.tlsMinVersion)
if err != nil {
return errors.Wrap(err, "setup gRPC server")
}
Expand Down
Loading

0 comments on commit 1a3dc07

Please sign in to comment.