From 2a7fdb0322983a6fbcb01a71b160a11a34fd3c6b Mon Sep 17 00:00:00 2001 From: Ben Ye Date: Tue, 26 Sep 2023 10:53:31 -0700 Subject: [PATCH] Update thanos to latest main (#5580) * update thanos to latest main Signed-off-by: Ben Ye * update changelog Signed-off-by: Ben Ye --------- Signed-off-by: Ben Ye --- CHANGELOG.md | 3 +- go.mod | 2 +- go.sum | 4 +-- pkg/storegateway/bucket_store_metrics.go | 7 ++++ pkg/storegateway/bucket_store_metrics_test.go | 20 +++++++++++ .../pkg/block/indexheader/binary_reader.go | 28 +++++++++------ .../thanos-io/thanos/pkg/store/bucket.go | 7 ++++ .../thanos-io/thanos/pkg/store/cache/cache.go | 6 ++++ .../thanos/pkg/store/cache/inmemory.go | 9 +++++ .../thanos/pkg/store/cache/memcached.go | 34 ++++++++++++++----- vendor/modules.txt | 2 +- 11 files changed, 97 insertions(+), 25 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index edb6250b5d..b3752cf625 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,7 +36,8 @@ * [FEATURE] Ruler: Support for filtering rules in the API. #5417 * [FEATURE] Compactor: Add `-compactor.ring.tokens-file-path` to store generated tokens locally. #5432 * [FEATURE] Query Frontend: Add `-frontend.retry-on-too-many-outstanding-requests` to re-enqueue 429 requests if there are multiple query-schedulers available. #5496 -* [FEATURE] Store Gateway: Add `-blocks-storage.bucket-store.max-inflight-requests`for store gateways to reject further requests upon reaching the limit. #5553 +* [FEATURE] Store Gateway: Add `-blocks-storage.bucket-store.max-inflight-requests` for store gateways to reject further requests upon reaching the limit. #5553 +* [FEATURE] Store Gateway: Add `cortex_bucket_store_block_load_duration_seconds` histogram to track time to load blocks. #5580 * [ENHANCEMENT] Distributor/Ingester: Add span on push path #5319 * [ENHANCEMENT] Support object storage backends for runtime configuration file. #5292 * [ENHANCEMENT] Query Frontend: Reject subquery with too small step size. #5323 diff --git a/go.mod b/go.mod index 2043387dfb..db3e9e7014 100644 --- a/go.mod +++ b/go.mod @@ -53,7 +53,7 @@ require ( github.com/stretchr/testify v1.8.4 github.com/thanos-io/objstore v0.0.0-20230921130928-63a603e651ed github.com/thanos-io/promql-engine v0.0.0-20230821193351-e1ae4275b96e - github.com/thanos-io/thanos v0.32.4-0.20230921182036-6257767ec9d0 + github.com/thanos-io/thanos v0.32.4-0.20230926060504-20d29008068f github.com/uber/jaeger-client-go v2.30.0+incompatible github.com/weaveworks/common v0.0.0-20221201103051-7c2720a9024d go.etcd.io/etcd/api/v3 v3.5.9 diff --git a/go.sum b/go.sum index c94e572657..f77d8bb131 100644 --- a/go.sum +++ b/go.sum @@ -1212,8 +1212,8 @@ github.com/thanos-io/objstore v0.0.0-20230921130928-63a603e651ed h1:iWQdY3S6DpWj github.com/thanos-io/objstore v0.0.0-20230921130928-63a603e651ed/go.mod h1:oJ82xgcBDzGJrEgUsjlTj6n01+ZWUMMUR8BlZzX5xDE= github.com/thanos-io/promql-engine v0.0.0-20230821193351-e1ae4275b96e h1:kwsFCU8eSkZehbrAN3nXPw5RdMHi/Bok/y8l2C4M+gk= github.com/thanos-io/promql-engine v0.0.0-20230821193351-e1ae4275b96e/go.mod h1:+T/ZYNCGybT6eTsGGvVtGb63nT1cvUmH6MjqRrcQoKw= -github.com/thanos-io/thanos v0.32.4-0.20230921182036-6257767ec9d0 h1:T9Vot+BQao6M6j8F0JQbseAqtniOw1Csz+QHRRRwF48= -github.com/thanos-io/thanos v0.32.4-0.20230921182036-6257767ec9d0/go.mod h1:Px5Boq60s+2WwR+V4v4oxgmxfw9WHrwMwjRou6pkUNw= +github.com/thanos-io/thanos v0.32.4-0.20230926060504-20d29008068f h1:OdZZLgF2eYIiad7h4WeUPkew7Uq6F9vFPg3aDZfMQLY= +github.com/thanos-io/thanos v0.32.4-0.20230926060504-20d29008068f/go.mod h1:Px5Boq60s+2WwR+V4v4oxgmxfw9WHrwMwjRou6pkUNw= github.com/themihai/gomemcache v0.0.0-20180902122335-24332e2d58ab h1:7ZR3hmisBWw77ZpO1/o86g+JV3VKlk3d48jopJxzTjU= github.com/themihai/gomemcache v0.0.0-20180902122335-24332e2d58ab/go.mod h1:eheTFp954zcWZXCU8d0AT76ftsQOTo4DTqkN/h3k1MY= github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk= diff --git a/pkg/storegateway/bucket_store_metrics.go b/pkg/storegateway/bucket_store_metrics.go index cb1cf1152b..f351940bcf 100644 --- a/pkg/storegateway/bucket_store_metrics.go +++ b/pkg/storegateway/bucket_store_metrics.go @@ -16,6 +16,7 @@ type BucketStoreMetrics struct { blockLoadFailures *prometheus.Desc blockDrops *prometheus.Desc blockDropFailures *prometheus.Desc + blockLoadDuration *prometheus.Desc blocksLoaded *prometheus.Desc seriesDataTouched *prometheus.Desc seriesDataFetched *prometheus.Desc @@ -75,6 +76,10 @@ func NewBucketStoreMetrics() *BucketStoreMetrics { "cortex_bucket_store_block_drop_failures_total", "Total number of local blocks that failed to be dropped.", nil, nil), + blockLoadDuration: prometheus.NewDesc( + "cortex_bucket_store_block_load_duration_seconds", + "The total time taken to load a block in seconds.", + nil, nil), blocksLoaded: prometheus.NewDesc( "cortex_bucket_store_blocks_loaded", "Number of currently loaded blocks.", @@ -228,6 +233,7 @@ func (m *BucketStoreMetrics) Describe(out chan<- *prometheus.Desc) { out <- m.blockLoadFailures out <- m.blockDrops out <- m.blockDropFailures + out <- m.blockLoadDuration out <- m.blocksLoaded out <- m.seriesDataTouched out <- m.seriesDataFetched @@ -274,6 +280,7 @@ func (m *BucketStoreMetrics) Collect(out chan<- prometheus.Metric) { data.SendSumOfCounters(out, m.blockLoadFailures, "thanos_bucket_store_block_load_failures_total") data.SendSumOfCounters(out, m.blockDrops, "thanos_bucket_store_block_drops_total") data.SendSumOfCounters(out, m.blockDropFailures, "thanos_bucket_store_block_drop_failures_total") + data.SendSumOfHistograms(out, m.blockLoadDuration, "thanos_bucket_store_block_load_duration_seconds") data.SendSumOfGaugesPerUser(out, m.blocksLoaded, "thanos_bucket_store_blocks_loaded") diff --git a/pkg/storegateway/bucket_store_metrics_test.go b/pkg/storegateway/bucket_store_metrics_test.go index 37bccc1d57..650a015a49 100644 --- a/pkg/storegateway/bucket_store_metrics_test.go +++ b/pkg/storegateway/bucket_store_metrics_test.go @@ -41,6 +41,19 @@ func TestBucketStoreMetrics(t *testing.T) { # HELP cortex_bucket_store_block_drops_total Total number of local blocks that were dropped. # TYPE cortex_bucket_store_block_drops_total counter cortex_bucket_store_block_drops_total 90076 + # HELP cortex_bucket_store_block_load_duration_seconds The total time taken to load a block in seconds. + # TYPE cortex_bucket_store_block_load_duration_seconds histogram + cortex_bucket_store_block_load_duration_seconds_bucket{le="0.1"} 0 + cortex_bucket_store_block_load_duration_seconds_bucket{le="0.5"} 0 + cortex_bucket_store_block_load_duration_seconds_bucket{le="1"} 0 + cortex_bucket_store_block_load_duration_seconds_bucket{le="10"} 0 + cortex_bucket_store_block_load_duration_seconds_bucket{le="20"} 0 + cortex_bucket_store_block_load_duration_seconds_bucket{le="30"} 0 + cortex_bucket_store_block_load_duration_seconds_bucket{le="60"} 0 + cortex_bucket_store_block_load_duration_seconds_bucket{le="120"} 0 + cortex_bucket_store_block_load_duration_seconds_bucket{le="+Inf"} 3 + cortex_bucket_store_block_load_duration_seconds_sum 112595 + cortex_bucket_store_block_load_duration_seconds_count 3 # HELP cortex_bucket_store_block_drop_failures_total Total number of local blocks that failed to be dropped. # TYPE cortex_bucket_store_block_drop_failures_total counter @@ -601,6 +614,7 @@ func populateMockedBucketStoreMetrics(base float64) *prometheus.Registry { m.blockLoadFailures.Add(3 * base) m.blockDrops.Add(4 * base) m.blockDropFailures.Add(5 * base) + m.blockLoadDuration.Observe(5 * base) m.seriesDataTouched.WithLabelValues("touched-a").Observe(6 * base) m.seriesDataTouched.WithLabelValues("touched-b").Observe(7 * base) m.seriesDataTouched.WithLabelValues("touched-c").Observe(8 * base) @@ -684,6 +698,7 @@ type mockedBucketStoreMetrics struct { blockLoadFailures prometheus.Counter blockDrops prometheus.Counter blockDropFailures prometheus.Counter + blockLoadDuration prometheus.Histogram seriesDataTouched *prometheus.HistogramVec seriesDataFetched *prometheus.HistogramVec seriesDataSizeTouched *prometheus.HistogramVec @@ -741,6 +756,11 @@ func newMockedBucketStoreMetrics(reg prometheus.Registerer) *mockedBucketStoreMe Name: "thanos_bucket_store_block_drop_failures_total", Help: "Total number of local blocks that failed to be dropped.", }) + m.blockLoadDuration = promauto.With(reg).NewHistogram(prometheus.HistogramOpts{ + Name: "thanos_bucket_store_block_load_duration_seconds", + Help: "The total time taken to load a block in seconds.", + Buckets: []float64{0.1, 0.5, 1, 10, 20, 30, 60, 120}, + }) m.blocksLoaded = promauto.With(reg).NewGauge(prometheus.GaugeOpts{ Name: "thanos_bucket_store_blocks_loaded", Help: "Number of currently loaded blocks.", diff --git a/vendor/github.com/thanos-io/thanos/pkg/block/indexheader/binary_reader.go b/vendor/github.com/thanos-io/thanos/pkg/block/indexheader/binary_reader.go index 16ef73ac3b..7dbed1bec2 100644 --- a/vendor/github.com/thanos-io/thanos/pkg/block/indexheader/binary_reader.go +++ b/vendor/github.com/thanos-io/thanos/pkg/block/indexheader/binary_reader.go @@ -505,7 +505,8 @@ type BinaryReader struct { postingsV1 map[string]map[string]index.Range // Symbols struct that keeps only 1/postingOffsetsInMemSampling in the memory, then looks up the rest via mmap. - symbols *index.Symbols + // Use Symbols as interface for ease of testing. + symbols Symbols // Cache of the label name symbol lookups, // as there are not many and they are half of all lookups. nameSymbols map[uint32]string @@ -925,6 +926,16 @@ func (r *BinaryReader) postingsOffset(name string, values ...string) ([]index.Ra } func (r *BinaryReader) LookupSymbol(o uint32) (string, error) { + if r.indexVersion == index.FormatV1 { + // For v1 little trick is needed. Refs are actual offset inside index, not index-header. This is different + // of the header length difference between two files. + o += headerLen - index.HeaderLen + } + + if s, ok := r.nameSymbols[o]; ok { + return s, nil + } + cacheIndex := o % valueSymbolsCacheSize r.valueSymbolsMx.Lock() if cached := r.valueSymbols[cacheIndex]; cached.index == o && cached.symbol != "" { @@ -934,16 +945,6 @@ func (r *BinaryReader) LookupSymbol(o uint32) (string, error) { } r.valueSymbolsMx.Unlock() - if s, ok := r.nameSymbols[o]; ok { - return s, nil - } - - if r.indexVersion == index.FormatV1 { - // For v1 little trick is needed. Refs are actual offset inside index, not index-header. This is different - // of the header length difference between two files. - o += headerLen - index.HeaderLen - } - s, err := r.symbols.Lookup(o) if err != nil { return s, err @@ -1047,3 +1048,8 @@ func (b realByteSlice) Range(start, end int) []byte { func (b realByteSlice) Sub(start, end int) index.ByteSlice { return b[start:end] } + +type Symbols interface { + Lookup(o uint32) (string, error) + ReverseLookup(sym string) (uint32, error) +} diff --git a/vendor/github.com/thanos-io/thanos/pkg/store/bucket.go b/vendor/github.com/thanos-io/thanos/pkg/store/bucket.go index 5a6f31c42d..bc1507a367 100644 --- a/vendor/github.com/thanos-io/thanos/pkg/store/bucket.go +++ b/vendor/github.com/thanos-io/thanos/pkg/store/bucket.go @@ -122,6 +122,7 @@ type bucketStoreMetrics struct { lastLoadedBlock prometheus.Gauge blockDrops prometheus.Counter blockDropFailures prometheus.Counter + blockLoadDuration prometheus.Histogram seriesDataTouched *prometheus.HistogramVec seriesDataFetched *prometheus.HistogramVec seriesDataSizeTouched *prometheus.HistogramVec @@ -185,6 +186,11 @@ func newBucketStoreMetrics(reg prometheus.Registerer) *bucketStoreMetrics { Name: "thanos_bucket_store_blocks_last_loaded_timestamp_seconds", Help: "Timestamp when last block got loaded.", }) + m.blockLoadDuration = promauto.With(reg).NewHistogram(prometheus.HistogramOpts{ + Name: "thanos_bucket_store_block_load_duration_seconds", + Help: "The total time taken to load a block in seconds.", + Buckets: []float64{0.1, 0.5, 1, 10, 20, 30, 60, 120}, + }) m.seriesDataTouched = promauto.With(reg).NewHistogramVec(prometheus.HistogramOpts{ Name: "thanos_bucket_store_series_data_touched", @@ -727,6 +733,7 @@ func (s *BucketStore) addBlock(ctx context.Context, meta *metadata.Meta) (err er level.Warn(s.logger).Log("msg", "loading block failed", "elapsed", time.Since(start), "id", meta.ULID, "err", err) } else { level.Info(s.logger).Log("msg", "loaded new block", "elapsed", time.Since(start), "id", meta.ULID) + s.metrics.blockLoadDuration.Observe(time.Since(start).Seconds()) } }() s.metrics.blockLoads.Inc() diff --git a/vendor/github.com/thanos-io/thanos/pkg/store/cache/cache.go b/vendor/github.com/thanos-io/thanos/pkg/store/cache/cache.go index 87cdb17d96..360cdd67e5 100644 --- a/vendor/github.com/thanos-io/thanos/pkg/store/cache/cache.go +++ b/vendor/github.com/thanos-io/thanos/pkg/store/cache/cache.go @@ -61,6 +61,7 @@ type commonMetrics struct { requestTotal *prometheus.CounterVec hitsTotal *prometheus.CounterVec dataSizeBytes *prometheus.HistogramVec + fetchLatency *prometheus.HistogramVec } func newCommonMetrics(reg prometheus.Registerer) *commonMetrics { @@ -80,6 +81,11 @@ func newCommonMetrics(reg prometheus.Registerer) *commonMetrics { 32, 256, 512, 1024, 32 * 1024, 256 * 1024, 512 * 1024, 1024 * 1024, 32 * 1024 * 1024, 64 * 1024 * 1024, 128 * 1024 * 1024, 256 * 1024 * 1024, 512 * 1024 * 1024, }, }, []string{"item_type"}), + fetchLatency: promauto.With(reg).NewHistogramVec(prometheus.HistogramOpts{ + Name: "thanos_store_index_cache_fetch_duration_seconds", + Help: "Histogram to track latency to fetch items from index cache", + Buckets: []float64{0.01, 0.1, 0.3, 0.6, 1, 3, 6, 10, 15, 20, 30, 45, 60, 90, 120}, + }, []string{"item_type"}), } } diff --git a/vendor/github.com/thanos-io/thanos/pkg/store/cache/inmemory.go b/vendor/github.com/thanos-io/thanos/pkg/store/cache/inmemory.go index 747199b414..e0077acc35 100644 --- a/vendor/github.com/thanos-io/thanos/pkg/store/cache/inmemory.go +++ b/vendor/github.com/thanos-io/thanos/pkg/store/cache/inmemory.go @@ -302,6 +302,9 @@ func (c *InMemoryIndexCache) StorePostings(blockID ulid.ULID, l labels.Label, v // FetchMultiPostings fetches multiple postings - each identified by a label - // and returns a map containing cache hits, along with a list of missing keys. func (c *InMemoryIndexCache) FetchMultiPostings(_ context.Context, blockID ulid.ULID, keys []labels.Label) (hits map[labels.Label][]byte, misses []labels.Label) { + timer := prometheus.NewTimer(c.commonMetrics.fetchLatency.WithLabelValues(cacheTypePostings)) + defer timer.ObserveDuration() + hits = map[labels.Label][]byte{} blockIDKey := blockID.String() @@ -325,6 +328,9 @@ func (c *InMemoryIndexCache) StoreExpandedPostings(blockID ulid.ULID, matchers [ // FetchExpandedPostings fetches expanded postings and returns cached data and a boolean value representing whether it is a cache hit or not. func (c *InMemoryIndexCache) FetchExpandedPostings(_ context.Context, blockID ulid.ULID, matchers []*labels.Matcher) ([]byte, bool) { + timer := prometheus.NewTimer(c.commonMetrics.fetchLatency.WithLabelValues(cacheTypeExpandedPostings)) + defer timer.ObserveDuration() + if b, ok := c.get(cacheTypeExpandedPostings, cacheKey{blockID.String(), cacheKeyExpandedPostings(labelMatchersToString(matchers)), ""}); ok { return b, true } @@ -341,6 +347,9 @@ func (c *InMemoryIndexCache) StoreSeries(blockID ulid.ULID, id storage.SeriesRef // FetchMultiSeries fetches multiple series - each identified by ID - from the cache // and returns a map containing cache hits, along with a list of missing IDs. func (c *InMemoryIndexCache) FetchMultiSeries(_ context.Context, blockID ulid.ULID, ids []storage.SeriesRef) (hits map[storage.SeriesRef][]byte, misses []storage.SeriesRef) { + timer := prometheus.NewTimer(c.commonMetrics.fetchLatency.WithLabelValues(cacheTypeSeries)) + defer timer.ObserveDuration() + hits = map[storage.SeriesRef][]byte{} blockIDKey := blockID.String() diff --git a/vendor/github.com/thanos-io/thanos/pkg/store/cache/memcached.go b/vendor/github.com/thanos-io/thanos/pkg/store/cache/memcached.go index 9292f3ed59..a3dbce9940 100644 --- a/vendor/github.com/thanos-io/thanos/pkg/store/cache/memcached.go +++ b/vendor/github.com/thanos-io/thanos/pkg/store/cache/memcached.go @@ -33,15 +33,18 @@ type RemoteIndexCache struct { compressionScheme string // Metrics. - postingRequests prometheus.Counter - seriesRequests prometheus.Counter - expandedPostingRequests prometheus.Counter - postingHits prometheus.Counter - seriesHits prometheus.Counter - expandedPostingHits prometheus.Counter - postingDataSizeBytes prometheus.Observer - expandedPostingDataSizeBytes prometheus.Observer - seriesDataSizeBytes prometheus.Observer + postingRequests prometheus.Counter + seriesRequests prometheus.Counter + expandedPostingRequests prometheus.Counter + postingHits prometheus.Counter + seriesHits prometheus.Counter + expandedPostingHits prometheus.Counter + postingDataSizeBytes prometheus.Observer + expandedPostingDataSizeBytes prometheus.Observer + seriesDataSizeBytes prometheus.Observer + postingsFetchDuration prometheus.Observer + expandedPostingsFetchDuration prometheus.Observer + seriesFetchDuration prometheus.Observer } // NewRemoteIndexCache makes a new RemoteIndexCache. @@ -68,6 +71,10 @@ func NewRemoteIndexCache(logger log.Logger, cacheClient cacheutil.RemoteCacheCli c.seriesDataSizeBytes = commonMetrics.dataSizeBytes.WithLabelValues(cacheTypeSeries) c.expandedPostingDataSizeBytes = commonMetrics.dataSizeBytes.WithLabelValues(cacheTypeExpandedPostings) + c.postingsFetchDuration = commonMetrics.fetchLatency.WithLabelValues(cacheTypePostings) + c.seriesFetchDuration = commonMetrics.fetchLatency.WithLabelValues(cacheTypeSeries) + c.expandedPostingsFetchDuration = commonMetrics.fetchLatency.WithLabelValues(cacheTypeExpandedPostings) + level.Info(logger).Log("msg", "created index cache") return c, nil @@ -88,6 +95,9 @@ func (c *RemoteIndexCache) StorePostings(blockID ulid.ULID, l labels.Label, v [] // and returns a map containing cache hits, along with a list of missing keys. // In case of error, it logs and return an empty cache hits map. func (c *RemoteIndexCache) FetchMultiPostings(ctx context.Context, blockID ulid.ULID, lbls []labels.Label) (hits map[labels.Label][]byte, misses []labels.Label) { + timer := prometheus.NewTimer(c.postingsFetchDuration) + defer timer.ObserveDuration() + keys := make([]string, 0, len(lbls)) blockIDKey := blockID.String() @@ -138,6 +148,9 @@ func (c *RemoteIndexCache) StoreExpandedPostings(blockID ulid.ULID, keys []*labe // and returns a map containing cache hits, along with a list of missing keys. // In case of error, it logs and return an empty cache hits map. func (c *RemoteIndexCache) FetchExpandedPostings(ctx context.Context, blockID ulid.ULID, lbls []*labels.Matcher) ([]byte, bool) { + timer := prometheus.NewTimer(c.postingsFetchDuration) + defer timer.ObserveDuration() + key := cacheKey{blockID.String(), cacheKeyExpandedPostings(labelMatchersToString(lbls)), c.compressionScheme}.string() // Fetch the keys from memcached in a single request. @@ -169,6 +182,9 @@ func (c *RemoteIndexCache) StoreSeries(blockID ulid.ULID, id storage.SeriesRef, // and returns a map containing cache hits, along with a list of missing IDs. // In case of error, it logs and return an empty cache hits map. func (c *RemoteIndexCache) FetchMultiSeries(ctx context.Context, blockID ulid.ULID, ids []storage.SeriesRef) (hits map[storage.SeriesRef][]byte, misses []storage.SeriesRef) { + timer := prometheus.NewTimer(c.postingsFetchDuration) + defer timer.ObserveDuration() + keys := make([]string, 0, len(ids)) blockIDKey := blockID.String() diff --git a/vendor/modules.txt b/vendor/modules.txt index e92ea755dc..d3cc541a84 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -902,7 +902,7 @@ github.com/thanos-io/promql-engine/logicalplan github.com/thanos-io/promql-engine/parser github.com/thanos-io/promql-engine/query github.com/thanos-io/promql-engine/worker -# github.com/thanos-io/thanos v0.32.4-0.20230921182036-6257767ec9d0 +# github.com/thanos-io/thanos v0.32.4-0.20230926060504-20d29008068f ## explicit; go 1.18 github.com/thanos-io/thanos/pkg/block github.com/thanos-io/thanos/pkg/block/indexheader