Skip to content

Commit

Permalink
update thanos and add metrics to cortex
Browse files Browse the repository at this point in the history
Signed-off-by: Ben Ye <[email protected]>
  • Loading branch information
yeya24 committed Sep 19, 2023
1 parent a7b4ceb commit 101da53
Show file tree
Hide file tree
Showing 7 changed files with 154 additions and 48 deletions.
4 changes: 1 addition & 3 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ require (
github.com/stretchr/testify v1.8.4
github.com/thanos-io/objstore v0.0.0-20230913122821-eb06103887ab
github.com/thanos-io/promql-engine v0.0.0-20230821193351-e1ae4275b96e
github.com/thanos-io/thanos v0.32.3-0.20230911095949-f6a39507b6bd
github.com/thanos-io/thanos v0.32.3-0.20230919181645-2bc12a582284
github.com/uber/jaeger-client-go v2.30.0+incompatible
github.com/weaveworks/common v0.0.0-20221201103051-7c2720a9024d
go.etcd.io/etcd/api/v3 v3.5.9
Expand Down Expand Up @@ -262,5 +262,3 @@ replace github.com/google/gnostic => github.com/googleapis/gnostic v0.6.9
replace gopkg.in/alecthomas/kingpin.v2 => github.com/alecthomas/kingpin v1.3.8-0.20210301060133-17f40c25f497

replace github.com/sercand/kuberesolver => github.com/sercand/kuberesolver/v4 v4.0.0

replace github.com/thanos-io/thanos => github.com/yeya24/thanos v0.2.2-0.20230918182451-e0a5d841de76
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -1212,6 +1212,8 @@ github.com/thanos-io/objstore v0.0.0-20230913122821-eb06103887ab h1:IfcvGL/erj7I
github.com/thanos-io/objstore v0.0.0-20230913122821-eb06103887ab/go.mod h1:oJ82xgcBDzGJrEgUsjlTj6n01+ZWUMMUR8BlZzX5xDE=
github.com/thanos-io/promql-engine v0.0.0-20230821193351-e1ae4275b96e h1:kwsFCU8eSkZehbrAN3nXPw5RdMHi/Bok/y8l2C4M+gk=
github.com/thanos-io/promql-engine v0.0.0-20230821193351-e1ae4275b96e/go.mod h1:+T/ZYNCGybT6eTsGGvVtGb63nT1cvUmH6MjqRrcQoKw=
github.com/thanos-io/thanos v0.32.3-0.20230919181645-2bc12a582284 h1:x3Fs9sB5PvmNdS0DsTARyK5Dfc9IwOSReu7K7IN6mPA=
github.com/thanos-io/thanos v0.32.3-0.20230919181645-2bc12a582284/go.mod h1:MD/vJgwt6va80KWA3qELrNODEo1evehiKvulDVrNGKY=
github.com/themihai/gomemcache v0.0.0-20180902122335-24332e2d58ab h1:7ZR3hmisBWw77ZpO1/o86g+JV3VKlk3d48jopJxzTjU=
github.com/themihai/gomemcache v0.0.0-20180902122335-24332e2d58ab/go.mod h1:eheTFp954zcWZXCU8d0AT76ftsQOTo4DTqkN/h3k1MY=
github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk=
Expand All @@ -1234,8 +1236,6 @@ github.com/xdg-go/scram v1.1.2/go.mod h1:RT/sEzTbU5y00aCK8UOx6R7YryM0iF1N2MOmC3k
github.com/xdg-go/stringprep v1.0.2/go.mod h1:8F9zXuvzgwmyT5DUm4GUfZGDdT3W+LCvS6+da4O5kxM=
github.com/xdg-go/stringprep v1.0.3/go.mod h1:W3f5j4i+9rC0kuIEJL0ky1VpHXQU3ocBgklLGvcBnW8=
github.com/xdg-go/stringprep v1.0.4/go.mod h1:mPGuuIYwz7CmR2bT9j4GbQqutWS1zV24gijq1dTyGkM=
github.com/yeya24/thanos v0.2.2-0.20230918182451-e0a5d841de76 h1:czelPDvh0+R9z+TnjjIIhgoI6kKpSo30VvLcEAHtGtU=
github.com/yeya24/thanos v0.2.2-0.20230918182451-e0a5d841de76/go.mod h1:MD/vJgwt6va80KWA3qELrNODEo1evehiKvulDVrNGKY=
github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d/go.mod h1:rHwXgn7JulP+udvsHwJoVG1YGAP6VLg4y9I5dyZdqmA=
github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
Expand Down
2 changes: 1 addition & 1 deletion pkg/querier/blocks_store_queryable.go
Original file line number Diff line number Diff line change
Expand Up @@ -756,7 +756,7 @@ func (q *blocksStoreQuerier) fetchSeriesFromStores(
"data_downloaded_size_sum", seriesQueryStats.DataDownloadedSizeSum,
"get_all_duration", seriesQueryStats.GetAllDuration,
"merge_duration", seriesQueryStats.MergeDuration,
"response_latency", time.Since(begin),
"response_time", time.Since(begin),
)
}

Expand Down
20 changes: 17 additions & 3 deletions pkg/storegateway/bucket_store_metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,11 @@ type BucketStoreMetrics struct {
cachedPostingsOriginalSizeBytes *prometheus.Desc
cachedPostingsCompressedSizeBytes *prometheus.Desc

seriesFetchDuration *prometheus.Desc
postingsFetchDuration *prometheus.Desc
chunkFetchDuration *prometheus.Desc
seriesFetchDuration *prometheus.Desc
seriesFetchDurationSum *prometheus.Desc
postingsFetchDuration *prometheus.Desc
chunkFetchDuration *prometheus.Desc
chunkFetchDurationSum *prometheus.Desc

lazyExpandedPostingsCount *prometheus.Desc
lazyExpandedPostingSizeBytes *prometheus.Desc
Expand Down Expand Up @@ -160,6 +162,10 @@ func NewBucketStoreMetrics() *BucketStoreMetrics {
"cortex_bucket_store_series_fetch_duration_seconds",
"Time it takes to fetch series to respond a request sent to store-gateway. It includes both the time to fetch it from cache and from storage in case of cache misses.",
nil, nil),
seriesFetchDurationSum: prometheus.NewDesc(
"cortex_bucket_store_series_fetch_duration_sum_seconds",
"The time it takes to fetch postings to respond to a request sent to a store gateway. It includes both the time to fetch it from the cache and from storage in case of cache misses.",
nil, nil),
postingsFetchDuration: prometheus.NewDesc(
"cortex_bucket_store_postings_fetch_duration_seconds",
"Time it takes to fetch postings to respond a request sent to store-gateway. It includes both the time to fetch it from cache and from storage in case of cache misses.",
Expand All @@ -168,6 +174,10 @@ func NewBucketStoreMetrics() *BucketStoreMetrics {
"cortex_bucket_store_chunks_fetch_duration_seconds",
"The total time spent fetching chunks within a single request a store gateway.",
nil, nil),
chunkFetchDurationSum: prometheus.NewDesc(
"cortex_bucket_store_chunks_fetch_duration_sum_seconds",
"The total absolute time spent fetching chunks within a single request for one block.",
nil, nil),

indexHeaderLazyLoadCount: prometheus.NewDesc(
"cortex_bucket_store_indexheader_lazy_load_total",
Expand Down Expand Up @@ -241,8 +251,10 @@ func (m *BucketStoreMetrics) Describe(out chan<- *prometheus.Desc) {
out <- m.cachedPostingsCompressedSizeBytes

out <- m.seriesFetchDuration
out <- m.seriesFetchDurationSum
out <- m.postingsFetchDuration
out <- m.chunkFetchDuration
out <- m.chunkFetchDurationSum

out <- m.indexHeaderLazyLoadCount
out <- m.indexHeaderLazyLoadFailedCount
Expand Down Expand Up @@ -288,8 +300,10 @@ func (m *BucketStoreMetrics) Collect(out chan<- prometheus.Metric) {
data.SendSumOfCountersWithLabels(out, m.cachedPostingsCompressedSizeBytes, "thanos_bucket_store_cached_postings_compressed_size_bytes_total")

data.SendSumOfHistograms(out, m.seriesFetchDuration, "thanos_bucket_store_series_fetch_duration_seconds")
data.SendSumOfHistograms(out, m.seriesFetchDurationSum, "thanos_bucket_store_series_fetch_duration_sum_seconds")
data.SendSumOfHistograms(out, m.postingsFetchDuration, "thanos_bucket_store_postings_fetch_duration_seconds")
data.SendSumOfHistograms(out, m.chunkFetchDuration, "thanos_bucket_store_chunks_fetch_duration_seconds")
data.SendSumOfHistograms(out, m.chunkFetchDurationSum, "thanos_bucket_store_chunks_fetch_duration_sum_seconds")

data.SendSumOfCounters(out, m.indexHeaderLazyLoadCount, "thanos_bucket_store_indexheader_lazy_load_total")
data.SendSumOfCounters(out, m.indexHeaderLazyLoadFailedCount, "thanos_bucket_store_indexheader_lazy_load_failed_total")
Expand Down
61 changes: 57 additions & 4 deletions pkg/storegateway/bucket_store_metrics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -290,6 +290,25 @@ func TestBucketStoreMetrics(t *testing.T) {
cortex_bucket_store_series_data_touched_sum{data_type="touched-c"} 180152
cortex_bucket_store_series_data_touched_count{data_type="touched-c"} 3
# HELP cortex_bucket_store_series_fetch_duration_sum_seconds The time it takes to fetch postings to respond to a request sent to a store gateway. It includes both the time to fetch it from the cache and from storage in case of cache misses.
# TYPE cortex_bucket_store_series_fetch_duration_sum_seconds histogram
cortex_bucket_store_series_fetch_duration_sum_seconds_bucket{le="0.001"} 0
cortex_bucket_store_series_fetch_duration_sum_seconds_bucket{le="0.01"} 0
cortex_bucket_store_series_fetch_duration_sum_seconds_bucket{le="0.1"} 0
cortex_bucket_store_series_fetch_duration_sum_seconds_bucket{le="0.3"} 0
cortex_bucket_store_series_fetch_duration_sum_seconds_bucket{le="0.6"} 0
cortex_bucket_store_series_fetch_duration_sum_seconds_bucket{le="1"} 0
cortex_bucket_store_series_fetch_duration_sum_seconds_bucket{le="3"} 0
cortex_bucket_store_series_fetch_duration_sum_seconds_bucket{le="6"} 0
cortex_bucket_store_series_fetch_duration_sum_seconds_bucket{le="9"} 0
cortex_bucket_store_series_fetch_duration_sum_seconds_bucket{le="20"} 0
cortex_bucket_store_series_fetch_duration_sum_seconds_bucket{le="30"} 0
cortex_bucket_store_series_fetch_duration_sum_seconds_bucket{le="60"} 0
cortex_bucket_store_series_fetch_duration_sum_seconds_bucket{le="90"} 0
cortex_bucket_store_series_fetch_duration_sum_seconds_bucket{le="120"} 0
cortex_bucket_store_series_fetch_duration_sum_seconds_bucket{le="+Inf"} 3
cortex_bucket_store_series_fetch_duration_sum_seconds_sum 1.306102e+06
cortex_bucket_store_series_fetch_duration_sum_seconds_count 3
# HELP cortex_bucket_store_series_get_all_duration_seconds Time it takes until all per-block prepares and preloads for a query are finished.
# TYPE cortex_bucket_store_series_get_all_duration_seconds histogram
cortex_bucket_store_series_get_all_duration_seconds_bucket{le="0.001"} 0
Expand Down Expand Up @@ -395,7 +414,7 @@ func TestBucketStoreMetrics(t *testing.T) {
# HELP cortex_bucket_store_chunk_refetches_total Total number of cases where configured estimated chunk bytes was not enough was to fetch chunks from object store, resulting in refetch.
# TYPE cortex_bucket_store_chunk_refetches_total counter
cortex_bucket_store_chunk_refetches_total 0
cortex_bucket_store_chunk_refetches_total 765646
# HELP cortex_bucket_store_cached_postings_compressed_size_bytes_total Compressed size of postings stored into cache.
# TYPE cortex_bucket_store_cached_postings_compressed_size_bytes_total counter
Expand Down Expand Up @@ -439,6 +458,25 @@ func TestBucketStoreMetrics(t *testing.T) {
cortex_bucket_store_chunks_fetch_duration_seconds_bucket{le="+Inf"} 3
cortex_bucket_store_chunks_fetch_duration_seconds_sum 1.328621e+06
cortex_bucket_store_chunks_fetch_duration_seconds_count 3
# HELP cortex_bucket_store_chunks_fetch_duration_sum_seconds The total absolute time spent fetching chunks within a single request for one block.
# TYPE cortex_bucket_store_chunks_fetch_duration_sum_seconds histogram
cortex_bucket_store_chunks_fetch_duration_sum_seconds_bucket{le="0.001"} 0
cortex_bucket_store_chunks_fetch_duration_sum_seconds_bucket{le="0.01"} 0
cortex_bucket_store_chunks_fetch_duration_sum_seconds_bucket{le="0.1"} 0
cortex_bucket_store_chunks_fetch_duration_sum_seconds_bucket{le="0.3"} 0
cortex_bucket_store_chunks_fetch_duration_sum_seconds_bucket{le="0.6"} 0
cortex_bucket_store_chunks_fetch_duration_sum_seconds_bucket{le="1"} 0
cortex_bucket_store_chunks_fetch_duration_sum_seconds_bucket{le="3"} 0
cortex_bucket_store_chunks_fetch_duration_sum_seconds_bucket{le="6"} 0
cortex_bucket_store_chunks_fetch_duration_sum_seconds_bucket{le="9"} 0
cortex_bucket_store_chunks_fetch_duration_sum_seconds_bucket{le="20"} 0
cortex_bucket_store_chunks_fetch_duration_sum_seconds_bucket{le="30"} 0
cortex_bucket_store_chunks_fetch_duration_sum_seconds_bucket{le="60"} 0
cortex_bucket_store_chunks_fetch_duration_sum_seconds_bucket{le="90"} 0
cortex_bucket_store_chunks_fetch_duration_sum_seconds_bucket{le="120"} 0
cortex_bucket_store_chunks_fetch_duration_sum_seconds_bucket{le="+Inf"} 3
cortex_bucket_store_chunks_fetch_duration_sum_seconds_sum 1.328621e+06
cortex_bucket_store_chunks_fetch_duration_sum_seconds_count 3
# HELP cortex_bucket_store_empty_postings_total Total number of empty postings when fetching block series.
# TYPE cortex_bucket_store_empty_postings_total counter
cortex_bucket_store_empty_postings_total 112595
Expand Down Expand Up @@ -604,6 +642,7 @@ func populateMockedBucketStoreMetrics(base float64) *prometheus.Registry {
m.chunkSizeBytes.Observe(11 * base)

m.seriesRefetches.Add(33 * base)
m.chunkRefetches.Add(34 * base)

m.cachedPostingsCompressions.WithLabelValues("encode").Add(50 * base)
m.cachedPostingsCompressions.WithLabelValues("decode").Add(51 * base)
Expand All @@ -618,8 +657,10 @@ func populateMockedBucketStoreMetrics(base float64) *prometheus.Registry {
m.cachedPostingsCompressedSizeBytes.Add(57 * base)

m.seriesFetchDuration.Observe(58 * base)
m.seriesFetchDurationSum.Observe(58 * base)
m.postingsFetchDuration.Observe(59 * base)
m.chunkFetchDuration.Observe(59 * base)
m.chunkFetchDurationSum.Observe(59 * base)

m.indexHeaderLazyLoadCount.Add(60 * base)
m.indexHeaderLazyLoadFailedCount.Add(61 * base)
Expand Down Expand Up @@ -664,9 +705,11 @@ type mockedBucketStoreMetrics struct {
cachedPostingsOriginalSizeBytes prometheus.Counter
cachedPostingsCompressedSizeBytes prometheus.Counter

seriesFetchDuration prometheus.Histogram
postingsFetchDuration prometheus.Histogram
chunkFetchDuration prometheus.Histogram
seriesFetchDuration prometheus.Histogram
seriesFetchDurationSum prometheus.Histogram
postingsFetchDuration prometheus.Histogram
chunkFetchDuration prometheus.Histogram
chunkFetchDurationSum prometheus.Histogram

indexHeaderLazyLoadCount prometheus.Counter
indexHeaderLazyLoadFailedCount prometheus.Counter
Expand Down Expand Up @@ -801,6 +844,11 @@ func newMockedBucketStoreMetrics(reg prometheus.Registerer) *mockedBucketStoreMe
Help: "Time it takes to fetch series from a bucket to respond a query. It also includes the time it takes to cache fetch and store operations.",
Buckets: []float64{0.001, 0.01, 0.1, 0.3, 0.6, 1, 3, 6, 9, 20, 30, 60, 90, 120},
})
m.seriesFetchDurationSum = promauto.With(reg).NewHistogram(prometheus.HistogramOpts{
Name: "thanos_bucket_store_series_fetch_duration_sum_seconds",
Help: "The total time it takes to fetch series to respond to a request sent to a store gateway across all series batches. It includes both the time to fetch it from the cache and from storage in case of cache misses.",
Buckets: []float64{0.001, 0.01, 0.1, 0.3, 0.6, 1, 3, 6, 9, 20, 30, 60, 90, 120},
})
m.postingsFetchDuration = promauto.With(reg).NewHistogram(prometheus.HistogramOpts{
Name: "thanos_bucket_store_postings_fetch_duration_seconds",
Help: "Time it takes to fetch postings from a bucket to respond a query. It also includes the time it takes to cache fetch and store operations.",
Expand All @@ -811,6 +859,11 @@ func newMockedBucketStoreMetrics(reg prometheus.Registerer) *mockedBucketStoreMe
Help: "The total time spent fetching chunks within a single request a store gateway.",
Buckets: []float64{0.001, 0.01, 0.1, 0.3, 0.6, 1, 3, 6, 9, 20, 30, 60, 90, 120},
})
m.chunkFetchDurationSum = promauto.With(reg).NewHistogram(prometheus.HistogramOpts{
Name: "thanos_bucket_store_chunks_fetch_duration_sum_seconds",
Help: "The total absolute time spent fetching chunks within a single request for one block.",
Buckets: []float64{0.001, 0.01, 0.1, 0.3, 0.6, 1, 3, 6, 9, 20, 30, 60, 90, 120},
})

m.indexHeaderLazyLoadCount = promauto.With(reg).NewCounter(prometheus.CounterOpts{
Name: "thanos_bucket_store_indexheader_lazy_load_total",
Expand Down
Loading

0 comments on commit 101da53

Please sign in to comment.