From d519e78882ad08457af8471941a76a45cf15b39e Mon Sep 17 00:00:00 2001 From: Alan Protasio Date: Wed, 6 Nov 2024 18:41:59 -0800 Subject: [PATCH] Add reason why the key was evicted in the `cortex_ingester_expanded_postings_cache_evicts` metric (#6318) * Enhancing metric Signed-off-by: alanprot * lint Signed-off-by: alanprot --------- Signed-off-by: alanprot --- pkg/storage/tsdb/expanded_postings_cache.go | 27 +++++++------ .../tsdb/expanded_postings_cache_test.go | 38 ++++++++++++++++++- 2 files changed, 53 insertions(+), 12 deletions(-) diff --git a/pkg/storage/tsdb/expanded_postings_cache.go b/pkg/storage/tsdb/expanded_postings_cache.go index 6a8a46795a..59af6d879f 100644 --- a/pkg/storage/tsdb/expanded_postings_cache.go +++ b/pkg/storage/tsdb/expanded_postings_cache.go @@ -55,7 +55,7 @@ func NewPostingCacheMetrics(r prometheus.Registerer) *ExpandedPostingsCacheMetri CacheEvicts: promauto.With(r).NewCounterVec(prometheus.CounterOpts{ Name: "cortex_ingester_expanded_postings_cache_evicts", Help: "Total number of evictions in the cache, excluding items that got evicted due to TTL.", - }, []string{"cache"}), + }, []string{"cache", "reason"}), NonCacheableQueries: promauto.With(r).NewCounterVec(prometheus.CounterOpts{ Name: "cortex_ingester_expanded_postings_non_cacheable_queries", Help: "Total number of non cacheable queries.", @@ -301,14 +301,15 @@ func (c *fifoCache[V]) expire() { return } c.cachedMtx.RLock() - if !c.shouldEvictHead() { + if _, r := c.shouldEvictHead(); !r { c.cachedMtx.RUnlock() return } c.cachedMtx.RUnlock() c.cachedMtx.Lock() defer c.cachedMtx.Unlock() - for c.shouldEvictHead() { + for reason, r := c.shouldEvictHead(); r; reason, r = c.shouldEvictHead() { + c.metrics.CacheEvicts.WithLabelValues(c.name, reason).Inc() c.evictHead() } } @@ -340,6 +341,7 @@ func (c *fifoCache[V]) getPromiseForKey(k string, fetch func() (V, int64, error) // If is cached but is expired, lets try to replace the cache value. if loaded.(*cacheEntryPromise[V]).isExpired(c.cfg.Ttl, c.timeNow()) && c.cachedValues.CompareAndSwap(k, loaded, r) { + c.metrics.CacheEvicts.WithLabelValues(c.name, "expired").Inc() r.v, r.sizeBytes, r.err = fetch() r.sizeBytes += int64(len(k)) c.updateSize(loaded.(*cacheEntryPromise[V]).sizeBytes, r.sizeBytes) @@ -357,22 +359,25 @@ func (c *fifoCache[V]) contains(k string) bool { return ok } -func (c *fifoCache[V]) shouldEvictHead() bool { - if c.cachedBytes > c.cfg.MaxBytes { - c.metrics.CacheEvicts.WithLabelValues(c.name).Inc() - return true - } +func (c *fifoCache[V]) shouldEvictHead() (string, bool) { h := c.cached.Front() if h == nil { - return false + return "", false } + + if c.cachedBytes > c.cfg.MaxBytes { + return "full", true + } + key := h.Value.(string) if l, ok := c.cachedValues.Load(key); ok { - return l.(*cacheEntryPromise[V]).isExpired(c.cfg.Ttl, c.timeNow()) + if l.(*cacheEntryPromise[V]).isExpired(c.cfg.Ttl, c.timeNow()) { + return "expired", true + } } - return false + return "", false } func (c *fifoCache[V]) evictHead() { diff --git a/pkg/storage/tsdb/expanded_postings_cache_test.go b/pkg/storage/tsdb/expanded_postings_cache_test.go index 51d4ac78d4..db821736a3 100644 --- a/pkg/storage/tsdb/expanded_postings_cache_test.go +++ b/pkg/storage/tsdb/expanded_postings_cache_test.go @@ -1,6 +1,7 @@ package tsdb import ( + "bytes" "fmt" "strings" "sync" @@ -8,6 +9,7 @@ import ( "time" "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/testutil" "github.com/stretchr/testify/require" "go.uber.org/atomic" ) @@ -88,7 +90,8 @@ func TestFifoCacheExpire(t *testing.T) { for name, c := range tc { t.Run(name, func(t *testing.T) { - m := NewPostingCacheMetrics(prometheus.NewPedanticRegistry()) + r := prometheus.NewPedanticRegistry() + m := NewPostingCacheMetrics(r) timeNow := time.Now cache := newFifoCache[int](c.cfg, "test", m, timeNow) @@ -118,6 +121,16 @@ func TestFifoCacheExpire(t *testing.T) { require.Equal(t, c.expectedFinalItems, totalCacheSize) + if c.expectedFinalItems != numberOfKeys { + err := testutil.GatherAndCompare(r, bytes.NewBufferString(fmt.Sprintf(` + # HELP cortex_ingester_expanded_postings_cache_evicts Total number of evictions in the cache, excluding items that got evicted due to TTL. + # TYPE cortex_ingester_expanded_postings_cache_evicts counter + cortex_ingester_expanded_postings_cache_evicts{cache="test",reason="full"} %v +`, numberOfKeys-c.expectedFinalItems)), "cortex_ingester_expanded_postings_cache_evicts") + require.NoError(t, err) + + } + if c.ttlExpire { cache.timeNow = func() time.Time { return timeNow().Add(2 * c.cfg.Ttl) @@ -135,6 +148,29 @@ func TestFifoCacheExpire(t *testing.T) { // Total Size Updated require.Equal(t, originalSize+10, cache.cachedBytes) } + + err := testutil.GatherAndCompare(r, bytes.NewBufferString(fmt.Sprintf(` + # HELP cortex_ingester_expanded_postings_cache_evicts Total number of evictions in the cache, excluding items that got evicted due to TTL. + # TYPE cortex_ingester_expanded_postings_cache_evicts counter + cortex_ingester_expanded_postings_cache_evicts{cache="test",reason="expired"} %v +`, numberOfKeys)), "cortex_ingester_expanded_postings_cache_evicts") + require.NoError(t, err) + + cache.timeNow = func() time.Time { + return timeNow().Add(5 * c.cfg.Ttl) + } + + cache.getPromiseForKey("newKwy", func() (int, int64, error) { + return 2, 18, nil + }) + + // Should expire all keys again as ttl is expired + err = testutil.GatherAndCompare(r, bytes.NewBufferString(fmt.Sprintf(` + # HELP cortex_ingester_expanded_postings_cache_evicts Total number of evictions in the cache, excluding items that got evicted due to TTL. + # TYPE cortex_ingester_expanded_postings_cache_evicts counter + cortex_ingester_expanded_postings_cache_evicts{cache="test",reason="expired"} %v +`, numberOfKeys*2)), "cortex_ingester_expanded_postings_cache_evicts") + require.NoError(t, err) } }) }