From bb18671e40071a01d78e6f22d4f3ef6448db5fb1 Mon Sep 17 00:00:00 2001 From: Ben Ye Date: Mon, 16 Dec 2024 16:58:34 -0800 Subject: [PATCH 1/2] cleanup dangling request queue metrics Signed-off-by: Ben Ye --- pkg/frontend/v1/frontend.go | 1 + pkg/frontend/v1/frontend_test.go | 7 +++++-- pkg/scheduler/queue/queue.go | 4 ++++ pkg/scheduler/scheduler.go | 1 + pkg/scheduler/scheduler_test.go | 11 +++++++++-- 5 files changed, 20 insertions(+), 4 deletions(-) diff --git a/pkg/frontend/v1/frontend.go b/pkg/frontend/v1/frontend.go index 7c7375027a..8dfcd98ef0 100644 --- a/pkg/frontend/v1/frontend.go +++ b/pkg/frontend/v1/frontend.go @@ -183,6 +183,7 @@ func (f *Frontend) cleanupInactiveUserMetrics(user string) { f.discardedRequests.DeletePartialMatch(prometheus.Labels{ "user": user, }) + f.requestQueue.CleanupInactiveUserMetrics(user) } // RoundTripGRPC round trips a proto (instead of a HTTP request). diff --git a/pkg/frontend/v1/frontend_test.go b/pkg/frontend/v1/frontend_test.go index 43e8d6d351..5bc63ba53f 100644 --- a/pkg/frontend/v1/frontend_test.go +++ b/pkg/frontend/v1/frontend_test.go @@ -212,11 +212,14 @@ func TestFrontendMetricsCleanup(t *testing.T) { # HELP cortex_query_frontend_queue_length Number of queries in the queue. # TYPE cortex_query_frontend_queue_length gauge cortex_query_frontend_queue_length{priority="0",type="fifo",user="1"} 0 - `), "cortex_query_frontend_queue_length")) + # HELP cortex_request_queue_requests_total Total number of query requests going to the request queue. + # TYPE cortex_request_queue_requests_total counter + cortex_request_queue_requests_total{priority="0",user="1"} 1 + `), "cortex_query_frontend_queue_length", "cortex_request_queue_requests_total")) fr.cleanupInactiveUserMetrics("1") - require.NoError(t, testutil.GatherAndCompare(reg, strings.NewReader(""), "cortex_query_frontend_queue_length")) + require.NoError(t, testutil.GatherAndCompare(reg, strings.NewReader(""), "cortex_query_frontend_queue_length", "cortex_request_queue_requests_total")) } testFrontend(t, defaultFrontendConfig(), handler, test, matchMaxConcurrency, nil, reg) diff --git a/pkg/scheduler/queue/queue.go b/pkg/scheduler/queue/queue.go index 4f6cc130b1..8396e0d46a 100644 --- a/pkg/scheduler/queue/queue.go +++ b/pkg/scheduler/queue/queue.go @@ -247,3 +247,7 @@ func (q *RequestQueue) QuerierDisconnecting() { func (q *RequestQueue) GetConnectedQuerierWorkersMetric() float64 { return float64(q.connectedQuerierWorkers.Load()) } + +func (q *RequestQueue) CleanupInactiveUserMetrics(user string) { + q.totalRequests.DeletePartialMatch(prometheus.Labels{"user": user}) +} diff --git a/pkg/scheduler/scheduler.go b/pkg/scheduler/scheduler.go index 91f25410d6..647fb379b8 100644 --- a/pkg/scheduler/scheduler.go +++ b/pkg/scheduler/scheduler.go @@ -532,6 +532,7 @@ func (s *Scheduler) cleanupMetricsForInactiveUser(user string) { s.discardedRequests.DeletePartialMatch(prometheus.Labels{ "user": user, }) + s.requestQueue.CleanupInactiveUserMetrics(user) } func (s *Scheduler) getConnectedFrontendClientsMetric() float64 { diff --git a/pkg/scheduler/scheduler_test.go b/pkg/scheduler/scheduler_test.go index f8327aeb28..e1d810f8fc 100644 --- a/pkg/scheduler/scheduler_test.go +++ b/pkg/scheduler/scheduler_test.go @@ -430,7 +430,11 @@ func TestSchedulerMetrics(t *testing.T) { # TYPE cortex_query_scheduler_queue_length gauge cortex_query_scheduler_queue_length{priority="0",type="fifo",user="another"} 1 cortex_query_scheduler_queue_length{priority="0",type="fifo",user="test"} 1 - `), "cortex_query_scheduler_queue_length")) + # HELP cortex_request_queue_requests_total Total number of query requests going to the request queue. + # TYPE cortex_request_queue_requests_total counter + cortex_request_queue_requests_total{priority="0",user="another"} 1 + cortex_request_queue_requests_total{priority="0",user="test"} 1 + `), "cortex_query_scheduler_queue_length", "cortex_request_queue_requests_total")) scheduler.cleanupMetricsForInactiveUser("test") @@ -438,7 +442,10 @@ func TestSchedulerMetrics(t *testing.T) { # HELP cortex_query_scheduler_queue_length Number of queries in the queue. # TYPE cortex_query_scheduler_queue_length gauge cortex_query_scheduler_queue_length{priority="0",type="fifo",user="another"} 1 - `), "cortex_query_scheduler_queue_length")) + # HELP cortex_request_queue_requests_total Total number of query requests going to the request queue. + # TYPE cortex_request_queue_requests_total counter + cortex_request_queue_requests_total{priority="0",user="another"} 1 + `), "cortex_query_scheduler_queue_length", "cortex_request_queue_requests_total")) } func initFrontendLoop(t *testing.T, client schedulerpb.SchedulerForFrontendClient, frontendAddr string) schedulerpb.SchedulerForFrontend_FrontendLoopClient { From 60d98567608c2ed3bdd12a1ad7c8b0b89062e35e Mon Sep 17 00:00:00 2001 From: Ben Ye Date: Tue, 17 Dec 2024 11:31:15 -0800 Subject: [PATCH 2/2] fix lint Signed-off-by: Ben Ye --- pkg/frontend/v1/frontend_test.go | 6 +++--- pkg/scheduler/scheduler_test.go | 14 +++++++------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/pkg/frontend/v1/frontend_test.go b/pkg/frontend/v1/frontend_test.go index 5bc63ba53f..ef7cd705f2 100644 --- a/pkg/frontend/v1/frontend_test.go +++ b/pkg/frontend/v1/frontend_test.go @@ -212,9 +212,9 @@ func TestFrontendMetricsCleanup(t *testing.T) { # HELP cortex_query_frontend_queue_length Number of queries in the queue. # TYPE cortex_query_frontend_queue_length gauge cortex_query_frontend_queue_length{priority="0",type="fifo",user="1"} 0 - # HELP cortex_request_queue_requests_total Total number of query requests going to the request queue. - # TYPE cortex_request_queue_requests_total counter - cortex_request_queue_requests_total{priority="0",user="1"} 1 + # HELP cortex_request_queue_requests_total Total number of query requests going to the request queue. + # TYPE cortex_request_queue_requests_total counter + cortex_request_queue_requests_total{priority="0",user="1"} 1 `), "cortex_query_frontend_queue_length", "cortex_request_queue_requests_total")) fr.cleanupInactiveUserMetrics("1") diff --git a/pkg/scheduler/scheduler_test.go b/pkg/scheduler/scheduler_test.go index e1d810f8fc..b670011f11 100644 --- a/pkg/scheduler/scheduler_test.go +++ b/pkg/scheduler/scheduler_test.go @@ -430,10 +430,10 @@ func TestSchedulerMetrics(t *testing.T) { # TYPE cortex_query_scheduler_queue_length gauge cortex_query_scheduler_queue_length{priority="0",type="fifo",user="another"} 1 cortex_query_scheduler_queue_length{priority="0",type="fifo",user="test"} 1 - # HELP cortex_request_queue_requests_total Total number of query requests going to the request queue. - # TYPE cortex_request_queue_requests_total counter - cortex_request_queue_requests_total{priority="0",user="another"} 1 - cortex_request_queue_requests_total{priority="0",user="test"} 1 + # HELP cortex_request_queue_requests_total Total number of query requests going to the request queue. + # TYPE cortex_request_queue_requests_total counter + cortex_request_queue_requests_total{priority="0",user="another"} 1 + cortex_request_queue_requests_total{priority="0",user="test"} 1 `), "cortex_query_scheduler_queue_length", "cortex_request_queue_requests_total")) scheduler.cleanupMetricsForInactiveUser("test") @@ -442,9 +442,9 @@ func TestSchedulerMetrics(t *testing.T) { # HELP cortex_query_scheduler_queue_length Number of queries in the queue. # TYPE cortex_query_scheduler_queue_length gauge cortex_query_scheduler_queue_length{priority="0",type="fifo",user="another"} 1 - # HELP cortex_request_queue_requests_total Total number of query requests going to the request queue. - # TYPE cortex_request_queue_requests_total counter - cortex_request_queue_requests_total{priority="0",user="another"} 1 + # HELP cortex_request_queue_requests_total Total number of query requests going to the request queue. + # TYPE cortex_request_queue_requests_total counter + cortex_request_queue_requests_total{priority="0",user="another"} 1 `), "cortex_query_scheduler_queue_length", "cortex_request_queue_requests_total")) }