From 1e4d11141c4896b609d85477e933ffdef7e557b8 Mon Sep 17 00:00:00 2001 From: Anand Rajagopal <anrajag@amazon.com> Date: Tue, 17 Oct 2023 16:50:04 +0000 Subject: [PATCH] Add contextual information and query statistics to log Signed-off-by: Anand Rajagopal <anrajag@amazon.com> --- CHANGELOG.md | 1 + pkg/ruler/compat.go | 30 ++++++++++++++++++++++++++++-- 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5233a07d3ef..2085470f507 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # Changelog ## master / unreleased +* [CHANGE] Ruler: Add contextual info and query statistics to log * [FEATURE] Ruler: Add support for disabling rule groups. #5521 * [FEATURE] Added the flag `-alertmanager.alerts-gc-interval` to configure alert manager alerts Garbage collection interval. #5550 * [FEATURE] Ruler: Add support for Limit field on RuleGroup. #5528 diff --git a/pkg/ruler/compat.go b/pkg/ruler/compat.go index e8e597f5150..7caf3cb20e3 100644 --- a/pkg/ruler/compat.go +++ b/pkg/ruler/compat.go @@ -3,6 +3,7 @@ package ruler import ( "context" "errors" + "github.com/cortexproject/cortex/pkg/querier/stats" "time" "github.com/cortexproject/cortex/pkg/util/validation" @@ -214,6 +215,7 @@ func RecordAndReportRuleQueryMetrics(qf rules.QueryFunc, queryTime prometheus.Co } return func(ctx context.Context, qs string, t time.Time) (promql.Vector, error) { + queryStats, ctx := stats.ContextWithEmptyStats(ctx) // If we've been passed a counter we want to record the wall time spent executing this request. timer := prometheus.NewTimer(nil) defer func() { @@ -224,8 +226,29 @@ func RecordAndReportRuleQueryMetrics(qf rules.QueryFunc, queryTime prometheus.Co logMessage := []interface{}{ "msg", "query stats", "component", "ruler", - "cortex_ruler_query_seconds_total", querySeconds, + } + if origin := ctx.Value(promql.QueryOrigin{}); origin != nil { + queryLabels := origin.(map[string]interface{}) + rgMap := queryLabels["ruleGroup"].(map[string]string) + logMessage = append(logMessage, + "rule_group", rgMap["name"], + "namespace", rgMap["file"], + ) + } + ruleDetail := rules.FromOriginContext(ctx) + logMessage = append(logMessage, + "rule", ruleDetail.Name, "query", qs, + "cortex_ruler_query_seconds_total", querySeconds, + "wall_time", queryStats.WallTime, + "fetched_series_count", queryStats.FetchedSeriesCount, + "fetched_chunk_count", queryStats.FetchedChunksCount, + "fetched_chunk_bytes", queryStats.FetchedChunkBytes, + "sample_count", queryStats.FetchedSamplesCount, + ) + // log any additional fields from the map + for k, v := range queryStats.ExtraFields { + logMessage = append(logMessage, k, v) } level.Info(util_log.WithContext(ctx, logger)).Log(logMessage...) }() @@ -295,10 +318,13 @@ func DefaultTenantManagerFactory(cfg Config, p Pusher, q storage.Queryable, engi totalWrites := totalWritesVec.WithLabelValues(userID) failedWrites := failedWritesVec.WithLabelValues(userID) + engineQueryFunc := EngineQueryFunc(engine, q, overrides, userID) + metricsQueryFunc := MetricsQueryFunc(engineQueryFunc, totalQueries, failedQueries) + return rules.NewManager(&rules.ManagerOptions{ Appendable: NewPusherAppendable(p, userID, overrides, totalWrites, failedWrites), Queryable: q, - QueryFunc: RecordAndReportRuleQueryMetrics(MetricsQueryFunc(EngineQueryFunc(engine, q, overrides, userID), totalQueries, failedQueries), queryTime, logger), + QueryFunc: RecordAndReportRuleQueryMetrics(metricsQueryFunc, queryTime, logger), Context: user.InjectOrgID(ctx, userID), ExternalURL: cfg.ExternalURL.URL, NotifyFunc: SendAlerts(notifier, cfg.ExternalURL.URL.String()),