From c95ab4c79065f3d48e95ea33d23efff6516c70bf Mon Sep 17 00:00:00 2001 From: Juraj Michalek Date: Tue, 2 Apr 2024 10:25:22 +0200 Subject: [PATCH] chore: mixin make range interval configurable in recording rule --- operations/mimir-mixin/alerts/blocks.libsonnet | 8 +++++--- operations/mimir-mixin/recording_rules.libsonnet | 4 ++-- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/operations/mimir-mixin/alerts/blocks.libsonnet b/operations/mimir-mixin/alerts/blocks.libsonnet index 8f6ed51d2f8..89ac3df611e 100644 --- a/operations/mimir-mixin/alerts/blocks.libsonnet +++ b/operations/mimir-mixin/alerts/blocks.libsonnet @@ -14,17 +14,18 @@ (max by(%(alert_aggregation_labels)s, %(per_instance_label)s) (cortex_ingester_shipper_last_successful_upload_timestamp_seconds) > 0) and # Only if the ingester has ingested samples over the last 4h. - (max by(%(alert_aggregation_labels)s, %(per_instance_label)s) (max_over_time(%(alert_aggregation_rule_prefix)s_%(per_instance_label)s:cortex_ingester_ingested_samples_total:rate1m[4h])) > 0) + (max by(%(alert_aggregation_labels)s, %(per_instance_label)s) (max_over_time(%(alert_aggregation_rule_prefix)s_%(per_instance_label)s:cortex_ingester_ingested_samples_total:rate(%(recording_rules_range_interval)s[4h])) > 0) and # Only if the ingester was ingesting samples 4h ago. This protects against the case where the ingester replica # had ingested samples in the past, then no traffic was received for a long period and then it starts # receiving samples again. Without this check, the alert would fire as soon as it gets back receiving # samples, while the a block shipping is expected within the next 4h. - (max by(%(alert_aggregation_labels)s, %(per_instance_label)s) (max_over_time(%(alert_aggregation_rule_prefix)s_%(per_instance_label)s:cortex_ingester_ingested_samples_total:rate1m[1h] offset 4h)) > 0) + (max by(%(alert_aggregation_labels)s, %(per_instance_label)s) (max_over_time(%(alert_aggregation_rule_prefix)s_%(per_instance_label)s:cortex_ingester_ingested_samples_total:rate(%(recording_rules_range_interval)s[1h] offset 4h)) > 0) ||| % { alert_aggregation_labels: $._config.alert_aggregation_labels, per_instance_label: $._config.per_instance_label, alert_aggregation_rule_prefix: $._config.alert_aggregation_rule_prefix, + recording_rules_range_interval: $._config.recording_rules_range_interval, }, labels: { severity: 'critical', @@ -41,11 +42,12 @@ expr: ||| (max by(%(alert_aggregation_labels)s, %(per_instance_label)s) (cortex_ingester_shipper_last_successful_upload_timestamp_seconds) == 0) and - (max by(%(alert_aggregation_labels)s, %(per_instance_label)s) (max_over_time(%(alert_aggregation_rule_prefix)s_%(per_instance_label)s:cortex_ingester_ingested_samples_total:rate1m[4h])) > 0) + (max by(%(alert_aggregation_labels)s, %(per_instance_label)s) (max_over_time(%(alert_aggregation_rule_prefix)s_%(per_instance_label)s:cortex_ingester_ingested_samples_total:rate%(recording_rules_range_interval)s[4h])) > 0) ||| % { alert_aggregation_labels: $._config.alert_aggregation_labels, per_instance_label: $._config.per_instance_label, alert_aggregation_rule_prefix: $._config.alert_aggregation_rule_prefix, + recording_rules_range_interval: $._config.recording_rules_range_interval, }, labels: { severity: 'critical', diff --git a/operations/mimir-mixin/recording_rules.libsonnet b/operations/mimir-mixin/recording_rules.libsonnet index 567bae04989..5f0bb2a68d4 100644 --- a/operations/mimir-mixin/recording_rules.libsonnet +++ b/operations/mimir-mixin/recording_rules.libsonnet @@ -333,9 +333,9 @@ local utils = import 'mixin-utils/utils.libsonnet'; rules: [ { // cortex_ingester_ingested_samples_total is per user, in this rule we want to see the sum per cluster/namespace/instance - record: '%s_%s:cortex_ingester_ingested_samples_total:rate1m' % [$._config.alert_aggregation_rule_prefix, $._config.per_instance_label], + record: '%s_%s:cortex_ingester_ingested_samples_total:rate%s' % [$._config.alert_aggregation_rule_prefix, $._config.per_instance_label, $._config.recording_rules_range_interval], expr: ||| - sum by(%(alert_aggregation_labels)s, %(per_instance_label)s) (rate(cortex_ingester_ingested_samples_total[1m])) + sum by(%(alert_aggregation_labels)s, %(per_instance_label)s) (rate(cortex_ingester_ingested_samples_total[%(recording_rules_range_interval)s])) ||| % $._config, }, ],