Skip to content

Commit

Permalink
chore: mixin make range interval configurable in recording rule
Browse files Browse the repository at this point in the history
  • Loading branch information
jmichalek132 committed Apr 2, 2024
1 parent 1f7b840 commit c95ab4c
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 5 deletions.
8 changes: 5 additions & 3 deletions operations/mimir-mixin/alerts/blocks.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,18 @@
(max by(%(alert_aggregation_labels)s, %(per_instance_label)s) (cortex_ingester_shipper_last_successful_upload_timestamp_seconds) > 0)
and
# Only if the ingester has ingested samples over the last 4h.
(max by(%(alert_aggregation_labels)s, %(per_instance_label)s) (max_over_time(%(alert_aggregation_rule_prefix)s_%(per_instance_label)s:cortex_ingester_ingested_samples_total:rate1m[4h])) > 0)
(max by(%(alert_aggregation_labels)s, %(per_instance_label)s) (max_over_time(%(alert_aggregation_rule_prefix)s_%(per_instance_label)s:cortex_ingester_ingested_samples_total:rate(%(recording_rules_range_interval)s[4h])) > 0)
and
# Only if the ingester was ingesting samples 4h ago. This protects against the case where the ingester replica
# had ingested samples in the past, then no traffic was received for a long period and then it starts
# receiving samples again. Without this check, the alert would fire as soon as it gets back receiving
# samples, while the a block shipping is expected within the next 4h.
(max by(%(alert_aggregation_labels)s, %(per_instance_label)s) (max_over_time(%(alert_aggregation_rule_prefix)s_%(per_instance_label)s:cortex_ingester_ingested_samples_total:rate1m[1h] offset 4h)) > 0)
(max by(%(alert_aggregation_labels)s, %(per_instance_label)s) (max_over_time(%(alert_aggregation_rule_prefix)s_%(per_instance_label)s:cortex_ingester_ingested_samples_total:rate(%(recording_rules_range_interval)s[1h] offset 4h)) > 0)
||| % {
alert_aggregation_labels: $._config.alert_aggregation_labels,
per_instance_label: $._config.per_instance_label,
alert_aggregation_rule_prefix: $._config.alert_aggregation_rule_prefix,
recording_rules_range_interval: $._config.recording_rules_range_interval,
},
labels: {
severity: 'critical',
Expand All @@ -41,11 +42,12 @@
expr: |||
(max by(%(alert_aggregation_labels)s, %(per_instance_label)s) (cortex_ingester_shipper_last_successful_upload_timestamp_seconds) == 0)
and
(max by(%(alert_aggregation_labels)s, %(per_instance_label)s) (max_over_time(%(alert_aggregation_rule_prefix)s_%(per_instance_label)s:cortex_ingester_ingested_samples_total:rate1m[4h])) > 0)
(max by(%(alert_aggregation_labels)s, %(per_instance_label)s) (max_over_time(%(alert_aggregation_rule_prefix)s_%(per_instance_label)s:cortex_ingester_ingested_samples_total:rate%(recording_rules_range_interval)s[4h])) > 0)
||| % {
alert_aggregation_labels: $._config.alert_aggregation_labels,
per_instance_label: $._config.per_instance_label,
alert_aggregation_rule_prefix: $._config.alert_aggregation_rule_prefix,
recording_rules_range_interval: $._config.recording_rules_range_interval,
},
labels: {
severity: 'critical',
Expand Down
4 changes: 2 additions & 2 deletions operations/mimir-mixin/recording_rules.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -333,9 +333,9 @@ local utils = import 'mixin-utils/utils.libsonnet';
rules: [
{
// cortex_ingester_ingested_samples_total is per user, in this rule we want to see the sum per cluster/namespace/instance
record: '%s_%s:cortex_ingester_ingested_samples_total:rate1m' % [$._config.alert_aggregation_rule_prefix, $._config.per_instance_label],
record: '%s_%s:cortex_ingester_ingested_samples_total:rate%s' % [$._config.alert_aggregation_rule_prefix, $._config.per_instance_label, $._config.recording_rules_range_interval],
expr: |||
sum by(%(alert_aggregation_labels)s, %(per_instance_label)s) (rate(cortex_ingester_ingested_samples_total[1m]))
sum by(%(alert_aggregation_labels)s, %(per_instance_label)s) (rate(cortex_ingester_ingested_samples_total[%(recording_rules_range_interval)s]))
||| % $._config,
},
],
Expand Down

0 comments on commit c95ab4c

Please sign in to comment.