diff --git a/helm/prometheus-rules/templates/platform/atlas/alerting-rules/mimir.rules.yml b/helm/prometheus-rules/templates/platform/atlas/alerting-rules/mimir.rules.yml index 10e68506..99604b6d 100644 --- a/helm/prometheus-rules/templates/platform/atlas/alerting-rules/mimir.rules.yml +++ b/helm/prometheus-rules/templates/platform/atlas/alerting-rules/mimir.rules.yml @@ -169,6 +169,8 @@ spec: severity: page team: atlas topic: observability + - name: mimir.continuous-test + rules: - alert: MimirContinuousTestFailingOnWrites annotations: dashboard: bdxh7hszfgmbkc/mimir-continous-test diff --git a/test/tests/providers/capi/capa/platform/atlas/alerting-rules/mimir.rules.test.yml b/test/tests/providers/capi/capa/platform/atlas/alerting-rules/mimir.rules.test.yml index 6bdfeaea..dd062bbb 100644 --- a/test/tests/providers/capi/capa/platform/atlas/alerting-rules/mimir.rules.test.yml +++ b/test/tests/providers/capi/capa/platform/atlas/alerting-rules/mimir.rules.test.yml @@ -390,3 +390,65 @@ tests: eval_time: 205m - alertname: MimirCompactorFailedCompaction eval_time: 350m + + # Test for MimirContinuousTestFailingOnWrites alert + - interval: 1m + input_series: + # Test: none, rate > 0, rate = 0 + - series: 'mimir_continuous_test_writes_failed_total{cluster_id="golem", installation="golem", namespace="mimir", pipeline="testing", provider="capa"}' + values: "_x20 1+1x80 0+0x70" + alert_rule_test: + - alertname: MimirContinuousTestFailingOnWrites + eval_time: 40m + - alertname: MimirContinuousTestFailingOnWrites + eval_time: 95m + exp_alerts: + - exp_labels: + area: platform + cancel_if_outside_working_hours: "true" + cancel_if_cluster_status_creating: "true" + cancel_if_cluster_status_deleting: "true" + cancel_if_cluster_status_updating: "true" + cluster_id: golem + installation: golem + namespace: mimir + severity: page + team: atlas + topic: observability + exp_annotations: + dashboard: bdxh7hszfgmbkc/mimir-continous-test + description: "Mimir continous-test detected errors in the write path." + opsrecipe: "mimir/" + - alertname: MimirContinuousTestFailingOnWrites + eval_time: 160m + + # Test for MimirContinuousTestFailingOnReads alert + - interval: 1m + input_series: + # Test: none, rate > 0, rate = 0 + - series: 'mimir_continuous_test_queries_failed_total{cluster_id="golem", installation="golem", namespace="mimir", pipeline="testing", provider="capa"}' + values: "_x20 1+1x80 0+0x70" + alert_rule_test: + - alertname: MimirContinuousTestFailingOnReads + eval_time: 40m + - alertname: MimirContinuousTestFailingOnReads + eval_time: 95m + exp_alerts: + - exp_labels: + area: platform + cancel_if_outside_working_hours: "true" + cancel_if_cluster_status_creating: "true" + cancel_if_cluster_status_deleting: "true" + cancel_if_cluster_status_updating: "true" + cluster_id: golem + installation: golem + namespace: mimir + severity: page + team: atlas + topic: observability + exp_annotations: + dashboard: bdxh7hszfgmbkc/mimir-continous-test + description: "Mimir continous-test detected errors in the write path." + opsrecipe: "mimir/" + - alertname: MimirContinuousTestFailingOnReads + eval_time: 160m