Skip to content

Commit

Permalink
Add unit test to alertmanager.rules
Browse files Browse the repository at this point in the history
  • Loading branch information
Marie Roque committed Dec 19, 2023
1 parent ab8c1e0 commit d78618d
Show file tree
Hide file tree
Showing 3 changed files with 70 additions and 5 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,11 @@ spec:
annotations:
description: '{{`AlertManager {{ $labels.integration }} notifications are failing.`}}'
opsrecipe: alert-manager-notifications-failing/
expr: rate(alertmanager_notifications_failed_total{integration!="opsgenie"}[5m]) > 0
for: 10m
# Interval = 20m because currently AlertManager config set `group_interval=15m` that means that if a notification fails, it will be retried after 15m
# so the counter will stay flat during this time.
# Here, we decide to page after 3 successive failures, so we need to wait 3*15m = 45m before paging.
expr: rate(alertmanager_notifications_failed_total{integration!="opsgenie"}[20m]) > 0
for: 45m
labels:
area: empowerment
severity: page
Expand All @@ -27,8 +30,9 @@ spec:
annotations:
description: '{{`AlertManager {{ $labels.integration }} notifications are failing.`}}'
opsrecipe: alert-manager-notifications-failing/
expr: rate(alertmanager_notifications_failed_total{integration="opsgenie"}[5m]) > 0
for: 2m
# Here, we decide to notify after 2 successive failures (opsgenie notification), so we need to wait 2*15m = 30m before notifying.
expr: rate(alertmanager_notifications_failed_total{integration="opsgenie"}[20m]) > 0
for: 30m
labels:
area: empowerment
severity: notify
Expand Down
1 change: 0 additions & 1 deletion test/conf/promtool_ignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
templates/alerting-rules-test/capo.rules.test.yml
templates/alerting-rules/alertmanager-dashboard.rules.yml
templates/alerting-rules/alertmanager.rules.yml
templates/alerting-rules/apiserver.management-cluster.rules.yml
templates/alerting-rules/apiserver.workload-cluster.rules.yml
templates/alerting-rules/argocd.rules.yml
Expand Down
62 changes: 62 additions & 0 deletions test/tests/providers/global/alertmanager.rules.test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
---
rule_files:
- alertmanager.rules.yml

tests:
- interval: 1m
input_series:
# after 1h, slack notification fails during 2h then works again (15m group_interval) => alert fires after 3 successive failures
- series: 'alertmanager_notifications_failed_total{integration="slack"}'
values: "0x60 1+0x15 2+0x15 3+0x15 4+0x15 5+0x15 6+0x15 7+0x15 8+0x15 8+0x120"
# after 1h, slack notification fails 2 times during 30mn than works again => alert must not fires
- series: 'alertmanager_notifications_failed_total{integration="webhook"}'
values: "0x60 1+0x15 2+0x15 2+0x15 2+0x15 2+0x15 2+0x15 2+0x15 2+0x15 2+0x120"
alert_rule_test:
- alertname: AlertmanagerPageNotificationsFailing
eval_time: 10m
- alertname: AlertmanagerPageNotificationsFailing
eval_time: 90m
- alertname: AlertmanagerPageNotificationsFailing
eval_time: 95m
- alertname: AlertmanagerPageNotificationsFailing
eval_time: 106m
exp_alerts:
- exp_labels:
area: empowerment
severity: page
team: atlas
topic: monitoring
integration: slack
cancel_if_outside_working_hours: "true"
exp_annotations:
description: "AlertManager slack notifications are failing."
opsrecipe: alert-manager-notifications-failing/
- alertname: AlertmanagerPageNotificationsFailing
eval_time: 240m
- interval: 1m
input_series:
# after 1h, opsgenie notification fails during 45m then works again for 1h, finally fails 1 time (group_interval=15m)
# => alert fires after 2 successive failures only
- series: 'alertmanager_notifications_failed_total{integration="opsgenie"}'
values: "0x60 1+0x15 2+0x15 2+0x15 2+0x60 3+0x15 3+0x60"
alert_rule_test:
- alertname: AlertmanagerNotifyNotificationsFailing
eval_time: 10m
- alertname: AlertmanagerNotifyNotificationsFailing
eval_time: 75m
- alertname: AlertmanagerNotifyNotificationsFailing
eval_time: 91m
exp_alerts:
- exp_labels:
area: empowerment
severity: notify
team: atlas
topic: monitoring
integration: opsgenie
exp_annotations:
description: "AlertManager opsgenie notifications are failing."
opsrecipe: alert-manager-notifications-failing/
- alertname: AlertmanagerNotifyNotificationsFailing
eval_time: 180m
- alertname: AlertmanagerNotifyNotificationsFailing
eval_time: 210m

0 comments on commit d78618d

Please sign in to comment.