Skip to content

Commit

Permalink
moved KSMDown alert to the adequate file
Browse files Browse the repository at this point in the history
  • Loading branch information
QuantumEnigmaa committed Sep 19, 2023
1 parent 68a3ac8 commit d7f5331
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 29 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,33 @@ spec:
groups:
- name: kube-state-metrics
rules:
- alert: KubeStateMetricsDown
annotations:
description: '{{`KubeStateMetrics ({{ $labels.instance }}) is down.`}}'
opsrecipe: kube-state-metrics-down/
expr: |-
(
# modern clusters
label_replace(up{app="kube-state-metrics",instance=~".*:8080"}, "ip", "$1.$2.$3.$4", "node", "ip-(\\d+)-(\\d+)-(\\d+)-(\\d+).*") == 0 or absent(up{app="kube-state-metrics",instance=~".*:8080"} == 1)
)
and
(
# vintage clusters without servicemonitor
label_replace(up{app="kube-state-metrics",container=""}, "ip", "$1.$2.$3.$4", "node", "ip-(\\d+)-(\\d+)-(\\d+)-(\\d+).*") == 0 or absent(up{app="kube-state-metrics",container=""} == 1)
)
for: 15m
labels:
area: kaas
cancel_if_apiserver_down: "true"
cancel_if_cluster_status_creating: "true"
cancel_if_cluster_status_deleting: "true"
cancel_if_cluster_has_no_workers: "true"
inhibit_kube_state_metrics_down: "true"
cancel_if_kubelet_down: "true"
cancel_if_outside_working_hours: "false"
severity: page
team: atlas
topic: observability
- alert: KubeStateMetricsSlow
annotations:
description: '{{`KubeStateMetrics ({{ $labels.instance }}) is too slow.`}}'
Expand Down
28 changes: 0 additions & 28 deletions helm/prometheus-rules/templates/alerting-rules/up.all.rules.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,31 +44,3 @@ spec:
severity: page
team: atlas
topic: observability
- alert: KubeStateMetricsDown
annotations:
description: '{{`KubeStateMetrics ({{ $labels.instance }}) is down.`}}'
opsrecipe: kube-state-metrics-down/
expr: |-
(
# modern clusters
label_replace(up{app="kube-state-metrics",instance=~".*:8080"}, "ip", "$1.$2.$3.$4", "node", "ip-(\\d+)-(\\d+)-(\\d+)-(\\d+).*") == 0 or absent(up{app="kube-state-metrics",instance=~".*:8080"} == 1)
)
and
(
# vintage clusters without servicemonitor
label_replace(up{app="kube-state-metrics",container=""}, "ip", "$1.$2.$3.$4", "node", "ip-(\\d+)-(\\d+)-(\\d+)-(\\d+).*") == 0 or absent(up{app="kube-state-metrics",container=""} == 1)
)
for: 15m
labels:
area: kaas
cancel_if_apiserver_down: "true"
cancel_if_cluster_status_creating: "true"
cancel_if_cluster_status_deleting: "true"
cancel_if_cluster_has_no_workers: "true"
inhibit_kube_state_metrics_down: "true"
cancel_if_kubelet_down: "true"
cancel_if_outside_working_hours: "false"
severity: page
team: atlas
topic: observability

2 changes: 1 addition & 1 deletion test/tests/providers/global/up.all.rules.test.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
---
rule_files:
- up.all.rules.yml
- kube-state-metrics.rules.yml

tests:
# KubeStateMetricsDown tests
Expand Down

0 comments on commit d7f5331

Please sign in to comment.