diff --git a/CHANGELOG.md b/CHANGELOG.md index 3f552c814..bdf225f00 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Fixed + +- Fix `kube-state-metrics` down alert. + ## [3.7.0] - 2024-04-08 ### Fixed diff --git a/helm/prometheus-rules/templates/alerting-rules/kube-state-metrics.rules.yml b/helm/prometheus-rules/templates/alerting-rules/kube-state-metrics.rules.yml index 7a259ad6f..ef60bb4a9 100644 --- a/helm/prometheus-rules/templates/alerting-rules/kube-state-metrics.rules.yml +++ b/helm/prometheus-rules/templates/alerting-rules/kube-state-metrics.rules.yml @@ -15,7 +15,16 @@ spec: description: '{{`KubeStateMetrics ({{ $labels.instance }}) is down.`}}' opsrecipe: kube-state-metrics-down/ {{- if not .Values.mimir.enabled }} - expr: label_replace(up{app="kube-state-metrics",container=""}, "ip", "$1.$2.$3.$4", "node", "ip-(\\d+)-(\\d+)-(\\d+)-(\\d+).*") == 0 or absent(up{app="kube-state-metrics",container=""} == 1) + expr: |- + ( + # modern clusters + label_replace(up{app="kube-state-metrics",instance=~".*:8080"}, "ip", "$1.$2.$3.$4", "node", "ip-(\\d+)-(\\d+)-(\\d+)-(\\d+).*") == 0 or absent(up{app="kube-state-metrics",instance=~".*:8080"} == 1) + ) + and + ( + # vintage clusters without servicemonitor + label_replace(up{app="kube-state-metrics",container=""}, "ip", "$1.$2.$3.$4", "node", "ip-(\\d+)-(\\d+)-(\\d+)-(\\d+).*") == 0 or absent(up{app="kube-state-metrics",container=""} == 1) + ) {{- else }} expr: |- count by (cluster_id, installation, provider, pipeline) (label_replace(up{app="kube-state-metrics", instance=~".*:8080"}, "ip", "$1.$2.$3.$4", "node", "ip-(\\d+)-(\\d+)-(\\d+)-(\\d+).*")) == 0