From 07bca5647d872e9b271813dedf4fd06a3fffa8be Mon Sep 17 00:00:00 2001 From: Quentin Bisson Date: Mon, 6 May 2024 12:13:27 +0200 Subject: [PATCH] fix: cilium related alerts for mimir (#1159) Signed-off-by: QuentinBisson --- CHANGELOG.md | 1 + .../templates/alerting-rules/cilium.rules.yml | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 466624d27..695ce21f8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed - Remove cilium entry from KAAS SLOs. +- Fix cilium related alerts for mimir. - Fix etcd alerts for Mimir. - Add missing labels for apiserver alerts. diff --git a/helm/prometheus-rules/templates/alerting-rules/cilium.rules.yml b/helm/prometheus-rules/templates/alerting-rules/cilium.rules.yml index 0f3ec9f40..74b7e36cf 100644 --- a/helm/prometheus-rules/templates/alerting-rules/cilium.rules.yml +++ b/helm/prometheus-rules/templates/alerting-rules/cilium.rules.yml @@ -14,7 +14,7 @@ spec: annotations: description: '{{`Cilium BPF map is about to fill up.`}}' opsrecipe: cilium-bpf-map/ - expr: avg(cilium_bpf_map_pressure) by (cluster_id, map_name) * 100 > 80 + expr: avg(cilium_bpf_map_pressure) by (cluster_id, installation, pipeline, provider, map_name) * 100 > 80 for: 15m labels: area: managedservices @@ -26,7 +26,7 @@ spec: annotations: description: '{{`Cilium BPF map is about filled up.`}}' opsrecipe: cilium-bpf-map/ - expr: avg(cilium_bpf_map_pressure) by (cluster_id, map_name) * 100 > 95 + expr: avg(cilium_bpf_map_pressure) by (cluster_id, installation, pipeline, provider, map_name) * 100 > 95 for: 15m labels: area: managedservices @@ -39,7 +39,7 @@ spec: opsrecipe: unsupported-cilium-network-policy/ # cilium_policy_change_total - for cilium >=1.15 # cilium_policy_import_errors_total - for cilium <1.15 - expr: max(rate(cilium_policy_change_total{outcome=~"fail.*"}[20m]) OR rate(cilium_policy_import_errors_total[20m])) > 0 + expr: max(rate(cilium_policy_change_total{outcome=~"fail.*"}[20m]) OR rate(cilium_policy_import_errors_total[20m])) by (cluster_id, installation, pipeline, provider) > 0 for: 10m labels: area: managedservices