From e48e77ffc0801f5d9538c6139d6ee61419b74b87 Mon Sep 17 00:00:00 2001
From: QuentinBisson <quentin@giantswarm.io>
Date: Mon, 23 Sep 2024 15:58:22 +0200
Subject: [PATCH] fix-mimir-hpa-label

---
 CHANGELOG.md                                     |  1 +
 .../atlas/alerting-rules/mimir.rules.yml         |  2 +-
 .../atlas/alerting-rules/mimir.rules.test.yml    |  4 ++--
 .../atlas/alerting-rules/loki.rules.test.yml     | 16 ++++++++--------
 4 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 9a4f3ae8..95021edf 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -18,6 +18,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Remove deprecated app labels for `external-dns` and `ingress-nginx` alerts.
 - Remove deprecated app labels for `kube-state-metrics` alerts.
 - Fix falco events alerts node label to hostname as node does not exist.
+- Fix `MimirHPAReachedMaxReplicas` description to render the horizontalpodautoscaler label.
 
 ## [4.15.2] - 2024-09-17
 
diff --git a/helm/prometheus-rules/templates/platform/atlas/alerting-rules/mimir.rules.yml b/helm/prometheus-rules/templates/platform/atlas/alerting-rules/mimir.rules.yml
index 6eac71fd..d641c2c9 100644
--- a/helm/prometheus-rules/templates/platform/atlas/alerting-rules/mimir.rules.yml
+++ b/helm/prometheus-rules/templates/platform/atlas/alerting-rules/mimir.rules.yml
@@ -142,7 +142,7 @@ spec:
         topic: observability
     - alert: MimirHPAReachedMaxReplicas
       annotations:
-        description: '{{`Mimir ${ labels.horizontalpodautoscaler } HPA has reached maximum replicas and consume too much resources, it needs to be scaled up.`}}'
+        description: '{{`Mimir {{ $labels.horizontalpodautoscaler }} HPA has reached maximum replicas and consume too much resources, it needs to be scaled up.`}}'
         opsrecipe: mimir-hpa/
       expr: |-
         (
diff --git a/test/tests/providers/capi/capa-mimir/platform/atlas/alerting-rules/mimir.rules.test.yml b/test/tests/providers/capi/capa-mimir/platform/atlas/alerting-rules/mimir.rules.test.yml
index f1d45481..4d18a45c 100644
--- a/test/tests/providers/capi/capa-mimir/platform/atlas/alerting-rules/mimir.rules.test.yml
+++ b/test/tests/providers/capi/capa-mimir/platform/atlas/alerting-rules/mimir.rules.test.yml
@@ -363,7 +363,7 @@ tests:
                horizontalpodautoscaler: mimir-distributor
                namespace: mimir
              exp_annotations:
-               description: "Mimir ${ labels.horizontalpodautoscaler } HPA has reached maximum replicas and consume too much resources, it needs to be scaled up."
+               description: "Mimir mimir-distributor HPA has reached maximum replicas and consume too much resources, it needs to be scaled up."
                opsrecipe: "mimir-hpa/"
       - alertname: MimirHPAReachedMaxReplicas
         eval_time: 246m
@@ -382,7 +382,7 @@ tests:
                horizontalpodautoscaler: mimir-distributor
                namespace: mimir
              exp_annotations:
-               description: "Mimir ${ labels.horizontalpodautoscaler } HPA has reached maximum replicas and consume too much resources, it needs to be scaled up."
+               description: "Mimir mimir-distributor HPA has reached maximum replicas and consume too much resources, it needs to be scaled up."
                opsrecipe: "mimir-hpa/"
   # Test for MimirCompactorFailedCompaction alert
   - interval: 1m
diff --git a/test/tests/providers/global/platform/atlas/alerting-rules/loki.rules.test.yml b/test/tests/providers/global/platform/atlas/alerting-rules/loki.rules.test.yml
index ed6e5fdc..a2e8b6bd 100644
--- a/test/tests/providers/global/platform/atlas/alerting-rules/loki.rules.test.yml
+++ b/test/tests/providers/global/platform/atlas/alerting-rules/loki.rules.test.yml
@@ -18,7 +18,7 @@ tests:
     alert_rule_test:
       - alertname: LokiRequestPanics
         eval_time: 15m  # should be OK after 15 minutes
-        exp_alerts:
+        exp_alerts: []
       - alertname: LokiRequestPanics
         eval_time: 25m  # After 25 minutes, should fire an alert for the t+20 error
         exp_alerts:
@@ -43,11 +43,11 @@ tests:
               opsrecipe: loki/
       - alertname: LokiRequestPanics
         eval_time: 40m  # After 40 minutes, all should be back to normal
-        exp_alerts:
+        exp_alerts: []
 
       - alertname: LokiRequestErrors
         eval_time: 15m  # should be OK after 15 minutes
-        exp_alerts:
+        exp_alerts: []
       - alertname: LokiRequestErrors
         eval_time: 160m  # Alert after more than 120m of incident
         exp_alerts:
@@ -74,13 +74,13 @@ tests:
 
       - alertname: LokiRingUnhealthy
         eval_time: 15m  # should be OK after 15 minutes
-        exp_alerts:
+        exp_alerts: []
       - alertname: LokiRingUnhealthy
         eval_time: 25m  # after 25 minutes we have an unhealthy member, but we want to filter too short events. So no alert yet.
-        exp_alerts:
+        exp_alerts: []
       - alertname: LokiRingUnhealthy
         eval_time: 35m  # special case to validate when a new pod is unhealthy (no data at the beginning)
-        exp_alerts:
+        exp_alerts: []
       - alertname: LokiRingUnhealthy
         eval_time: 60m  # now the event has been there for 20 minutes, we should have an alert.
         exp_alerts:
@@ -115,7 +115,7 @@ tests:
     alert_rule_test:
       - alertname: LokiRestartingTooOften
         eval_time: 15m  # should be OK after 15 minutes
-        exp_alerts:
+        exp_alerts: []
       - alertname: LokiRestartingTooOften
         eval_time: 85m  # After 85 minutes, should fire an alert for the t+85 error
         exp_alerts:
@@ -133,7 +133,7 @@ tests:
               opsrecipe: loki/
       - alertname: LokiRestartingTooOften
         eval_time: 140m  # After 140m minutes, all should be back to normal
-        exp_alerts:
+        exp_alerts: []
   - interval: 1m
     input_series:
       # loki-backend real memory usage gradually decreases until it goes below 30% of the memory requests.