From 16a556077563d2488091ff0281a68fa34149bc45 Mon Sep 17 00:00:00 2001
From: QuantumEnigmaa <thibaud@giantswarm.io>
Date: Wed, 4 Oct 2023 11:26:29 +0200
Subject: [PATCH 1/2] remove prometheusAvailabilityRatio alert

---
 .../prometheus-availability.rules.yml         | 32 ----------
 .../prometheus-availability.rules.test.yml    | 64 -------------------
 2 files changed, 96 deletions(-)
 delete mode 100644 helm/prometheus-rules/templates/alerting-rules/prometheus-availability.rules.yml
 delete mode 100644 test/tests/providers/global/prometheus-availability.rules.test.yml

diff --git a/helm/prometheus-rules/templates/alerting-rules/prometheus-availability.rules.yml b/helm/prometheus-rules/templates/alerting-rules/prometheus-availability.rules.yml
deleted file mode 100644
index 7d772ac4f..000000000
--- a/helm/prometheus-rules/templates/alerting-rules/prometheus-availability.rules.yml
+++ /dev/null
@@ -1,32 +0,0 @@
-apiVersion: monitoring.coreos.com/v1
-kind: PrometheusRule
-metadata:
-  creationTimestamp: null
-  labels:
-    {{- include "labels.common" . | nindent 4 }}
-    cluster_type: "management_cluster"
-  name: prometheus-availability.rules
-  namespace: {{ .Values.namespace  }}
-spec:
-  groups:
-  - name: prometheus
-    rules:
-    - alert: PrometheusAvailabilityRatio
-      annotations:
-        description: '{{`Prometheus {{$labels.pod}} has availability ratio of {{ printf "%.2f" $value }} (min 0.8) over the last hour.`}}'
-        opsrecipe: prometheus-resource-limit-reached/
-        dashboard: promavailability/prometheus-availability
-      expr: label_replace(avg(avg_over_time(kube_pod_status_ready{namespace=~"(.*)-prometheus", condition="true"}[1h])) by (pod), "cluster_id", "$1", "pod", "prometheus-(.+)-(.+)") < 0.8
-      # At startup, availability starts at 0 for a few minutes. So ratio grows slowly from 0.
-      for: 30m
-      labels:
-        area: empowerment
-        cancel_if_any_apiserver_down: "true"
-        cancel_if_cluster_status_creating: "true"
-        cancel_if_cluster_status_updating: "true"
-        cancel_if_cluster_status_deleting: "true"
-        cancel_if_cluster_has_no_workers: "true"
-        cancel_if_outside_working_hours: "true"
-        severity: page
-        team: atlas
-        topic: observability
diff --git a/test/tests/providers/global/prometheus-availability.rules.test.yml b/test/tests/providers/global/prometheus-availability.rules.test.yml
deleted file mode 100644
index d40c1de75..000000000
--- a/test/tests/providers/global/prometheus-availability.rules.test.yml
+++ /dev/null
@@ -1,64 +0,0 @@
----
-rule_files:
-  - prometheus-availability.rules.yml
-
-# Setting evaluation interval to 1h
-# to make it faster on long test duration.
-evaluation_interval: 1h
-
-tests:
-  # Test PrometheusAvailabilityRatio
-  - interval: 1m
-    input_series:
-      # This prometheus is up foreve - generates no alert
-      - series: 'kube_pod_status_ready{app="kube-state-metrics", condition="true", container="kube-state-metrics", namespace="install-prometheus", pod="prometheus-install-0"}'
-        values: "1+0x120"
-      # This prometheus starts at h+1, and takes 5min to get ready - generates no alert
-      - series: 'kube_pod_status_ready{app="kube-state-metrics", condition="true", container="kube-state-metrics", namespace="wcok-prometheus", pod="prometheus-wcok-0"}'
-        values: "_x60 0+0x5 1+0x60"
-      # This prometheus is down - generates alerts
-      - series: 'kube_pod_status_ready{app="kube-state-metrics", condition="true", container="kube-state-metrics", namespace="wcbad-prometheus", pod="prometheus-wcbad-0"}'
-        values: "0+0x60 1+0x60"
-    alert_rule_test:
-      - alertname: PrometheusAvailabilityRatio
-        eval_time: 60m
-        exp_alerts:
-          - exp_labels:
-              area: empowerment
-              severity: page
-              team: atlas
-              topic: observability
-              cancel_if_any_apiserver_down: "true"
-              cancel_if_cluster_has_no_workers: "true"
-              cancel_if_cluster_status_creating: "true"
-              cancel_if_cluster_status_deleting: "true"
-              cancel_if_cluster_status_updating: "true"
-              cancel_if_outside_working_hours: "true"
-              pod: "prometheus-wcbad-0"
-              cluster_id: wcbad
-            exp_annotations:
-              description: "Prometheus prometheus-wcbad-0 has availability ratio of 0.00 (min 0.8) over the last hour."
-              opsrecipe: "prometheus-resource-limit-reached/"
-              dashboard: "promavailability/prometheus-availability"
-      - alertname: PrometheusAvailabilityRatio
-        eval_time: 108m
-        exp_alerts:
-          - exp_labels:
-              area: empowerment
-              severity: page
-              team: atlas
-              topic: observability
-              cancel_if_any_apiserver_down: "true"
-              cancel_if_cluster_has_no_workers: "true"
-              cancel_if_cluster_status_creating: "true"
-              cancel_if_cluster_status_deleting: "true"
-              cancel_if_cluster_status_updating: "true"
-              cancel_if_outside_working_hours: "true"
-              pod: "prometheus-wcbad-0"
-              cluster_id: wcbad
-            exp_annotations:
-              description: "Prometheus prometheus-wcbad-0 has availability ratio of 0.00 (min 0.8) over the last hour."
-              opsrecipe: "prometheus-resource-limit-reached/"
-              dashboard: "promavailability/prometheus-availability"
-      - alertname: PrometheusAvailabilityRatio
-        eval_time: 140m

From a1fb6ccc7aed5ae8e735897191009e1ddbc99eb9 Mon Sep 17 00:00:00 2001
From: QuantumEnigmaa <thibaud@giantswarm.io>
Date: Wed, 4 Oct 2023 11:28:59 +0200
Subject: [PATCH 2/2] changelog

---
 CHANGELOG.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ff7791049..05afd9eac 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+### Changed
+
+- Remove PrometheusAvailabilityRatio alert.
+
 ## [2.135.0] - 2023-10-02
 
 ### Changed