From 413f305c6682e59321c45066cd760f09eb4aeead Mon Sep 17 00:00:00 2001 From: QuentinBisson Date: Tue, 19 Sep 2023 11:11:28 +0200 Subject: [PATCH] Fix Prometheus agent failing alerts Signed-off-by: QuentinBisson --- CHANGELOG.md | 6 +++++- .../templates/alerting-rules/prometheus-agent.rules.yml | 2 -- test/tests/providers/global/prometheus-agent.rules.test.yml | 4 ---- 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b6d93a9ea..79f9b7dd4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,11 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Changed - Add missing prometheus-agent inhibition to `KubeStateMetricsDown` alert -- Change time duration before `ManagementClusterDeploymentMissingAWS` pages because it is dependant on the `PrometheusAgentFailing` alert. +- Change time duration before `ManagementClusterDeploymentMissingAWS` pages because it is dependant on the `PrometheusAgentFailing` alert. + +### Fixed + +- Remove `cancel_if_outside_working_hours` from PrometheusAgent alerts. ## [2.132.0] - 2023-09-15 diff --git a/helm/prometheus-rules/templates/alerting-rules/prometheus-agent.rules.yml b/helm/prometheus-rules/templates/alerting-rules/prometheus-agent.rules.yml index ebeafb6cd..97bf1b956 100644 --- a/helm/prometheus-rules/templates/alerting-rules/prometheus-agent.rules.yml +++ b/helm/prometheus-rules/templates/alerting-rules/prometheus-agent.rules.yml @@ -38,7 +38,6 @@ spec: cancel_if_cluster_is_not_running_prometheus_agent: "true" cancel_if_cluster_status_creating: "true" cancel_if_cluster_status_deleting: "true" - cancel_if_outside_working_hours: "true" ## Page Atlas if prometheus agent is missing shards to send samples to MC prometheus. - alert: PrometheusAgentShardsMissing annotations: @@ -74,5 +73,4 @@ spec: cancel_if_cluster_is_not_running_prometheus_agent: "true" cancel_if_cluster_status_creating: "true" cancel_if_cluster_status_deleting: "true" - cancel_if_outside_working_hours: "true" {{- end }} diff --git a/test/tests/providers/global/prometheus-agent.rules.test.yml b/test/tests/providers/global/prometheus-agent.rules.test.yml index 7db4b646c..d7c43a895 100644 --- a/test/tests/providers/global/prometheus-agent.rules.test.yml +++ b/test/tests/providers/global/prometheus-agent.rules.test.yml @@ -22,7 +22,6 @@ tests: cancel_if_cluster_is_not_running_prometheus_agent: "true" cancel_if_cluster_status_creating: "true" cancel_if_cluster_status_deleting: "true" - cancel_if_outside_working_hours: "true" exp_annotations: dashboard: "promRW001/prometheus-remote-write" description: "Prometheus agent remote write is failing." @@ -44,7 +43,6 @@ tests: cancel_if_cluster_is_not_running_prometheus_agent: "true" cancel_if_cluster_status_creating: "true" cancel_if_cluster_status_deleting: "true" - cancel_if_outside_working_hours: "true" exp_annotations: dashboard: "promRW001/prometheus-remote-write" description: "Prometheus agent remote write is failing." @@ -80,7 +78,6 @@ tests: cancel_if_cluster_is_not_running_prometheus_agent: "true" cancel_if_cluster_status_creating: "true" cancel_if_cluster_status_deleting: "true" - cancel_if_outside_working_hours: "true" exp_annotations: description: "Prometheus agent is missing shards." opsrecipe: "prometheus-agent-missing-shards/" @@ -97,7 +94,6 @@ tests: cancel_if_cluster_is_not_running_prometheus_agent: "true" cancel_if_cluster_status_creating: "true" cancel_if_cluster_status_deleting: "true" - cancel_if_outside_working_hours: "true" exp_annotations: description: "Prometheus agent is missing shards." opsrecipe: "prometheus-agent-missing-shards/"