From ba99db2fec1f06d4965cb57feb8b3792dc5bf9a7 Mon Sep 17 00:00:00 2001 From: Laszlo Uveges Date: Tue, 30 Apr 2024 10:01:30 +0200 Subject: [PATCH] Add missing crsync deployment alerts (#1147) --- CHANGELOG.md | 5 +++- .../templates/alerting-rules/crsync.rules.yml | 30 +++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ab04f270e..29eaad74b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,11 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] -- Changed DeploymentNotSatisfiedBigMac alert to work for teleport related deployments only on CAPI flavored clusters +### Added + +- Added alerts for absent `crsync` deployments. ### Changed - Update LokiRingUnhealthy query to avoid false positive when a new pod is starting. +- Changed DeploymentNotSatisfiedBigMac alert to work for teleport related deployments only on CAPI flavored clusters ## [3.12.2] - 2024-04-25 diff --git a/helm/prometheus-rules/templates/alerting-rules/crsync.rules.yml b/helm/prometheus-rules/templates/alerting-rules/crsync.rules.yml index 883d26cb8..de564da67 100644 --- a/helm/prometheus-rules/templates/alerting-rules/crsync.rules.yml +++ b/helm/prometheus-rules/templates/alerting-rules/crsync.rules.yml @@ -13,6 +13,36 @@ spec: groups: - name: crsync rules: + - alert: CrsyncDockerIoIsMissing + annotations: + description: 'CrSync deployment for docker.io is absent' + opsrecipe: crsync-deployments-missing/ + expr: absent(kube_deployment_status_replicas_available{namespace="crsync", deployment="crsync-docker-io"}) + for: 10m + labels: + area: kaas + cancel_if_cluster_status_creating: "true" + cancel_if_cluster_status_deleting: "true" + cancel_if_cluster_status_updating: "true" + cancel_if_outside_working_hours: "true" + severity: page + team: honeybadger + topic: releng + - alert: CrsyncGiantswarmAzureCrIoIsMissing + annotations: + description: 'CrSync deployment for giantswarm.azurecr.io is absent' + opsrecipe: crsync-deployments-missing/ + expr: absent(kube_deployment_status_replicas_available{namespace="crsync", deployment="crsync-giantswarm-azurecr-io"}) + for: 10m + labels: + area: kaas + cancel_if_cluster_status_creating: "true" + cancel_if_cluster_status_deleting: "true" + cancel_if_cluster_status_updating: "true" + cancel_if_outside_working_hours: "true" + severity: page + team: honeybadger + topic: releng - alert: CrsyncDeploymentNotSatisfied annotations: description: '{{`CrSync deployment {{ $labels.deployment }} is not satisfied in {{ $labels.installation }} / {{ $labels.cluster_id }} at the {{ $labels.namespace }} namespace.`}}'