Skip to content

Commit

Permalink
Fix crsync alerting rules (#1140)
Browse files Browse the repository at this point in the history
  • Loading branch information
uvegla authored Apr 25, 2024
1 parent 800b640 commit 78ff922
Show file tree
Hide file tree
Showing 4 changed files with 62 additions and 5 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Fixed

- Fix alerting rules for `crsync`.

## [3.12.0] - 2024-04-19

### Changed
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{{- if eq .Values.managementCluster.name "gorilla" }}
{{- if eq .Values.managementCluster.name "gazelle" }}
apiVersion: monitoring.coreos.com/v1
kind: PrometheusRule
metadata:
Expand All @@ -15,9 +15,9 @@ spec:
rules:
- alert: CrsyncDeploymentNotSatisfied
annotations:
description: '{{`Crsync deployment is not satisfied.`}}'
description: '{{`CrSync deployment {{ $labels.deployment }} is not satisfied in {{ $labels.installation }} / {{ $labels.cluster_id }} at the {{ $labels.namespace }} namespace.`}}'
opsrecipe: deployment-not-satisfied/
expr: kube_deployment_status_replicas_available{cluster_type="workload_cluster", cluster_id="rfjh2", deployment=~"crsync-.*"} == 0
expr: kube_deployment_status_replicas_available{cluster_type="workload_cluster", cluster_id="operations", deployment=~"crsync-.*"} == 0
for: 10m
labels:
area: kaas
Expand All @@ -32,7 +32,7 @@ spec:
annotations:
description: '{{`Too many tags are not synchronised to registry mirrors.`}}'
opsrecipe: crsync-too-many-tags-missing/
expr: crsync_sync_tags_total{registry="quay.io"} - on (cluster_id, repository, app) group_left sum by(cluster_id, repository, app) (crsync_sync_tags_total{registry!="quay.io"}) > 0
expr: crsync_sync_tags_total{registry="quay.io"} - on (cluster_id, repository) group_left sum by(cluster_id, repository) (crsync_sync_tags_total{registry!="quay.io"}) > 0
for: 1h
labels:
area: kaas
Expand Down
1 change: 0 additions & 1 deletion test/conf/promtool_ignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ templates/alerting-rules/cluster-autoscaler.rules.yml
templates/alerting-rules/cluster-service.rules.yml
templates/alerting-rules/coredns.rules.yml
templates/alerting-rules/credentiald.rules.yml
templates/alerting-rules/crsync.rules.yml
templates/alerting-rules/daemonset.management-cluster.rules.yml
templates/alerting-rules/deployment.management-cluster.rules.yml
templates/alerting-rules/deployment.workload-cluster.rules.yml
Expand Down
54 changes: 54 additions & 0 deletions test/tests/providers/global/crsync.rules.test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
---
rule_files:
- crsync.rules.yml

tests:
- interval: 1m
input_series:
- series: 'kube_deployment_status_replicas_available{cluster_type="workload_cluster", installation="gazelle", cluster_id="operations", namespace="crsync", deployment="crsync-giantswarm-azurecr-io"}'
values: "1x5 0x9 1x5 0x10"
alert_rule_test:
- alertname: CrsyncDeploymentNotSatisfied
eval_time: 32m
exp_alerts:
- exp_labels:
alertname: "CrsyncDeploymentNotSatisfied"
area: "kaas"
cancel_if_cluster_status_creating: "true"
cancel_if_cluster_status_deleting: "true"
cancel_if_cluster_status_updating: "true"
cancel_if_outside_working_hours: "true"
cluster_id: "operations"
cluster_type: "workload_cluster"
deployment: "crsync-giantswarm-azurecr-io"
installation: "gazelle"
namespace: "crsync"
severity: "page"
team: "honeybadger"
topic: "releng"
exp_annotations:
description: "CrSync deployment crsync-giantswarm-azurecr-io is not satisfied in gazelle / operations at the crsync namespace."
opsrecipe: "deployment-not-satisfied/"
- interval: 1m
input_series:
- series: 'crsync_sync_tags_total{registry="quay.io", cluster_id="example", repository="giantswarm/example"}'
values: "100x60"
- series: 'crsync_sync_tags_total{registry="docker.io", cluster_id="example", repository="giantswarm/example"}'
values: "95x60"
alert_rule_test:
- alertname: CrsyncTooManyTagsMissing
eval_time: 60m
exp_alerts:
- exp_labels:
alertname: "CrsyncTooManyTagsMissing"
area: "kaas"
cancel_if_outside_working_hours: "true"
cluster_id: "example"
registry: "quay.io"
repository: "giantswarm/example"
severity: "page"
team: "honeybadger"
topic: "releng"
exp_annotations:
description: "Too many tags are not synchronised to registry mirrors."
opsrecipe: "crsync-too-many-tags-missing/"

0 comments on commit 78ff922

Please sign in to comment.