From ba689b85cd60c046db4cbfad178f1da4804a005a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Vila=C3=A7a?= Date: Thu, 7 Sep 2023 18:59:19 +0100 Subject: [PATCH] Reduce VirtualMachineCRCErrors noise MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: João Vilaça --- internal/operands/metrics/resources.go | 4 ++-- tests/monitoring_test.go | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/internal/operands/metrics/resources.go b/internal/operands/metrics/resources.go index 26423e4fb..ffb54befb 100644 --- a/internal/operands/metrics/resources.go +++ b/internal/operands/metrics/resources.go @@ -166,9 +166,9 @@ func getAlertRules() ([]promv1.Rule, error) { }, { Alert: "VirtualMachineCRCErrors", - Expr: intstr.FromString("kubevirt_ssp_vm_rbd_volume{volume_mode=\"Block\", rxbounce_enabled=\"false\"} > 0"), + Expr: intstr.FromString("(count(kubevirt_ssp_vm_rbd_volume{volume_mode=\"Block\", rxbounce_enabled=\"false\"} > 0) or vector(0)) > 0"), Annotations: map[string]string{ - "description": "VirtualMachine {{ $labels.namespace }}/{{ $labels.name }} may report OSD errors", + "description": "{{ $value }} Virtual Machines are in risk of causing CRC errors and major service outages", "summary": "When running VMs using ODF storage with 'rbd' mounter or 'rbd.csi.ceph.com provisioner', it will report bad crc/signature errors and cluster performance will be severely degraded if krbd:rxbounce is not set.", "runbook_url": fmt.Sprintf(runbookURLTemplate, "VirtualMachineCRCErrors"), }, diff --git a/tests/monitoring_test.go b/tests/monitoring_test.go index 8abc4e580..df14e52bb 100644 --- a/tests/monitoring_test.go +++ b/tests/monitoring_test.go @@ -283,7 +283,7 @@ var _ = Describe("Prometheus Alerts", func() { alertShouldNotBeActive("VirtualMachineCRCErrors") }) - It("[test_id:TODO] Should fire VirtualMachineCRCErrors is disabled", func() { + It("[test_id:TODO] Should fire VirtualMachineCRCErrors when rxbounce is disabled", func() { vmName := createResources(true, false) waitForSeriesToBeDetected(fmt.Sprintf("kubevirt_ssp_vm_rbd_volume{name='%s', rxbounce_enabled='false'}", vmName)) waitForAlertToActivate("VirtualMachineCRCErrors")