diff --git a/internal/operands/metrics/resources.go b/internal/operands/metrics/resources.go index 26423e4fb..ffb54befb 100644 --- a/internal/operands/metrics/resources.go +++ b/internal/operands/metrics/resources.go @@ -166,9 +166,9 @@ func getAlertRules() ([]promv1.Rule, error) { }, { Alert: "VirtualMachineCRCErrors", - Expr: intstr.FromString("kubevirt_ssp_vm_rbd_volume{volume_mode=\"Block\", rxbounce_enabled=\"false\"} > 0"), + Expr: intstr.FromString("(count(kubevirt_ssp_vm_rbd_volume{volume_mode=\"Block\", rxbounce_enabled=\"false\"} > 0) or vector(0)) > 0"), Annotations: map[string]string{ - "description": "VirtualMachine {{ $labels.namespace }}/{{ $labels.name }} may report OSD errors", + "description": "{{ $value }} Virtual Machines are in risk of causing CRC errors and major service outages", "summary": "When running VMs using ODF storage with 'rbd' mounter or 'rbd.csi.ceph.com provisioner', it will report bad crc/signature errors and cluster performance will be severely degraded if krbd:rxbounce is not set.", "runbook_url": fmt.Sprintf(runbookURLTemplate, "VirtualMachineCRCErrors"), }, diff --git a/tests/monitoring_test.go b/tests/monitoring_test.go index 8abc4e580..df14e52bb 100644 --- a/tests/monitoring_test.go +++ b/tests/monitoring_test.go @@ -283,7 +283,7 @@ var _ = Describe("Prometheus Alerts", func() { alertShouldNotBeActive("VirtualMachineCRCErrors") }) - It("[test_id:TODO] Should fire VirtualMachineCRCErrors is disabled", func() { + It("[test_id:TODO] Should fire VirtualMachineCRCErrors when rxbounce is disabled", func() { vmName := createResources(true, false) waitForSeriesToBeDetected(fmt.Sprintf("kubevirt_ssp_vm_rbd_volume{name='%s', rxbounce_enabled='false'}", vmName)) waitForAlertToActivate("VirtualMachineCRCErrors")