From c474ca93cf8693f176c131acfd7579dd223c70c3 Mon Sep 17 00:00:00 2001 From: technowhizz <7688823+technowhizz@users.noreply.github.com> Date: Fri, 13 Oct 2023 18:35:02 +0100 Subject: [PATCH] Add DWPD alerts --- etc/kayobe/kolla/config/prometheus/smart.rules | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/etc/kayobe/kolla/config/prometheus/smart.rules b/etc/kayobe/kolla/config/prometheus/smart.rules index aea36bdf8..5eb97e7a3 100644 --- a/etc/kayobe/kolla/config/prometheus/smart.rules +++ b/etc/kayobe/kolla/config/prometheus/smart.rules @@ -13,4 +13,20 @@ groups: summary: "SMART monitor reports bad disk on (instance {{ $labels.instance }})" description: "{{ $labels.instance }} is reporting unhealthy for the disk at {{ $labels.disk }}. Disk serial number is: {{ $labels.serial_number }}" -{% endraw %} \ No newline at end of file + - alert: DWPDTooHigh + expr: (delta(nvme_data_units_written_total[168h])*512000 / nvme_physical_size_bytes)/7 > 1 + labels: + severity: alert + annotations: + summary: "High 7-Day Average DWPD for {{ $labels.instance }} + description: "The 7-Day average for Disk Writes Per Day for disk {{ $labels.device }} on {{ $labels.instance }} exceeds 1 DWPD" + + - alert: DWPDTooHighWarning + expr: delta(nvme_data_units_written_total[24h])*512000 / nvme_physical_size_bytes > 1 + labels: + severity: warning + annotations: + summary: "High DWPD for {{ $labels.instance }} + description: "The 24-Hour average for Disk Writes Per Day for disk {{ $labels.device }} on {{ $labels.instance }} exceeds 1 DWPD" + +{% endraw %}