diff --git a/etc/kayobe/kolla/config/prometheus/smart.rules b/etc/kayobe/kolla/config/prometheus/smart.rules index aea36bdf8..5eb97e7a3 100644 --- a/etc/kayobe/kolla/config/prometheus/smart.rules +++ b/etc/kayobe/kolla/config/prometheus/smart.rules @@ -13,4 +13,20 @@ groups: summary: "SMART monitor reports bad disk on (instance {{ $labels.instance }})" description: "{{ $labels.instance }} is reporting unhealthy for the disk at {{ $labels.disk }}. Disk serial number is: {{ $labels.serial_number }}" -{% endraw %} \ No newline at end of file + - alert: DWPDTooHigh + expr: (delta(nvme_data_units_written_total[168h])*512000 / nvme_physical_size_bytes)/7 > 1 + labels: + severity: alert + annotations: + summary: "High 7-Day Average DWPD for {{ $labels.instance }} + description: "The 7-Day average for Disk Writes Per Day for disk {{ $labels.device }} on {{ $labels.instance }} exceeds 1 DWPD" + + - alert: DWPDTooHighWarning + expr: delta(nvme_data_units_written_total[24h])*512000 / nvme_physical_size_bytes > 1 + labels: + severity: warning + annotations: + summary: "High DWPD for {{ $labels.instance }} + description: "The 24-Hour average for Disk Writes Per Day for disk {{ $labels.device }} on {{ $labels.instance }} exceeds 1 DWPD" + +{% endraw %}