diff --git a/metrics/deploy/prometheus-ocs-rules.yaml b/metrics/deploy/prometheus-ocs-rules.yaml index 2bbfe59209..0a5d11745e 100644 --- a/metrics/deploy/prometheus-ocs-rules.yaml +++ b/metrics/deploy/prometheus-ocs-rules.yaml @@ -282,7 +282,7 @@ spec: message: High MDS cache usage for the daemon {{ $labels.ceph_daemon }}. severity_level: error expr: | - ceph_mds_mem_rss / on(ceph_daemon) group_left(job)(label_replace(kube_pod_container_resource_requests{container="mds", resource="memory"}, "ceph_daemon", "mds.$1", "pod", "rook-ceph-mds-(.*)-(.*)") * .5) > .95 + (ceph_mds_mem_rss * 1000) / on(ceph_daemon) group_left(job)(label_replace(kube_pod_container_resource_requests{container="mds", resource="memory"}, "ceph_daemon", "mds.$1", "pod", "rook-ceph-mds-(.*)-(.*)") * .5) > .95 for: 5m labels: severity: critical diff --git a/metrics/mixin/alerts/perf.libsonnet b/metrics/mixin/alerts/perf.libsonnet index ae385de236..0cc0ea4b18 100644 --- a/metrics/mixin/alerts/perf.libsonnet +++ b/metrics/mixin/alerts/perf.libsonnet @@ -7,7 +7,7 @@ { alert: 'MDSCacheUsageHigh', expr: ||| - ceph_mds_mem_rss / on(ceph_daemon) group_left(job)(label_replace(kube_pod_container_resource_requests{container="mds", resource="memory"}, "ceph_daemon", "mds.$1", "pod", "rook-ceph-mds-(.*)-(.*)") * .5) > .95 + (ceph_mds_mem_rss * 1000) / on(ceph_daemon) group_left(job)(label_replace(kube_pod_container_resource_requests{container="mds", resource="memory"}, "ceph_daemon", "mds.$1", "pod", "rook-ceph-mds-(.*)-(.*)") * .5) > .95 ||| % $._config, 'for': $._config.mdsCacheUsageAlertTime, labels: {