From 8f672912f570d6bdf4bc3666e89dd8f89fc5bf10 Mon Sep 17 00:00:00 2001 From: Arun Kumar Mohan Date: Thu, 14 Dec 2023 22:45:50 +0530 Subject: [PATCH] Changed alerts queries to support multicluster mode Needed changes are made for alerts queries to support multicluster mode. Signed-off-by: Arun Kumar Mohan --- .../deploy/prometheus-ocs-rules-external.yaml | 8 +++---- metrics/deploy/prometheus-ocs-rules.yaml | 22 +++++++++---------- metrics/mixin/alerts/blocklist.libsonnet | 2 +- metrics/mixin/alerts/mirroring.libsonnet | 12 +++++----- metrics/mixin/alerts/obc.libsonnet | 8 +++---- 5 files changed, 26 insertions(+), 26 deletions(-) diff --git a/metrics/deploy/prometheus-ocs-rules-external.yaml b/metrics/deploy/prometheus-ocs-rules-external.yaml index 6d633e12e7..c4f85c496a 100644 --- a/metrics/deploy/prometheus-ocs-rules-external.yaml +++ b/metrics/deploy/prometheus-ocs-rules-external.yaml @@ -73,7 +73,7 @@ spec: severity_level: warning storage_type: RGW expr: | - (ocs_objectbucketclaim_info * on (namespace, objectbucket) group_left() (ocs_objectbucket_used_bytes/ocs_objectbucket_max_bytes)) > 0.80 + (ocs_objectbucketclaim_info * on (namespace, objectbucket, managedBy) group_left() (ocs_objectbucket_used_bytes/ocs_objectbucket_max_bytes)) > 0.80 for: 10s labels: severity: warning @@ -84,7 +84,7 @@ spec: severity_level: warning storage_type: RGW expr: | - (ocs_objectbucketclaim_info * on (namespace, objectbucket) group_left() (ocs_objectbucket_objects_total/ocs_objectbucket_max_objects)) > 0.80 + (ocs_objectbucketclaim_info * on (namespace, managedBy, objectbucket) group_left() (ocs_objectbucket_objects_total/ocs_objectbucket_max_objects)) > 0.80 for: 10s labels: severity: warning @@ -95,7 +95,7 @@ spec: severity_level: error storage_type: RGW expr: | - (ocs_objectbucketclaim_info * on (namespace, objectbucket) group_left() (ocs_objectbucket_used_bytes/ocs_objectbucket_max_bytes)) >= 1 + (ocs_objectbucketclaim_info * on (namespace, managedBy, objectbucket) group_left() (ocs_objectbucket_used_bytes/ocs_objectbucket_max_bytes)) >= 1 for: 0s labels: severity: critical @@ -106,7 +106,7 @@ spec: severity_level: error storage_type: RGW expr: | - (ocs_objectbucketclaim_info * on (namespace, objectbucket) group_left() (ocs_objectbucket_objects_total/ocs_objectbucket_max_objects)) >= 1 + (ocs_objectbucketclaim_info * on (namespace, managedBy, objectbucket) group_left() (ocs_objectbucket_objects_total/ocs_objectbucket_max_objects)) >= 1 for: 0s labels: severity: critical diff --git a/metrics/deploy/prometheus-ocs-rules.yaml b/metrics/deploy/prometheus-ocs-rules.yaml index e01386b2a2..2bbfe59209 100644 --- a/metrics/deploy/prometheus-ocs-rules.yaml +++ b/metrics/deploy/prometheus-ocs-rules.yaml @@ -73,7 +73,7 @@ spec: severity_level: error storage_type: ceph expr: | - ((count by(namespace) (ocs_mirror_daemon_count{job="ocs-metrics-exporter"} == 0)) * on(namespace) group_left() (count by(namespace) (ocs_pool_mirroring_status{job="ocs-metrics-exporter"} == 1))) > 0 + ((count by(namespace, managedBy) (ocs_mirror_daemon_count{job="ocs-metrics-exporter"} == 0)) * on(namespace, managedBy) group_left() (count by(namespace, managedBy) (ocs_pool_mirroring_status{job="ocs-metrics-exporter"} == 1))) > 0 for: 1m labels: severity: critical @@ -85,7 +85,7 @@ spec: severity_level: warning storage_type: ceph expr: | - (ocs_pool_mirroring_image_health{job="ocs-metrics-exporter"} * on (namespace) group_left() (max by(namespace) (ocs_pool_mirroring_status{job="ocs-metrics-exporter"}))) == 1 + (ocs_pool_mirroring_image_health{job="ocs-metrics-exporter"} * on (namespace, managedBy) group_left() (max by(namespace, managedBy) (ocs_pool_mirroring_status{job="ocs-metrics-exporter"}))) == 1 for: 1m labels: mirroring_image_status: unknown @@ -98,7 +98,7 @@ spec: severity_level: warning storage_type: ceph expr: | - (ocs_pool_mirroring_image_health{job="ocs-metrics-exporter"} * on (namespace) group_left() (max by(namespace) (ocs_pool_mirroring_status{job="ocs-metrics-exporter"}))) == 2 + (ocs_pool_mirroring_image_health{job="ocs-metrics-exporter"} * on (namespace, managedBy) group_left() (max by(namespace, managedBy) (ocs_pool_mirroring_status{job="ocs-metrics-exporter"}))) == 2 for: 1m labels: mirroring_image_status: warning @@ -111,7 +111,7 @@ spec: severity_level: error storage_type: ceph expr: | - (ocs_pool_mirroring_image_health{job="ocs-metrics-exporter"} * on (namespace) group_left() (max by(namespace) (ocs_pool_mirroring_status{job="ocs-metrics-exporter"}))) == 3 + (ocs_pool_mirroring_image_health{job="ocs-metrics-exporter"} * on (namespace, managedBy) group_left() (max by(namespace, managedBy) (ocs_pool_mirroring_status{job="ocs-metrics-exporter"}))) == 3 for: 10s labels: mirroring_image_status: error @@ -124,7 +124,7 @@ spec: severity_level: error storage_type: ceph expr: | - ocs_rbd_mirror_image_state{job="ocs-metrics-exporter"} * on(image,pool_name) group_left(name,namespace) ocs_rbd_pv_metadata{job="ocs-metrics-exporter"} == 1 + ocs_rbd_mirror_image_state{job="ocs-metrics-exporter"} * on(image,pool_name) group_left(name,namespace,managedBy) ocs_rbd_pv_metadata{job="ocs-metrics-exporter"} == 1 for: 1m labels: severity: critical @@ -136,7 +136,7 @@ spec: severity_level: warning storage_type: ceph expr: | - ocs_rbd_mirror_image_state{job="ocs-metrics-exporter"} * on(image,pool_name) group_left(name,namespace) ocs_rbd_pv_metadata{job="ocs-metrics-exporter"} == 0 + ocs_rbd_mirror_image_state{job="ocs-metrics-exporter"} * on(image,pool_name) group_left(name,namespace,managedBy) ocs_rbd_pv_metadata{job="ocs-metrics-exporter"} == 0 for: 1m labels: severity: warning @@ -150,7 +150,7 @@ spec: severity_level: warning storage_type: RGW expr: | - (ocs_objectbucketclaim_info * on (namespace, objectbucket) group_left() (ocs_objectbucket_used_bytes/ocs_objectbucket_max_bytes)) > 0.80 + (ocs_objectbucketclaim_info * on (namespace, objectbucket, managedBy) group_left() (ocs_objectbucket_used_bytes/ocs_objectbucket_max_bytes)) > 0.80 for: 10s labels: severity: warning @@ -161,7 +161,7 @@ spec: severity_level: warning storage_type: RGW expr: | - (ocs_objectbucketclaim_info * on (namespace, objectbucket) group_left() (ocs_objectbucket_objects_total/ocs_objectbucket_max_objects)) > 0.80 + (ocs_objectbucketclaim_info * on (namespace, managedBy, objectbucket) group_left() (ocs_objectbucket_objects_total/ocs_objectbucket_max_objects)) > 0.80 for: 10s labels: severity: warning @@ -172,7 +172,7 @@ spec: severity_level: error storage_type: RGW expr: | - (ocs_objectbucketclaim_info * on (namespace, objectbucket) group_left() (ocs_objectbucket_used_bytes/ocs_objectbucket_max_bytes)) >= 1 + (ocs_objectbucketclaim_info * on (namespace, managedBy, objectbucket) group_left() (ocs_objectbucket_used_bytes/ocs_objectbucket_max_bytes)) >= 1 for: 0s labels: severity: critical @@ -183,7 +183,7 @@ spec: severity_level: error storage_type: RGW expr: | - (ocs_objectbucketclaim_info * on (namespace, objectbucket) group_left() (ocs_objectbucket_objects_total/ocs_objectbucket_max_objects)) >= 1 + (ocs_objectbucketclaim_info * on (namespace, managedBy, objectbucket) group_left() (ocs_objectbucket_objects_total/ocs_objectbucket_max_objects)) >= 1 for: 0s labels: severity: critical @@ -216,7 +216,7 @@ spec: ) and on(node) ( kube_pod_container_status_waiting_reason{reason="CreateContainerError"} - * on(pod, namespace) group_left(node) + * on(pod, namespace, managedBy) group_left(node) kube_pod_info ) > 0 for: 10s diff --git a/metrics/mixin/alerts/blocklist.libsonnet b/metrics/mixin/alerts/blocklist.libsonnet index e6cbd1336f..899e0120cf 100644 --- a/metrics/mixin/alerts/blocklist.libsonnet +++ b/metrics/mixin/alerts/blocklist.libsonnet @@ -12,7 +12,7 @@ ) and on(node) ( kube_pod_container_status_waiting_reason{reason="CreateContainerError"} - * on(pod, namespace) group_left(node) + * on(pod, namespace, managedBy) group_left(node) kube_pod_info ) > 0 ||| % $._config, diff --git a/metrics/mixin/alerts/mirroring.libsonnet b/metrics/mixin/alerts/mirroring.libsonnet index 352246f68c..a0d36ba05f 100644 --- a/metrics/mixin/alerts/mirroring.libsonnet +++ b/metrics/mixin/alerts/mirroring.libsonnet @@ -7,7 +7,7 @@ { alert: 'OdfMirrorDaemonStatus', expr: ||| - ((count by(namespace) (ocs_mirror_daemon_count{%(ocsExporterSelector)s} == 0)) * on(namespace) group_left() (count by(namespace) (ocs_pool_mirroring_status{%(ocsExporterSelector)s} == 1))) > 0 + ((count by(namespace, managedBy) (ocs_mirror_daemon_count{%(ocsExporterSelector)s} == 0)) * on(namespace, managedBy) group_left() (count by(namespace, managedBy) (ocs_pool_mirroring_status{%(ocsExporterSelector)s} == 1))) > 0 ||| % $._config, 'for': $._config.odfMirrorDaemonStatusAlertTime, labels: { @@ -24,7 +24,7 @@ { alert: 'OdfPoolMirroringImageHealth', expr: ||| - (ocs_pool_mirroring_image_health{%(ocsExporterSelector)s} * on (namespace) group_left() (max by(namespace) (ocs_pool_mirroring_status{%(ocsExporterSelector)s}))) == 1 + (ocs_pool_mirroring_image_health{%(ocsExporterSelector)s} * on (namespace, managedBy) group_left() (max by(namespace, managedBy) (ocs_pool_mirroring_status{%(ocsExporterSelector)s}))) == 1 ||| % $._config, 'for': $._config.odfPoolMirroringImageHealthWarningAlertTime, labels: { @@ -42,7 +42,7 @@ { alert: 'OdfPoolMirroringImageHealth', expr: ||| - (ocs_pool_mirroring_image_health{%(ocsExporterSelector)s} * on (namespace) group_left() (max by(namespace) (ocs_pool_mirroring_status{%(ocsExporterSelector)s}))) == 2 + (ocs_pool_mirroring_image_health{%(ocsExporterSelector)s} * on (namespace, managedBy) group_left() (max by(namespace, managedBy) (ocs_pool_mirroring_status{%(ocsExporterSelector)s}))) == 2 ||| % $._config, 'for': $._config.odfPoolMirroringImageHealthWarningAlertTime, labels: { @@ -60,7 +60,7 @@ { alert: 'OdfPoolMirroringImageHealth', expr: ||| - (ocs_pool_mirroring_image_health{%(ocsExporterSelector)s} * on (namespace) group_left() (max by(namespace) (ocs_pool_mirroring_status{%(ocsExporterSelector)s}))) == 3 + (ocs_pool_mirroring_image_health{%(ocsExporterSelector)s} * on (namespace, managedBy) group_left() (max by(namespace, managedBy) (ocs_pool_mirroring_status{%(ocsExporterSelector)s}))) == 3 ||| % $._config, 'for': $._config.odfPoolMirroringImageHealthCriticalAlertTime, labels: { @@ -78,7 +78,7 @@ { alert: 'ODFPersistentVolumeMirrorStatus', expr: ||| - ocs_rbd_mirror_image_state{%(ocsExporterSelector)s} * on(image,pool_name) group_left(name,namespace) ocs_rbd_pv_metadata{%(ocsExporterSelector)s} == 1 + ocs_rbd_mirror_image_state{%(ocsExporterSelector)s} * on(image,pool_name) group_left(name,namespace,managedBy) ocs_rbd_pv_metadata{%(ocsExporterSelector)s} == 1 ||| % $._config, 'for': $._config.odfPoolMirroringImageHealthWarningAlertTime, labels: { @@ -95,7 +95,7 @@ { alert: 'ODFPersistentVolumeMirrorStatus', expr: ||| - ocs_rbd_mirror_image_state{%(ocsExporterSelector)s} * on(image,pool_name) group_left(name,namespace) ocs_rbd_pv_metadata{%(ocsExporterSelector)s} == 0 + ocs_rbd_mirror_image_state{%(ocsExporterSelector)s} * on(image,pool_name) group_left(name,namespace,managedBy) ocs_rbd_pv_metadata{%(ocsExporterSelector)s} == 0 ||| % $._config, 'for': $._config.odfPoolMirroringImageHealthWarningAlertTime, labels: { diff --git a/metrics/mixin/alerts/obc.libsonnet b/metrics/mixin/alerts/obc.libsonnet index 78a722f01c..53c38aef28 100644 --- a/metrics/mixin/alerts/obc.libsonnet +++ b/metrics/mixin/alerts/obc.libsonnet @@ -7,7 +7,7 @@ { alert: 'ObcQuotaBytesAlert', expr: ||| - (ocs_objectbucketclaim_info * on (namespace, objectbucket) group_left() (ocs_objectbucket_used_bytes/ocs_objectbucket_max_bytes)) > 0.80 + (ocs_objectbucketclaim_info * on (namespace, objectbucket, managedBy) group_left() (ocs_objectbucket_used_bytes/ocs_objectbucket_max_bytes)) > 0.80 ||| % $._config, 'for': $._config.odfObcQuotaAlertTime, labels: { @@ -24,7 +24,7 @@ { alert: 'ObcQuotaObjectsAlert', expr: ||| - (ocs_objectbucketclaim_info * on (namespace, objectbucket) group_left() (ocs_objectbucket_objects_total/ocs_objectbucket_max_objects)) > 0.80 + (ocs_objectbucketclaim_info * on (namespace, managedBy, objectbucket) group_left() (ocs_objectbucket_objects_total/ocs_objectbucket_max_objects)) > 0.80 ||| % $._config, 'for': $._config.odfObcQuotaAlertTime, labels: { @@ -40,7 +40,7 @@ { alert: 'ObcQuotaBytesExhausedAlert', expr: ||| - (ocs_objectbucketclaim_info * on (namespace, objectbucket) group_left() (ocs_objectbucket_used_bytes/ocs_objectbucket_max_bytes)) >= 1 + (ocs_objectbucketclaim_info * on (namespace, managedBy, objectbucket) group_left() (ocs_objectbucket_used_bytes/ocs_objectbucket_max_bytes)) >= 1 ||| % $._config, 'for': $._config.odfObcQuotaCriticalAlertTime, labels: { @@ -56,7 +56,7 @@ { alert: 'ObcQuotaObjectsExhausedAlert', expr: ||| - (ocs_objectbucketclaim_info * on (namespace, objectbucket) group_left() (ocs_objectbucket_objects_total/ocs_objectbucket_max_objects)) >= 1 + (ocs_objectbucketclaim_info * on (namespace, managedBy, objectbucket) group_left() (ocs_objectbucket_objects_total/ocs_objectbucket_max_objects)) >= 1 ||| % $._config, 'for': $._config.odfObcQuotaCriticalAlertTime, labels: {