Skip to content

Commit

Permalink
Alert user if ceph metadata server is consuming cpu at threshold point.
Browse files Browse the repository at this point in the history
Signed-off-by: Manish <[email protected]>
  • Loading branch information
manishym committed Dec 11, 2023
1 parent f2da20a commit b1edd0a
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 0 deletions.
16 changes: 16 additions & 0 deletions metrics/mixin/alerts/perf.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,22 @@
severity_level: 'error',
},
},

{
alert: 'MDSCPUUsageHigh',
expr: |||
pod:container_cpu_usage:sum{%(mdsSelector)s}/ on(pod) kube_pod_resource_limit{resource='cpu',%(mdsSelector)s} > 0.67
||| % $._config,
'for': $._config.mds_cpu_usage_high_threshold_duration,
labels: {
severity: 'warning',
},
annotations: {
message: 'Ceph metadata server pod ({{ $labels.pod }}) has high cpu usage',
description: 'Ceph metadata server pod ({{ $labels.pod }}) has high cpu usage.\nPlease consider increasing the number of active metadata servers,\nit can be done by increasing the number of activeMetadataServers parameter in the StorageCluster CR.',
severity_level: 'warning',
},
},
],
},
],
Expand Down
2 changes: 2 additions & 0 deletions metrics/mixin/config.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
blockedRBDClientAlertTime: '10s',
ocsStorageClusterKMSConnectionAlert: '5s',
mdsCacheUsageAlertTime: '5m',
mdsSelector: 'pod=~"rook-ceph-mds.*"',
mds_cpu_usage_high_threshold_duration: '6h',

// Constants
objectStorageType: 'RGW',
Expand Down

0 comments on commit b1edd0a

Please sign in to comment.