diff --git a/controllers/storagecluster/cephcluster.go b/controllers/storagecluster/cephcluster.go index 1cfaad2717..8c165b717a 100644 --- a/controllers/storagecluster/cephcluster.go +++ b/controllers/storagecluster/cephcluster.go @@ -1162,13 +1162,10 @@ func createPrometheusRules(r *StorageClusterReconciler, sc *ocsv1.StorageCluster applyLabels(getCephClusterMonitoringLabels(*sc), &prometheusRule.ObjectMeta) replaceTokens := []replaceToken{ - { - recordOrAlertName: "CephMgrIsAbsent", - wordToReplace: "openshift-storage", - replaceWith: sc.Namespace, - }, + createReplaceToken("", "CephMgrIsAbsent", "openshift-storage", sc.Namespace, ExprRuleSection, ""), } + const DescriptionKey = "description" // if nearFullRatio/backfillFullRatio/fullRatio are specified on the StorageCLuster CR, replace the values in the prometheus rule accordingly specifiedNearFullRatio := sc.Spec.ManagedResources.CephCluster.NearFullRatio specifiedBackfillFullRatio := sc.Spec.ManagedResources.CephCluster.BackfillFullRatio @@ -1176,18 +1173,29 @@ func createPrometheusRules(r *StorageClusterReconciler, sc *ocsv1.StorageCluster if specifiedNearFullRatio != nil { replaceTokens = append(replaceTokens, - createReplaceToken("", "", "75%", fmt.Sprintf("%.2f%%", *specifiedNearFullRatio*100)), - createReplaceToken("", "", "0.75", fmt.Sprintf("%f", *specifiedNearFullRatio))) + createReplaceToken("cluster-utilization-alert.rules", "CephClusterNearFull", + "75%", fmt.Sprintf("%0.0f%%", *specifiedNearFullRatio*100), AnnotationRuleSection, DescriptionKey), + createReplaceToken("cluster-utilization-alert.rules", "CephClusterNearFull", + "0.75", fmt.Sprintf("%0.2f", *specifiedNearFullRatio), ExprRuleSection, "")) } if specifiedBackfillFullRatio != nil { replaceTokens = append(replaceTokens, - createReplaceToken("", "", "80%", fmt.Sprintf("%.2f%%", *specifiedBackfillFullRatio*100)), - createReplaceToken("", "", "0.80", fmt.Sprintf("%f", *specifiedBackfillFullRatio))) + createReplaceToken("cluster-utilization-alert.rules", "CephClusterCriticallyFull", + "80%", fmt.Sprintf("%0.0f%%", *specifiedBackfillFullRatio*100), AnnotationRuleSection, DescriptionKey), + createReplaceToken("cluster-utilization-alert.rules", "CephClusterCriticallyFull", + "0.80", fmt.Sprintf("%0.2f", *specifiedBackfillFullRatio), ExprRuleSection, "")) } if specifiedFullRatio != nil { replaceTokens = append(replaceTokens, - createReplaceToken("", "", "85%", fmt.Sprintf("%.2f%%", *specifiedFullRatio*100)), - createReplaceToken("", "", "0.85", fmt.Sprintf("%f", *specifiedFullRatio))) + createReplaceToken("cluster-utilization-alert.rules", "CephClusterReadOnly", + "85%", fmt.Sprintf("%0.0f%%", *specifiedFullRatio*100), AnnotationRuleSection, DescriptionKey), + createReplaceToken("cluster-utilization-alert.rules", "CephClusterReadOnly", + "0.85", fmt.Sprintf("%0.2f", *specifiedFullRatio), ExprRuleSection, ""), + // if 'FullRatio' is specified, make changes to other alerts as well where their description has read-only ratio / value specified + createReplaceToken("cluster-utilization-alert.rules", "CephClusterNearFull", + "85%", fmt.Sprintf("%0.0f%%", *specifiedFullRatio*100), AnnotationRuleSection, DescriptionKey), + createReplaceToken("cluster-utilization-alert.rules", "CephClusterCriticallyFull", + "85%", fmt.Sprintf("%0.0f%%", *specifiedFullRatio*100), AnnotationRuleSection, DescriptionKey)) } // nothing to replace in external mode @@ -1215,19 +1223,36 @@ func applyLabels(labels map[string]string, t *metav1.ObjectMeta) { } } +// RuleSection determines which section in the Rule struct the change has to go in +type RuleSection int + +const ( + ExprRuleSection RuleSection = iota + AnnotationRuleSection + LabelRuleSection +) + type replaceToken struct { groupName string recordOrAlertName string wordToReplace string replaceWith string + // wordInSection represents + // which field, in the rule struct, this change has to go + wordInSection RuleSection + // sectionKey contains the key value, if the above RuleSection is a map (like Annotations or Labels) + sectionKey string } -func createReplaceToken(groupName, recordOrAlertName, wordToReplace, replaceWith string) replaceToken { +func createReplaceToken(groupName, recordOrAlertName, wordToReplace, replaceWith string, + wordInSection RuleSection, sectionKey string) replaceToken { return replaceToken{ groupName: groupName, recordOrAlertName: recordOrAlertName, wordToReplace: wordToReplace, replaceWith: replaceWith, + wordInSection: wordInSection, + sectionKey: sectionKey, } } @@ -1258,19 +1283,23 @@ func changePromRule(promRule *monitoringv1.PrometheusRule, tokens []replaceToken rule := &group.Rules[ruleIdx] // If recordOrAlertName is specified, ensure it matches; otherwise, apply to all rules if token.recordOrAlertName == "" || rule.Record == token.recordOrAlertName || rule.Alert == token.recordOrAlertName { - // Update the annotations in the rule - if rule.Annotations != nil { - // Update description if it exists - if description, exists := rule.Annotations["description"]; exists { - newDescription := strings.Replace(description, token.wordToReplace, token.replaceWith, -1) - rule.Annotations["description"] = newDescription - } - } - // Update the expression field in the rule - exprStr := rule.Expr.String() - if exprStr != "" { - newExpr := strings.Replace(exprStr, token.wordToReplace, token.replaceWith, -1) + switch token.wordInSection { + case ExprRuleSection: + // Update the expression field in the rule + newExpr := strings.ReplaceAll(rule.Expr.String(), token.wordToReplace, token.replaceWith) rule.Expr = intstr.Parse(newExpr) + case AnnotationRuleSection: + if _, ok := rule.Annotations[token.sectionKey]; !ok { + continue + } + rule.Annotations[token.sectionKey] = strings.ReplaceAll( + rule.Annotations[token.sectionKey], token.wordToReplace, token.replaceWith) + case LabelRuleSection: + if _, ok := rule.Labels[token.sectionKey]; !ok { + continue + } + rule.Labels[token.sectionKey] = strings.ReplaceAll( + rule.Labels[token.sectionKey], token.wordToReplace, token.replaceWith) } } } diff --git a/controllers/storagecluster/cephcluster_test.go b/controllers/storagecluster/cephcluster_test.go index 3833cb9913..4c9d579b01 100644 --- a/controllers/storagecluster/cephcluster_test.go +++ b/controllers/storagecluster/cephcluster_test.go @@ -1040,16 +1040,18 @@ func TestParsePrometheusRules(t *testing.T) { assert.Equal(t, 1, len(prometheusRules.Spec.Groups)) } -func TestChangePrometheusExprFunc(t *testing.T) { +func TestChangePromRuleFunc(t *testing.T) { prometheusRule, err := parsePrometheusRule(localPrometheusRules) + const DescriptionKey, SeverityKey = "description", "severity" assert.NilError(t, err) var changeTokens = []replaceToken{ - {recordOrAlertName: "CephMgrIsAbsent", wordToReplace: "openshift-storage", replaceWith: "new-namespace"}, + createReplaceToken("", "CephMgrIsAbsent", "openshift-storage", "new-namespace", ExprRuleSection, ""), // when alert or record name is not specified, - // the change should affect all the expressions which has the 'wordInExpr' - {recordOrAlertName: "", wordToReplace: "ceph_pool_stored_raw", replaceWith: "new_ceph_pool_stored_raw"}, - {recordOrAlertName: "", wordToReplace: "0.75", replaceWith: "0.775"}, - {recordOrAlertName: "", wordToReplace: "85%", replaceWith: "92.50%"}, + // the change should affect all the alerts' specified 'RuleSection' which has the 'wordToReplace' string + createReplaceToken("", "", "ceph_pool_stored_raw", "new_ceph_pool_stored_raw", ExprRuleSection, ""), + createReplaceToken("", "", "0.75", "0.775", ExprRuleSection, ""), + createReplaceToken("", "", "85%", "92.50%", AnnotationRuleSection, DescriptionKey), + createReplaceToken("", "", "critical", "warning", LabelRuleSection, SeverityKey), } changePromRule(prometheusRule, changeTokens) @@ -1068,13 +1070,25 @@ func TestChangePrometheusExprFunc(t *testing.T) { for _, eachChange := range recordOrAlertNameAndReplacedWord { alertName := eachChange[0] changeStr := eachChange[1] + // one of the replace token is to change all 'critical' severity rules to 'warning' + if severityVal, ok := rule.Labels[SeverityKey]; ok { + assert.Assert(t, severityVal != "critical", "There should not be any 'critical' severity rules", "Rule", rule) + } if rule.Alert != alertName { continue } - assert.Assert(t, - strings.Contains(rule.Expr.String(), changeStr) || - (rule.Annotations != nil && strings.Contains(rule.Annotations["description"], changeStr)), - fmt.Sprintf("Expected '%s' to be found in either Expr or Annotations for alert %s", changeStr, alertName)) + // below is a small hack to determine whether we have to check 'Annotation' or 'Expr' + // if it contains a %-age sign, then check the annotation description + if strings.Contains(changeStr, "%") { + assert.Assert(t, + strings.Contains(rule.Annotations[DescriptionKey], changeStr), + "Annotation description doesn't mach", "Current Annotation Description", + rule.Annotations[DescriptionKey], "Expected Change", changeStr) + } else { // check the expression + assert.Assert(t, + strings.Contains(rule.Expr.String(), changeStr), "Expression doesn't mach", + "Current Expression", rule.Expr.String(), "Expected Change", changeStr) + } } } }