From b3163fa84d003550d436492f1fcb0e57d89137ac Mon Sep 17 00:00:00 2001 From: Muhammad Talal Anwar Date: Thu, 26 Sep 2024 07:34:49 +0200 Subject: [PATCH] fix: clean up of deleted rule group --- controllers/absence_prometheusrule.go | 31 ++++++------------- controllers/alert_rule.go | 6 ++-- controllers/alert_rule_test.go | 2 ++ e2e/controller_test.go | 22 +++++++++++-- ..._openstack_absent_metrics_alert_rules.yaml | 17 ++++++++++ .../resmgmt_openstack_limes_api.yaml | 14 +++++++++ 6 files changed, 65 insertions(+), 27 deletions(-) diff --git a/controllers/absence_prometheusrule.go b/controllers/absence_prometheusrule.go index a7d3849b..24eba034 100644 --- a/controllers/absence_prometheusrule.go +++ b/controllers/absence_prometheusrule.go @@ -20,6 +20,7 @@ import ( "fmt" "reflect" "sort" + "strings" "time" monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" @@ -302,8 +303,8 @@ func (r *PrometheusRuleReconciler) updateAbsenceAlertRules(ctx context.Context, // Step 5: if it's an existing AbsencePrometheusRule then update otherwise create a new resource. if existingAbsencePrometheusRule { - existingRuleGroups := absencePromRule.Spec.Groups - result := mergeAbsenceRuleGroups(existingRuleGroups, absenceRuleGroups) + existingRuleGroups := unmodifiedAbsencePromRule.Spec.Groups + result := mergeAbsenceRuleGroups(promRuleName, existingRuleGroups, absenceRuleGroups) if reflect.DeepEqual(unmodifiedAbsencePromRule.GetLabels(), absencePromRule.GetLabels()) && reflect.DeepEqual(existingRuleGroups, result) { return nil @@ -318,27 +319,13 @@ func (r *PrometheusRuleReconciler) updateAbsenceAlertRules(ctx context.Context, // mergeAbsenceRuleGroups merges existing and newly generated AbsenceRuleGroups. If the // same AbsenceRuleGroup exists in both 'existing' and 'new' then the newer one will be // used. -func mergeAbsenceRuleGroups(existingRuleGroups, newRuleGroups []monitoringv1.RuleGroup) []monitoringv1.RuleGroup { +func mergeAbsenceRuleGroups(promRuleName string, existingRuleGroups, newRuleGroups []monitoringv1.RuleGroup) []monitoringv1.RuleGroup { var result []monitoringv1.RuleGroup - added := make(map[string]bool) - -OuterLoop: - for _, oldG := range existingRuleGroups { - for _, newG := range newRuleGroups { - if oldG.Name == newG.Name { - // Add the new updated RuleGroup. - result = append(result, newG) - added[newG.Name] = true - continue OuterLoop - } - } - // This RuleGroup should be carried over as is. - result = append(result, oldG) - } - - // Add the pending rule groups. - for _, g := range newRuleGroups { - if !added[g.Name] { + // Add the absence rule groups for the PrometheusRule that we are currently dealing with. + result = append(result, newRuleGroups...) + // Carry over the absence rule groups for other PrometheusRule(s) as is. + for _, g := range existingRuleGroups { + if !strings.HasPrefix(g.Name, promRuleName) { result = append(result, g) } } diff --git a/controllers/alert_rule.go b/controllers/alert_rule.go index 1c346379..affd8854 100644 --- a/controllers/alert_rule.go +++ b/controllers/alert_rule.go @@ -107,9 +107,9 @@ func (mex *metricNameExtractor) Visit(node parser.Node, path []parser.Node) (par return mex, nil } -// absenceRuleGroupName returns the name of the RuleGroup that holds absence alert rules +// AbsenceRuleGroupName returns the name of the RuleGroup that holds absence alert rules // for a specific RuleGroup in a specific PrometheusRule. -func absenceRuleGroupName(promRule, ruleGroup string) string { +func AbsenceRuleGroupName(promRule, ruleGroup string) string { return fmt.Sprintf("%s/%s", promRule, ruleGroup) } @@ -161,7 +161,7 @@ func ParseRuleGroups(logger logr.Logger, in []monitoringv1.RuleGroup, promRuleNa }) out = append(out, monitoringv1.RuleGroup{ - Name: absenceRuleGroupName(promRuleName, g.Name), + Name: AbsenceRuleGroupName(promRuleName, g.Name), Rules: absenceAlertRules, }) } diff --git a/controllers/alert_rule_test.go b/controllers/alert_rule_test.go index aeb2b899..0eb5ceb1 100644 --- a/controllers/alert_rule_test.go +++ b/controllers/alert_rule_test.go @@ -37,6 +37,8 @@ var _ = Describe("Alert Rule", func() { Expect(err).ToNot(HaveOccurred()) Expect(actual).To(HaveLen(len(expected))) + // We only check the alert name, expression, and labels. Annotations are hard-coded and + // don't need to be checked here in unit tests; they are already checked in e2e tests. for i, wanted := range expected { got := actual[i] Expect(got.Alert).To(Equal(wanted.Alert)) diff --git a/e2e/controller_test.go b/e2e/controller_test.go index c8a124b1..d505af05 100644 --- a/e2e/controller_test.go +++ b/e2e/controller_test.go @@ -193,6 +193,25 @@ var _ = Describe("Controller", Ordered, func() { }) }) + Context("when a rule group is deleted from a PrometheusRule", func() { + It("should delete the corresponding rule group from the AbsencePromRule "+osAbsencePRName+" in "+resmgmtNs+" namespace", func() { + // We will remove one of the two rule groups. + prName := "openstack-limes-api.alerts" + pr := getPromRule(newObjKey(resmgmtNs, prName)) + ruleGroupName := pr.Spec.Groups[0].Name + pr.Spec.Groups = pr.Spec.Groups[1:] + Expect(k8sClient.Update(ctx, &pr)).To(Succeed()) + + waitForControllerToProcess() + actual := getPromRule(newObjKey(resmgmtNs, osAbsencePRName)) + groups := make([]string, 0, len(actual.Spec.Groups)) + for _, v := range actual.Spec.Groups { + groups = append(groups, v.Name) + } + Expect(groups).ToNot(ContainElement(controllers.AbsenceRuleGroupName(prName, ruleGroupName))) + }) + }) + Context("when a PrometheusRule has no alert rules", func() { It("should delete "+osAbsencePRName+" in "+resmgmtNs+" namespace", func() { // We will remove all alert rules from a PromRule which should result in @@ -204,8 +223,7 @@ var _ = Describe("Controller", Ordered, func() { Expect(k8sClient.Update(ctx, &pr)).To(Succeed()) waitForControllerToProcess() - expectToNotFindPromRule(newObjKey("resmgmt", osAbsencePRName)) - + expectToNotFindPromRule(newObjKey(resmgmtNs, osAbsencePRName)) }) }) }) diff --git a/e2e/fixtures/resmgmt_openstack_absent_metrics_alert_rules.yaml b/e2e/fixtures/resmgmt_openstack_absent_metrics_alert_rules.yaml index fe0e6489..e4573012 100644 --- a/e2e/fixtures/resmgmt_openstack_absent_metrics_alert_rules.yaml +++ b/e2e/fixtures/resmgmt_openstack_absent_metrics_alert_rules.yaml @@ -27,6 +27,23 @@ spec: operator playbook>. summary: missing limes_foo + - name: openstack-limes-api.alerts/api2.alerts + rules: + - alert: AbsentContainersLimesBar + expr: absent(limes_bar) + for: 10m + labels: + context: absent-metrics + support_group: containers + service: limes + severity: info + annotations: + description: + The metric 'limes_bar' is missing. 'OpenstackLimesBar' + alert using it may not fire as intended. See . + summary: missing limes_bar + - name: openstack-limes-roleassign.alerts/roleassignment.alerts rules: - alert: AbsentContainersLimesOpenstackAssignmentsPerRole diff --git a/e2e/fixtures/start-data/resmgmt_openstack_limes_api.yaml b/e2e/fixtures/start-data/resmgmt_openstack_limes_api.yaml index 068c6ad8..08354c92 100644 --- a/e2e/fixtures/start-data/resmgmt_openstack_limes_api.yaml +++ b/e2e/fixtures/start-data/resmgmt_openstack_limes_api.yaml @@ -22,3 +22,17 @@ spec: annotations: summary: "Server errors on {{ $labels.kubernetes_name }}" description: "{{ $labels.kubernetes_name }} is producing HTTP responses with 5xx status codes." + + - name: api2.alerts + rules: + - alert: OpenstackLimesBar + expr: limes_bar > 0 + for: 5m + labels: + context: api + severity: info + support_group: containers + service: limes + annotations: + summary: "Server errors on {{ $labels.kubernetes_name }}" + description: "{{ $labels.kubernetes_name }} is producing HTTP responses with 5xx status codes."