Skip to content

Commit

Permalink
controller: clean up absent alerts for disabled resources
Browse files Browse the repository at this point in the history
  • Loading branch information
talal committed Nov 17, 2020
1 parent c9a7d9e commit 7014fb3
Show file tree
Hide file tree
Showing 4 changed files with 81 additions and 15 deletions.
9 changes: 8 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

## [0.7.1] - 2020-11-17

### Fixed

- Clean up of PrometheusRule resources for which the operator is disabled.

## [0.7.0] - 2020-09-28

### Fixed
Expand Down Expand Up @@ -69,7 +75,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

- Initial release.

[unreleased]: https://github.com/sapcc/absent-metrics-operator/compare/v0.7.0...HEAD
[unreleased]: https://github.com/sapcc/absent-metrics-operator/compare/v0.7.1...HEAD
[0.7.1]: https://github.com/sapcc/absent-metrics-operator/compare/v0.7.0...v0.7.1
[0.7.0]: https://github.com/sapcc/absent-metrics-operator/compare/v0.6.0...v0.7.0
[0.6.0]: https://github.com/sapcc/absent-metrics-operator/compare/v0.5.2...v0.6.0
[0.5.2]: https://github.com/sapcc/absent-metrics-operator/compare/v0.5.1...v0.5.2
Expand Down
29 changes: 17 additions & 12 deletions internal/controller/controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -196,22 +196,18 @@ func (c *Controller) enqueuePromRule(obj interface{}) {
}

// Do not enqueue object to workqueue if it is managed (read: created) by
// the operator itself or if the annotation for disabling the operator is
// present.
// the operator itself.
l := obj.(*monitoringv1.PrometheusRule).GetLabels()
if mustParseBool(l[labelOperatorManagedBy]) {
return
}
if mustParseBool(l[labelOperatorDisable]) {
c.logger.Debug("msg", "operator disabled, skipping", "key", key)
return
}

c.workqueue.Add(key)
}

// enqueueAllPrometheusRules adds all PrometheusRules to the workqueue.
func (c *Controller) enqueueAllPrometheusRules() {
// EnqueueAllPrometheusRules adds all PrometheusRules to the workqueue.
// This method is exported because it is used in the unit tests to force reconciliation.
func (c *Controller) EnqueueAllPrometheusRules() {
// promRules is a map of namespace to map[promRuleName]bool.
promRules := make(map[string]map[string]bool)
// absentPromRules is a map of namespace to a slice of absentPrometheusRule.
Expand Down Expand Up @@ -240,8 +236,8 @@ func (c *Controller) enqueueAllPrometheusRules() {
}
}

// Add the PrometheusRules which no longer exist but their corresponding
// absent alerts have been added to an absentPrometheusRule.
// Enqueue the PrometheusRules which no longer exist but their corresponding
// absent alerts still exist in an absentPrometheusRule.
// The syncHandler() will perform the appropriate cleanup for them.
for ns, promRuleNames := range promRules {
// Check all absentPrometheusRules for this namespace for alerts that
Expand Down Expand Up @@ -279,7 +275,7 @@ func (c *Controller) runWorker() {
case <-done:
return
case <-reconcileT.C:
c.enqueueAllPrometheusRules()
c.EnqueueAllPrometheusRules()
}
}
}()
Expand Down Expand Up @@ -342,7 +338,16 @@ func (c *Controller) syncHandler(key string) error {
promRule, err := c.promRuleLister.PrometheusRules(namespace).Get(name)
switch {
case err == nil:
err = c.updateAbsentAlerts(namespace, name, promRule)
if l := promRule.GetLabels(); mustParseBool(l[labelOperatorDisable]) {
c.logger.Debug("msg", "operator disabled for this PrometheusRule, cleaning up any corresponding absent alerts", "key", key)
err = c.cleanUpOrphanedAbsentAlertsNamespace(name, namespace)
if err == nil {
c.metrics.SuccessfulPrometheusRuleReconcileTime.DeleteLabelValues(namespace, name)
return nil
}
} else {
err = c.updateAbsentAlerts(namespace, name, promRule)
}
case apierrors.IsNotFound(err):
// The resource may no longer exist, in which case we clean up any
// orphaned absent alerts.
Expand Down
57 changes: 56 additions & 1 deletion test/controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -224,8 +224,63 @@ var _ = Describe("Controller", func() {
Expect(apierrors.IsNotFound(err)).To(Equal(true))
})
})
})

Describe("Operator disable", func() {
objKey := client.ObjectKey{Namespace: "swift", Name: "openstack-swift.alerts"}
prObjKey := client.ObjectKey{Namespace: "swift", Name: fixtures.OSAbsentPromRuleName}

Context("with disabling operator for a specific alert rule", func() {
It("should delete the corresponding absent alert rule from "+fixtures.OSAbsentPromRuleName+" in swift namespace", func() {
// Add the no_alert_on_absence label to a specific alert rule.
// This should result in the deletion of the corresponding
// absent alert rule.
var expected monitoringv1.PrometheusRule
err := k8sClient.Get(ctx, prObjKey, &expected)
Expect(err).ToNot(HaveOccurred())
// Remove first rule of first group
expected.Spec.Groups[0].Rules = expected.Spec.Groups[0].Rules[1:]

var pr monitoringv1.PrometheusRule
err = k8sClient.Get(ctx, objKey, &pr)
Expect(err).ToNot(HaveOccurred())
// Add no_alert_on_absence label to first rule of first group.
pr.Spec.Groups[0].Rules[0].Labels["no_alert_on_absence"] = "true"
err = k8sClient.Update(ctx, &pr)
Expect(err).ToNot(HaveOccurred())

waitForControllerToProcess()
var actual monitoringv1.PrometheusRule
err = k8sClient.Get(ctx, prObjKey, &actual)
Expect(err).ToNot(HaveOccurred())
Expect(actual.Spec).To(Equal(expected.Spec))
})
})

Context("with disabling operator for a PrometheusRule", func() {
It("should delete the "+fixtures.OSAbsentPromRuleName+" in swift namespace", func() {
// Add the disable label to the original PrometheusRule
// resource. This should result in the deletion of the
// corresponding AbsentPrometheusRule.
var pr monitoringv1.PrometheusRule
err := k8sClient.Get(ctx, objKey, &pr)
Expect(err).ToNot(HaveOccurred())
pr.Labels["absent-metrics-operator/disable"] = "true"
// pr.Spec.Groups = []monitoringv1.RuleGroup{}
err = k8sClient.Update(ctx, &pr)
Expect(err).ToNot(HaveOccurred())

c.EnqueueAllPrometheusRules() // Force reconciliation
waitForControllerToProcess()
err = k8sClient.Get(ctx, prObjKey, &pr)
Expect(err).To(HaveOccurred())
Expect(apierrors.IsNotFound(err)).To(Equal(true))
})
})
})

It("should cleanup orphaned gauge metrics", func() {
Describe("AbsentPrometheusRule orphaned gauge metrics", func() {
It("should be cleaned up", func() {
handler := promhttp.HandlerFor(reg, promhttp.HandlerOpts{})
method := http.MethodGet
path := "/metrics"
Expand Down
1 change: 0 additions & 1 deletion test/fixtures/metrics.prom
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# HELP absent_metrics_operator_successful_reconcile_time The time at which a specific PrometheusRule was successfully reconciled by the operator.
# TYPE absent_metrics_operator_successful_reconcile_time gauge
absent_metrics_operator_successful_reconcile_time{prometheusrule_name="openstack-limes-api.alerts",prometheusrule_namespace="resmgmt"} 1
absent_metrics_operator_successful_reconcile_time{prometheusrule_name="openstack-swift.alerts",prometheusrule_namespace="swift"} 1

0 comments on commit 7014fb3

Please sign in to comment.