diff --git a/CHANGELOG.md b/CHANGELOG.md index dad80956..cac93d1b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +## [0.7.1] - 2020-11-17 + +### Fixed + +- Clean up of PrometheusRule resources for which the operator is disabled. + ## [0.7.0] - 2020-09-28 ### Fixed @@ -69,7 +75,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Initial release. -[unreleased]: https://github.com/sapcc/absent-metrics-operator/compare/v0.7.0...HEAD +[unreleased]: https://github.com/sapcc/absent-metrics-operator/compare/v0.7.1...HEAD +[0.7.1]: https://github.com/sapcc/absent-metrics-operator/compare/v0.7.0...v0.7.1 [0.7.0]: https://github.com/sapcc/absent-metrics-operator/compare/v0.6.0...v0.7.0 [0.6.0]: https://github.com/sapcc/absent-metrics-operator/compare/v0.5.2...v0.6.0 [0.5.2]: https://github.com/sapcc/absent-metrics-operator/compare/v0.5.1...v0.5.2 diff --git a/internal/controller/controller.go b/internal/controller/controller.go index 52551069..71cb8f26 100644 --- a/internal/controller/controller.go +++ b/internal/controller/controller.go @@ -196,22 +196,18 @@ func (c *Controller) enqueuePromRule(obj interface{}) { } // Do not enqueue object to workqueue if it is managed (read: created) by - // the operator itself or if the annotation for disabling the operator is - // present. + // the operator itself. l := obj.(*monitoringv1.PrometheusRule).GetLabels() if mustParseBool(l[labelOperatorManagedBy]) { return } - if mustParseBool(l[labelOperatorDisable]) { - c.logger.Debug("msg", "operator disabled, skipping", "key", key) - return - } c.workqueue.Add(key) } -// enqueueAllPrometheusRules adds all PrometheusRules to the workqueue. -func (c *Controller) enqueueAllPrometheusRules() { +// EnqueueAllPrometheusRules adds all PrometheusRules to the workqueue. +// This method is exported because it is used in the unit tests to force reconciliation. +func (c *Controller) EnqueueAllPrometheusRules() { // promRules is a map of namespace to map[promRuleName]bool. promRules := make(map[string]map[string]bool) // absentPromRules is a map of namespace to a slice of absentPrometheusRule. @@ -240,8 +236,8 @@ func (c *Controller) enqueueAllPrometheusRules() { } } - // Add the PrometheusRules which no longer exist but their corresponding - // absent alerts have been added to an absentPrometheusRule. + // Enqueue the PrometheusRules which no longer exist but their corresponding + // absent alerts still exist in an absentPrometheusRule. // The syncHandler() will perform the appropriate cleanup for them. for ns, promRuleNames := range promRules { // Check all absentPrometheusRules for this namespace for alerts that @@ -279,7 +275,7 @@ func (c *Controller) runWorker() { case <-done: return case <-reconcileT.C: - c.enqueueAllPrometheusRules() + c.EnqueueAllPrometheusRules() } } }() @@ -342,7 +338,16 @@ func (c *Controller) syncHandler(key string) error { promRule, err := c.promRuleLister.PrometheusRules(namespace).Get(name) switch { case err == nil: - err = c.updateAbsentAlerts(namespace, name, promRule) + if l := promRule.GetLabels(); mustParseBool(l[labelOperatorDisable]) { + c.logger.Debug("msg", "operator disabled for this PrometheusRule, cleaning up any corresponding absent alerts", "key", key) + err = c.cleanUpOrphanedAbsentAlertsNamespace(name, namespace) + if err == nil { + c.metrics.SuccessfulPrometheusRuleReconcileTime.DeleteLabelValues(namespace, name) + return nil + } + } else { + err = c.updateAbsentAlerts(namespace, name, promRule) + } case apierrors.IsNotFound(err): // The resource may no longer exist, in which case we clean up any // orphaned absent alerts. diff --git a/test/controller_test.go b/test/controller_test.go index 8bedff5d..9695de48 100644 --- a/test/controller_test.go +++ b/test/controller_test.go @@ -224,8 +224,63 @@ var _ = Describe("Controller", func() { Expect(apierrors.IsNotFound(err)).To(Equal(true)) }) }) + }) + + Describe("Operator disable", func() { + objKey := client.ObjectKey{Namespace: "swift", Name: "openstack-swift.alerts"} + prObjKey := client.ObjectKey{Namespace: "swift", Name: fixtures.OSAbsentPromRuleName} + + Context("with disabling operator for a specific alert rule", func() { + It("should delete the corresponding absent alert rule from "+fixtures.OSAbsentPromRuleName+" in swift namespace", func() { + // Add the no_alert_on_absence label to a specific alert rule. + // This should result in the deletion of the corresponding + // absent alert rule. + var expected monitoringv1.PrometheusRule + err := k8sClient.Get(ctx, prObjKey, &expected) + Expect(err).ToNot(HaveOccurred()) + // Remove first rule of first group + expected.Spec.Groups[0].Rules = expected.Spec.Groups[0].Rules[1:] + + var pr monitoringv1.PrometheusRule + err = k8sClient.Get(ctx, objKey, &pr) + Expect(err).ToNot(HaveOccurred()) + // Add no_alert_on_absence label to first rule of first group. + pr.Spec.Groups[0].Rules[0].Labels["no_alert_on_absence"] = "true" + err = k8sClient.Update(ctx, &pr) + Expect(err).ToNot(HaveOccurred()) + + waitForControllerToProcess() + var actual monitoringv1.PrometheusRule + err = k8sClient.Get(ctx, prObjKey, &actual) + Expect(err).ToNot(HaveOccurred()) + Expect(actual.Spec).To(Equal(expected.Spec)) + }) + }) + + Context("with disabling operator for a PrometheusRule", func() { + It("should delete the "+fixtures.OSAbsentPromRuleName+" in swift namespace", func() { + // Add the disable label to the original PrometheusRule + // resource. This should result in the deletion of the + // corresponding AbsentPrometheusRule. + var pr monitoringv1.PrometheusRule + err := k8sClient.Get(ctx, objKey, &pr) + Expect(err).ToNot(HaveOccurred()) + pr.Labels["absent-metrics-operator/disable"] = "true" + // pr.Spec.Groups = []monitoringv1.RuleGroup{} + err = k8sClient.Update(ctx, &pr) + Expect(err).ToNot(HaveOccurred()) + + c.EnqueueAllPrometheusRules() // Force reconciliation + waitForControllerToProcess() + err = k8sClient.Get(ctx, prObjKey, &pr) + Expect(err).To(HaveOccurred()) + Expect(apierrors.IsNotFound(err)).To(Equal(true)) + }) + }) + }) - It("should cleanup orphaned gauge metrics", func() { + Describe("AbsentPrometheusRule orphaned gauge metrics", func() { + It("should be cleaned up", func() { handler := promhttp.HandlerFor(reg, promhttp.HandlerOpts{}) method := http.MethodGet path := "/metrics" diff --git a/test/fixtures/metrics.prom b/test/fixtures/metrics.prom index 8ce193ef..9da81768 100644 --- a/test/fixtures/metrics.prom +++ b/test/fixtures/metrics.prom @@ -1,4 +1,3 @@ # HELP absent_metrics_operator_successful_reconcile_time The time at which a specific PrometheusRule was successfully reconciled by the operator. # TYPE absent_metrics_operator_successful_reconcile_time gauge absent_metrics_operator_successful_reconcile_time{prometheusrule_name="openstack-limes-api.alerts",prometheusrule_namespace="resmgmt"} 1 -absent_metrics_operator_successful_reconcile_time{prometheusrule_name="openstack-swift.alerts",prometheusrule_namespace="swift"} 1