diff --git a/controllers/absence_prometheusrule.go b/controllers/absence_prometheusrule.go index a7d3849b..fae54ee2 100644 --- a/controllers/absence_prometheusrule.go +++ b/controllers/absence_prometheusrule.go @@ -21,26 +21,43 @@ import ( "reflect" "sort" "time" - + "text/template" monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" + "bytes" + "encoding/json" ) const absencePromRuleNameSuffix = "-absent-metric-alert-rules" + // AbsencePrometheusRuleName returns the name of an AbsencePrometheusRule resource that // holds the absence alert rules concerning a specific Prometheus server (e.g. openstack, kubernetes, etc.). -func AbsencePrometheusRuleName(promServer string) string { - return fmt.Sprintf("%s%s", promServer, absencePromRuleNameSuffix) +func AbsencePrometheusRuleName(prometheusRule monitoringv1.PrometheusRule, prometheusRuleString string) string { + + t := template.Must(template.New("PrometheusRuleTemplate").Parse(prometheusRuleString)) + b, err := json.Marshal(prometheusRule) + + m := make(map[string]interface{}) + err = json.Unmarshal(b, &m) + + buf := &bytes.Buffer{} + err = t.Execute(buf, m) + if err != nil { + fmt.Println(err.Error()) + return "default-absent-metrics" + } + + return buf.String() } -func (r *PrometheusRuleReconciler) newAbsencePrometheusRule(namespace, promServer string) *monitoringv1.PrometheusRule { +func (r *PrometheusRuleReconciler) newAbsencePrometheusRule(namespace, name string, promServer string) *monitoringv1.PrometheusRule { return &monitoringv1.PrometheusRule{ ObjectMeta: metav1.ObjectMeta{ - Name: AbsencePrometheusRuleName(promServer), + Name: name, Namespace: namespace, Labels: map[string]string{ // Add a label that identifies that this PrometheusRule resource is @@ -55,11 +72,12 @@ func (r *PrometheusRuleReconciler) newAbsencePrometheusRule(namespace, promServe func (r *PrometheusRuleReconciler) getExistingAbsencePrometheusRule( ctx context.Context, - namespace, promServer string, + namespace, prometheusRuleString string, + rule monitoringv1.PrometheusRule, ) (*monitoringv1.PrometheusRule, error) { var absencePromRule monitoringv1.PrometheusRule - nsName := types.NamespacedName{Namespace: namespace, Name: AbsencePrometheusRuleName(promServer)} + nsName := types.NamespacedName{Namespace: namespace, Name: AbsencePrometheusRuleName(rule, prometheusRuleString)} if err := r.Get(ctx, nsName, &absencePromRule); err != nil { return nil, err } @@ -134,13 +152,20 @@ func (r *PrometheusRuleReconciler) cleanUpOrphanedAbsenceAlertRules( ctx context.Context, promRule types.NamespacedName, promServer string, + prometheusRuleString string, ) error { + var promRuleObj monitoringv1.PrometheusRule + if err := r.Get(ctx, promRule, &promRuleObj); err != nil { + return err + } + + // Step 1: find the corresponding AbsencePrometheusRule that needs to be cleaned up. var aPRToClean *monitoringv1.PrometheusRule if promServer != "" { var err error - if aPRToClean, err = r.getExistingAbsencePrometheusRule(ctx, promRule.Namespace, promServer); err != nil { + if aPRToClean, err = r.getExistingAbsencePrometheusRule(ctx, promRule.Namespace, prometheusRuleString, promRuleObj); err != nil { return err } } else { @@ -204,9 +229,6 @@ func (r *PrometheusRuleReconciler) cleanUpAbsencePrometheusRule(ctx context.Cont // concerning Prometheus server. var listOpts client.ListOptions client.InNamespace(absencePromRule.GetNamespace()).ApplyToList(&listOpts) - client.MatchingLabels{ - labelPrometheusServer: absencePromRule.Labels[labelPrometheusServer], - }.ApplyToList(&listOpts) var promRules monitoringv1.PrometheusRuleList if err := r.List(ctx, &promRules, &listOpts); err != nil { return err @@ -242,7 +264,7 @@ func (r *PrometheusRuleReconciler) cleanUpAbsencePrometheusRule(ctx context.Cont // updateAbsenceAlertRules generates absence alert rules for the given PrometheusRule and // adds them to the corresponding AbsencePrometheusRule. -func (r *PrometheusRuleReconciler) updateAbsenceAlertRules(ctx context.Context, promRule *monitoringv1.PrometheusRule) error { +func (r *PrometheusRuleReconciler) updateAbsenceAlertRules(ctx context.Context, promRule *monitoringv1.PrometheusRule, prometheusRuleString string) error { promRuleName := promRule.GetName() namespace := promRule.GetNamespace() log := r.Log.WithValues("name", promRuleName, "namespace", namespace) @@ -252,17 +274,19 @@ func (r *PrometheusRuleReconciler) updateAbsenceAlertRules(ctx context.Context, promServer, ok := promRuleLabels["prometheus"] if !ok { // Normally this shouldn't happen but just in case that it does. - return errors.New("no 'prometheus' label found") + promServer = "default-prometheus" + // return errors.New("no 'prometheus' label found") } // Step 2: get the corresponding AbsencePrometheusRule if it exists. existingAbsencePrometheusRule := false - absencePromRule, err := r.getExistingAbsencePrometheusRule(ctx, namespace, promServer) + absencePromRule, err := r.getExistingAbsencePrometheusRule(ctx, namespace, prometheusRuleString, *promRule) switch { case err == nil: existingAbsencePrometheusRule = true case apierrors.IsNotFound(err): - absencePromRule = r.newAbsencePrometheusRule(namespace, promServer) + name := AbsencePrometheusRuleName(*promRule, prometheusRuleString) + absencePromRule = r.newAbsencePrometheusRule(namespace, name, promServer) default: // This could have been caused by a temporary network failure, or any // other transient reason. @@ -295,7 +319,7 @@ func (r *PrometheusRuleReconciler) updateAbsenceAlertRules(ctx context.Context, if len(absenceRuleGroups) == 0 { if existingAbsencePrometheusRule { key := types.NamespacedName{Namespace: namespace, Name: promRuleName} - return r.cleanUpOrphanedAbsenceAlertRules(ctx, key, promServer) + return r.cleanUpOrphanedAbsenceAlertRules(ctx, key, promServer, prometheusRuleString) } return nil } diff --git a/controllers/prometheusrule_controller.go b/controllers/prometheusrule_controller.go index e94e9ed0..ef3264e7 100644 --- a/controllers/prometheusrule_controller.go +++ b/controllers/prometheusrule_controller.go @@ -50,6 +50,7 @@ type PrometheusRuleReconciler struct { // KeepLabel is a map of labels that will be retained from the original alert rule and // passed on to its corresponding absent alert rule. KeepLabel KeepLabel + PrometheusRuleString string } //+kubebuilder:rbac:groups=monitoring.coreos.com,resources=prometheusrules,verbs=get;list;watch;create;update;patch;delete @@ -67,10 +68,10 @@ func (r *PrometheusRuleReconciler) Reconcile(ctx context.Context, req ctrl.Reque err := r.Get(ctx, req.NamespacedName, &promRule) switch { case err == nil: - err = r.reconcileObject(ctx, req.NamespacedName, &promRule) + err = r.reconcileObject(ctx, req.NamespacedName, &promRule, r.PrometheusRuleString) case apierrors.IsNotFound(err): // Could not find object on the API server, maybe it has been deleted? - return r.handleObjectNotFound(ctx, req.NamespacedName) + return r.handleObjectNotFound(ctx, req.NamespacedName, r.PrometheusRuleString) default: // Handle err down below. } @@ -103,7 +104,7 @@ func (r *PrometheusRuleReconciler) SetupWithManager(mgr ctrl.Manager) error { // handleObjectNotFound is a helper function for Reconcile(). It exists separately so that // we can exit on error without making the `switch` in Reconcile() complex. -func (r *PrometheusRuleReconciler) handleObjectNotFound(ctx context.Context, key types.NamespacedName) (ctrl.Result, error) { +func (r *PrometheusRuleReconciler) handleObjectNotFound(ctx context.Context, key types.NamespacedName, prometheusRuleString string) (ctrl.Result, error) { log := r.Log.WithValues("name", key.Name, "namespace", key.Namespace) // Step 1: check if the object is a PrometheusRule or an AbsencePrometheusRule. @@ -124,7 +125,7 @@ func (r *PrometheusRuleReconciler) handleObjectNotFound(ctx context.Context, key // we wait until the next time when all AbsencePrometheusRules are requeued for // processing (after the requeueInterval is elapsed). log.V(logLevelDebug).Info("PrometheusRule no longer exists") - err := r.cleanUpOrphanedAbsenceAlertRules(ctx, key, "") + err := r.cleanUpOrphanedAbsenceAlertRules(ctx, key, "", prometheusRuleString) if err != nil { if !apierrors.IsNotFound(err) && !errors.Is(err, errCorrespondingAbsencePromRuleNotExists) { log.Error(err, "could not clean up orphaned absence alert rules") @@ -142,6 +143,7 @@ func (r *PrometheusRuleReconciler) reconcileObject( ctx context.Context, key types.NamespacedName, obj *monitoringv1.PrometheusRule, + prometheusRuleString string, ) error { log := r.Log.WithValues("name", key.Name, "namespace", key.Namespace) @@ -176,7 +178,7 @@ func (r *PrometheusRuleReconciler) reconcileObject( // elapsed). if parseBool(l[labelOperatorDisable]) { log.V(logLevelDebug).Info("operator disabled for this PrometheusRule") - err := r.cleanUpOrphanedAbsenceAlertRules(ctx, key, l[labelPrometheusServer]) + err := r.cleanUpOrphanedAbsenceAlertRules(ctx, key, l[labelPrometheusServer], prometheusRuleString) if err != nil { if !apierrors.IsNotFound(err) && !errors.Is(err, errCorrespondingAbsencePromRuleNotExists) { log.Error(err, "could not clean up orphaned absence alert rules") @@ -189,7 +191,7 @@ func (r *PrometheusRuleReconciler) reconcileObject( } // Step 3: Generate the corresponding absence alert rules for this resource. - err := r.updateAbsenceAlertRules(ctx, obj) + err := r.updateAbsenceAlertRules(ctx, obj, prometheusRuleString) if err == nil { setReconcileGauge(key) log.V(logLevelDebug).Info("successfully reconciled PrometheusRule") diff --git a/main.go b/main.go index 45165528..98ad621f 100644 --- a/main.go +++ b/main.go @@ -59,6 +59,7 @@ func main() { probeAddr string enableLeaderElection bool keepLabel labelsMap + prometheusRuleString string ) bininfo.HandleVersionArgument() @@ -66,6 +67,7 @@ func main() { // Port `9659` has been allocated for absent metrics operator: https://github.com/prometheus/prometheus/wiki/Default-port-allocations flag.StringVar(&metricsAddr, "metrics-bind-address", ":9659", "The address the metric endpoint binds to.") flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.") + flag.StringVar(&prometheusRuleString, "rule", "{{ .metadata.labels.prometheus }}-absent-metrics", "Create new prometheusRules form this template string.") flag.BoolVar(&enableLeaderElection, "leader-elect", false, "Enable leader election for controller manager. "+ "Enabling this will ensure there is only one active controller manager.") @@ -111,6 +113,7 @@ func main() { Scheme: mgr.GetScheme(), Log: ctrl.Log.WithName("controller").WithName("prometheusrule"), KeepLabel: controllers.KeepLabel(keepLabel), + PrometheusRuleString: prometheusRuleString, }).SetupWithManager(mgr); err != nil { setupLog.Error(err, "unable to create controller", "controller", "PrometheusRule") os.Exit(1)