Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Parameterize absent pr name #153

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 40 additions & 16 deletions controllers/absence_prometheusrule.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,26 +21,43 @@ import (
"reflect"
"sort"
"time"

"text/template"
monitoringv1 "github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring/v1"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/types"
"sigs.k8s.io/controller-runtime/pkg/client"
"bytes"
"encoding/json"
)

const absencePromRuleNameSuffix = "-absent-metric-alert-rules"


// AbsencePrometheusRuleName returns the name of an AbsencePrometheusRule resource that
// holds the absence alert rules concerning a specific Prometheus server (e.g. openstack, kubernetes, etc.).
func AbsencePrometheusRuleName(promServer string) string {
return fmt.Sprintf("%s%s", promServer, absencePromRuleNameSuffix)
func AbsencePrometheusRuleName(prometheusRule monitoringv1.PrometheusRule, prometheusRuleString string) string {

t := template.Must(template.New("PrometheusRuleTemplate").Parse(prometheusRuleString))
b, err := json.Marshal(prometheusRule)

m := make(map[string]interface{})
err = json.Unmarshal(b, &m)

buf := &bytes.Buffer{}
err = t.Execute(buf, m)
if err != nil {
fmt.Println(err.Error())
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please use the standard logging methods used elsewhere in the program

return "default-absent-metrics"
}

return buf.String()
}

func (r *PrometheusRuleReconciler) newAbsencePrometheusRule(namespace, promServer string) *monitoringv1.PrometheusRule {
func (r *PrometheusRuleReconciler) newAbsencePrometheusRule(namespace, name string, promServer string) *monitoringv1.PrometheusRule {
return &monitoringv1.PrometheusRule{
ObjectMeta: metav1.ObjectMeta{
Name: AbsencePrometheusRuleName(promServer),
Name: name,
Namespace: namespace,
Labels: map[string]string{
// Add a label that identifies that this PrometheusRule resource is
Expand All @@ -55,11 +72,12 @@ func (r *PrometheusRuleReconciler) newAbsencePrometheusRule(namespace, promServe

func (r *PrometheusRuleReconciler) getExistingAbsencePrometheusRule(
ctx context.Context,
namespace, promServer string,
namespace, prometheusRuleString string,
rule monitoringv1.PrometheusRule,
) (*monitoringv1.PrometheusRule, error) {

var absencePromRule monitoringv1.PrometheusRule
nsName := types.NamespacedName{Namespace: namespace, Name: AbsencePrometheusRuleName(promServer)}
nsName := types.NamespacedName{Namespace: namespace, Name: AbsencePrometheusRuleName(rule, prometheusRuleString)}
if err := r.Get(ctx, nsName, &absencePromRule); err != nil {
return nil, err
}
Expand Down Expand Up @@ -134,13 +152,20 @@ func (r *PrometheusRuleReconciler) cleanUpOrphanedAbsenceAlertRules(
ctx context.Context,
promRule types.NamespacedName,
promServer string,
prometheusRuleString string,
) error {

var promRuleObj monitoringv1.PrometheusRule
if err := r.Get(ctx, promRule, &promRuleObj); err != nil {
return err
}


// Step 1: find the corresponding AbsencePrometheusRule that needs to be cleaned up.
var aPRToClean *monitoringv1.PrometheusRule
if promServer != "" {
var err error
if aPRToClean, err = r.getExistingAbsencePrometheusRule(ctx, promRule.Namespace, promServer); err != nil {
if aPRToClean, err = r.getExistingAbsencePrometheusRule(ctx, promRule.Namespace, prometheusRuleString, promRuleObj); err != nil {
return err
}
} else {
Expand Down Expand Up @@ -204,9 +229,6 @@ func (r *PrometheusRuleReconciler) cleanUpAbsencePrometheusRule(ctx context.Cont
// concerning Prometheus server.
var listOpts client.ListOptions
client.InNamespace(absencePromRule.GetNamespace()).ApplyToList(&listOpts)
client.MatchingLabels{
labelPrometheusServer: absencePromRule.Labels[labelPrometheusServer],
}.ApplyToList(&listOpts)
var promRules monitoringv1.PrometheusRuleList
if err := r.List(ctx, &promRules, &listOpts); err != nil {
return err
Expand Down Expand Up @@ -242,7 +264,7 @@ func (r *PrometheusRuleReconciler) cleanUpAbsencePrometheusRule(ctx context.Cont

// updateAbsenceAlertRules generates absence alert rules for the given PrometheusRule and
// adds them to the corresponding AbsencePrometheusRule.
func (r *PrometheusRuleReconciler) updateAbsenceAlertRules(ctx context.Context, promRule *monitoringv1.PrometheusRule) error {
func (r *PrometheusRuleReconciler) updateAbsenceAlertRules(ctx context.Context, promRule *monitoringv1.PrometheusRule, prometheusRuleString string) error {
promRuleName := promRule.GetName()
namespace := promRule.GetNamespace()
log := r.Log.WithValues("name", promRuleName, "namespace", namespace)
Expand All @@ -252,19 +274,21 @@ func (r *PrometheusRuleReconciler) updateAbsenceAlertRules(ctx context.Context,
promServer, ok := promRuleLabels["prometheus"]
if !ok {
// Normally this shouldn't happen but just in case that it does.
return errors.New("no 'prometheus' label found")
promServer = "default-prometheus"
// return errors.New("no 'prometheus' label found")
Comment on lines +277 to +278
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This looks like some leftover debugging code

}

// Step 2: get the corresponding AbsencePrometheusRule if it exists. We do this in
// advance so that we can get suitable defaults for tier and service labels in the
// next step.
existingAbsencePrometheusRule := false
absencePromRule, err := r.getExistingAbsencePrometheusRule(ctx, namespace, promServer)
absencePromRule, err := r.getExistingAbsencePrometheusRule(ctx, namespace, prometheusRuleString, *promRule)
switch {
case err == nil:
existingAbsencePrometheusRule = true
case apierrors.IsNotFound(err):
absencePromRule = r.newAbsencePrometheusRule(namespace, promServer)
name := AbsencePrometheusRuleName(*promRule, prometheusRuleString)
absencePromRule = r.newAbsencePrometheusRule(namespace, name, promServer)
default:
// This could have been caused by a temporary network failure, or any
// other transient reason.
Expand Down Expand Up @@ -310,7 +334,7 @@ func (r *PrometheusRuleReconciler) updateAbsenceAlertRules(ctx context.Context,
if len(absenceRuleGroups) == 0 {
if existingAbsencePrometheusRule {
key := types.NamespacedName{Namespace: namespace, Name: promRuleName}
return r.cleanUpOrphanedAbsenceAlertRules(ctx, key, promServer)
return r.cleanUpOrphanedAbsenceAlertRules(ctx, key, promServer, prometheusRuleString)
}
return nil
}
Expand Down
14 changes: 8 additions & 6 deletions controllers/prometheusrule_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ type PrometheusRuleReconciler struct {
// KeepLabel is a map of labels that will be retained from the original alert rule and
// passed on to its corresponding absent alert rule.
KeepLabel KeepLabel
PrometheusRuleString string
}

//+kubebuilder:rbac:groups=monitoring.coreos.com,resources=prometheusrules,verbs=get;list;watch;create;update;patch;delete
Expand All @@ -67,10 +68,10 @@ func (r *PrometheusRuleReconciler) Reconcile(ctx context.Context, req ctrl.Reque
err := r.Get(ctx, req.NamespacedName, &promRule)
switch {
case err == nil:
err = r.reconcileObject(ctx, req.NamespacedName, &promRule)
err = r.reconcileObject(ctx, req.NamespacedName, &promRule, r.PrometheusRuleString)
case apierrors.IsNotFound(err):
// Could not find object on the API server, maybe it has been deleted?
return r.handleObjectNotFound(ctx, req.NamespacedName)
return r.handleObjectNotFound(ctx, req.NamespacedName, r.PrometheusRuleString)
default:
// Handle err down below.
}
Expand Down Expand Up @@ -103,7 +104,7 @@ func (r *PrometheusRuleReconciler) SetupWithManager(mgr ctrl.Manager) error {

// handleObjectNotFound is a helper function for Reconcile(). It exists separately so that
// we can exit on error without making the `switch` in Reconcile() complex.
func (r *PrometheusRuleReconciler) handleObjectNotFound(ctx context.Context, key types.NamespacedName) (ctrl.Result, error) {
func (r *PrometheusRuleReconciler) handleObjectNotFound(ctx context.Context, key types.NamespacedName, prometheusRuleString string) (ctrl.Result, error) {
log := r.Log.WithValues("name", key.Name, "namespace", key.Namespace)

// Step 1: check if the object is a PrometheusRule or an AbsencePrometheusRule.
Expand All @@ -124,7 +125,7 @@ func (r *PrometheusRuleReconciler) handleObjectNotFound(ctx context.Context, key
// we wait until the next time when all AbsencePrometheusRules are requeued for
// processing (after the requeueInterval is elapsed).
log.V(logLevelDebug).Info("PrometheusRule no longer exists")
err := r.cleanUpOrphanedAbsenceAlertRules(ctx, key, "")
err := r.cleanUpOrphanedAbsenceAlertRules(ctx, key, "", prometheusRuleString)
if err != nil {
if !apierrors.IsNotFound(err) && !errors.Is(err, errCorrespondingAbsencePromRuleNotExists) {
log.Error(err, "could not clean up orphaned absence alert rules")
Expand All @@ -142,6 +143,7 @@ func (r *PrometheusRuleReconciler) reconcileObject(
ctx context.Context,
key types.NamespacedName,
obj *monitoringv1.PrometheusRule,
prometheusRuleString string,
) error {

log := r.Log.WithValues("name", key.Name, "namespace", key.Namespace)
Expand Down Expand Up @@ -176,7 +178,7 @@ func (r *PrometheusRuleReconciler) reconcileObject(
// elapsed).
if parseBool(l[labelOperatorDisable]) {
log.V(logLevelDebug).Info("operator disabled for this PrometheusRule")
err := r.cleanUpOrphanedAbsenceAlertRules(ctx, key, l[labelPrometheusServer])
err := r.cleanUpOrphanedAbsenceAlertRules(ctx, key, l[labelPrometheusServer], prometheusRuleString)
if err != nil {
if !apierrors.IsNotFound(err) && !errors.Is(err, errCorrespondingAbsencePromRuleNotExists) {
log.Error(err, "could not clean up orphaned absence alert rules")
Expand All @@ -189,7 +191,7 @@ func (r *PrometheusRuleReconciler) reconcileObject(
}

// Step 3: Generate the corresponding absence alert rules for this resource.
err := r.updateAbsenceAlertRules(ctx, obj)
err := r.updateAbsenceAlertRules(ctx, obj, prometheusRuleString)
if err == nil {
setReconcileGauge(key)
log.V(logLevelDebug).Info("successfully reconciled PrometheusRule")
Expand Down
3 changes: 3 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,13 @@ func main() {
probeAddr string
enableLeaderElection bool
keepLabel labelsMap
prometheusRuleString string
)
flag.BoolVar(&debug, "debug", false, "Alias for '-zap-devel' flag.")
// Port `9659` has been allocated for absent metrics operator: https://github.com/prometheus/prometheus/wiki/Default-port-allocations
flag.StringVar(&metricsAddr, "metrics-bind-address", ":9659", "The address the metric endpoint binds to.")
flag.StringVar(&probeAddr, "health-probe-bind-address", ":8081", "The address the probe endpoint binds to.")
flag.StringVar(&prometheusRuleString, "rule", "{{ .metadata.labels.prometheus }}-absent-metrics", "Create new prometheusRules form this template string.")
flag.BoolVar(&enableLeaderElection, "leader-elect", false,
"Enable leader election for controller manager. "+
"Enabling this will ensure there is only one active controller manager.")
Expand Down Expand Up @@ -110,6 +112,7 @@ func main() {
Scheme: mgr.GetScheme(),
Log: ctrl.Log.WithName("controller").WithName("prometheusrule"),
KeepLabel: controllers.KeepLabel(keepLabel),
PrometheusRuleString: prometheusRuleString,
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "PrometheusRule")
os.Exit(1)
Expand Down
Loading