diff --git a/CHANGELOG.md b/CHANGELOG.md index 875d234d..5ab378b6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- Add Alertmanager controller + ## [0.10.1] - 2024-12-12 ### Fixed diff --git a/helm/observability-operator/templates/deployment.yaml b/helm/observability-operator/templates/deployment.yaml index 3b8a0a91..99739760 100644 --- a/helm/observability-operator/templates/deployment.yaml +++ b/helm/observability-operator/templates/deployment.yaml @@ -31,12 +31,15 @@ spec: - --management-cluster-pipeline={{ $.Values.managementCluster.pipeline }} - --management-cluster-region={{ $.Values.managementCluster.region }} # Monitoring configuration + - --alertmanager-enabled={{ $.Values.alerting.enabled }} + - --alertmanager-secret-name={{ include "alertmanager-secret.name" . }} - --alertmanager-url={{ $.Values.alerting.alertmanagerURL }} - --monitoring-enabled={{ $.Values.monitoring.enabled }} - --monitoring-agent={{ $.Values.monitoring.agent }} - --monitoring-sharding-scale-up-series-count={{ $.Values.monitoring.sharding.scaleUpSeriesCount }} - --monitoring-sharding-scale-down-percentage={{ $.Values.monitoring.sharding.scaleDownPercentage }} - --monitoring-wal-truncate-frequency={{ $.Values.monitoring.wal.truncateFrequency }} + - --operator-namespace={{ include "resource.default.namespace" . }} {{- if .Values.monitoring.prometheusVersion }} - --prometheus-version={{ $.Values.monitoring.prometheusVersion }} {{- end }} diff --git a/helm/observability-operator/values.yaml b/helm/observability-operator/values.yaml index 23e398b9..981a52ce 100644 --- a/helm/observability-operator/values.yaml +++ b/helm/observability-operator/values.yaml @@ -16,6 +16,7 @@ managementCluster: region: region alerting: + enabled: false alertmanagerURL: "" grafanaAddress: "" proxyURL: "" diff --git a/internal/controller/alertmanager_controller.go b/internal/controller/alertmanager_controller.go new file mode 100644 index 00000000..8799353b --- /dev/null +++ b/internal/controller/alertmanager_controller.go @@ -0,0 +1,95 @@ +package controller + +import ( + "context" + + v1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/types" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/builder" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/handler" + "sigs.k8s.io/controller-runtime/pkg/log" + "sigs.k8s.io/controller-runtime/pkg/reconcile" + + "github.com/pkg/errors" + + "github.com/giantswarm/observability-operator/internal/controller/predicates" + "github.com/giantswarm/observability-operator/pkg/alertmanager" + "github.com/giantswarm/observability-operator/pkg/config" +) + +// AlertmanagerReconciler reconciles the Alertmanager secret created by the observability-operator Helm chart +// and configures the Alertmanager instance with the configuration stored in the secret. +// This controller do not make use of finalizers as the configuration is not removed from Alertmanager when the secret is deleted. +type AlertmanagerReconciler struct { + client client.Client + + alertmanagerService alertmanager.Service +} + +// SetupAlertmanagerReconciler adds a controller into mgr that reconciles the Alertmanager secret. +func SetupAlertmanagerReconciler(mgr ctrl.Manager, conf config.Config) error { + r := &AlertmanagerReconciler{ + client: mgr.GetClient(), + alertmanagerService: alertmanager.New(conf), + } + + // Filter only the Alertmanager secret created by the observability-operator Helm chart + secretPredicate := predicates.NewAlertmanagerSecretPredicate(conf) + + // Filter only the Mimir Alertmanager pod + podPredicate := predicates.NewAlertmanagerPodPredicate() + + // Requeue the Alertmanager secret when the Mimir Alertmanager pod changes + p := podEventHandler(conf) + + // Setup the controller + return ctrl.NewControllerManagedBy(mgr). + For(&v1.Secret{}, builder.WithPredicates(secretPredicate)). + Watches(&v1.Pod{}, p, builder.WithPredicates(podPredicate)). + Complete(r) +} + +// podEventHandler returns an event handler that enqueues requests for the Alertmanager secret only. +// For now there is only one Alertmanager secret to be reconciled. +func podEventHandler(conf config.Config) handler.EventHandler { + return handler.EnqueueRequestsFromMapFunc(func(ctx context.Context, obj client.Object) []ctrl.Request { + return []reconcile.Request{ + { + NamespacedName: types.NamespacedName{ + Name: conf.Monitoring.AlertmanagerSecretName, + Namespace: conf.OperatorNamespace, + }, + }, + } + }) +} + +// Reconcile main logic +func (r AlertmanagerReconciler) Reconcile(ctx context.Context, req reconcile.Request) (ctrl.Result, error) { + logger := log.FromContext(ctx) + + logger.Info("Started reconciling") + + // Retrieve the secret being reconciled + secret := &v1.Secret{} + if err := r.client.Get(ctx, req.NamespacedName, secret); err != nil { + return ctrl.Result{}, errors.WithStack(err) + } + + if !secret.DeletionTimestamp.IsZero() { + // Nothing to do if the secret is being deleted + // Configuration is not removed from Alertmanager when the secret is deleted. + return ctrl.Result{}, nil + } + + err := r.alertmanagerService.Configure(ctx, secret) + if err != nil { + return ctrl.Result{}, errors.WithStack(err) + } + + logger.Info("Finished reconciling") + + return ctrl.Result{}, nil +} diff --git a/internal/controller/predicates/alertmanager_predicates.go b/internal/controller/predicates/alertmanager_predicates.go new file mode 100644 index 00000000..9347c095 --- /dev/null +++ b/internal/controller/predicates/alertmanager_predicates.go @@ -0,0 +1,78 @@ +package predicates + +import ( + v1 "k8s.io/api/core/v1" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/predicate" + + "github.com/giantswarm/observability-operator/pkg/config" +) + +// NewAlertmanagerSecretPredicate returns a predicate that filters only the Alertmanager secret created by the observability-operator Helm chart. +func NewAlertmanagerSecretPredicate(conf config.Config) predicate.Predicate { + filter := func(object client.Object) bool { + if object == nil { + return false + } + + secret, ok := object.(*v1.Secret) + if !ok { + return false + } + + if !secret.DeletionTimestamp.IsZero() { + return false + } + + labels := secret.GetLabels() + + ok = secret.GetName() == conf.Monitoring.AlertmanagerSecretName && + secret.GetNamespace() == conf.OperatorNamespace && + labels != nil && + labels["app.kubernetes.io/name"] == "observability-operator" + + return ok + } + + p := predicate.NewPredicateFuncs(filter) + + return p +} + +const ( + mimirNamespace = "mimir" + mimirInstance = "mimir" + mimirAlertmanagerComponent = "alertmanager" +) + +// NewAlertmanagerPodPredicate returns a predicate that filters only the Mimir Alertmanager pod. +func NewAlertmanagerPodPredicate() predicate.Predicate { + filter := func(object client.Object) bool { + if object == nil { + return false + } + + pod, ok := object.(*v1.Pod) + if !ok { + return false + } + + if !pod.DeletionTimestamp.IsZero() { + return false + } + + labels := pod.GetLabels() + + ok = pod.GetNamespace() == mimirNamespace && + labels != nil && + labels["app.kubernetes.io/component"] == mimirAlertmanagerComponent && + labels["app.kubernetes.io/instance"] == mimirInstance && + isPodReady(pod) + + return ok + } + + p := predicate.NewPredicateFuncs(filter) + + return p +} diff --git a/main.go b/main.go index 3e27f8a9..57fb9adb 100644 --- a/main.go +++ b/main.go @@ -74,6 +74,8 @@ func main() { "If set the metrics endpoint is served securely") flag.BoolVar(&conf.EnableHTTP2, "enable-http2", false, "If set, HTTP/2 will be enabled for the metrics and webhook servers") + flag.StringVar(&conf.OperatorNamespace, "operator-namespace", "", + "The namespace where the observability-operator is running.") // Management cluster configuration flags. flag.StringVar(&conf.ManagementCluster.BaseDomain, "management-cluster-base-domain", "", @@ -90,6 +92,10 @@ func main() { "The region of the management cluster.") // Monitoring configuration flags. + flag.BoolVar(&conf.Monitoring.AlertmanagerEnabled, "alertmanager-enabled", false, + "Enable Alertmanager controller.") + flag.StringVar(&conf.Monitoring.AlertmanagerSecretName, "alertmanager-secret-name", "", + "The name of the secret containing the Alertmanager configuration.") flag.StringVar(&conf.Monitoring.AlertmanagerURL, "alertmanager-url", "", "The URL of the Alertmanager API.") flag.StringVar(&conf.Monitoring.MonitoringAgent, "monitoring-agent", commonmonitoring.MonitoringAgentAlloy, @@ -184,6 +190,15 @@ func main() { setupLog.Error(err, "unable to setup controller", "controller", "GrafanaOrganizationReconciler") os.Exit(1) } + + if conf.Monitoring.AlertmanagerEnabled { + // Setup controller for Alertmanager + err = controller.SetupAlertmanagerReconciler(mgr, conf) + if err != nil { + setupLog.Error(err, "unable to setup controller", "controller", "AlertmanagerReconciler") + os.Exit(1) + } + } //+kubebuilder:scaffold:builder if err := mgr.AddHealthzCheck("healthz", healthz.Ping); err != nil { diff --git a/pkg/config/config.go b/pkg/config/config.go index 03702834..68925239 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -11,6 +11,7 @@ type Config struct { ProbeAddr string SecureMetrics bool EnableHTTP2 bool + OperatorNamespace string ManagementCluster common.ManagementCluster diff --git a/pkg/monitoring/config.go b/pkg/monitoring/config.go index b90c6653..ec06309d 100644 --- a/pkg/monitoring/config.go +++ b/pkg/monitoring/config.go @@ -15,7 +15,9 @@ const MonitoringLabel = "giantswarm.io/monitoring" type Config struct { Enabled bool - AlertmanagerURL string + AlertmanagerSecretName string + AlertmanagerURL string + AlertmanagerEnabled bool MonitoringAgent string DefaultShardingStrategy sharding.Strategy