Skip to content

Commit

Permalink
Add support to disable monitoring at the installation or cluster level
Browse files Browse the repository at this point in the history
Signed-off-by: QuentinBisson <[email protected]>
  • Loading branch information
QuentinBisson committed Jul 9, 2024
1 parent 48afbc4 commit 5710357
Show file tree
Hide file tree
Showing 9 changed files with 340 additions and 35 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Changed

- Disable prometheus agents if monitoring is disabled at the installation or cluster level.

## [0.2.0] - 2024-06-25

### Added
Expand Down
5 changes: 4 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ module github.com/giantswarm/observability-operator
go 1.22.0

require (
github.com/giantswarm/apiextensions-application v0.6.2
github.com/go-logr/logr v1.4.2
github.com/onsi/ginkgo/v2 v2.19.0
github.com/onsi/gomega v1.33.1
Expand All @@ -20,6 +21,8 @@ require (
sigs.k8s.io/yaml v1.4.0
)

require github.com/giantswarm/k8smetadata v0.24.0 // indirect

require (
github.com/beorn7/perks v1.0.1 // indirect
github.com/blang/semver/v4 v4.0.0 // indirect
Expand Down Expand Up @@ -68,7 +71,7 @@ require (
gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect
google.golang.org/protobuf v1.34.0 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v2 v2.4.0
gopkg.in/yaml.v3 v3.0.1 // indirect
k8s.io/apiextensions-apiserver v0.29.5 // indirect
k8s.io/component-base v0.29.5 // indirect
Expand Down
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@ github.com/evanphx/json-patch/v5 v5.9.0 h1:kcBlZQbplgElYIlo/n1hJbls2z/1awpXxpRi0
github.com/evanphx/json-patch/v5 v5.9.0/go.mod h1:VNkHZ/282BpEyt/tObQO8s5CMPmYYq14uClGH4abBuQ=
github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA=
github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM=
github.com/giantswarm/apiextensions-application v0.6.2 h1:XL86OrpprWl5Wp38EUvUXt3ztTo25+V63oDVlFwDpNg=
github.com/giantswarm/apiextensions-application v0.6.2/go.mod h1:8ylqSmDSzFblCppRQTFo8v9s/F6MX6RTusVVoDDfWso=
github.com/giantswarm/k8smetadata v0.24.0 h1:mAIgH4W06qx8X5rV9QEtJhCJLn8DMXfTfNVZi5ROp4c=
github.com/giantswarm/k8smetadata v0.24.0/go.mod h1:QiQAyaZnwco1U0lENLF0Kp4bSN4dIPwIlHWEvUo3ES8=
github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ=
Expand Down
104 changes: 81 additions & 23 deletions internal/controller/cluster_monitoring_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/log"
"sigs.k8s.io/controller-runtime/pkg/reconcile"

"github.com/giantswarm/observability-operator/pkg/bundle"
"github.com/giantswarm/observability-operator/pkg/common"
"github.com/giantswarm/observability-operator/pkg/monitoring"
"github.com/giantswarm/observability-operator/pkg/monitoring/heartbeat"
Expand All @@ -48,6 +49,8 @@ type ClusterMonitoringReconciler struct {
heartbeat.HeartbeatRepository
// MimirService is the service for managing mimir configuration.
mimir.MimirService
// BundleConfigurationService is the service for configuring the observability bundle.
*bundle.BundleConfigurationService
// MonitoringConfig is the configuration for the monitoring package.
MonitoringConfig monitoring.Config
}
Expand Down Expand Up @@ -88,23 +91,27 @@ func (r *ClusterMonitoringReconciler) Reconcile(ctx context.Context, req ctrl.Re
ctx = log.IntoContext(ctx, logger)

if !r.MonitoringConfig.Enabled {
logger.Info("Monitoring is disabled at the installation level")
return ctrl.Result{}, nil
logger.Info("Monitoring is disabled at the installation level.")
}

if !r.MonitoringConfig.IsMonitored(cluster) {
logger.Info("Monitoring is disabled for this cluster.")
}

// Handle deletion reconciliation loop.
if !cluster.ObjectMeta.DeletionTimestamp.IsZero() {
logger.Info("Handling deletion for Cluster")
logger.Info("Handling deletion for cluster")
return r.reconcileDelete(ctx, cluster)
}

logger.Info("Reconciling Cluster")
logger.Info("Reconciling cluster")
// Handle normal reconciliation loop.
return r.reconcile(ctx, cluster)
}

// reconcile handles cluster reconciliation.
func (r *ClusterMonitoringReconciler) reconcile(ctx context.Context, cluster *clusterv1.Cluster) (ctrl.Result, error) {
var err error
logger := log.FromContext(ctx)

// Add finalizer first if not set to avoid the race condition between init and delete.
Expand All @@ -126,24 +133,49 @@ func (r *ClusterMonitoringReconciler) reconcile(ctx context.Context, cluster *cl
return ctrl.Result{}, nil
}

// Management cluster specific configuration
if cluster.Name == r.ManagementCluster.Name {
err := r.HeartbeatRepository.CreateOrUpdate(ctx)
// If monitoring is enabled as the installation level, configure the monitoring stack, otherwise, tear it down.
if r.MonitoringConfig.Enabled {
err = r.HeartbeatRepository.CreateOrUpdate(ctx)
if err != nil {
logger.Error(err, "failed to create or update heartbeat")
return ctrl.Result{RequeueAfter: 5 * time.Minute}, errors.WithStack(err)
}

err = r.MimirService.ConfigureMimir(ctx)
if err != nil {
logger.Error(err, "failed to configure mimir")
return ctrl.Result{RequeueAfter: 5 * time.Minute}, errors.WithStack(err)
}
} else {
err = r.tearDown(ctx)
if err != nil {
return ctrl.Result{RequeueAfter: 5 * time.Minute}, errors.WithStack(err)
}
}
}

// Cluster specific configuration
if r.MonitoringConfig.IsMonitored(cluster) {
// Create or update PrometheusAgent remote write configuration.
err = r.PrometheusAgentService.ReconcileRemoteWriteConfiguration(ctx, cluster)
if err != nil {
logger.Error(err, "failed to create or update heartbeat")
logger.Error(err, "failed to create or update prometheus agent remote write config")
return ctrl.Result{RequeueAfter: 5 * time.Minute}, errors.WithStack(err)
}

err = r.MimirService.ConfigureMimir(ctx)
} else {
err := r.PrometheusAgentService.DeleteRemoteWriteConfiguration(ctx, cluster)
if err != nil {
logger.Error(err, "failed to configure mimir")
logger.Error(err, "failed to delete prometheus agent remote write config")
return ctrl.Result{RequeueAfter: 5 * time.Minute}, errors.WithStack(err)
}
}

// Create or update PrometheusAgent remote write configuration.
err := r.PrometheusAgentService.ReconcileRemoteWriteConfiguration(ctx, cluster)
// We always configure the bundle, even if monitoring is disabled for the cluster.
err = r.BundleConfigurationService.Configure(ctx, cluster)
if err != nil {
logger.Error(err, "failed to create or update prometheus agent remote write config")
logger.Error(err, "failed to configure the observability-bundle")
return ctrl.Result{RequeueAfter: 5 * time.Minute}, errors.WithStack(err)
}

Expand All @@ -153,27 +185,33 @@ func (r *ClusterMonitoringReconciler) reconcile(ctx context.Context, cluster *cl
// reconcileDelete handles cluster deletion.
func (r *ClusterMonitoringReconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Cluster) (reconcile.Result, error) {
logger := log.FromContext(ctx)

// We do not need to delete anything if there is no finalizer on the cluster
if controllerutil.ContainsFinalizer(cluster, monitoring.MonitoringFinalizer) {
if cluster.Name == r.ManagementCluster.Name {
err := r.HeartbeatRepository.Delete(ctx)
// We always remote the bundle configure, even if monitoring is disabled for the cluster.
err := r.BundleConfigurationService.RemoveConfiguration(ctx, cluster)
if err != nil {
logger.Error(err, "failed to remove the observability-bundle configuration")
return ctrl.Result{RequeueAfter: 5 * time.Minute}, errors.WithStack(err)
}

// Cluster specific configuration
if r.MonitoringConfig.IsMonitored(cluster) {
err := r.PrometheusAgentService.DeleteRemoteWriteConfiguration(ctx, cluster)
if err != nil {
logger.Error(err, "failed to delete heartbeat")
logger.Error(err, "failed to delete prometheus agent remote write config")
return ctrl.Result{RequeueAfter: 5 * time.Minute}, errors.WithStack(err)
}
}

err = r.MimirService.DeleteMimirSecrets(ctx)
// Management cluster specific configuration
if cluster.Name == r.ManagementCluster.Name {
err := r.tearDown(ctx)
if err != nil {
logger.Error(err, "failed to delete mimir ingress secret")
return ctrl.Result{RequeueAfter: 5 * time.Minute}, errors.WithStack(err)
}
}

err := r.PrometheusAgentService.DeleteRemoteWriteConfiguration(ctx, cluster)
if err != nil {
logger.Error(err, "failed to delete prometheus agent remote write config")
return ctrl.Result{RequeueAfter: 5 * time.Minute}, errors.WithStack(err)
}

// We get the latest state of the object to avoid race conditions.
// Finalizer handling needs to come last.
// We use a patch rather than an update to avoid conflicts when multiple controllers are removing their finalizer from the ClusterCR
Expand All @@ -183,6 +221,7 @@ func (r *ClusterMonitoringReconciler) reconcileDelete(ctx context.Context, clust
if err != nil {
return ctrl.Result{}, errors.WithStack(err)
}

controllerutil.RemoveFinalizer(cluster, monitoring.MonitoringFinalizer)
if err := patchHelper.Patch(ctx, cluster); err != nil {
logger.Error(err, "failed to remove finalizer, requeuing", "finalizer", monitoring.MonitoringFinalizer)
Expand All @@ -192,3 +231,22 @@ func (r *ClusterMonitoringReconciler) reconcileDelete(ctx context.Context, clust
}
return ctrl.Result{}, nil
}

// tearDown tears down the monitoring stack management cluster specific components like the hearbeat, mimir secrets and so on.
func (r *ClusterMonitoringReconciler) tearDown(ctx context.Context) error {
logger := log.FromContext(ctx)

err := r.HeartbeatRepository.Delete(ctx)
if err != nil {
logger.Error(err, "failed to delete heartbeat")
return err
}

err = r.MimirService.DeleteMimirSecrets(ctx)
if err != nil {
logger.Error(err, "failed to delete mimir ingress secret")
return err
}

return nil
}
16 changes: 10 additions & 6 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
// to ensure that exec-entrypoint and run can make use of them.
_ "k8s.io/client-go/plugin/pkg/client/auth"

appv1 "github.com/giantswarm/apiextensions-application/api/v1alpha1"
"k8s.io/apimachinery/pkg/runtime"
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
Expand All @@ -38,6 +39,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/webhook"

"github.com/giantswarm/observability-operator/internal/controller"
"github.com/giantswarm/observability-operator/pkg/bundle"
"github.com/giantswarm/observability-operator/pkg/common"
"github.com/giantswarm/observability-operator/pkg/common/organization"
"github.com/giantswarm/observability-operator/pkg/common/password"
Expand Down Expand Up @@ -79,6 +81,7 @@ const (
func init() {
utilruntime.Must(clientgoscheme.AddToScheme(scheme))
utilruntime.Must(clusterv1.AddToScheme(scheme))
utilruntime.Must(appv1.AddToScheme(scheme))

//+kubebuilder:scaffold:scheme
}
Expand Down Expand Up @@ -221,12 +224,13 @@ func main() {
}

if err = (&controller.ClusterMonitoringReconciler{
Client: mgr.GetClient(),
ManagementCluster: managementCluster,
HeartbeatRepository: heartbeatRepository,
PrometheusAgentService: prometheusAgentService,
MimirService: mimirService,
MonitoringConfig: monitoringConfig,
Client: mgr.GetClient(),
ManagementCluster: managementCluster,
HeartbeatRepository: heartbeatRepository,
PrometheusAgentService: prometheusAgentService,
MimirService: mimirService,
MonitoringConfig: monitoringConfig,
BundleConfigurationService: bundle.NewBundleConfigurationService(mgr.GetClient(), monitoringConfig),
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "Cluster")
os.Exit(1)
Expand Down
Loading

0 comments on commit 5710357

Please sign in to comment.