diff --git a/CHANGELOG.md b/CHANGELOG.md index 960ef7b2..ca180a31 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Changed + +- Delete monitoring resources if monitoring is disabled at the installation or cluster level using the giantswarm.io/monitoring label. + ## [0.2.0] - 2024-06-25 ### Added diff --git a/go.mod b/go.mod index b3f3f977..0b15f6aa 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module github.com/giantswarm/observability-operator go 1.22.0 require ( + github.com/giantswarm/apiextensions-application v0.6.2 github.com/go-logr/logr v1.4.2 github.com/onsi/ginkgo/v2 v2.19.0 github.com/onsi/gomega v1.33.1 @@ -28,6 +29,7 @@ require ( github.com/emicklei/go-restful/v3 v3.12.0 // indirect github.com/evanphx/json-patch/v5 v5.9.0 // indirect github.com/fsnotify/fsnotify v1.7.0 // indirect + github.com/giantswarm/k8smetadata v0.24.0 // indirect github.com/go-logr/zapr v1.3.0 // indirect github.com/go-openapi/jsonpointer v0.21.0 // indirect github.com/go-openapi/jsonreference v0.21.0 // indirect @@ -42,7 +44,7 @@ require ( github.com/google/gofuzz v1.2.0 // indirect github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6 // indirect github.com/google/uuid v1.6.0 // indirect - github.com/hashicorp/go-cleanhttp v0.5.0 // indirect + github.com/hashicorp/go-cleanhttp v0.5.2 // indirect github.com/hashicorp/go-retryablehttp v0.5.1 // indirect github.com/imdario/mergo v0.3.16 // indirect github.com/josharian/intern v1.0.0 // indirect @@ -68,7 +70,7 @@ require ( gomodules.xyz/jsonpatch/v2 v2.4.0 // indirect google.golang.org/protobuf v1.34.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect - gopkg.in/yaml.v2 v2.4.0 // indirect + gopkg.in/yaml.v2 v2.4.0 gopkg.in/yaml.v3 v3.0.1 // indirect k8s.io/apiextensions-apiserver v0.29.5 // indirect k8s.io/component-base v0.29.5 // indirect @@ -78,3 +80,8 @@ require ( sigs.k8s.io/json v0.0.0-20221116044647-bc3834ca7abd // indirect sigs.k8s.io/structured-merge-diff/v4 v4.4.1 // indirect ) + +replace ( + github.com/hashicorp/go-cleanhttp => github.com/hashicorp/go-cleanhttp v0.5.2 + github.com/hashicorp/go-retryablehttp => github.com/hashicorp/go-retryablehttp v0.7.7 +) diff --git a/go.sum b/go.sum index cb2fd1ac..a8bee1c2 100644 --- a/go.sum +++ b/go.sum @@ -32,8 +32,15 @@ github.com/evanphx/json-patch v5.7.0+incompatible h1:vgGkfT/9f8zE6tvSCe74nfpAVDQ github.com/evanphx/json-patch v5.7.0+incompatible/go.mod h1:50XU6AFN0ol/bzJsmQLiYLvXMP4fmwYFNcr97nuDLSk= github.com/evanphx/json-patch/v5 v5.9.0 h1:kcBlZQbplgElYIlo/n1hJbls2z/1awpXxpRi0/FOJfg= github.com/evanphx/json-patch/v5 v5.9.0/go.mod h1:VNkHZ/282BpEyt/tObQO8s5CMPmYYq14uClGH4abBuQ= +github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYFFOfk= +github.com/fatih/color v1.16.0 h1:zmkK9Ngbjj+K0yRhTVONQh1p/HknKYSlNT+vZCzyokM= +github.com/fatih/color v1.16.0/go.mod h1:fL2Sau1YI5c0pdGEVCbKQbLXB6edEj1ZgiY4NijnWvE= github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= +github.com/giantswarm/apiextensions-application v0.6.2 h1:XL86OrpprWl5Wp38EUvUXt3ztTo25+V63oDVlFwDpNg= +github.com/giantswarm/apiextensions-application v0.6.2/go.mod h1:8ylqSmDSzFblCppRQTFo8v9s/F6MX6RTusVVoDDfWso= +github.com/giantswarm/k8smetadata v0.24.0 h1:mAIgH4W06qx8X5rV9QEtJhCJLn8DMXfTfNVZi5ROp4c= +github.com/giantswarm/k8smetadata v0.24.0/go.mod h1:QiQAyaZnwco1U0lENLF0Kp4bSN4dIPwIlHWEvUo3ES8= github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/zapr v1.3.0 h1:XGdV8XW8zdwFiwOA2Dryh1gj2KRQyOOoNmBy4EplIcQ= @@ -68,10 +75,12 @@ github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6 h1:k7nVchz72niMH6YLQN github.com/google/pprof v0.0.0-20240424215950-a892ee059fd6/go.mod h1:kf6iHlnVGwgKolg33glAes7Yg/8iWP8ukqeldJSO7jw= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= -github.com/hashicorp/go-cleanhttp v0.5.0 h1:wvCrVc9TjDls6+YGAF2hAifE1E5U1+b4tH6KdvN3Gig= -github.com/hashicorp/go-cleanhttp v0.5.0/go.mod h1:JpRdi6/HCYpAwUzNwuwqhbovhLtngrth3wmdIIUrZ80= -github.com/hashicorp/go-retryablehttp v0.5.1 h1:Vsx5XKPqPs3M6sM4U4GWyUqFS8aBiL9U5gkgvpkg4SE= -github.com/hashicorp/go-retryablehttp v0.5.1/go.mod h1:9B5zBasrRhHXnJnui7y6sL7es7NDiJgTc6Er0maI1Xs= +github.com/hashicorp/go-cleanhttp v0.5.2 h1:035FKYIWjmULyFRBKPs8TBQoi0x6d9G4xc9neXJWAZQ= +github.com/hashicorp/go-cleanhttp v0.5.2/go.mod h1:kO/YDlP8L1346E6Sodw+PrpBSV4/SoxCXGY6BqNFT48= +github.com/hashicorp/go-hclog v1.6.3 h1:Qr2kF+eVWjTiYmU7Y31tYlP1h0q/X3Nl3tPGdaB11/k= +github.com/hashicorp/go-hclog v1.6.3/go.mod h1:W4Qnvbt70Wk/zYJryRzDRU/4r0kIg0PVHBcfoyhpF5M= +github.com/hashicorp/go-retryablehttp v0.7.7 h1:C8hUCYzor8PIfXHa4UrZkU4VvK8o9ISHxT2Q8+VepXU= +github.com/hashicorp/go-retryablehttp v0.7.7/go.mod h1:pkQpWZeYWskR+D1tR2O5OcBFOxfA7DoAO6xtkuQnHTk= github.com/huandu/xstrings v1.3.3 h1:/Gcsuc1x8JVbJ9/rlye4xZnVAbEkGauT8lbebqcQws4= github.com/huandu/xstrings v1.3.3/go.mod h1:y5/lhBue+AyNmUVz9RLU9xbLR0o4KIIExikq4ovT0aE= github.com/imdario/mergo v0.3.16 h1:wwQJbIsHYGMUyLSPrEq1CT16AhnhNJQ51+4fdHUnCl4= @@ -91,6 +100,15 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/mailru/easyjson v0.7.7 h1:UGYAvKxe3sBsEDzO8ZeWOSlIQfWFlxbzLZe7hwFURr0= github.com/mailru/easyjson v0.7.7/go.mod h1:xzfreul335JAWq5oZzymOObrkdz5UnU4kGfJJLY9Nlc= +github.com/mattn/go-colorable v0.1.9/go.mod h1:u6P/XSegPjTcexA+o6vUJrdnUu04hMope9wVRipJSqc= +github.com/mattn/go-colorable v0.1.12/go.mod h1:u5H1YNBxpqRaxsYJYSkiCWKzEfiAb1Gb520KVy5xxl4= +github.com/mattn/go-colorable v0.1.13 h1:fFA4WZxdEF4tXPZVKMLwD8oUnCTTo08duU7wxecdEvA= +github.com/mattn/go-colorable v0.1.13/go.mod h1:7S9/ev0klgBDR4GtXTXX8a3vIGJpMovkB8vQcUbaXHg= +github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= +github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= +github.com/mattn/go-isatty v0.0.16/go.mod h1:kYGgaQfpe5nmfYZH+SKPsOc2e4SrIfOl2e/yFXSvRLM= +github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= +github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= github.com/mitchellh/copystructure v1.2.0 h1:vpKXTN4ewci03Vljg/q9QvCGUDttBOGBIa15WveJJGw= github.com/mitchellh/copystructure v1.2.0/go.mod h1:qLl+cE2AmVv+CoeAwDPye/v+N2HKCj9FbZEVFJRxO9s= github.com/mitchellh/reflectwalk v1.0.2 h1:G2LzWKi524PWgd3mLHV8Y5k7s6XUvT0Gef6zxSIeXaQ= @@ -149,6 +167,7 @@ github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXf github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= +github.com/stretchr/testify v1.7.2/go.mod h1:R6va5+xMeoiuVRoj+gSkQ7d3FALtqAAGI1FQKckRals= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= @@ -187,9 +206,18 @@ golang.org/x/sync v0.7.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20190422165155-953cdadca894/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220503163025-988cb79eb6c6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.14.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws= golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= diff --git a/helm/observability-operator/templates/rbac.yaml b/helm/observability-operator/templates/rbac.yaml index b6d04c5f..3725e35f 100644 --- a/helm/observability-operator/templates/rbac.yaml +++ b/helm/observability-operator/templates/rbac.yaml @@ -39,6 +39,16 @@ rules: - events verbs: - create + # Needed to be able to configure the observability bundle app + - apiGroups: + - application.giantswarm.io + resources: + - apps + verbs: + - get + - list + - watch + - update --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding diff --git a/internal/controller/cluster_monitoring_controller.go b/internal/controller/cluster_monitoring_controller.go index c0873033..731dd77c 100644 --- a/internal/controller/cluster_monitoring_controller.go +++ b/internal/controller/cluster_monitoring_controller.go @@ -30,6 +30,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/log" "sigs.k8s.io/controller-runtime/pkg/reconcile" + "github.com/giantswarm/observability-operator/pkg/bundle" "github.com/giantswarm/observability-operator/pkg/common" "github.com/giantswarm/observability-operator/pkg/monitoring" "github.com/giantswarm/observability-operator/pkg/monitoring/heartbeat" @@ -48,6 +49,8 @@ type ClusterMonitoringReconciler struct { heartbeat.HeartbeatRepository // MimirService is the service for managing mimir configuration. mimir.MimirService + // BundleConfigurationService is the service for configuring the observability bundle. + *bundle.BundleConfigurationService // MonitoringConfig is the configuration for the monitoring package. MonitoringConfig monitoring.Config } @@ -88,23 +91,27 @@ func (r *ClusterMonitoringReconciler) Reconcile(ctx context.Context, req ctrl.Re ctx = log.IntoContext(ctx, logger) if !r.MonitoringConfig.Enabled { - logger.Info("Monitoring is disabled at the installation level") - return ctrl.Result{}, nil + logger.Info("monitoring is disabled at the installation level.") + } + + if !r.MonitoringConfig.IsMonitored(cluster) { + logger.Info("monitoring is disabled for this cluster.") } // Handle deletion reconciliation loop. if !cluster.ObjectMeta.DeletionTimestamp.IsZero() { - logger.Info("Handling deletion for Cluster") + logger.Info("handling deletion for cluster") return r.reconcileDelete(ctx, cluster) } - logger.Info("Reconciling Cluster") + logger.Info("reconciling cluster") // Handle normal reconciliation loop. return r.reconcile(ctx, cluster) } // reconcile handles cluster reconciliation. func (r *ClusterMonitoringReconciler) reconcile(ctx context.Context, cluster *clusterv1.Cluster) (ctrl.Result, error) { + var err error logger := log.FromContext(ctx) // Add finalizer first if not set to avoid the race condition between init and delete. @@ -126,54 +133,86 @@ func (r *ClusterMonitoringReconciler) reconcile(ctx context.Context, cluster *cl return ctrl.Result{}, nil } + // Management cluster specific configuration if cluster.Name == r.ManagementCluster.Name { - err := r.HeartbeatRepository.CreateOrUpdate(ctx) - if err != nil { - logger.Error(err, "failed to create or update heartbeat") - return ctrl.Result{RequeueAfter: 5 * time.Minute}, errors.WithStack(err) - } + // If monitoring is enabled as the installation level, configure the monitoring stack, otherwise, tear it down. + if r.MonitoringConfig.Enabled { + err = r.HeartbeatRepository.CreateOrUpdate(ctx) + if err != nil { + logger.Error(err, "failed to create or update heartbeat") + return ctrl.Result{RequeueAfter: 5 * time.Minute}, errors.WithStack(err) + } - err = r.MimirService.ConfigureMimir(ctx) - if err != nil { - logger.Error(err, "failed to configure mimir") - return ctrl.Result{RequeueAfter: 5 * time.Minute}, errors.WithStack(err) + err = r.MimirService.ConfigureMimir(ctx) + if err != nil { + logger.Error(err, "failed to configure mimir") + return ctrl.Result{RequeueAfter: 5 * time.Minute}, errors.WithStack(err) + } + } else { + err = r.tearDown(ctx) + if err != nil { + return ctrl.Result{RequeueAfter: 5 * time.Minute}, errors.WithStack(err) + } } } - // Create or update PrometheusAgent remote write configuration. - err := r.PrometheusAgentService.ReconcileRemoteWriteConfiguration(ctx, cluster) + // We always configure the bundle, even if monitoring is disabled for the cluster. + err = r.BundleConfigurationService.Configure(ctx, cluster) if err != nil { - logger.Error(err, "failed to create or update prometheus agent remote write config") + logger.Error(err, "failed to configure the observability-bundle") return ctrl.Result{RequeueAfter: 5 * time.Minute}, errors.WithStack(err) } + // Cluster specific configuration + if r.MonitoringConfig.IsMonitored(cluster) { + // Create or update PrometheusAgent remote write configuration. + err = r.PrometheusAgentService.ReconcileRemoteWriteConfiguration(ctx, cluster) + if err != nil { + logger.Error(err, "failed to create or update prometheus agent remote write config") + return ctrl.Result{RequeueAfter: 5 * time.Minute}, errors.WithStack(err) + } + } else { + // clean up any existing prometheus agent configuration + err := r.PrometheusAgentService.DeleteRemoteWriteConfiguration(ctx, cluster) + if err != nil { + logger.Error(err, "failed to delete prometheus agent remote write config") + return ctrl.Result{RequeueAfter: 5 * time.Minute}, errors.WithStack(err) + } + } + return ctrl.Result{}, nil } // reconcileDelete handles cluster deletion. func (r *ClusterMonitoringReconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Cluster) (reconcile.Result, error) { logger := log.FromContext(ctx) + + // We do not need to delete anything if there is no finalizer on the cluster if controllerutil.ContainsFinalizer(cluster, monitoring.MonitoringFinalizer) { - if cluster.Name == r.ManagementCluster.Name { - err := r.HeartbeatRepository.Delete(ctx) + // We always remove the bundle configure, even if monitoring is disabled for the cluster. + err := r.BundleConfigurationService.RemoveConfiguration(ctx, cluster) + if err != nil { + logger.Error(err, "failed to remove the observability-bundle configuration") + return ctrl.Result{RequeueAfter: 5 * time.Minute}, errors.WithStack(err) + } + + // Cluster specific configuration + if r.MonitoringConfig.IsMonitored(cluster) { + err := r.PrometheusAgentService.DeleteRemoteWriteConfiguration(ctx, cluster) if err != nil { - logger.Error(err, "failed to delete heartbeat") + logger.Error(err, "failed to delete prometheus agent remote write config") return ctrl.Result{RequeueAfter: 5 * time.Minute}, errors.WithStack(err) } + } - err = r.MimirService.DeleteMimirSecrets(ctx) + // Management cluster specific configuration + if cluster.Name == r.ManagementCluster.Name { + err := r.tearDown(ctx) if err != nil { - logger.Error(err, "failed to delete mimir ingress secret") return ctrl.Result{RequeueAfter: 5 * time.Minute}, errors.WithStack(err) } } - err := r.PrometheusAgentService.DeleteRemoteWriteConfiguration(ctx, cluster) - if err != nil { - logger.Error(err, "failed to delete prometheus agent remote write config") - return ctrl.Result{RequeueAfter: 5 * time.Minute}, errors.WithStack(err) - } - // We get the latest state of the object to avoid race conditions. // Finalizer handling needs to come last. // We use a patch rather than an update to avoid conflicts when multiple controllers are removing their finalizer from the ClusterCR @@ -183,6 +222,7 @@ func (r *ClusterMonitoringReconciler) reconcileDelete(ctx context.Context, clust if err != nil { return ctrl.Result{}, errors.WithStack(err) } + controllerutil.RemoveFinalizer(cluster, monitoring.MonitoringFinalizer) if err := patchHelper.Patch(ctx, cluster); err != nil { logger.Error(err, "failed to remove finalizer, requeuing", "finalizer", monitoring.MonitoringFinalizer) @@ -192,3 +232,22 @@ func (r *ClusterMonitoringReconciler) reconcileDelete(ctx context.Context, clust } return ctrl.Result{}, nil } + +// tearDown tears down the monitoring stack management cluster specific components like the hearbeat, mimir secrets and so on. +func (r *ClusterMonitoringReconciler) tearDown(ctx context.Context) error { + logger := log.FromContext(ctx) + + err := r.HeartbeatRepository.Delete(ctx) + if err != nil { + logger.Error(err, "failed to delete heartbeat") + return err + } + + err = r.MimirService.DeleteMimirSecrets(ctx) + if err != nil { + logger.Error(err, "failed to delete mimir ingress secret") + return err + } + + return nil +} diff --git a/main.go b/main.go index b46647cf..4834daef 100644 --- a/main.go +++ b/main.go @@ -26,6 +26,7 @@ import ( // to ensure that exec-entrypoint and run can make use of them. _ "k8s.io/client-go/plugin/pkg/client/auth" + appv1 "github.com/giantswarm/apiextensions-application/api/v1alpha1" "k8s.io/apimachinery/pkg/runtime" utilruntime "k8s.io/apimachinery/pkg/util/runtime" clientgoscheme "k8s.io/client-go/kubernetes/scheme" @@ -38,6 +39,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/webhook" "github.com/giantswarm/observability-operator/internal/controller" + "github.com/giantswarm/observability-operator/pkg/bundle" "github.com/giantswarm/observability-operator/pkg/common" "github.com/giantswarm/observability-operator/pkg/common/organization" "github.com/giantswarm/observability-operator/pkg/common/password" @@ -79,6 +81,7 @@ const ( func init() { utilruntime.Must(clientgoscheme.AddToScheme(scheme)) utilruntime.Must(clusterv1.AddToScheme(scheme)) + utilruntime.Must(appv1.AddToScheme(scheme)) //+kubebuilder:scaffold:scheme } @@ -221,12 +224,13 @@ func main() { } if err = (&controller.ClusterMonitoringReconciler{ - Client: mgr.GetClient(), - ManagementCluster: managementCluster, - HeartbeatRepository: heartbeatRepository, - PrometheusAgentService: prometheusAgentService, - MimirService: mimirService, - MonitoringConfig: monitoringConfig, + Client: mgr.GetClient(), + ManagementCluster: managementCluster, + HeartbeatRepository: heartbeatRepository, + PrometheusAgentService: prometheusAgentService, + MimirService: mimirService, + MonitoringConfig: monitoringConfig, + BundleConfigurationService: bundle.NewBundleConfigurationService(mgr.GetClient(), monitoringConfig), }).SetupWithManager(mgr); err != nil { setupLog.Error(err, "unable to create controller", "controller", "Cluster") os.Exit(1) diff --git a/pkg/bundle/service.go b/pkg/bundle/service.go new file mode 100644 index 00000000..fae271a8 --- /dev/null +++ b/pkg/bundle/service.go @@ -0,0 +1,192 @@ +package bundle + +import ( + "context" + "fmt" + "reflect" + "slices" + + appv1 "github.com/giantswarm/apiextensions-application/api/v1alpha1" + "github.com/pkg/errors" + "gopkg.in/yaml.v2" + v1 "k8s.io/api/core/v1" + apimachineryerrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/log" + + "github.com/giantswarm/observability-operator/pkg/monitoring" +) + +type BundleConfigurationService struct { + client client.Client + config monitoring.Config +} + +func NewBundleConfigurationService(client client.Client, config monitoring.Config) *BundleConfigurationService { + return &BundleConfigurationService{ + client: client, + config: config, + } +} + +func getConfigMapObjectKey(cluster *clusterv1.Cluster) types.NamespacedName { + return types.NamespacedName{ + Name: fmt.Sprintf("%s-observability-platform-configuration", cluster.Name), + Namespace: cluster.Namespace, + } +} + +// Configure configures the observability-bundle application. +// the observabilitybundle application to enable logging agents. +func (s BundleConfigurationService) Configure(ctx context.Context, cluster *clusterv1.Cluster) error { + logger := log.FromContext(ctx) + logger.Info("configuring observability-bundle") + + bundleConfiguration := bundleConfiguration{ + Apps: map[string]app{ + "prometheusAgent": { + Enabled: s.config.IsMonitored(cluster), + }, + }, + } + + logger.Info("creating or updating observability-bundle configmap") + err := s.createOrUpdateObservabilityBundleConfigMap(ctx, cluster, bundleConfiguration) + if err != nil { + return errors.WithStack(err) + } + + logger.Info("configure observability-bundle app") + err = s.configureObservabilityBundleApp(ctx, cluster) + if err != nil { + return errors.WithStack(err) + } + + logger.Info("observability-bundle is configured successfully") + + return nil +} + +func (s BundleConfigurationService) createOrUpdateObservabilityBundleConfigMap( + ctx context.Context, cluster *clusterv1.Cluster, configuration bundleConfiguration) error { + + logger := log.FromContext(ctx) + + values, err := yaml.Marshal(configuration) + if err != nil { + return errors.WithStack(err) + } + + configMapObjectKey := getConfigMapObjectKey(cluster) + desired := v1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: configMapObjectKey.Name, + Namespace: configMapObjectKey.Namespace, + Labels: map[string]string{ + "app.kubernetes.io/name": "observability-bundle", + "app.kubernetes.io/managed-by": "observability-operator", + "app.kubernetes.io/part-of": "observability-platform", + }, + }, + Data: map[string]string{"values": string(values)}, + } + + var current v1.ConfigMap + err = s.client.Get(ctx, configMapObjectKey, ¤t) + if err != nil { + if apimachineryerrors.IsNotFound(err) { + err = s.client.Create(ctx, &desired) + if err != nil { + return errors.WithStack(err) + } + logger.Info("observability-bundle configuration created") + } else { + return errors.WithStack(err) + } + } + + if !reflect.DeepEqual(current.Data, desired.Data) || + !reflect.DeepEqual(current.ObjectMeta.Labels, desired.ObjectMeta.Labels) { + err := s.client.Update(ctx, &desired) + if err != nil { + return errors.WithStack(err) + } + logger.Info("observability-bundle configuration updated") + } + + logger.Info("observability-bundle configuration up to date") + return nil +} + +func (s BundleConfigurationService) configureObservabilityBundleApp( + ctx context.Context, cluster *clusterv1.Cluster) error { + + configMapObjectKey := getConfigMapObjectKey(cluster) + + // Get observability bundle app metadata. + appObjectKey := types.NamespacedName{ + Name: fmt.Sprintf("%s-observability-bundle", cluster.Name), + Namespace: cluster.Namespace, + } + + var current appv1.App + err := s.client.Get(ctx, appObjectKey, ¤t) + if err != nil { + return errors.WithStack(err) + } + + desired := current.DeepCopy() + + desiredExtraConfig := appv1.AppExtraConfig{ + Kind: "configMap", + Name: configMapObjectKey.Name, + Namespace: configMapObjectKey.Namespace, + Priority: 25, + } + + foundIndex := slices.IndexFunc(current.Spec.ExtraConfigs, func(extraConfig appv1.AppExtraConfig) bool { + // We skip priority in case we want to change it + return extraConfig.Kind == desiredExtraConfig.Kind && + extraConfig.Name == desiredExtraConfig.Name && + extraConfig.Namespace == desiredExtraConfig.Namespace + }) + + if foundIndex == -1 { + desired.Spec.ExtraConfigs = append(desired.Spec.ExtraConfigs, desiredExtraConfig) + } else { + desired.Spec.ExtraConfigs[foundIndex] = desiredExtraConfig + } + + if !reflect.DeepEqual(current, *desired) { + err := s.client.Update(ctx, desired) + if err != nil { + return errors.WithStack(err) + } + } + + return nil +} + +func (s BundleConfigurationService) RemoveConfiguration(ctx context.Context, cluster *clusterv1.Cluster) error { + logger := log.FromContext(ctx) + + logger.Info("deleting observability-bundle configuration") + + configMapObjectKey := getConfigMapObjectKey(cluster) + var current = v1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: configMapObjectKey.Name, + Namespace: configMapObjectKey.Namespace, + }, + } + if err := s.client.Delete(ctx, ¤t); client.IgnoreNotFound(err) != nil { + return errors.WithStack(err) + } + + logger.Info("observability-bundle configuration has been deleted successfully") + + return nil +} diff --git a/pkg/bundle/types.go b/pkg/bundle/types.go new file mode 100644 index 00000000..c6125ce4 --- /dev/null +++ b/pkg/bundle/types.go @@ -0,0 +1,9 @@ +package bundle + +type bundleConfiguration struct { + Apps map[string]app `yaml:"apps" json:"apps"` +} + +type app struct { + Enabled bool `yaml:"enabled" json:"enabled"` +} diff --git a/pkg/monitoring/config.go b/pkg/monitoring/config.go index 77ad1477..69c0e803 100644 --- a/pkg/monitoring/config.go +++ b/pkg/monitoring/config.go @@ -1,6 +1,14 @@ package monitoring -import "github.com/giantswarm/observability-operator/pkg/monitoring/prometheusagent/sharding" +import ( + "strconv" + + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + + "github.com/giantswarm/observability-operator/pkg/monitoring/prometheusagent/sharding" +) + +const MonitoringLabel = "giantswarm.io/monitoring" // Config represents the configuration used by the monitoring package. type Config struct { @@ -9,3 +17,26 @@ type Config struct { // TODO(atlas): validate prometheus version using SemVer PrometheusVersion string } + +// Monitoring should be enabled when all conditions are met: +// - global monitoring flag is enabled +// - monitoring label is not set or is set to true on the cluster object +func (c Config) IsMonitored(cluster *clusterv1.Cluster) bool { + if !c.Enabled { + return false + } + + // Check if label is set on the cluster object + labels := cluster.GetLabels() + monitoringLabelValue, ok := labels[MonitoringLabel] + if !ok { + // If it's not set, monitoring is enabled by default + return true + } + + monitoringEnabled, err := strconv.ParseBool(monitoringLabelValue) + if err != nil { + return true + } + return monitoringEnabled +} diff --git a/pkg/monitoring/prometheusagent/sharding/sharding.go b/pkg/monitoring/prometheusagent/sharding/sharding.go index 9b0eae3a..ab3e623c 100644 --- a/pkg/monitoring/prometheusagent/sharding/sharding.go +++ b/pkg/monitoring/prometheusagent/sharding/sharding.go @@ -26,14 +26,14 @@ func (s Strategy) Merge(newStrategy *Strategy) Strategy { } // We want to start with 1 prometheus-agent for each 1M time series with a scale down 20% threshold. -func (pass Strategy) ComputeShards(currentShardCount int, timeSeries float64) int { - shardScaleDownThreshold := pass.ScaleDownPercentage * pass.ScaleUpSeriesCount - desiredShardCount := int(math.Ceil(timeSeries / pass.ScaleUpSeriesCount)) +func (s Strategy) ComputeShards(currentShardCount int, timeSeries float64) int { + shardScaleDownThreshold := s.ScaleDownPercentage * s.ScaleUpSeriesCount + desiredShardCount := int(math.Ceil(timeSeries / s.ScaleUpSeriesCount)) // Compute Scale Down if currentShardCount > desiredShardCount { // Check if the remainder of (timeSeries mod ScaleupSeriesCount) is bigger than the scale down threshold. - if math.Mod(timeSeries, pass.ScaleUpSeriesCount) > pass.ScaleUpSeriesCount-shardScaleDownThreshold { + if math.Mod(timeSeries, s.ScaleUpSeriesCount) > s.ScaleUpSeriesCount-shardScaleDownThreshold { desiredShardCount = currentShardCount } }