Skip to content

Commit

Permalink
Wire Alloy service into the controller (#81)
Browse files Browse the repository at this point in the history
* Wire Alloy service into the controller, and add the logic to decide between Prometheus agent and Alloy as monitoring agent

* bump required observability-bundler version to 1.6.0
  • Loading branch information
TheoBrigitte authored Aug 20, 2024
1 parent 6db40e8 commit c860163
Show file tree
Hide file tree
Showing 6 changed files with 67 additions and 6 deletions.
1 change: 1 addition & 0 deletions helm/observability-operator/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ spec:
- --management-cluster-name={{ $.Values.managementCluster.name }}
- --management-cluster-pipeline={{ $.Values.managementCluster.pipeline }}
- --management-cluster-region={{ $.Values.managementCluster.region }}
- --monitoring-agent={{ $.Values.monitoring.agent }}
- --monitoring-enabled={{ $.Values.monitoring.enabled }}
- --monitoring-sharding-scale-up-series-count={{ $.Values.monitoring.sharding.scaleUpSeriesCount }}
- --monitoring-sharding-scale-down-percentage={{ $.Values.monitoring.sharding.scaleDownPercentage }}
Expand Down
1 change: 1 addition & 0 deletions helm/observability-operator/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ managementCluster:
region: region

monitoring:
agent: prometheus-agent
enabled: false
opsgenieApiKey: ""
prometheusVersion: ""
Expand Down
53 changes: 48 additions & 5 deletions internal/controller/cluster_monitoring_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (
"context"
"time"

"github.com/blang/semver"
"github.com/pkg/errors"
apierrors "k8s.io/apimachinery/pkg/api/errors"
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
Expand All @@ -32,19 +33,27 @@ import (

"github.com/giantswarm/observability-operator/pkg/bundle"
"github.com/giantswarm/observability-operator/pkg/common"
commonmonitoring "github.com/giantswarm/observability-operator/pkg/common/monitoring"
"github.com/giantswarm/observability-operator/pkg/monitoring"
"github.com/giantswarm/observability-operator/pkg/monitoring/alloy"
"github.com/giantswarm/observability-operator/pkg/monitoring/heartbeat"
"github.com/giantswarm/observability-operator/pkg/monitoring/mimir"
"github.com/giantswarm/observability-operator/pkg/monitoring/prometheusagent"
)

var (
observabilityBundleVersionSupportAlloyMetrics = semver.MustParse("1.6.0")
)

// ClusterMonitoringReconciler reconciles a Cluster object
type ClusterMonitoringReconciler struct {
// Client is the controller client.
client.Client
common.ManagementCluster
// PrometheusAgentService is the service for managing PrometheusAgent resources.
prometheusagent.PrometheusAgentService
// AlloyService is the service which manages Alloy monitoring agent configuration.
AlloyService alloy.Service
// HeartbeatRepository is the repository for managing heartbeats.
heartbeat.HeartbeatRepository
// MimirService is the service for managing mimir configuration.
Expand Down Expand Up @@ -157,6 +166,21 @@ func (r *ClusterMonitoringReconciler) reconcile(ctx context.Context, cluster *cl
}
}

// Enforce prometheus-agent as monitoring agent when observability-bundle version < 1.6.0
monitoringAgent := r.MonitoringConfig.MonitoringAgent
observabilityBundleVersion, err := commonmonitoring.GetObservabilityBundleAppVersion(cluster, r.Client, ctx)
if err != nil {
logger.Error(err, "failed to configure get observability-bundle version")
return ctrl.Result{RequeueAfter: 5 * time.Minute}, nil
}
if observabilityBundleVersion.LT(observabilityBundleVersionSupportAlloyMetrics) && monitoringAgent != commonmonitoring.MonitoringAgentPrometheus {
logger.Info("Monitoring agent is not supported by observability bundle, using prometheus-agent instead.", "observability-bundle-version", observabilityBundleVersion, "monitoring-agent", monitoringAgent)
monitoringAgent = commonmonitoring.MonitoringAgentPrometheus
}
r.MonitoringConfig.MonitoringAgent = monitoringAgent
r.BundleConfigurationService.SetMonitoringAgent(monitoringAgent)
r.AlloyService.SetMonitoringAgent(monitoringAgent)

// We always configure the bundle, even if monitoring is disabled for the cluster.
err = r.BundleConfigurationService.Configure(ctx, cluster)
if err != nil {
Expand All @@ -166,11 +190,23 @@ func (r *ClusterMonitoringReconciler) reconcile(ctx context.Context, cluster *cl

// Cluster specific configuration
if r.MonitoringConfig.IsMonitored(cluster) {
// Create or update PrometheusAgent remote write configuration.
err = r.PrometheusAgentService.ReconcileRemoteWriteConfiguration(ctx, cluster)
if err != nil {
logger.Error(err, "failed to create or update prometheus agent remote write config")
return ctrl.Result{RequeueAfter: 5 * time.Minute}, nil
switch r.MonitoringConfig.MonitoringAgent {
case commonmonitoring.MonitoringAgentPrometheus:
// Create or update PrometheusAgent remote write configuration.
err = r.PrometheusAgentService.ReconcileRemoteWriteConfiguration(ctx, cluster)
if err != nil {
logger.Error(err, "failed to create or update prometheus agent remote write config")
return ctrl.Result{RequeueAfter: 5 * time.Minute}, nil
}
case commonmonitoring.MonitoringAgentAlloy:
// Create or update Alloy monitoring configuration.
err = r.AlloyService.ReconcileCreate(ctx, cluster)
if err != nil {
logger.Error(err, "failed to create or update alloy monitoring config")
return ctrl.Result{RequeueAfter: 5 * time.Minute}, nil
}
default:
return ctrl.Result{}, errors.Errorf("unsupported monitoring agent %q", r.MonitoringConfig.MonitoringAgent)
}
} else {
// clean up any existing prometheus agent configuration
Expand All @@ -179,6 +215,13 @@ func (r *ClusterMonitoringReconciler) reconcile(ctx context.Context, cluster *cl
logger.Error(err, "failed to delete prometheus agent remote write config")
return ctrl.Result{RequeueAfter: 5 * time.Minute}, nil
}

// clean up any existing alloy monitoring configuration
err = r.AlloyService.ReconcileDelete(ctx, cluster)
if err != nil {
logger.Error(err, "failed to delete alloy monitoring config")
return ctrl.Result{RequeueAfter: 5 * time.Minute}, nil
}
}

return ctrl.Result{}, nil
Expand Down
10 changes: 10 additions & 0 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ import (
"github.com/giantswarm/observability-operator/pkg/common/organization"
"github.com/giantswarm/observability-operator/pkg/common/password"
"github.com/giantswarm/observability-operator/pkg/monitoring"
"github.com/giantswarm/observability-operator/pkg/monitoring/alloy"
"github.com/giantswarm/observability-operator/pkg/monitoring/heartbeat"
"github.com/giantswarm/observability-operator/pkg/monitoring/mimir"
"github.com/giantswarm/observability-operator/pkg/monitoring/prometheusagent"
Expand Down Expand Up @@ -222,6 +223,14 @@ func main() {
MonitoringConfig: monitoringConfig,
}

alloyService := alloy.Service{
Client: mgr.GetClient(),
OrganizationRepository: organizationRepository,
PasswordManager: password.SimpleManager{},
ManagementCluster: managementCluster,
MonitoringConfig: monitoringConfig,
}

mimirService := mimir.MimirService{
Client: mgr.GetClient(),
PasswordManager: password.SimpleManager{},
Expand All @@ -233,6 +242,7 @@ func main() {
ManagementCluster: managementCluster,
HeartbeatRepository: heartbeatRepository,
PrometheusAgentService: prometheusAgentService,
AlloyService: alloyService,
MimirService: mimirService,
MonitoringConfig: monitoringConfig,
BundleConfigurationService: bundle.NewBundleConfigurationService(mgr.GetClient(), monitoringConfig),
Expand Down
5 changes: 5 additions & 0 deletions pkg/bundle/service.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ func NewBundleConfigurationService(client client.Client, config monitoring.Confi
}
}

func (s *BundleConfigurationService) SetMonitoringAgent(monitoringAgent string) {
s.config.MonitoringAgent = monitoringAgent
}

func getConfigMapObjectKey(cluster *clusterv1.Cluster) types.NamespacedName {
return types.NamespacedName{
Name: fmt.Sprintf("%s-observability-platform-configuration", cluster.Name),
Expand Down Expand Up @@ -63,6 +67,7 @@ func (s BundleConfigurationService) Configure(ctx context.Context, cluster *clus
Enabled: false,
}
bundleConfiguration.Apps[commonmonitoring.MonitoringAlloyAppName] = app{
AppName: commonmonitoring.AlloyMonitoringAgentAppName,
Enabled: s.config.IsMonitored(cluster),
}
default:
Expand Down
3 changes: 2 additions & 1 deletion pkg/bundle/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,6 @@ type bundleConfiguration struct {
}

type app struct {
Enabled bool `yaml:"enabled" json:"enabled"`
AppName string `yaml:"appName,omitempty" json:"appName,omitempty"`
Enabled bool `yaml:"enabled" json:"enabled"`
}

0 comments on commit c860163

Please sign in to comment.