diff --git a/CHANGELOG.md b/CHANGELOG.md index a5c342c7..452e4072 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- command line args to configure mimir and grafana URLs + ## [0.10.2] - 2024-12-17 ### Added diff --git a/internal/controller/grafanaorganization_controller.go b/internal/controller/grafanaorganization_controller.go index 184ed2c0..10a188de 100644 --- a/internal/controller/grafanaorganization_controller.go +++ b/internal/controller/grafanaorganization_controller.go @@ -54,25 +54,26 @@ type GrafanaOrganizationReconciler struct { GrafanaAPI *grafanaAPI.GrafanaHTTPAPI } -func SetupGrafanaOrganizationReconciler(mgr manager.Manager, environment config.Environment) error { +func SetupGrafanaOrganizationReconciler(mgr manager.Manager, conf config.Config) error { // Generate Grafana client + // Get grafana admin-password and admin-user grafanaAdminCredentials := grafanaclient.AdminCredentials{ - Username: environment.GrafanaAdminUsername, - Password: environment.GrafanaAdminPassword, + Username: conf.Environment.GrafanaAdminUsername, + Password: conf.Environment.GrafanaAdminPassword, } if grafanaAdminCredentials.Username == "" { - return fmt.Errorf("GrafanaAdminUsername not set: %q", environment.GrafanaAdminUsername) + return fmt.Errorf("GrafanaAdminUsername not set: %q", conf.Environment.GrafanaAdminUsername) } if grafanaAdminCredentials.Password == "" { - return fmt.Errorf("GrafanaAdminPassword not set: %q", environment.GrafanaAdminPassword) + return fmt.Errorf("GrafanaAdminPassword not set: %q", conf.Environment.GrafanaAdminPassword) } grafanaTLSConfig := grafanaclient.TLSConfig{ - Cert: environment.GrafanaTLSCertFile, - Key: environment.GrafanaTLSKeyFile, + Cert: conf.Environment.GrafanaTLSCertFile, + Key: conf.Environment.GrafanaTLSKeyFile, } - grafanaAPI, err := grafanaclient.GenerateGrafanaClient(grafanaAdminCredentials, grafanaTLSConfig) + grafanaAPI, err := grafanaclient.GenerateGrafanaClient(conf.GrafanaURL, grafanaAdminCredentials, grafanaTLSConfig) if err != nil { return fmt.Errorf("unable to create grafana client: %w", err) } diff --git a/main.go b/main.go index 57fb9adb..e97dfcce 100644 --- a/main.go +++ b/main.go @@ -20,6 +20,7 @@ import ( "crypto/tls" "flag" "fmt" + "net/url" "os" "time" @@ -63,6 +64,9 @@ func init() { } func main() { + var grafanaURL string + var err error + flag.StringVar(&conf.MetricsAddr, "metrics-bind-address", ":8080", "The address the metric endpoint binds to.") flag.StringVar(&conf.ProbeAddr, "health-probe-bind-address", ":8081", @@ -76,6 +80,8 @@ func main() { "If set, HTTP/2 will be enabled for the metrics and webhook servers") flag.StringVar(&conf.OperatorNamespace, "operator-namespace", "", "The namespace where the observability-operator is running.") + flag.StringVar(&grafanaURL, "grafana-url", "http://grafana.monitoring.svc.cluster.local", + "grafana URL") // Management cluster configuration flags. flag.StringVar(&conf.ManagementCluster.BaseDomain, "management-cluster-base-domain", "", @@ -110,6 +116,8 @@ func main() { "The version of Prometheus Agents to deploy.") flag.DurationVar(&conf.Monitoring.WALTruncateFrequency, "monitoring-wal-truncate-frequency", 2*time.Hour, "Configures how frequently the Write-Ahead Log (WAL) truncates segments.") + flag.StringVar(&conf.Monitoring.MetricsQueryURL, "monitoring-metrics-query-url", "http://mimir-gateway.mimir.svc/prometheus", + "URL to query for cluster metrics") opts := zap.Options{ Development: false, } @@ -117,10 +125,16 @@ func main() { opts.BindFlags(flag.CommandLine) flag.Parse() + // parse grafana URL + conf.GrafanaURL, err = url.Parse(grafanaURL) + if err != nil { + panic(fmt.Sprintf("failed to parse grafana url: %v", err)) + } + ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts))) // Load environment variables. - _, err := env.UnmarshalFromEnviron(&conf.Environment) + _, err = env.UnmarshalFromEnviron(&conf.Environment) if err != nil { setupLog.Error(err, "failed to unmarshal environment variables") os.Exit(1) @@ -185,7 +199,7 @@ func main() { } // Setup controller for the GrafanaOrganization resource. - err = controller.SetupGrafanaOrganizationReconciler(mgr, conf.Environment) + err = controller.SetupGrafanaOrganizationReconciler(mgr, conf) if err != nil { setupLog.Error(err, "unable to setup controller", "controller", "GrafanaOrganizationReconciler") os.Exit(1) diff --git a/pkg/config/config.go b/pkg/config/config.go index 68925239..31ca0a26 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -1,6 +1,8 @@ package config import ( + "net/url" + "github.com/giantswarm/observability-operator/pkg/common" "github.com/giantswarm/observability-operator/pkg/monitoring" ) @@ -12,6 +14,7 @@ type Config struct { SecureMetrics bool EnableHTTP2 bool OperatorNamespace string + GrafanaURL *url.URL ManagementCluster common.ManagementCluster diff --git a/pkg/grafana/client/client.go b/pkg/grafana/client/client.go index 47c247d7..ea6f426c 100644 --- a/pkg/grafana/client/client.go +++ b/pkg/grafana/client/client.go @@ -7,21 +7,13 @@ import ( grafana "github.com/grafana/grafana-openapi-client-go/client" ) -var grafanaURL *url.URL - -func init() { - var err error - grafanaURL, err = url.Parse("http://grafana.monitoring.svc.cluster.local") - if err != nil { - panic(fmt.Sprintf("failed to parse grafana url: %v", err)) - } -} - const ( clientConfigNumRetries = 3 ) -func GenerateGrafanaClient(adminUserCredentials AdminCredentials, tlsConfig TLSConfig) (*grafana.GrafanaHTTPAPI, error) { +func GenerateGrafanaClient(grafanaURL *url.URL, adminUserCredentials AdminCredentials, tlsConfig TLSConfig) (*grafana.GrafanaHTTPAPI, error) { + var err error + grafanaTLSConfig, err := tlsConfig.toTLSConfig() if err != nil { return nil, fmt.Errorf("failed to build tls config: %w", err) diff --git a/pkg/monitoring/alloy/configmap.go b/pkg/monitoring/alloy/configmap.go index 283d46af..48aa1706 100644 --- a/pkg/monitoring/alloy/configmap.go +++ b/pkg/monitoring/alloy/configmap.go @@ -58,7 +58,7 @@ func (a *Service) GenerateAlloyMonitoringConfigMapData(ctx context.Context, curr // Compute the number of shards based on the number of series. query := fmt.Sprintf(`sum(max_over_time((sum(prometheus_remote_write_wal_storage_active_series{cluster_id="%s", component_id="prometheus.remote_write.default", service="%s"})by(pod))[6h:1h]))`, cluster.Name, commonmonitoring.AlloyMonitoringAgentAppName) - headSeries, err := querier.QueryTSDBHeadSeries(ctx, query) + headSeries, err := querier.QueryTSDBHeadSeries(ctx, query, a.MonitoringConfig.MetricsQueryURL) if err != nil { logger.Error(err, "alloy-service - failed to query head series") metrics.MimirQueryErrors.WithLabelValues().Inc() diff --git a/pkg/monitoring/config.go b/pkg/monitoring/config.go index ec06309d..b3053478 100644 --- a/pkg/monitoring/config.go +++ b/pkg/monitoring/config.go @@ -25,6 +25,7 @@ type Config struct { WALTruncateFrequency time.Duration // TODO(atlas): validate prometheus version using SemVer PrometheusVersion string + MetricsQueryURL string } // Monitoring should be enabled when all conditions are met: diff --git a/pkg/monitoring/mimir/querier/querier.go b/pkg/monitoring/mimir/querier/querier.go index e520d4b5..92de2d98 100644 --- a/pkg/monitoring/mimir/querier/querier.go +++ b/pkg/monitoring/mimir/querier/querier.go @@ -18,9 +18,9 @@ var ( ) // QueryTSDBHeadSeries performs an instant query against Mimir. -func QueryTSDBHeadSeries(ctx context.Context, query string) (float64, error) { +func QueryTSDBHeadSeries(ctx context.Context, query string, metricsQueryURL string) (float64, error) { config := api.Config{ - Address: "http://mimir-gateway.mimir.svc/prometheus", + Address: metricsQueryURL, } // Create new client. diff --git a/pkg/monitoring/prometheusagent/configmap.go b/pkg/monitoring/prometheusagent/configmap.go index d836b3a5..0389ed18 100644 --- a/pkg/monitoring/prometheusagent/configmap.go +++ b/pkg/monitoring/prometheusagent/configmap.go @@ -46,7 +46,7 @@ func (pas PrometheusAgentService) buildRemoteWriteConfig(ctx context.Context, // Compute the number of shards based on the number of series. query := fmt.Sprintf(`sum(max_over_time((sum(prometheus_agent_active_series{cluster_id="%s"})by(pod))[6h:1h]))`, cluster.Name) - headSeries, err := querier.QueryTSDBHeadSeries(ctx, query) + headSeries, err := querier.QueryTSDBHeadSeries(ctx, query, pas.MonitoringConfig.MetricsQueryURL) if err != nil { logger.Error(err, "failed to query head series") metrics.MimirQueryErrors.WithLabelValues().Inc()