Skip to content

Commit

Permalink
Take over prometheus agent remote write config from PMO.
Browse files Browse the repository at this point in the history
  • Loading branch information
QuentinBisson committed Apr 8, 2024
1 parent 2f6d694 commit 75bf367
Show file tree
Hide file tree
Showing 11 changed files with 314 additions and 58 deletions.
6 changes: 3 additions & 3 deletions go.mod
Original file line number Diff line number Diff line change
@@ -1,17 +1,16 @@
module github.com/giantswarm/observability-operator

go 1.21
go 1.22

require (
github.com/go-logr/logr v1.4.1
github.com/onsi/ginkgo/v2 v2.17.1
github.com/onsi/gomega v1.32.0
github.com/opsgenie/opsgenie-go-sdk-v2 v1.2.22
github.com/pkg/errors v0.9.1
github.com/prometheus-operator/prometheus-operator/pkg/apis/monitoring v0.72.0
github.com/prometheus/client_golang v1.19.0
github.com/prometheus/common v0.51.1
github.com/sirupsen/logrus v1.9.0
github.com/sirupsen/logrus v1.9.3
gopkg.in/yaml.v2 v2.4.0
k8s.io/api v0.29.3
k8s.io/apimachinery v0.29.3
Expand All @@ -27,6 +26,7 @@ require (
github.com/emicklei/go-restful/v3 v3.12.0 // indirect
github.com/evanphx/json-patch/v5 v5.9.0 // indirect
github.com/fsnotify/fsnotify v1.7.0 // indirect
github.com/go-logr/logr v1.4.1 // indirect
github.com/go-logr/zapr v1.3.0 // indirect
github.com/go-openapi/jsonpointer v0.21.0 // indirect
github.com/go-openapi/jsonreference v0.21.0 // indirect
Expand Down
2 changes: 2 additions & 0 deletions helm/observability-operator/templates/deployment.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,9 @@ spec:
image: "{{ .Values.image.registry }}/{{ .Values.image.name }}:{{ default .Chart.Version .Values.image.tag }}"
args:
- --leader-elect
- --management-cluster-base-domain={{ $.Values.managementCluster.baseDomain }}
- --management-cluster-customer={{ $.Values.managementCluster.customer }}
- --management-cluster-insecure-ca={{ $.Values.managementCluster.insecureCA }}
- --management-cluster-name={{ $.Values.managementCluster.name }}
- --management-cluster-pipeline={{ $.Values.managementCluster.pipeline }}
- --management-cluster-region={{ $.Values.managementCluster.region }}
Expand Down
9 changes: 9 additions & 0 deletions helm/observability-operator/values.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,15 @@
"managementCluster": {
"type": "object",
"properties": {
"baseDomain": {
"type": "string"
},
"customer": {
"type": "string"
},
"insecureCA": {
"type": "boolean"
},
"name": {
"type": "string"
},
Expand All @@ -54,6 +60,9 @@
},
"opsgenieApiKey": {
"type": "string"
},
"prometheusVersion": {
"type": "string"
}
}
},
Expand Down
2 changes: 2 additions & 0 deletions helm/observability-operator/values.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,9 @@ image:
tag: ""

managementCluster:
baseDomain: domain
customer: customer
insecureCA: false
name: name
pipeline: pipeline
region: region
Expand Down
42 changes: 26 additions & 16 deletions main.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ import (
"github.com/giantswarm/observability-operator/internal/controller"
"github.com/giantswarm/observability-operator/pkg/common"
"github.com/giantswarm/observability-operator/pkg/common/organization"
"github.com/giantswarm/observability-operator/pkg/common/password"
"github.com/giantswarm/observability-operator/pkg/monitoring/heartbeat"
"github.com/giantswarm/observability-operator/pkg/monitoring/prometheusagent"
//+kubebuilder:scaffold:imports
Expand All @@ -49,17 +50,19 @@ var (
scheme = runtime.NewScheme()
setupLog = ctrl.Log.WithName("setup")

metricsAddr string
enableLeaderElection bool
probeAddr string
secureMetrics bool
enableHTTP2 bool
managementClusterCustomer string
managementClusterName string
managementClusterPipeline string
managementClusterRegion string
monitoringEnabled bool
prometheusVersion string
metricsAddr string
enableLeaderElection bool
probeAddr string
secureMetrics bool
enableHTTP2 bool
managementClusterBaseDomain string
managementClusterCustomer string
managementClusterInsecureCA bool
managementClusterName string
managementClusterPipeline string
managementClusterRegion string
monitoringEnabled bool
prometheusVersion string
)

const (
Expand All @@ -85,8 +88,12 @@ func main() {
"If set the metrics endpoint is served securely")
flag.BoolVar(&enableHTTP2, "enable-http2", false,
"If set, HTTP/2 will be enabled for the metrics and webhook servers")
flag.StringVar(&managementClusterBaseDomain, "management-cluster-base-domain", "",
"The base domain of the management cluster.")
flag.StringVar(&managementClusterCustomer, "management-cluster-customer", "",
"The customer of the management cluster.")
flag.BoolVar(&managementClusterInsecureCA, "management-cluster-insecure-ca", false,
"Flag to indicate if the management cluster has an insecure CA that should be trusted")
flag.StringVar(&managementClusterName, "management-cluster-name", "",
"The name of the management cluster.")
flag.StringVar(&managementClusterPipeline, "management-cluster-pipeline", "",
Expand Down Expand Up @@ -157,10 +164,12 @@ func main() {
record.InitFromRecorder(mgr.GetEventRecorderFor("observability-operator"))

var managementCluster common.ManagementCluster = common.ManagementCluster{
Customer: managementClusterCustomer,
Name: managementClusterName,
Pipeline: managementClusterPipeline,
Region: managementClusterRegion,
BaseDomain: managementClusterBaseDomain,
Customer: managementClusterCustomer,
InsecureCA: managementClusterInsecureCA,
Name: managementClusterName,
Pipeline: managementClusterPipeline,
Region: managementClusterRegion,
}

var opsgenieApiKey = os.Getenv(OpsgenieApiKey)
Expand All @@ -181,11 +190,12 @@ func main() {
prometheusAgentService := prometheusagent.PrometheusAgentService{
Client: mgr.GetClient(),
OrganizationRepository: organizationRepository,
PasswordManager: password.SimpleManager{},
ManagementCluster: managementCluster,
PrometheusVersion: prometheusVersion,
}

if err = (&controller.ClusterMonitoringReconciler{
if err = (&controller.ClusterMonitoringReconciler{
Client: mgr.GetClient(),
ManagementCluster: managementCluster,
HeartbeatRepository: heartbeatRepository,
Expand Down
21 changes: 21 additions & 0 deletions pkg/common/password/manager.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
package password

import (
"crypto/rand"
"encoding/hex"
)

type Manager interface {
GeneratePassword(length int) (string, error)
}

type SimpleManager struct {
}

func (m SimpleManager) GeneratePassword(length int) (string, error) {
bytes := make([]byte, length)
if _, err := rand.Read(bytes); err != nil {
return "", err
}
return hex.EncodeToString(bytes), nil
}
4 changes: 4 additions & 0 deletions pkg/common/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,12 @@ const (
)

type ManagementCluster struct {
// BaseDomain is the base domain of the management cluster.
BaseDomain string
// Customer is the customer name of the management cluster.
Customer string
// InsecureCA is a flag to indicate if the management cluster has an insecure CA that should be truster
InsecureCA bool
// Name is the name of the management cluster.
Name string
// Pipeline is the pipeline name of the management cluster.
Expand Down
38 changes: 38 additions & 0 deletions pkg/monitoring/prometheusagent/config.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
package prometheusagent

import (
"context"
"fmt"
"net"

"github.com/pkg/errors"
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"

"github.com/giantswarm/observability-operator/pkg/monitoring/mimir/querier"
"github.com/giantswarm/observability-operator/pkg/monitoring/prometheusagent/shards"
)

func getPrometheusAgentRemoteWriteConfigName(cluster *clusterv1.Cluster) string {
return fmt.Sprintf("%s-remote-write-config", cluster.Name)
}

func getServicePriority(cluster *clusterv1.Cluster) string {
if servicePriority, ok := cluster.GetLabels()[servicePriorityLabel]; ok && servicePriority != "" {
return servicePriority
}
return defaultServicePriority
}

// We want to compute the number of shards based on the number of nodes.
func getShardsCountForCluster(ctx context.Context, cluster *clusterv1.Cluster, currentShardCount int) (int, error) {
headSeries, err := querier.QueryTSDBHeadSeries(ctx, cluster.Name)
if err != nil {
// If prometheus is not accessible (for instance, not running because this is a new cluster, we check if prometheus is accessible)
var dnsError *net.DNSError
if errors.As(err, &dnsError) {
return shards.ComputeShards(currentShardCount, defaultShards), nil
}
return 0, errors.WithStack(err)
}
return shards.ComputeShards(currentShardCount, headSeries), nil
}
36 changes: 36 additions & 0 deletions pkg/monitoring/prometheusagent/parsing.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
package prometheusagent

import (
"gopkg.in/yaml.v2"

"github.com/pkg/errors"
corev1 "k8s.io/api/core/v1"

"github.com/giantswarm/observability-operator/pkg/monitoring/prometheusagent/remotewrite"
)

func readCurrentShardsFromConfig(configMap corev1.ConfigMap) (int, error) {
remoteWriteConfig := remotewrite.RemoteWriteConfig{}
err := yaml.Unmarshal([]byte(configMap.Data["values"]), &remoteWriteConfig)
if err != nil {
return 0, errors.WithStack(err)
}

return remoteWriteConfig.PrometheusAgentConfig.Shards, nil
}

func readRemoteWritePasswordFromSecret(secret corev1.Secret) (string, error) {
remoteWriteConfig := remotewrite.RemoteWriteConfig{}
err := yaml.Unmarshal(secret.Data["values"], &remoteWriteConfig)
if err != nil {
return "", errors.WithStack(err)
}

for _, rw := range remoteWriteConfig.PrometheusAgentConfig.RemoteWrite {
if rw.Name == "prometheus-meta-operator" {
return rw.Password, nil
}
}

return "", errors.New("remote write password not found in secret")
}
11 changes: 11 additions & 0 deletions pkg/monitoring/prometheusagent/secret.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
package prometheusagent

import (
"fmt"

clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
)

func getPrometheusAgentRemoteWriteSecretName(cluster *clusterv1.Cluster) string {
return fmt.Sprintf("%s-remote-write-secret", cluster.Name)
}
Loading

0 comments on commit 75bf367

Please sign in to comment.