Skip to content

Commit

Permalink
Merge pull request #742 from linxiulei/monitoring
Browse files Browse the repository at this point in the history
Replace golang.org/api/monitoring/v3 with cloud.google.com/go/monitoring/apiv3
  • Loading branch information
CatherineF-dev authored Jul 20, 2024
2 parents 4b6dad4 + bfd349e commit 5a7e5a0
Show file tree
Hide file tree
Showing 927 changed files with 159,028 additions and 35,773 deletions.
50 changes: 33 additions & 17 deletions prometheus-to-sd/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,35 +3,46 @@ module github.com/GoogleCloudPlatform/k8s-stackdriver/prometheus-to-sd
go 1.20

require (
cloud.google.com/go v0.65.0
cloud.google.com/go/compute/metadata v0.4.0
cloud.google.com/go/monitoring v1.20.2
github.com/golang/glog v1.2.1
github.com/prometheus/client_golang v1.11.1
github.com/prometheus/client_model v0.2.0
github.com/prometheus/common v0.26.0
github.com/stretchr/testify v1.9.0
golang.org/x/oauth2 v0.0.0-20220223155221-ee480838109b
google.golang.org/api v0.30.0
golang.org/x/oauth2 v0.21.0
google.golang.org/api v0.188.0
google.golang.org/genproto v0.0.0-20240711142825-46eb208f015d
google.golang.org/genproto/googleapis/api v0.0.0-20240701130421-f6361c86f094
google.golang.org/protobuf v1.34.2
k8s.io/api v0.26.2
k8s.io/apimachinery v0.26.2
k8s.io/client-go v0.26.2
)

require (
cloud.google.com/go/auth v0.7.0 // indirect
cloud.google.com/go/auth/oauth2adapt v0.2.2 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/cespare/xxhash/v2 v2.1.1 // indirect
github.com/cespare/xxhash/v2 v2.2.0 // indirect
github.com/davecgh/go-spew v1.1.1 // indirect
github.com/emicklei/go-restful/v3 v3.9.0 // indirect
github.com/go-logr/logr v1.2.3 // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/go-logr/logr v1.4.1 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/go-openapi/jsonpointer v0.19.5 // indirect
github.com/go-openapi/jsonreference v0.20.0 // indirect
github.com/go-openapi/swag v0.19.14 // indirect
github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
github.com/golang/protobuf v1.5.2 // indirect
github.com/golang/protobuf v1.5.4 // indirect
github.com/google/gnostic v0.5.7-v3refs // indirect
github.com/google/go-cmp v0.6.0 // indirect
github.com/google/gofuzz v1.1.0 // indirect
github.com/googleapis/gax-go/v2 v2.0.5 // indirect
github.com/google/s2a-go v0.1.7 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/googleapis/enterprise-certificate-proxy v0.3.2 // indirect
github.com/googleapis/gax-go/v2 v2.12.5 // indirect
github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect
github.com/mailru/easyjson v0.7.6 // indirect
Expand All @@ -41,16 +52,21 @@ require (
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/prometheus/procfs v0.6.0 // indirect
go.opencensus.io v0.22.4 // indirect
golang.org/x/net v0.23.0 // indirect
golang.org/x/sys v0.18.0 // indirect
golang.org/x/term v0.18.0 // indirect
golang.org/x/text v0.14.0 // indirect
golang.org/x/time v0.0.0-20220210224613-90d013bbcef8 // indirect
google.golang.org/appengine v1.6.7 // indirect
google.golang.org/genproto v0.0.0-20201019141844-1ed22bb0c154 // indirect
google.golang.org/grpc v1.31.0 // indirect
google.golang.org/protobuf v1.33.0 // indirect
go.opencensus.io v0.24.0 // indirect
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.49.0 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.49.0 // indirect
go.opentelemetry.io/otel v1.24.0 // indirect
go.opentelemetry.io/otel/metric v1.24.0 // indirect
go.opentelemetry.io/otel/trace v1.24.0 // indirect
golang.org/x/crypto v0.25.0 // indirect
golang.org/x/net v0.27.0 // indirect
golang.org/x/sync v0.7.0 // indirect
golang.org/x/sys v0.22.0 // indirect
golang.org/x/term v0.22.0 // indirect
golang.org/x/text v0.16.0 // indirect
golang.org/x/time v0.5.0 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20240708141625-4ad9e859172b // indirect
google.golang.org/grpc v1.64.1 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
Expand Down
357 changes: 76 additions & 281 deletions prometheus-to-sd/go.sum

Large diffs are not rendered by default.

45 changes: 15 additions & 30 deletions prometheus-to-sd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,10 @@ import (
"syscall"
"time"

monitoring "cloud.google.com/go/monitoring/apiv3/v2"
"github.com/golang/glog"
"github.com/prometheus/client_golang/prometheus/promhttp"
"golang.org/x/oauth2"
"golang.org/x/oauth2/google"
v3 "google.golang.org/api/monitoring/v3"
"google.golang.org/api/option"

"github.com/GoogleCloudPlatform/k8s-stackdriver/prometheus-to-sd/config"
"github.com/GoogleCloudPlatform/k8s-stackdriver/prometheus-to-sd/flags"
Expand Down Expand Up @@ -144,32 +143,18 @@ func main() {
glog.Error(http.ListenAndServe(fmt.Sprintf("%s:%d", *debugAddress, *debugPort), expvar.Handler()))
}()

var client *http.Client

var options []option.ClientOption
if *gceTokenURL != "" {
client = oauth2.NewClient(context.Background(), config.NewAltTokenSource(*gceTokenURL, *gceTokenBody))
} else if *projectOverride != "" {
client, err = google.DefaultClient(context.Background(), "https://www.googleapis.com/auth/cloud-platform")
if err != nil {
glog.Fatalf("Error getting default credentials: %v", err)
}
glog.Infof("Created a client with the default credentials")
} else {
ts, err := google.DefaultTokenSource(context.Background(), "https://www.googleapis.com/auth/cloud-platform")
if err != nil {
glog.Fatalf("Error creating default token source: %v", err)
}
client = oauth2.NewClient(context.Background(), ts)
ts := config.NewAltTokenSource(*gceTokenURL, *gceTokenBody)
options = append(options, option.WithTokenSource(ts))
}

stackdriverService, err := v3.New(client)
if *apioverride != "" {
stackdriverService.BasePath = *apioverride
}
ctx := context.Background()
client, err := monitoring.NewMetricClient(ctx, options...)
if err != nil {
glog.Fatalf("Failed to create Stackdriver client: %v", err)
glog.Fatalf("Failed to create client: %v", err)
}
glog.V(4).Infof("Successfully created Stackdriver client")
glog.V(4).Infof("Successfully created gcm client")

if len(sourceConfigs) == 0 {
glog.Fatalf("No sources defined. Please specify at least one --source flag.")
Expand All @@ -183,7 +168,7 @@ func main() {
glog.V(4).Infof("Starting goroutine for %+v", sourceConfig)

// Pass sourceConfig as a parameter to avoid using the last sourceConfig by all goroutines.
go readAndPushDataToStackdriver(stackdriverService, gceConf, sourceConfig, monitoredResourceLabels, *monitoredResourceTypePrefix)
go readAndPushDataToStackdriver(ctx, client, gceConf, sourceConfig, monitoredResourceLabels, *monitoredResourceTypePrefix)
}

// As worker goroutines work forever, block main thread as well.
Expand All @@ -201,7 +186,7 @@ func getSourceConfigs(defaultMetricsPrefix string, gceConfig *config.GceConfig)
return append(staticSourceConfigs, dynamicSourceConfigs...)
}

func readAndPushDataToStackdriver(stackdriverService *v3.Service, gceConf *config.GceConfig, sourceConfig *config.SourceConfig, monitoredResourceLabels map[string]string, prefix string) {
func readAndPushDataToStackdriver(ctx context.Context, client *monitoring.MetricClient, gceConf *config.GceConfig, sourceConfig *config.SourceConfig, monitoredResourceLabels map[string]string, prefix string) {
glog.Infof("Running prometheus-to-sd, monitored target is %s %s://%v:%v", sourceConfig.Component, sourceConfig.Protocol, sourceConfig.Host, sourceConfig.Port)
commonConfig := &config.CommonConfig{
GceConfig: gceConf,
Expand All @@ -211,7 +196,7 @@ func readAndPushDataToStackdriver(stackdriverService *v3.Service, gceConf *confi
MonitoredResourceLabels: monitoredResourceLabels,
MonitoredResourceTypePrefix: prefix,
}
metricDescriptorCache := translator.NewMetricDescriptorCache(stackdriverService, commonConfig)
metricDescriptorCache := translator.NewMetricDescriptorCache(client, commonConfig)
signal := time.After(0)
useWhitelistedMetricsAutodiscovery := *autoWhitelistMetrics && len(sourceConfig.Whitelisted) == 0
timeSeriesBuilder := translator.NewTimeSeriesBuilder(commonConfig, metricDescriptorCache)
Expand All @@ -222,14 +207,14 @@ func readAndPushDataToStackdriver(stackdriverService *v3.Service, gceConf *confi
// road will jump to next iteration of the loop.
select {
case <-exportTicker:
ts, scrapeTimestamp, err := timeSeriesBuilder.Build()
ts, scrapeTimestamp, err := timeSeriesBuilder.Build(ctx)
// Mark cache as stale at the first export attempt after each refresh. Cache is considered refreshed only if after
// previous export there was successful call to Refresh function.
metricDescriptorCache.MarkStale()
if err != nil {
glog.Errorf("Could not build time series for component %v: %v", sourceConfig.Component, err)
} else {
translator.SendToStackdriver(stackdriverService, commonConfig, ts, scrapeTimestamp)
translator.SendToStackdriver(ctx, client, commonConfig, ts, scrapeTimestamp)
}
default:
}
Expand All @@ -238,7 +223,7 @@ func readAndPushDataToStackdriver(stackdriverService *v3.Service, gceConf *confi
select {
case <-signal:
glog.V(4).Infof("Updating metrics cache for component %v", sourceConfig.Component)
metricDescriptorCache.Refresh()
metricDescriptorCache.Refresh(ctx)
if useWhitelistedMetricsAutodiscovery {
sourceConfig.UpdateWhitelistedMetrics(metricDescriptorCache.GetMetricNames())
glog.V(2).Infof("Autodiscovered whitelisted metrics for component %v: %v", sourceConfig.Component, sourceConfig.Whitelisted)
Expand Down
37 changes: 20 additions & 17 deletions prometheus-to-sd/translator/metric_descriptor_cache.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,28 +17,31 @@ limitations under the License.
package translator

import (
"context"

monitoring "cloud.google.com/go/monitoring/apiv3/v2"
"github.com/golang/glog"
dto "github.com/prometheus/client_model/go"
v3 "google.golang.org/api/monitoring/v3"
"google.golang.org/genproto/googleapis/api/metric"

"github.com/GoogleCloudPlatform/k8s-stackdriver/prometheus-to-sd/config"
)

// MetricDescriptorCache is responsible for fetching, creating and updating metric descriptors from the stackdriver.
type MetricDescriptorCache struct {
descriptors map[string]*v3.MetricDescriptor
descriptors map[string]*metric.MetricDescriptor
broken map[string]bool
service *v3.Service
client *monitoring.MetricClient
config *config.CommonConfig
fresh bool
}

// NewMetricDescriptorCache creates empty metric descriptor cache for the given component.
func NewMetricDescriptorCache(service *v3.Service, config *config.CommonConfig) *MetricDescriptorCache {
func NewMetricDescriptorCache(client *monitoring.MetricClient, config *config.CommonConfig) *MetricDescriptorCache {
return &MetricDescriptorCache{
descriptors: make(map[string]*v3.MetricDescriptor),
descriptors: make(map[string]*metric.MetricDescriptor),
broken: make(map[string]bool),
service: service,
client: client,
config: config,
fresh: false,
}
Expand Down Expand Up @@ -93,15 +96,15 @@ func (cache *MetricDescriptorCache) ValidateMetricDescriptors(metrics map[string
}

// UpdateMetricDescriptors iterates over all metricFamilies and updates metricDescriptors in the Stackdriver if required.
func (cache *MetricDescriptorCache) UpdateMetricDescriptors(metrics map[string]*dto.MetricFamily, whitelisted []string) {
func (cache *MetricDescriptorCache) UpdateMetricDescriptors(ctx context.Context, metrics map[string]*dto.MetricFamily, whitelisted []string) {
// Perform cache operation only if cache was recently refreshed. This is done mostly from the optimization point
// of view, we don't want to check all metric descriptors too often, as they should change rarely.
if !cache.fresh {
return
}
for _, metricFamily := range metrics {
if isMetricWhitelisted(metricFamily.GetName(), whitelisted) {
cache.updateMetricDescriptorIfStale(metricFamily)
cache.updateMetricDescriptorIfStale(ctx, metricFamily)
}
}
}
Expand All @@ -121,39 +124,39 @@ func isMetricWhitelisted(metric string, whitelisted []string) bool {

// updateMetricDescriptorIfStale checks if descriptor created from MetricFamily object differs from the existing one
// and updates if needed.
func (cache *MetricDescriptorCache) updateMetricDescriptorIfStale(metricFamily *dto.MetricFamily) {
func (cache *MetricDescriptorCache) updateMetricDescriptorIfStale(ctx context.Context, metricFamily *dto.MetricFamily) {
metricDescriptor, ok := cache.descriptors[metricFamily.GetName()]
updatedMetricDescriptor := MetricFamilyToMetricDescriptor(cache.config, metricFamily, metricDescriptor)
if !ok || descriptorChanged(metricDescriptor, updatedMetricDescriptor) {
if updateMetricDescriptorInStackdriver(cache.service, cache.config.GceConfig, updatedMetricDescriptor) {
if updateMetricDescriptorInStackdriver(ctx, cache.client, cache.config.GceConfig, updatedMetricDescriptor) {
cache.descriptors[metricFamily.GetName()] = updatedMetricDescriptor
} else {
cache.broken[metricFamily.GetName()] = true
}
}
}

func (cache *MetricDescriptorCache) getMetricDescriptor(metric string) *v3.MetricDescriptor {
func (cache *MetricDescriptorCache) getMetricDescriptor(metric string) *metric.MetricDescriptor {
value, ok := cache.descriptors[metric]
if !ok {
glog.V(4).Infof("Metric %s was not found in the cache for component %v", metric, cache.config.SourceConfig.Component)
}
return value
}

func descriptorChanged(original *v3.MetricDescriptor, checked *v3.MetricDescriptor) bool {
func descriptorChanged(original *metric.MetricDescriptor, checked *metric.MetricDescriptor) bool {
return descriptorDescriptionChanged(original, checked) || descriptorLabelSetChanged(original, checked) || descriptorMetricKindChanged(original, checked)
}

func descriptorDescriptionChanged(original *v3.MetricDescriptor, checked *v3.MetricDescriptor) bool {
func descriptorDescriptionChanged(original *metric.MetricDescriptor, checked *metric.MetricDescriptor) bool {
if original.Description != checked.Description {
glog.V(4).Infof("Description is different, %v != %v", original.Description, checked.Description)
return true
}
return false
}

func descriptorLabelSetChanged(original *v3.MetricDescriptor, checked *v3.MetricDescriptor) bool {
func descriptorLabelSetChanged(original *metric.MetricDescriptor, checked *metric.MetricDescriptor) bool {
for _, label := range checked.Labels {
found := false
for _, labelFromOriginal := range original.Labels {
Expand All @@ -170,7 +173,7 @@ func descriptorLabelSetChanged(original *v3.MetricDescriptor, checked *v3.Metric
return false
}

func descriptorMetricKindChanged(original *v3.MetricDescriptor, checked *v3.MetricDescriptor) bool {
func descriptorMetricKindChanged(original *metric.MetricDescriptor, checked *metric.MetricDescriptor) bool {
if original.MetricKind != checked.MetricKind {
glog.V(4).Infof("Metric kind is different, %v != %v", original.MetricKind, checked.MetricKind)
return true
Expand All @@ -180,8 +183,8 @@ func descriptorMetricKindChanged(original *v3.MetricDescriptor, checked *v3.Metr

// Refresh function fetches all metric descriptors of all metrics defined for given component with a defined prefix
// and puts them into cache.
func (cache *MetricDescriptorCache) Refresh() {
metricDescriptors, err := getMetricDescriptors(cache.service, cache.config)
func (cache *MetricDescriptorCache) Refresh(ctx context.Context) {
metricDescriptors, err := getMetricDescriptors(ctx, cache.client, cache.config)
if err == nil {
cache.descriptors = metricDescriptors
cache.broken = make(map[string]bool)
Expand Down
Loading

0 comments on commit 5a7e5a0

Please sign in to comment.