From 83802d8f410a9377e74b23f4b006c4605c633ad1 Mon Sep 17 00:00:00 2001 From: Krisztian Litkey Date: Sun, 10 Nov 2024 20:26:45 +0200 Subject: [PATCH] config: expose metrics configuration. Add configuration bits for controlling which metrics are collected. Enable collection of policy metrics by default. Signed-off-by: Krisztian Litkey --- .../bases/config.nri_balloonspolicies.yaml | 28 ++++++++++- .../bases/config.nri_templatepolicies.yaml | 28 ++++++++++- .../config.nri_topologyawarepolicies.yaml | 28 ++++++++++- .../crds/config.nri_balloonspolicies.yaml | 28 ++++++++++- .../crds/config.nri_templatepolicies.yaml | 28 ++++++++++- .../config.nri_topologyawarepolicies.yaml | 28 ++++++++++- docs/resource-policy/policy/balloons.md | 9 ++-- .../config/v1alpha1/instrumentation/config.go | 8 +++- .../instrumentation/zz_generated.deepcopy.go | 44 ++++++++++++++++++ pkg/apis/config/v1alpha1/metrics/config.go | 28 +++++++++++ .../v1alpha1/metrics/zz_generated.deepcopy.go | 46 +++++++++++++++++++ .../config/v1alpha1/zz_generated.deepcopy.go | 8 ++-- pkg/instrumentation/instrumentation.go | 3 +- pkg/instrumentation/metrics/metrics.go | 13 ++++-- 14 files changed, 302 insertions(+), 25 deletions(-) create mode 100644 pkg/apis/config/v1alpha1/instrumentation/zz_generated.deepcopy.go create mode 100644 pkg/apis/config/v1alpha1/metrics/config.go create mode 100644 pkg/apis/config/v1alpha1/metrics/zz_generated.deepcopy.go diff --git a/config/crd/bases/config.nri_balloonspolicies.yaml b/config/crd/bases/config.nri_balloonspolicies.yaml index 84cf113a1..da3b852c2 100644 --- a/config/crd/bases/config.nri_balloonspolicies.yaml +++ b/config/crd/bases/config.nri_balloonspolicies.yaml @@ -320,12 +320,36 @@ spec: to expose Prometheus metrics among other things. example: :8891 type: string + metrics: + default: + enabled: + - policy + description: Metrics defines which metrics to collect. + properties: + enabled: + description: Enabled enables collection for metrics matched + by glob patterns. + example: + - '*' + items: + type: string + type: array + polled: + description: Polled forces polled collection for metrics matched + by glob patterns. + example: + - computationally-expensive-metrics + items: + type: string + type: array + type: object prometheusExport: description: PrometheusExport enables exporting /metrics for Prometheus. type: boolean reportPeriod: - description: ReportPeriod is the interval between reporting aggregated - metrics. + default: 30s + description: ReportPeriod is the interval between between collecting + polled metrics. format: duration type: string samplingRatePerMillion: diff --git a/config/crd/bases/config.nri_templatepolicies.yaml b/config/crd/bases/config.nri_templatepolicies.yaml index 7aa462501..2b1d4db9a 100644 --- a/config/crd/bases/config.nri_templatepolicies.yaml +++ b/config/crd/bases/config.nri_templatepolicies.yaml @@ -92,12 +92,36 @@ spec: to expose Prometheus metrics among other things. example: :8891 type: string + metrics: + default: + enabled: + - policy + description: Metrics defines which metrics to collect. + properties: + enabled: + description: Enabled enables collection for metrics matched + by glob patterns. + example: + - '*' + items: + type: string + type: array + polled: + description: Polled forces polled collection for metrics matched + by glob patterns. + example: + - computationally-expensive-metrics + items: + type: string + type: array + type: object prometheusExport: description: PrometheusExport enables exporting /metrics for Prometheus. type: boolean reportPeriod: - description: ReportPeriod is the interval between reporting aggregated - metrics. + default: 30s + description: ReportPeriod is the interval between between collecting + polled metrics. format: duration type: string samplingRatePerMillion: diff --git a/config/crd/bases/config.nri_topologyawarepolicies.yaml b/config/crd/bases/config.nri_topologyawarepolicies.yaml index df4944647..af76a8e9a 100644 --- a/config/crd/bases/config.nri_topologyawarepolicies.yaml +++ b/config/crd/bases/config.nri_topologyawarepolicies.yaml @@ -119,12 +119,36 @@ spec: to expose Prometheus metrics among other things. example: :8891 type: string + metrics: + default: + enabled: + - policy + description: Metrics defines which metrics to collect. + properties: + enabled: + description: Enabled enables collection for metrics matched + by glob patterns. + example: + - '*' + items: + type: string + type: array + polled: + description: Polled forces polled collection for metrics matched + by glob patterns. + example: + - computationally-expensive-metrics + items: + type: string + type: array + type: object prometheusExport: description: PrometheusExport enables exporting /metrics for Prometheus. type: boolean reportPeriod: - description: ReportPeriod is the interval between reporting aggregated - metrics. + default: 30s + description: ReportPeriod is the interval between between collecting + polled metrics. format: duration type: string samplingRatePerMillion: diff --git a/deployment/helm/balloons/crds/config.nri_balloonspolicies.yaml b/deployment/helm/balloons/crds/config.nri_balloonspolicies.yaml index 84cf113a1..da3b852c2 100644 --- a/deployment/helm/balloons/crds/config.nri_balloonspolicies.yaml +++ b/deployment/helm/balloons/crds/config.nri_balloonspolicies.yaml @@ -320,12 +320,36 @@ spec: to expose Prometheus metrics among other things. example: :8891 type: string + metrics: + default: + enabled: + - policy + description: Metrics defines which metrics to collect. + properties: + enabled: + description: Enabled enables collection for metrics matched + by glob patterns. + example: + - '*' + items: + type: string + type: array + polled: + description: Polled forces polled collection for metrics matched + by glob patterns. + example: + - computationally-expensive-metrics + items: + type: string + type: array + type: object prometheusExport: description: PrometheusExport enables exporting /metrics for Prometheus. type: boolean reportPeriod: - description: ReportPeriod is the interval between reporting aggregated - metrics. + default: 30s + description: ReportPeriod is the interval between between collecting + polled metrics. format: duration type: string samplingRatePerMillion: diff --git a/deployment/helm/template/crds/config.nri_templatepolicies.yaml b/deployment/helm/template/crds/config.nri_templatepolicies.yaml index 7aa462501..2b1d4db9a 100644 --- a/deployment/helm/template/crds/config.nri_templatepolicies.yaml +++ b/deployment/helm/template/crds/config.nri_templatepolicies.yaml @@ -92,12 +92,36 @@ spec: to expose Prometheus metrics among other things. example: :8891 type: string + metrics: + default: + enabled: + - policy + description: Metrics defines which metrics to collect. + properties: + enabled: + description: Enabled enables collection for metrics matched + by glob patterns. + example: + - '*' + items: + type: string + type: array + polled: + description: Polled forces polled collection for metrics matched + by glob patterns. + example: + - computationally-expensive-metrics + items: + type: string + type: array + type: object prometheusExport: description: PrometheusExport enables exporting /metrics for Prometheus. type: boolean reportPeriod: - description: ReportPeriod is the interval between reporting aggregated - metrics. + default: 30s + description: ReportPeriod is the interval between between collecting + polled metrics. format: duration type: string samplingRatePerMillion: diff --git a/deployment/helm/topology-aware/crds/config.nri_topologyawarepolicies.yaml b/deployment/helm/topology-aware/crds/config.nri_topologyawarepolicies.yaml index df4944647..af76a8e9a 100644 --- a/deployment/helm/topology-aware/crds/config.nri_topologyawarepolicies.yaml +++ b/deployment/helm/topology-aware/crds/config.nri_topologyawarepolicies.yaml @@ -119,12 +119,36 @@ spec: to expose Prometheus metrics among other things. example: :8891 type: string + metrics: + default: + enabled: + - policy + description: Metrics defines which metrics to collect. + properties: + enabled: + description: Enabled enables collection for metrics matched + by glob patterns. + example: + - '*' + items: + type: string + type: array + polled: + description: Polled forces polled collection for metrics matched + by glob patterns. + example: + - computationally-expensive-metrics + items: + type: string + type: array + type: object prometheusExport: description: PrometheusExport enables exporting /metrics for Prometheus. type: boolean reportPeriod: - description: ReportPeriod is the interval between reporting aggregated - metrics. + default: 30s + description: ReportPeriod is the interval between between collecting + polled metrics. format: duration type: string samplingRatePerMillion: diff --git a/docs/resource-policy/policy/balloons.md b/docs/resource-policy/policy/balloons.md index 8d550d036..b531a54cb 100644 --- a/docs/resource-policy/policy/balloons.md +++ b/docs/resource-policy/policy/balloons.md @@ -264,13 +264,13 @@ Balloons policy parameters: - `prometheusExport`: if set to True, balloons with their CPUs and assigned containers are readable through `/metrics` from the httpEndpoint. - - `reportPeriod`: `/metrics` aggregation interval. + - `reportPeriod`: `/metrics` aggregation interval for polled metrics. ### Example Example configuration that runs all pods in balloons of 1-4 CPUs. Instrumentation enables reading CPUs and containers in balloons -from `http://localhost:8891/metrics`. +from `http://$localhost_or_pod_IP:8891/metrics`. ```yaml apiVersion: config.nri/v1alpha1 @@ -413,9 +413,12 @@ nri-resource-policy global config: instrumentation: # The balloons policy exports containers running in each balloon, # and cpusets of balloons. Accessible in command line: - # curl --silent http://localhost:8891/metrics + # curl --silent http://$localhost_or_pod_IP:8891/metrics HTTPEndpoint: :8891 PrometheusExport: true + metrics: + enabled: # use '*' instead for all available metrics + - policy logger: Debug: policy ``` diff --git a/pkg/apis/config/v1alpha1/instrumentation/config.go b/pkg/apis/config/v1alpha1/instrumentation/config.go index 97e96251f..1e387dc20 100644 --- a/pkg/apis/config/v1alpha1/instrumentation/config.go +++ b/pkg/apis/config/v1alpha1/instrumentation/config.go @@ -15,10 +15,12 @@ package instrumentation import ( + "github.com/containers/nri-plugins/pkg/apis/config/v1alpha1/metrics" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) // Config provides runtime configuration for instrumentation. +// +k8s:deepcopy-gen=true type Config struct { // SamplingRatePerMillion is the number of samples to collect per million spans. // +optional @@ -33,9 +35,10 @@ type Config struct { // +optional // +kubebuilder:example="otlp-http://localhost:4318" TracingCollector string `json:"tracingCollector,omitempty"` - // ReportPeriod is the interval between reporting aggregated metrics. + // ReportPeriod is the interval between between collecting polled metrics. // +optional // +kubebuilder:validation:Format="duration" + // +kubebuilder:default="30s" ReportPeriod metav1.Duration `json:"reportPeriod,omitempty"` // HTTPEndpoint is the address our HTTP server listens on. This endpoint is used // to expose Prometheus metrics among other things. @@ -45,4 +48,7 @@ type Config struct { // PrometheusExport enables exporting /metrics for Prometheus. // +optional PrometheusExport bool `json:"prometheusExport,omitempty"` + // Metrics defines which metrics to collect. + // +kubebuilder:default={"enabled": {"policy"}} + Metrics *metrics.Config `json:"metrics,omitempty"` } diff --git a/pkg/apis/config/v1alpha1/instrumentation/zz_generated.deepcopy.go b/pkg/apis/config/v1alpha1/instrumentation/zz_generated.deepcopy.go new file mode 100644 index 000000000..d9887b1a2 --- /dev/null +++ b/pkg/apis/config/v1alpha1/instrumentation/zz_generated.deepcopy.go @@ -0,0 +1,44 @@ +//go:build !ignore_autogenerated + +// Copyright The NRI Plugins Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Code generated by controller-gen. DO NOT EDIT. + +package instrumentation + +import ( + "github.com/containers/nri-plugins/pkg/apis/config/v1alpha1/metrics" +) + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Config) DeepCopyInto(out *Config) { + *out = *in + out.ReportPeriod = in.ReportPeriod + if in.Metrics != nil { + in, out := &in.Metrics, &out.Metrics + *out = new(metrics.Config) + (*in).DeepCopyInto(*out) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Config. +func (in *Config) DeepCopy() *Config { + if in == nil { + return nil + } + out := new(Config) + in.DeepCopyInto(out) + return out +} diff --git a/pkg/apis/config/v1alpha1/metrics/config.go b/pkg/apis/config/v1alpha1/metrics/config.go new file mode 100644 index 000000000..e9159c4d1 --- /dev/null +++ b/pkg/apis/config/v1alpha1/metrics/config.go @@ -0,0 +1,28 @@ +// Copyright The NRI Plugins Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package metrics + +// Config provides runtime configuration for metrics collection. +// +k8s:deepcopy-gen=true +type Config struct { + // Enabled enables collection for metrics matched by glob patterns. + // +optional + // +kubebuilder:example={"*"} + Enabled []string `json:"enabled,omitempty"` + // Polled forces polled collection for metrics matched by glob patterns. + // +optional + // +kubebuilder:example={"computationally-expensive-metrics"} + Polled []string `json:"polled,omitempty"` +} diff --git a/pkg/apis/config/v1alpha1/metrics/zz_generated.deepcopy.go b/pkg/apis/config/v1alpha1/metrics/zz_generated.deepcopy.go new file mode 100644 index 000000000..c9c49723a --- /dev/null +++ b/pkg/apis/config/v1alpha1/metrics/zz_generated.deepcopy.go @@ -0,0 +1,46 @@ +//go:build !ignore_autogenerated + +// Copyright The NRI Plugins Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +// Code generated by controller-gen. DO NOT EDIT. + +package metrics + +import () + +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *Config) DeepCopyInto(out *Config) { + *out = *in + if in.Enabled != nil { + in, out := &in.Enabled, &out.Enabled + *out = make([]string, len(*in)) + copy(*out, *in) + } + if in.Polled != nil { + in, out := &in.Polled, &out.Polled + *out = make([]string, len(*in)) + copy(*out, *in) + } +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new Config. +func (in *Config) DeepCopy() *Config { + if in == nil { + return nil + } + out := new(Config) + in.DeepCopyInto(out) + return out +} diff --git a/pkg/apis/config/v1alpha1/zz_generated.deepcopy.go b/pkg/apis/config/v1alpha1/zz_generated.deepcopy.go index 608c046b7..2ca084794 100644 --- a/pkg/apis/config/v1alpha1/zz_generated.deepcopy.go +++ b/pkg/apis/config/v1alpha1/zz_generated.deepcopy.go @@ -87,7 +87,7 @@ func (in *BalloonsPolicySpec) DeepCopyInto(out *BalloonsPolicySpec) { in.Config.DeepCopyInto(&out.Config) in.Control.DeepCopyInto(&out.Control) in.Log.DeepCopyInto(&out.Log) - out.Instrumentation = in.Instrumentation + in.Instrumentation.DeepCopyInto(&out.Instrumentation) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new BalloonsPolicySpec. @@ -105,7 +105,7 @@ func (in *CommonConfig) DeepCopyInto(out *CommonConfig) { *out = *in in.Control.DeepCopyInto(&out.Control) in.Log.DeepCopyInto(&out.Log) - out.Instrumentation = in.Instrumentation + in.Instrumentation.DeepCopyInto(&out.Instrumentation) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new CommonConfig. @@ -226,7 +226,7 @@ func (in *TemplatePolicySpec) DeepCopyInto(out *TemplatePolicySpec) { in.Config.DeepCopyInto(&out.Config) in.Control.DeepCopyInto(&out.Control) in.Log.DeepCopyInto(&out.Log) - out.Instrumentation = in.Instrumentation + in.Instrumentation.DeepCopyInto(&out.Instrumentation) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TemplatePolicySpec. @@ -304,7 +304,7 @@ func (in *TopologyAwarePolicySpec) DeepCopyInto(out *TopologyAwarePolicySpec) { in.Config.DeepCopyInto(&out.Config) in.Control.DeepCopyInto(&out.Control) in.Log.DeepCopyInto(&out.Log) - out.Instrumentation = in.Instrumentation + in.Instrumentation.DeepCopyInto(&out.Instrumentation) } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new TopologyAwarePolicySpec. diff --git a/pkg/instrumentation/instrumentation.go b/pkg/instrumentation/instrumentation.go index 67dd0d440..b2e461045 100644 --- a/pkg/instrumentation/instrumentation.go +++ b/pkg/instrumentation/instrumentation.go @@ -118,8 +118,7 @@ func start() error { metrics.WithNamespace("nri"), metrics.WithExporterDisabled(!cfg.PrometheusExport), metrics.WithReportPeriod(cfg.ReportPeriod.Duration), - // TODO(klihub): make this configurable via apis/config/.../instrumentation.Config - metrics.WithMetrics([]string{"misc/buildinfo"}, []string{"policy"}), + metrics.WithMetrics(cfg.Metrics), ); err != nil { return fmt.Errorf("failed to start metrics: %v", err) } diff --git a/pkg/instrumentation/metrics/metrics.go b/pkg/instrumentation/metrics/metrics.go index 0f23076c9..a036156ca 100644 --- a/pkg/instrumentation/metrics/metrics.go +++ b/pkg/instrumentation/metrics/metrics.go @@ -24,6 +24,8 @@ import ( "github.com/containers/nri-plugins/pkg/http" logger "github.com/containers/nri-plugins/pkg/log" "github.com/containers/nri-plugins/pkg/metrics" + + config "github.com/containers/nri-plugins/pkg/apis/config/v1alpha1/metrics" ) type ( @@ -62,10 +64,15 @@ func WithReportPeriod(v time.Duration) Option { } } -func WithMetrics(enable []string, poll []string) Option { +func WithMetrics(cfg *config.Config) Option { return func() error { - enabled = slices.Clone(enable) - polled = slices.Clone(poll) + if cfg != nil { + enabled = slices.Clone(cfg.Enabled) + polled = slices.Clone(cfg.Polled) + } else { + enabled = nil + polled = nil + } return nil } }