Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[stacked 3/5] metrics: simplify policy/backend metrics collection interface. #408

Merged
merged 2 commits into from
Nov 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 13 additions & 18 deletions cmd/plugins/balloons/policy/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -91,14 +91,7 @@ type BalloonMetrics struct {
ContainerReqMilliCpus int
}

// DescribeMetrics generates policy-specific prometheus metrics data
// descriptors.
func (p *balloons) DescribeMetrics() []*prometheus.Desc {
return descriptors
}

// PollMetrics provides policy metrics for monitoring.
func (p *balloons) PollMetrics() policy.Metrics {
func (p *balloons) GetMetrics() policy.Metrics {
policyMetrics := &Metrics{}
policyMetrics.Balloons = make([]*BalloonMetrics, len(p.balloons))
for index, bln := range p.balloons {
Expand Down Expand Up @@ -150,16 +143,19 @@ func (p *balloons) PollMetrics() policy.Metrics {
return policyMetrics
}

// CollectMetrics generates prometheus metrics from cached/polled
// policy-specific metrics data.
func (p *balloons) CollectMetrics(m policy.Metrics) ([]prometheus.Metric, error) {
metrics, ok := m.(*Metrics)
if !ok {
return nil, balloonsError("type mismatch in balloons metrics")
func (m *Metrics) Describe(ch chan<- *prometheus.Desc) {
for _, d := range descriptors {
ch <- d
}
}

func (m *Metrics) Collect(ch chan<- prometheus.Metric) {
if m == nil {
return
}
promMetrics := make([]prometheus.Metric, len(metrics.Balloons))
for index, bm := range metrics.Balloons {
promMetrics[index] = prometheus.MustNewConstMetric(

for _, bm := range m.Balloons {
ch <- prometheus.MustNewConstMetric(
descriptors[balloonsDesc],
prometheus.GaugeValue,
float64(bm.Cpus.Size()),
Expand All @@ -185,5 +181,4 @@ func (p *balloons) CollectMetrics(m policy.Metrics) ([]prometheus.Metric, error)
bm.ContainerNames,
strconv.Itoa(bm.ContainerReqMilliCpus))
}
return promMetrics, nil
}
26 changes: 13 additions & 13 deletions cmd/plugins/template/policy/template-policy.go
Original file line number Diff line number Diff line change
Expand Up @@ -116,19 +116,9 @@ func (p *policy) HandleEvent(e *events.Policy) (bool, error) {
return true, nil
}

// DescribeMetrics generates policy-specific prometheus metrics data descriptors.
func (p *policy) DescribeMetrics() []*prometheus.Desc {
return nil
}

// PollMetrics provides policy metrics for monitoring.
func (p *policy) PollMetrics() policyapi.Metrics {
return nil
}

// CollectMetrics generates prometheus metrics from cached/polled policy-specific metrics data.
func (p *policy) CollectMetrics(policyapi.Metrics) ([]prometheus.Metric, error) {
return nil, nil
// GetMetrics returns the policy-specific metrics collector.
func (p *policy) GetMetrics() policyapi.Metrics {
return &NoMetrics{}
}

// GetTopologyZones returns the policy/pool data for 'topology zone' CRDs.
Expand All @@ -145,3 +135,13 @@ func (p *policy) ExportResourceData(c cache.Container) map[string]string {
func (p *policy) initialize() error {
return nil
}

type NoMetrics struct{}

func (*NoMetrics) Describe(chan<- *prometheus.Desc) {
return
}

func (*NoMetrics) Collect(chan<- prometheus.Metric) {
return
}
39 changes: 22 additions & 17 deletions cmd/plugins/topology-aware/policy/topology-aware-policy.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,8 @@ import (
"fmt"

"github.com/containers/nri-plugins/pkg/utils/cpuset"
"k8s.io/apimachinery/pkg/api/resource"

"github.com/prometheus/client_golang/prometheus"
"k8s.io/apimachinery/pkg/api/resource"

cfgapi "github.com/containers/nri-plugins/pkg/apis/config/v1alpha1/resmgr/policy/topologyaware"
"github.com/containers/nri-plugins/pkg/cpuallocator"
Expand Down Expand Up @@ -68,6 +67,7 @@ type policy struct {
cpuAllocator cpuallocator.CPUAllocator // CPU allocator used by the policy
memAllocator *libmem.Allocator
coldstartOff bool // coldstart forced off (have movable PMEM zones)
metrics *TopologyAwareMetrics
}

var opt = &cfgapi.Config{}
Expand Down Expand Up @@ -115,6 +115,8 @@ func (p *policy) Setup(opts *policyapi.BackendOptions) error {
return policyError("failed to initialize %s policy: %w", PolicyName, err)
}

p.metrics = p.NewTopologyAwareMetrics()

log.Info("***** default CPU priority is %s", defaultPrio)

return nil
Expand Down Expand Up @@ -306,21 +308,6 @@ func (p *policy) HandleEvent(e *events.Policy) (bool, error) {
return false, nil
}

// DescribeMetrics generates policy-specific prometheus metrics data descriptors.
func (p *policy) DescribeMetrics() []*prometheus.Desc {
return nil
}

// PollMetrics provides policy metrics for monitoring.
func (p *policy) PollMetrics() policyapi.Metrics {
return nil
}

// CollectMetrics generates prometheus metrics from cached/polled policy-specific metrics data.
func (p *policy) CollectMetrics(policyapi.Metrics) ([]prometheus.Metric, error) {
return nil, nil
}

// GetTopologyZones returns the policy/pool data for 'topology zone' CRDs.
func (p *policy) GetTopologyZones() []*policyapi.TopologyZone {
zones := []*policyapi.TopologyZone{}
Expand Down Expand Up @@ -435,6 +422,24 @@ func (p *policy) ExportResourceData(c cache.Container) map[string]string {
return data
}

func (p *policy) GetMetrics() policyapi.Metrics {
return p.metrics
}

func (p *policy) NewTopologyAwareMetrics() *TopologyAwareMetrics {
return &TopologyAwareMetrics{}
}

type TopologyAwareMetrics struct{}

func (*TopologyAwareMetrics) Describe(ch chan<- *prometheus.Desc) {
return
}

func (*TopologyAwareMetrics) Collect(ch chan<- prometheus.Metric) {
return
}

// reallocateResources reallocates the given containers using the given pool hints
func (p *policy) reallocateResources(containers []cache.Container, pools map[string]string) error {
errs := []error{}
Expand Down
1 change: 1 addition & 0 deletions config/crd/bases/config.nri_balloonspolicies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,7 @@ spec:
default:
enabled:
- policy
- buildinfo
description: Metrics defines which metrics to collect.
properties:
enabled:
Expand Down
1 change: 1 addition & 0 deletions config/crd/bases/config.nri_templatepolicies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ spec:
default:
enabled:
- policy
- buildinfo
description: Metrics defines which metrics to collect.
properties:
enabled:
Expand Down
1 change: 1 addition & 0 deletions config/crd/bases/config.nri_topologyawarepolicies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ spec:
default:
enabled:
- policy
- buildinfo
description: Metrics defines which metrics to collect.
properties:
enabled:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,7 @@ spec:
default:
enabled:
- policy
- buildinfo
description: Metrics defines which metrics to collect.
properties:
enabled:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ spec:
default:
enabled:
- policy
- buildinfo
description: Metrics defines which metrics to collect.
properties:
enabled:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ spec:
default:
enabled:
- policy
- buildinfo
description: Metrics defines which metrics to collect.
properties:
enabled:
Expand Down
2 changes: 1 addition & 1 deletion pkg/apis/config/v1alpha1/instrumentation/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,6 @@ type Config struct {
// +optional
PrometheusExport bool `json:"prometheusExport,omitempty"`
// Metrics defines which metrics to collect.
// +kubebuilder:default={"enabled": {"policy"}}
// +kubebuilder:default={"enabled": {"policy", "buildinfo"}}
Metrics *metrics.Config `json:"metrics,omitempty"`
}
66 changes: 66 additions & 0 deletions pkg/metrics/collectors/collectors.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
// Copyright The NRI Plugins Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package collectors

import (
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/collectors"

logger "github.com/containers/nri-plugins/pkg/log"
"github.com/containers/nri-plugins/pkg/metrics"
"github.com/containers/nri-plugins/pkg/version"
)

var (
log = logger.Get("metrics")
)

func NewVersionInfoCollector(v, b string) prometheus.Collector {
return prometheus.NewGaugeFunc(
prometheus.GaugeOpts{
Name: "version_info",
Help: "A metric with constant '1' value labeled by version and build info.",
ConstLabels: prometheus.Labels{
"version": v,
"build": b,
},
},
func() float64 { return 1 },
)
}

func init() {
var (
collectors = map[string]prometheus.Collector{
"buildinfo": collectors.NewBuildInfoCollector(),
"golang": collectors.NewGoCollector(),
"process": collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}),
"versioninfo": NewVersionInfoCollector(version.Version, version.Build),
}
options = []metrics.RegisterOption{
metrics.WithGroup("standard"),
metrics.WithCollectorOptions(
metrics.WithoutNamespace(),
metrics.WithoutSubsystem(),
),
}
)

for name, collector := range collectors {
if err := metrics.Register(name, collector, options...); err != nil {
log.Error("failed to register %s collector: %v", name, err)
}
}
}
1 change: 1 addition & 0 deletions pkg/resmgr/main/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (

"github.com/containers/nri-plugins/pkg/agent"
"github.com/containers/nri-plugins/pkg/instrumentation"
_ "github.com/containers/nri-plugins/pkg/metrics/collectors"
"github.com/containers/nri-plugins/pkg/resmgr"
"github.com/containers/nri-plugins/pkg/resmgr/policy"

Expand Down
16 changes: 2 additions & 14 deletions pkg/resmgr/policy/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,21 +35,9 @@ func (c *PolicyCollector) register() error {
}

func (c *PolicyCollector) Describe(ch chan<- *prometheus.Desc) {
for _, d := range c.policy.active.DescribeMetrics() {
ch <- d
}
c.policy.active.GetMetrics().Describe(ch)
}

func (c *PolicyCollector) Collect(ch chan<- prometheus.Metric) {
polled := c.policy.active.PollMetrics()

collected, err := c.policy.active.CollectMetrics(polled)
if err != nil {
log.Error("failed to collect metrics: %v", err)
return
}

for _, m := range collected {
ch <- m
}
c.policy.active.GetMetrics().Collect(ch)
}
14 changes: 7 additions & 7 deletions pkg/resmgr/policy/policy.go
Original file line number Diff line number Diff line change
Expand Up @@ -115,12 +115,8 @@ type Backend interface {
HandleEvent(*events.Policy) (bool, error)
// ExportResourceData provides resource data to export for the container.
ExportResourceData(cache.Container) map[string]string
// DescribeMetrics generates policy-specific prometheus metrics data descriptors.
DescribeMetrics() []*prometheus.Desc
// PollMetrics provides policy metrics for monitoring.
PollMetrics() Metrics
// CollectMetrics generates prometheus metrics from cached/polled policy-specific metrics data.
CollectMetrics(Metrics) ([]prometheus.Metric, error)
// GetMetrics returns the policy-specific metrics collector.
GetMetrics() Metrics
// GetTopologyZones returns the policy/pool data for 'topology zone' CRDs.
GetTopologyZones() []*TopologyZone
}
Expand Down Expand Up @@ -151,7 +147,11 @@ type Policy interface {
GetTopologyZones() []*TopologyZone
}

type Metrics interface{}
// Metrics is the interface we expect policy-specific metrics to implement.
type Metrics interface {
Describe(chan<- *prometheus.Desc)
Collect(chan<- prometheus.Metric)
}

// Node resource topology resource and attribute names.
// XXX TODO(klihub): We'll probably need to add similar unified consts
Expand Down