From 5e7ffdebd9b34d3e085aa6d6525f146bb30a5232 Mon Sep 17 00:00:00 2001 From: Krisztian Litkey <krisztian.litkey@intel.com> Date: Tue, 12 Nov 2024 00:12:08 +0200 Subject: [PATCH] policy: rework policy/backend metrics interface. Simplify the policy-backend metrics collection interface, reducing it to a single GetMetrics() call and a returned Metrics interface which simply implements the collector- like Describe() and Collect() interfaces. Update policy implementations accordingly. Signed-off-by: Krisztian Litkey <krisztian.litkey@intel.com> --- cmd/plugins/balloons/policy/metrics.go | 31 +++++++-------- .../template/policy/template-policy.go | 26 ++++++------- .../policy/topology-aware-policy.go | 39 +++++++++++-------- pkg/resmgr/policy/metrics.go | 16 +------- pkg/resmgr/policy/policy.go | 14 +++---- 5 files changed, 57 insertions(+), 69 deletions(-) diff --git a/cmd/plugins/balloons/policy/metrics.go b/cmd/plugins/balloons/policy/metrics.go index 3a5868d09..566be8dee 100644 --- a/cmd/plugins/balloons/policy/metrics.go +++ b/cmd/plugins/balloons/policy/metrics.go @@ -91,14 +91,7 @@ type BalloonMetrics struct { ContainerReqMilliCpus int } -// DescribeMetrics generates policy-specific prometheus metrics data -// descriptors. -func (p *balloons) DescribeMetrics() []*prometheus.Desc { - return descriptors -} - -// PollMetrics provides policy metrics for monitoring. -func (p *balloons) PollMetrics() policy.Metrics { +func (p *balloons) GetMetrics() policy.Metrics { policyMetrics := &Metrics{} policyMetrics.Balloons = make([]*BalloonMetrics, len(p.balloons)) for index, bln := range p.balloons { @@ -150,16 +143,19 @@ func (p *balloons) PollMetrics() policy.Metrics { return policyMetrics } -// CollectMetrics generates prometheus metrics from cached/polled -// policy-specific metrics data. -func (p *balloons) CollectMetrics(m policy.Metrics) ([]prometheus.Metric, error) { - metrics, ok := m.(*Metrics) - if !ok { - return nil, balloonsError("type mismatch in balloons metrics") +func (m *Metrics) Describe(ch chan<- *prometheus.Desc) { + for _, d := range descriptors { + ch <- d + } +} + +func (m *Metrics) Collect(ch chan<- prometheus.Metric) { + if m == nil { + return } - promMetrics := make([]prometheus.Metric, len(metrics.Balloons)) - for index, bm := range metrics.Balloons { - promMetrics[index] = prometheus.MustNewConstMetric( + + for _, bm := range m.Balloons { + ch <- prometheus.MustNewConstMetric( descriptors[balloonsDesc], prometheus.GaugeValue, float64(bm.Cpus.Size()), @@ -185,5 +181,4 @@ func (p *balloons) CollectMetrics(m policy.Metrics) ([]prometheus.Metric, error) bm.ContainerNames, strconv.Itoa(bm.ContainerReqMilliCpus)) } - return promMetrics, nil } diff --git a/cmd/plugins/template/policy/template-policy.go b/cmd/plugins/template/policy/template-policy.go index bc74873ce..79464fbeb 100644 --- a/cmd/plugins/template/policy/template-policy.go +++ b/cmd/plugins/template/policy/template-policy.go @@ -116,19 +116,9 @@ func (p *policy) HandleEvent(e *events.Policy) (bool, error) { return true, nil } -// DescribeMetrics generates policy-specific prometheus metrics data descriptors. -func (p *policy) DescribeMetrics() []*prometheus.Desc { - return nil -} - -// PollMetrics provides policy metrics for monitoring. -func (p *policy) PollMetrics() policyapi.Metrics { - return nil -} - -// CollectMetrics generates prometheus metrics from cached/polled policy-specific metrics data. -func (p *policy) CollectMetrics(policyapi.Metrics) ([]prometheus.Metric, error) { - return nil, nil +// GetMetrics returns the policy-specific metrics collector. +func (p *policy) GetMetrics() policyapi.Metrics { + return &NoMetrics{} } // GetTopologyZones returns the policy/pool data for 'topology zone' CRDs. @@ -145,3 +135,13 @@ func (p *policy) ExportResourceData(c cache.Container) map[string]string { func (p *policy) initialize() error { return nil } + +type NoMetrics struct{} + +func (*NoMetrics) Describe(chan<- *prometheus.Desc) { + return +} + +func (*NoMetrics) Collect(chan<- prometheus.Metric) { + return +} diff --git a/cmd/plugins/topology-aware/policy/topology-aware-policy.go b/cmd/plugins/topology-aware/policy/topology-aware-policy.go index c0b07ce14..6a3d4b16b 100644 --- a/cmd/plugins/topology-aware/policy/topology-aware-policy.go +++ b/cmd/plugins/topology-aware/policy/topology-aware-policy.go @@ -19,9 +19,8 @@ import ( "fmt" "github.com/containers/nri-plugins/pkg/utils/cpuset" - "k8s.io/apimachinery/pkg/api/resource" - "github.com/prometheus/client_golang/prometheus" + "k8s.io/apimachinery/pkg/api/resource" cfgapi "github.com/containers/nri-plugins/pkg/apis/config/v1alpha1/resmgr/policy/topologyaware" "github.com/containers/nri-plugins/pkg/cpuallocator" @@ -68,6 +67,7 @@ type policy struct { cpuAllocator cpuallocator.CPUAllocator // CPU allocator used by the policy memAllocator *libmem.Allocator coldstartOff bool // coldstart forced off (have movable PMEM zones) + metrics *TopologyAwareMetrics } var opt = &cfgapi.Config{} @@ -115,6 +115,8 @@ func (p *policy) Setup(opts *policyapi.BackendOptions) error { return policyError("failed to initialize %s policy: %w", PolicyName, err) } + p.metrics = p.NewTopologyAwareMetrics() + log.Info("***** default CPU priority is %s", defaultPrio) return nil @@ -306,21 +308,6 @@ func (p *policy) HandleEvent(e *events.Policy) (bool, error) { return false, nil } -// DescribeMetrics generates policy-specific prometheus metrics data descriptors. -func (p *policy) DescribeMetrics() []*prometheus.Desc { - return nil -} - -// PollMetrics provides policy metrics for monitoring. -func (p *policy) PollMetrics() policyapi.Metrics { - return nil -} - -// CollectMetrics generates prometheus metrics from cached/polled policy-specific metrics data. -func (p *policy) CollectMetrics(policyapi.Metrics) ([]prometheus.Metric, error) { - return nil, nil -} - // GetTopologyZones returns the policy/pool data for 'topology zone' CRDs. func (p *policy) GetTopologyZones() []*policyapi.TopologyZone { zones := []*policyapi.TopologyZone{} @@ -435,6 +422,24 @@ func (p *policy) ExportResourceData(c cache.Container) map[string]string { return data } +func (p *policy) GetMetrics() policyapi.Metrics { + return p.metrics +} + +func (p *policy) NewTopologyAwareMetrics() *TopologyAwareMetrics { + return &TopologyAwareMetrics{} +} + +type TopologyAwareMetrics struct{} + +func (*TopologyAwareMetrics) Describe(ch chan<- *prometheus.Desc) { + return +} + +func (*TopologyAwareMetrics) Collect(ch chan<- prometheus.Metric) { + return +} + // reallocateResources reallocates the given containers using the given pool hints func (p *policy) reallocateResources(containers []cache.Container, pools map[string]string) error { errs := []error{} diff --git a/pkg/resmgr/policy/metrics.go b/pkg/resmgr/policy/metrics.go index 19eb09169..12ff57d3e 100644 --- a/pkg/resmgr/policy/metrics.go +++ b/pkg/resmgr/policy/metrics.go @@ -35,21 +35,9 @@ func (c *PolicyCollector) register() error { } func (c *PolicyCollector) Describe(ch chan<- *prometheus.Desc) { - for _, d := range c.policy.active.DescribeMetrics() { - ch <- d - } + c.policy.active.GetMetrics().Describe(ch) } func (c *PolicyCollector) Collect(ch chan<- prometheus.Metric) { - polled := c.policy.active.PollMetrics() - - collected, err := c.policy.active.CollectMetrics(polled) - if err != nil { - log.Error("failed to collect metrics: %v", err) - return - } - - for _, m := range collected { - ch <- m - } + c.policy.active.GetMetrics().Collect(ch) } diff --git a/pkg/resmgr/policy/policy.go b/pkg/resmgr/policy/policy.go index 2ecdcb29a..5b7901e4f 100644 --- a/pkg/resmgr/policy/policy.go +++ b/pkg/resmgr/policy/policy.go @@ -115,12 +115,8 @@ type Backend interface { HandleEvent(*events.Policy) (bool, error) // ExportResourceData provides resource data to export for the container. ExportResourceData(cache.Container) map[string]string - // DescribeMetrics generates policy-specific prometheus metrics data descriptors. - DescribeMetrics() []*prometheus.Desc - // PollMetrics provides policy metrics for monitoring. - PollMetrics() Metrics - // CollectMetrics generates prometheus metrics from cached/polled policy-specific metrics data. - CollectMetrics(Metrics) ([]prometheus.Metric, error) + // GetMetrics returns the policy-specific metrics collector. + GetMetrics() Metrics // GetTopologyZones returns the policy/pool data for 'topology zone' CRDs. GetTopologyZones() []*TopologyZone } @@ -151,7 +147,11 @@ type Policy interface { GetTopologyZones() []*TopologyZone } -type Metrics interface{} +// Metrics is the interface we expect policy-specific metrics to implement. +type Metrics interface { + Describe(chan<- *prometheus.Desc) + Collect(chan<- prometheus.Metric) +} // Node resource topology resource and attribute names. // XXX TODO(klihub): We'll probably need to add similar unified consts