Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[receiver/hostmetrics] Collect cpu utilization metric #7130

Merged
merged 9 commits into from
Mar 7, 2022
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
- `sapmexporter`: Add validation for `sending_queue` setting (#8023)
- `signalfxexporter`: Add validation for `sending_queue` setting (#8026)
- `resourcedetectionprocessor`: Add confighttp.HTTPClientSettings To Resource Detection Config Fixes (#7397)
- `hostmetricsreceiver`: Add cpu.utilization metrics to cpu scrapper (#7130)
- `honeycombexporter`: Add validation for `sending_queue` setting (#8113)

### 🛑 Breaking changes 🛑
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,23 +25,26 @@ import (
"go.opentelemetry.io/collector/receiver/scrapererror"

"github.com/open-telemetry/opentelemetry-collector-contrib/receiver/hostmetricsreceiver/internal/scraper/cpuscraper/internal/metadata"
"github.com/open-telemetry/opentelemetry-collector-contrib/receiver/hostmetricsreceiver/internal/scraper/cpuscraper/ucal"
)

const metricsLen = 1
const metricsLen = 2

// scraper for CPU Metrics
type scraper struct {
config *Config
mb *metadata.MetricsBuilder
ucal *ucal.CPUUtilizationCalculator

// for mocking
bootTime func() (uint64, error)
times func(bool) ([]cpu.TimesStat, error)
now func() time.Time
}

// newCPUScraper creates a set of CPU related metrics
func newCPUScraper(_ context.Context, cfg *Config) *scraper {
return &scraper{config: cfg, bootTime: host.BootTime, times: cpu.Times}
return &scraper{config: cfg, bootTime: host.BootTime, times: cpu.Times, ucal: &ucal.CPUUtilizationCalculator{}, now: time.Now}
}

func (s *scraper) start(context.Context, component.Host) error {
Expand All @@ -57,7 +60,7 @@ func (s *scraper) scrape(_ context.Context) (pdata.Metrics, error) {
md := pdata.NewMetrics()
metrics := md.ResourceMetrics().AppendEmpty().InstrumentationLibraryMetrics().AppendEmpty().Metrics()

now := pdata.NewTimestampFromTime(time.Now())
now := pdata.NewTimestampFromTime(s.now())
cpuTimes, err := s.times( /*percpu=*/ true)
if err != nil {
return md, scrapererror.NewPartialScrapeError(err, metricsLen)
Expand All @@ -66,6 +69,12 @@ func (s *scraper) scrape(_ context.Context) (pdata.Metrics, error) {
for _, cpuTime := range cpuTimes {
s.recordCPUTimeStateDataPoints(now, cpuTime)
}

err = s.ucal.CalculateAndRecord(now, cpuTimes, s.recordCPUUtilization)
if err != nil {
return md, scrapererror.NewPartialScrapeError(err, metricsLen)
}

s.mb.Emit(metrics)
return md, nil
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"go.opentelemetry.io/collector/model/pdata"

"github.com/open-telemetry/opentelemetry-collector-contrib/receiver/hostmetricsreceiver/internal/scraper/cpuscraper/internal/metadata"
"github.com/open-telemetry/opentelemetry-collector-contrib/receiver/hostmetricsreceiver/internal/scraper/cpuscraper/ucal"
)

func (s *scraper) recordCPUTimeStateDataPoints(now pdata.Timestamp, cpuTime cpu.TimesStat) {
Expand All @@ -34,3 +35,14 @@ func (s *scraper) recordCPUTimeStateDataPoints(now pdata.Timestamp, cpuTime cpu.
s.mb.RecordSystemCPUTimeDataPoint(now, cpuTime.Steal, cpuTime.CPU, metadata.AttributeState.Steal)
s.mb.RecordSystemCPUTimeDataPoint(now, cpuTime.Iowait, cpuTime.CPU, metadata.AttributeState.Wait)
}

func (s *scraper) recordCPUUtilization(now pdata.Timestamp, cpuUtilization ucal.CPUUtilization) {
s.mb.RecordSystemCPUUtilizationDataPoint(now, cpuUtilization.User, cpuUtilization.CPU, metadata.AttributeState.User)
s.mb.RecordSystemCPUUtilizationDataPoint(now, cpuUtilization.System, cpuUtilization.CPU, metadata.AttributeState.System)
s.mb.RecordSystemCPUUtilizationDataPoint(now, cpuUtilization.Idle, cpuUtilization.CPU, metadata.AttributeState.Idle)
s.mb.RecordSystemCPUUtilizationDataPoint(now, cpuUtilization.Irq, cpuUtilization.CPU, metadata.AttributeState.Interrupt)
s.mb.RecordSystemCPUUtilizationDataPoint(now, cpuUtilization.Nice, cpuUtilization.CPU, metadata.AttributeState.Nice)
s.mb.RecordSystemCPUUtilizationDataPoint(now, cpuUtilization.Softirq, cpuUtilization.CPU, metadata.AttributeState.Softirq)
s.mb.RecordSystemCPUUtilizationDataPoint(now, cpuUtilization.Steal, cpuUtilization.CPU, metadata.AttributeState.Steal)
s.mb.RecordSystemCPUUtilizationDataPoint(now, cpuUtilization.Iowait, cpuUtilization.CPU, metadata.AttributeState.Wait)
}
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"go.opentelemetry.io/collector/model/pdata"

"github.com/open-telemetry/opentelemetry-collector-contrib/receiver/hostmetricsreceiver/internal/scraper/cpuscraper/internal/metadata"
"github.com/open-telemetry/opentelemetry-collector-contrib/receiver/hostmetricsreceiver/internal/scraper/cpuscraper/ucal"
)

func (s *scraper) recordCPUTimeStateDataPoints(now pdata.Timestamp, cpuTime cpu.TimesStat) {
Expand All @@ -30,3 +31,10 @@ func (s *scraper) recordCPUTimeStateDataPoints(now pdata.Timestamp, cpuTime cpu.
s.mb.RecordSystemCPUTimeDataPoint(now, cpuTime.Idle, cpuTime.CPU, metadata.AttributeState.Idle)
s.mb.RecordSystemCPUTimeDataPoint(now, cpuTime.Irq, cpuTime.CPU, metadata.AttributeState.Interrupt)
}

func (s *scraper) recordCPUUtilization(now pdata.Timestamp, cpuUtilization ucal.CPUUtilization) {
s.mb.RecordSystemCPUUtilizationDataPoint(now, cpuUtilization.User, cpuUtilization.CPU, metadata.AttributeState.User)
s.mb.RecordSystemCPUUtilizationDataPoint(now, cpuUtilization.System, cpuUtilization.CPU, metadata.AttributeState.System)
s.mb.RecordSystemCPUUtilizationDataPoint(now, cpuUtilization.Idle, cpuUtilization.CPU, metadata.AttributeState.Idle)
s.mb.RecordSystemCPUUtilizationDataPoint(now, cpuUtilization.Irq, cpuUtilization.CPU, metadata.AttributeState.Interrupt)
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ import (
"errors"
"runtime"
"testing"
"time"

"github.com/shirou/gopsutil/v3/cpu"
"github.com/stretchr/testify/assert"
Expand Down Expand Up @@ -105,7 +106,7 @@ func TestScrape(t *testing.T) {
isPartial := scrapererror.IsPartialScrapeError(err)
assert.True(t, isPartial)
if isPartial {
assert.Equal(t, 1, err.(scrapererror.PartialScrapeError).Failed)
assert.Equal(t, 2, err.(scrapererror.PartialScrapeError).Failed)
}

return
Expand All @@ -128,6 +129,216 @@ func TestScrape(t *testing.T) {
}
}

// TestScrape_CpuUtilization to test utilization we need to execute scrape at least twice to have
// data to calculate the difference, so assertions will be done after the second scraping
func TestScrape_CpuUtilization(t *testing.T) {
type testCase struct {
name string
metricsConfig metadata.MetricsSettings
expectedMetricCount int
times bool
utilization bool
utilizationIndex int
}

testCases := []testCase{
{
name: "Standard",
metricsConfig: metadata.DefaultMetricsSettings(),
expectedMetricCount: 1,
times: true,
utilization: false,
},
{
name: "SystemCPUTime metric is disabled",
times: false,
utilization: true,
expectedMetricCount: 1,
},
{
name: "all metrics are enabled",
times: true,
utilization: true,
expectedMetricCount: 2,
utilizationIndex: 1,
},
{
name: "all metrics are disabled",
times: false,
utilization: false,
expectedMetricCount: 0,
},
}

for _, test := range testCases {
test := test
t.Run(test.name, func(t *testing.T) {
t.Parallel()
settings := test.metricsConfig
if test.metricsConfig == (metadata.MetricsSettings{}) {
settings = metadata.MetricsSettings{
SystemCPUTime: metadata.MetricSettings{
Enabled: test.times,
},
SystemCPUUtilization: metadata.MetricSettings{
Enabled: test.utilization,
},
}
}

scraper := newCPUScraper(context.Background(), &Config{Metrics: settings})
err := scraper.start(context.Background(), componenttest.NewNopHost())
require.NoError(t, err, "Failed to initialize cpu scraper: %v", err)

_, err = scraper.scrape(context.Background())
require.NoError(t, err, "Failed to scrape metrics: %v", err)
//2nd scrape will trigger utilization metrics calculation
md, err := scraper.scrape(context.Background())
require.NoError(t, err, "Failed to scrape metrics: %v", err)

assert.Equal(t, test.expectedMetricCount, md.MetricCount())
metrics := md.ResourceMetrics().At(0).InstrumentationLibraryMetrics().At(0).Metrics()
internal.AssertSameTimeStampForAllMetrics(t, metrics)
if test.times {
timesMetrics := metrics.At(0)
assertCPUMetricValid(t, timesMetrics, 0)
if runtime.GOOS == "linux" {
assertCPUMetricHasLinuxSpecificStateLabels(t, timesMetrics)
}
}
if test.utilization {
utilizationMetrics := metrics.At(test.utilizationIndex)
assertCPUUtilizationMetricValid(t, utilizationMetrics, 0)
if runtime.GOOS == "linux" {
assertCPUUtilizationMetricHasLinuxSpecificStateLabels(t, utilizationMetrics)
}
}
})
}
}

func TestScrape_CpuUtilizationError(t *testing.T) {
scraper := newCPUScraper(context.Background(), &Config{Metrics: metadata.DefaultMetricsSettings()})
err := scraper.start(context.Background(), componenttest.NewNopHost())
require.NoError(t, err, "Failed to initialize cpu scraper: %v", err)

scraper.now = func() time.Time {
now, _ := time.Parse(time.RFC3339, "2021-12-21 00:23:21")
return now
}

_, err = scraper.scrape(context.Background())
require.NoError(t, err, "Failed to scrape metrics: %v", err)
//2nd scrape will trigger utilization metrics calculation
md, err := scraper.scrape(context.Background())
var partialScrapeErr scrapererror.PartialScrapeError
assert.ErrorAs(t, err, &partialScrapeErr)
assert.Equal(t, 0, md.MetricCount())
}

func TestScrape_CpuUtilizationStandard(t *testing.T) {
metricSettings := metadata.MetricsSettings{
SystemCPUUtilization: metadata.MetricSettings{
Enabled: true,
},
}

//datapoint data
type dpData struct {
val float64
attrs map[string]string
}

scrapesData := []struct {
times []cpu.TimesStat
scrapeTime string
expectedDps []dpData
}{
{
times: []cpu.TimesStat{{CPU: "cpu0", User: 1.5, System: 2.7, Idle: 0.8}, {CPU: "cpu1", User: 2, System: 3, Idle: 1}},
scrapeTime: "2006-01-02T15:04:05Z",
expectedDps: []dpData{},
},
{
times: []cpu.TimesStat{{CPU: "cpu0", User: 2.8, System: 3.9, Idle: 3.3}, {CPU: "cpu1", User: 3.2, System: 5.2, Idle: 2.6}},
scrapeTime: "2006-01-02T15:04:10Z",
expectedDps: []dpData{
{val: 0.26, attrs: map[string]string{"cpu": "cpu0", "state": "user"}},
{val: 0.24, attrs: map[string]string{"cpu": "cpu0", "state": "system"}},
{val: 0.5, attrs: map[string]string{"cpu": "cpu0", "state": "idle"}},
{val: 0.24, attrs: map[string]string{"cpu": "cpu1", "state": "user"}},
{val: 0.44, attrs: map[string]string{"cpu": "cpu1", "state": "system"}},
{val: 0.32, attrs: map[string]string{"cpu": "cpu1", "state": "idle"}},
},
},
{
times: []cpu.TimesStat{{CPU: "cpu0", User: 3.4, System: 5.3, Idle: 6.3}, {CPU: "cpu1", User: 3.7, System: 7.1, Idle: 5.2}},
scrapeTime: "2006-01-02T15:04:15Z",
expectedDps: []dpData{
{val: 0.12, attrs: map[string]string{"cpu": "cpu0", "state": "user"}},
{val: 0.28, attrs: map[string]string{"cpu": "cpu0", "state": "system"}},
{val: 0.6, attrs: map[string]string{"cpu": "cpu0", "state": "idle"}},
{val: 0.1, attrs: map[string]string{"cpu": "cpu1", "state": "user"}},
{val: 0.38, attrs: map[string]string{"cpu": "cpu1", "state": "system"}},
{val: 0.52, attrs: map[string]string{"cpu": "cpu1", "state": "idle"}},
},
},
}

cpuScraper := newCPUScraper(context.Background(), &Config{Metrics: metricSettings})
for _, scrapeData := range scrapesData {
//mock TimeStats and Now
cpuScraper.times = func(_ bool) ([]cpu.TimesStat, error) {
return scrapeData.times, nil
}
cpuScraper.now = func() time.Time {
now, _ := time.Parse(time.RFC3339, scrapeData.scrapeTime)
return now
}

err := cpuScraper.start(context.Background(), componenttest.NewNopHost())
require.NoError(t, err, "Failed to initialize cpu scraper: %v", err)

md, err := cpuScraper.scrape(context.Background())
require.NoError(t, err)
//no metrics in the first scrape
if len(scrapeData.expectedDps) == 0 {
assert.Equal(t, 0, md.ResourceMetrics().At(0).InstrumentationLibraryMetrics().At(0).Metrics().Len())
continue
}

assert.Equal(t, 1, md.ResourceMetrics().At(0).InstrumentationLibraryMetrics().At(0).Metrics().Len())
metric := md.ResourceMetrics().At(0).InstrumentationLibraryMetrics().At(0).Metrics().At(0)
assertCPUUtilizationMetricValid(t, metric, 0)
dp := metric.Gauge().DataPoints()

expectedDataPoints := 8
if runtime.GOOS == "linux" {
expectedDataPoints = 16
assertCPUUtilizationMetricHasLinuxSpecificStateLabels(t, metric)
}
assert.Equal(t, expectedDataPoints, dp.Len())

//remove empty values to make the test more simple
dp.RemoveIf(func(n pdata.NumberDataPoint) bool {
return n.DoubleVal() == 0.0
})

for idx, expectedDp := range scrapeData.expectedDps {
assertDatapointValueAndStringAttributes(t, dp.At(idx), expectedDp.val, expectedDp.attrs)
}
}
}

func assertDatapointValueAndStringAttributes(t *testing.T, dp pdata.NumberDataPoint, value float64, attrs map[string]string) {
assert.InDelta(t, value, dp.DoubleVal(), 0.0001)
for k, v := range attrs {
cpuAttribute, exists := dp.Attributes().Get(k)
assert.True(t, exists)
assert.Equal(t, v, cpuAttribute.StringVal())
}
}

func assertCPUMetricValid(t *testing.T, metric pdata.Metric, startTime pdata.Timestamp) {
expected := pdata.NewMetric()
expected.SetName("system.cpu.time")
Expand All @@ -152,3 +363,27 @@ func assertCPUMetricHasLinuxSpecificStateLabels(t *testing.T, metric pdata.Metri
internal.AssertSumMetricHasAttributeValue(t, metric, 6, metadata.Attributes.State, pdata.NewAttributeValueString(metadata.AttributeState.Steal))
internal.AssertSumMetricHasAttributeValue(t, metric, 7, metadata.Attributes.State, pdata.NewAttributeValueString(metadata.AttributeState.Wait))
}

func assertCPUUtilizationMetricValid(t *testing.T, metric pdata.Metric, startTime pdata.Timestamp) {
expected := pdata.NewMetric()
expected.SetName("system.cpu.utilization")
expected.SetDescription("Percentage of CPU time broken down by different states.")
expected.SetUnit("1")
expected.SetDataType(pdata.MetricDataTypeGauge)
internal.AssertDescriptorEqual(t, expected, metric)
if startTime != 0 {
internal.AssertGaugeMetricStartTimeEquals(t, metric, startTime)
}
internal.AssertGaugeMetricHasAttribute(t, metric, 0, metadata.Attributes.Cpu)
internal.AssertGaugeMetricHasAttributeValue(t, metric, 0, metadata.Attributes.State, pdata.NewAttributeValueString(metadata.AttributeState.User))
internal.AssertGaugeMetricHasAttributeValue(t, metric, 1, metadata.Attributes.State, pdata.NewAttributeValueString(metadata.AttributeState.System))
internal.AssertGaugeMetricHasAttributeValue(t, metric, 2, metadata.Attributes.State, pdata.NewAttributeValueString(metadata.AttributeState.Idle))
internal.AssertGaugeMetricHasAttributeValue(t, metric, 3, metadata.Attributes.State, pdata.NewAttributeValueString(metadata.AttributeState.Interrupt))
}

func assertCPUUtilizationMetricHasLinuxSpecificStateLabels(t *testing.T, metric pdata.Metric) {
internal.AssertGaugeMetricHasAttributeValue(t, metric, 4, metadata.Attributes.State, pdata.NewAttributeValueString(metadata.AttributeState.Nice))
internal.AssertGaugeMetricHasAttributeValue(t, metric, 5, metadata.Attributes.State, pdata.NewAttributeValueString(metadata.AttributeState.Softirq))
internal.AssertGaugeMetricHasAttributeValue(t, metric, 6, metadata.Attributes.State, pdata.NewAttributeValueString(metadata.AttributeState.Steal))
internal.AssertGaugeMetricHasAttributeValue(t, metric, 7, metadata.Attributes.State, pdata.NewAttributeValueString(metadata.AttributeState.Wait))
}
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ These are the metrics available for this scraper.
| Name | Description | Unit | Type | Attributes |
| ---- | ----------- | ---- | ---- | ---------- |
| **system.cpu.time** | Total CPU seconds broken down by different states. | s | Sum(Double) | <ul> <li>cpu</li> <li>state</li> </ul> |
| system.cpu.utilization | Percentage of CPU time broken down by different states. | 1 | Gauge(Double) | <ul> <li>cpu</li> <li>state</li> </ul> |

**Highlighted metrics** are emitted by default. Other metrics are optional and not emitted by default.
Any metric can be enabled or disabled with the following scraper configuration:
Expand Down
Loading