From b2424db7745d4924904eb65cac5e48002a873c4a Mon Sep 17 00:00:00 2001 From: Christos Markou Date: Mon, 12 Aug 2024 00:30:24 +0530 Subject: [PATCH] Add k8s.{pod,node}.cpu.{time,usage} metrics (#1320) Signed-off-by: ChrsMark --- .chloggen/add_k8s_cpu_metrics.yaml | 22 ++++ docs/system/k8s-metrics.md | 171 +++++++++++++++++++++++++++++ model/metrics/k8s.yaml | 40 +++++++ 3 files changed, 233 insertions(+) create mode 100755 .chloggen/add_k8s_cpu_metrics.yaml create mode 100644 docs/system/k8s-metrics.md create mode 100644 model/metrics/k8s.yaml diff --git a/.chloggen/add_k8s_cpu_metrics.yaml b/.chloggen/add_k8s_cpu_metrics.yaml new file mode 100755 index 0000000000..fe52d97d1c --- /dev/null +++ b/.chloggen/add_k8s_cpu_metrics.yaml @@ -0,0 +1,22 @@ +# Use this changelog template to create an entry for release notes. +# +# If your change doesn't affect end users you should instead start +# your pull request title with [chore] or use the "Skip Changelog" label. + +# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix' +change_type: enhancement + +# The name of the area of concern in the attributes-registry, (e.g. http, cloud, db) +component: k8s + +# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`). +note: Add `k8s.pod.cpu.time`, `k8s.pod.cpu.usage`, `k8s.node.cpu.time`, `k8s.node.cpu.usage` metrics + +# Mandatory: One or more tracking issues related to the change. You can use the PR number here if no issue exists. +# The values here must be integers. +issues: [1320] + +# (Optional) One or more lines of additional information to render under the primary note. +# These lines will be padded with 2 spaces and then inserted directly into the document. +# Use pipe (|) for multiline entries. +subtext: diff --git a/docs/system/k8s-metrics.md b/docs/system/k8s-metrics.md new file mode 100644 index 0000000000..1f8e2bf8f9 --- /dev/null +++ b/docs/system/k8s-metrics.md @@ -0,0 +1,171 @@ + + +# Semantic Conventions for Kubernetes Metrics + +**Status**: [Experimental][DocumentStatus] + +## K8s Metrics + +This document describes instruments and attributes for common K8s level +metrics in OpenTelemetry. These metrics are collected from technology-specific, +well-defined APIs (e.g. Kubelet's API). + +Metrics in `k8s.` instruments SHOULD be attached to a [K8s Resource](/docs/resource/k8s.md) +and therefore inherit its attributes, like `k8s.pod.name` and `k8s.pod.uid`. + +### Metric: `k8s.pod.cpu.time` + +This metric is [recommended][MetricRecommended]. + + + + + + + + +| Name | Instrument Type | Unit (UCUM) | Description | Stability | +| -------- | --------------- | ----------- | -------------- | --------- | +| `k8s.pod.cpu.time` | Counter | `s` | Total CPU time consumed [1] | ![Experimental](https://img.shields.io/badge/-experimental-blue) | + + +**[1]:** Total CPU time consumed by the specific Pod on all available CPU cores + + + + + + + + + + + + + + + + + + + + + +### Metric: `k8s.pod.cpu.usage` + +This metric is [recommended][MetricRecommended]. + + + + + + + + +| Name | Instrument Type | Unit (UCUM) | Description | Stability | +| -------- | --------------- | ----------- | -------------- | --------- | +| `k8s.pod.cpu.usage` | Gauge | `{cpu}` | Pod's CPU usage, measured in cpus. Range from 0 to the number of allocatable CPUs [1] | ![Experimental](https://img.shields.io/badge/-experimental-blue) | + + +**[1]:** CPU usage of the specific Pod on all available CPU cores, averaged over the sample window + + + + + + + + + + + + + + + + + + + + + +### Metric: `k8s.node.cpu.time` + +This metric is [recommended][MetricRecommended]. + + + + + + + + +| Name | Instrument Type | Unit (UCUM) | Description | Stability | +| -------- | --------------- | ----------- | -------------- | --------- | +| `k8s.node.cpu.time` | Counter | `s` | Total CPU time consumed [1] | ![Experimental](https://img.shields.io/badge/-experimental-blue) | + + +**[1]:** Total CPU time consumed by the specific Node on all available CPU cores + + + + + + + + + + + + + + + + + + + + + +### Metric: `k8s.node.cpu.usage` + +This metric is [recommended][MetricRecommended]. + + + + + + + + +| Name | Instrument Type | Unit (UCUM) | Description | Stability | +| -------- | --------------- | ----------- | -------------- | --------- | +| `k8s.node.cpu.usage` | Gauge | `{cpu}` | Node's CPU usage, measured in cpus. Range from 0 to the number of allocatable CPUs [1] | ![Experimental](https://img.shields.io/badge/-experimental-blue) | + + +**[1]:** CPU usage of the specific Node on all available CPU cores, averaged over the sample window + + + + + + + + + + + + + + + + + + + + + +[DocumentStatus]: https://opentelemetry.io/docs/specs/otel/document-status +[MetricRecommended]: /docs/general/metric-requirement-level.md#recommended diff --git a/model/metrics/k8s.yaml b/model/metrics/k8s.yaml new file mode 100644 index 0000000000..b11a6f68d4 --- /dev/null +++ b/model/metrics/k8s.yaml @@ -0,0 +1,40 @@ +groups: + # k8s.pod.cpu.* metrics + - id: metric.k8s.pod.cpu.time + type: metric + metric_name: k8s.pod.cpu.time + stability: experimental + brief: "Total CPU time consumed" + note: > + Total CPU time consumed by the specific Pod on all available CPU cores + instrument: counter + unit: "s" + - id: metric.k8s.pod.cpu.usage + type: metric + metric_name: k8s.pod.cpu.usage + stability: experimental + brief: "Pod's CPU usage, measured in cpus. Range from 0 to the number of allocatable CPUs" + note: > + CPU usage of the specific Pod on all available CPU cores, averaged over the sample window + instrument: gauge + unit: "{cpu}" + + # k8s.node.cpu.* metrics + - id: metric.k8s.node.cpu.time + type: metric + metric_name: k8s.node.cpu.time + stability: experimental + brief: "Total CPU time consumed" + note: > + Total CPU time consumed by the specific Node on all available CPU cores + instrument: counter + unit: "s" + - id: metric.k8s.node.cpu.usage + type: metric + metric_name: k8s.node.cpu.usage + stability: experimental + brief: "Node's CPU usage, measured in cpus. Range from 0 to the number of allocatable CPUs" + note: > + CPU usage of the specific Node on all available CPU cores, averaged over the sample window + instrument: gauge + unit: "{cpu}"