generated from onedr0p/cluster-template
-
Notifications
You must be signed in to change notification settings - Fork 0
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(helm): update chart kube-prometheus-stack to 66.2.1 #1304
Merged
Conversation
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
github-actions
bot
added
the
area/kubernetes
Changes made in the kubernetes directory
label
Nov 16, 2024
--- kubernetes/apps/monitoring/kube-prometheus-stack/app Kustomization: flux-system/cluster-apps-kube-prometheus-stack HelmRelease: monitoring/kube-prometheus-stack
+++ kubernetes/apps/monitoring/kube-prometheus-stack/app Kustomization: flux-system/cluster-apps-kube-prometheus-stack HelmRelease: monitoring/kube-prometheus-stack
@@ -13,13 +13,13 @@
chart: kube-prometheus-stack
interval: 5m
sourceRef:
kind: HelmRepository
name: prometheus-community-charts
namespace: flux-system
- version: 66.0.0
+ version: 66.2.1
install:
remediation:
retries: 3
interval: 15m
timeout: 10m
upgrade: |
--- HelmRelease: monitoring/kube-prometheus-stack ConfigMap: monitoring/prometheus-alertmanager-overview
+++ HelmRelease: monitoring/kube-prometheus-stack ConfigMap: monitoring/prometheus-alertmanager-overview
@@ -10,37 +10,33 @@
app.kubernetes.io/managed-by: Helm
app.kubernetes.io/instance: kube-prometheus-stack
app.kubernetes.io/part-of: kube-prometheus-stack
release: kube-prometheus-stack
heritage: Helm
data:
- alertmanager-overview.json: '{"__inputs":[],"__requires":[],"annotations":{"list":[]},"editable":true,"gnetId":null,"graphTooltip":1,"hideControls":false,"id":null,"links":[],"refresh":"30s","rows":[{"collapse":false,"collapsed":false,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","description":"current
- set of alerts stored in the Alertmanager","fill":1,"fillGradient":0,"gridPos":{},"id":2,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":false,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(alertmanager_alerts{namespace=~\"$namespace\",service=~\"$service\"})
- by (namespace,service,instance)","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}}","refId":"A"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Alerts","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"none","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"none","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","description":"rate
- of successful and invalid alerts received by the Alertmanager","fill":1,"fillGradient":0,"gridPos":{},"id":3,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":false,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":null,"seriesOverrides":[],"spaceLength":10,"span":6,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(rate(alertmanager_alerts_received_total{namespace=~\"$namespace\",service=~\"$service\"}[$__rate_interval]))
- by (namespace,service,instance)","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}}
- Received","refId":"A"},{"expr":"sum(rate(alertmanager_alerts_invalid_total{namespace=~\"$namespace\",service=~\"$service\"}[$__rate_interval]))
- by (namespace,service,instance)","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}}
- Invalid","refId":"B"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"Alerts
- receive rate","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true}]}],"repeat":null,"repeatIteration":null,"repeatRowId":null,"showTitle":true,"title":"Alerts","titleSize":"h6","type":"row"},{"collapse":false,"collapsed":false,"panels":[{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","description":"rate
- of successful and invalid notifications sent by the Alertmanager","fill":1,"fillGradient":0,"gridPos":{},"id":4,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":false,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":"integration","seriesOverrides":[],"spaceLength":10,"stack":true,"steppedLine":false,"targets":[{"expr":"sum(rate(alertmanager_notifications_total{namespace=~\"$namespace\",service=~\"$service\",
- integration=\"$integration\"}[$__rate_interval])) by (integration,namespace,service,instance)","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}}
- Total","refId":"A"},{"expr":"sum(rate(alertmanager_notifications_failed_total{namespace=~\"$namespace\",service=~\"$service\",
- integration=\"$integration\"}[$__rate_interval])) by (integration,namespace,service,instance)","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}}
- Failed","refId":"B"}],"thresholds":[],"timeFrom":null,"timeShift":null,"title":"$integration:
- Notifications Send Rate","tooltip":{"shared":true,"sort":0,"value_type":"individual"},"type":"graph","xaxis":{"buckets":null,"mode":"time","name":null,"show":true,"values":[]},"yaxes":[{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true},{"format":"ops","label":null,"logBase":1,"max":null,"min":null,"show":true}]},{"aliasColors":{},"bars":false,"dashLength":10,"dashes":false,"datasource":"$datasource","description":"latency
- of notifications sent by the Alertmanager","fill":1,"fillGradient":0,"gridPos":{},"id":5,"legend":{"alignAsTable":false,"avg":false,"current":false,"max":false,"min":false,"rightSide":false,"show":false,"sideWidth":null,"total":false,"values":false},"lines":true,"linewidth":1,"links":[],"nullPointMode":"null","percentage":false,"pointradius":5,"points":false,"renderer":"flot","repeat":"integration","seriesOverrides":[],"spaceLength":10,"stack":false,"steppedLine":false,"targets":[{"expr":"histogram_quantile(0.99,\n sum(rate(alertmanager_notification_latency_seconds_bucket{namespace=~\"$namespace\",service=~\"$service\",
- integration=\"$integration\"}[$__rate_interval])) by (le,namespace,service,instance)\n)
- \n","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} 99th
- Percentile","refId":"A"},{"expr":"histogram_quantile(0.50,\n sum(rate(alertmanager_notification_latency_seconds_bucket{namespace=~\"$namespace\",service=~\"$service\",
- integration=\"$integration\"}[$__rate_interval])) by (le,namespace,service,instance)\n)
- \n","format":"time_series","intervalFactor":2,"legendFormat":"{{instance}} Median","refId":"B"},{"expr":"sum(rate(alertmanager_notification_latency_seconds_sum{namespace=~\"$namespace\",service=~\"$service\",
+ alertmanager-overview.json: '{"graphTooltip":1,"panels":[{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":0},"id":1,"panels":[],"title":"Alerts","type":"row"},{"datasource":{"type":"prometheus","uid":"$datasource"},"description":"current
+ set of alerts stored in the Alertmanager","fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","stacking":{"mode":"normal"}},"unit":"none"}},"gridPos":{"h":7,"w":12,"x":0,"y":1},"id":2,"options":{"legend":{"showLegend":false},"tooltip":{"mode":"multi"}},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"sum(alertmanager_alerts{namespace=~\"$namespace\",service=~\"$service\"})
+ by (namespace,service,instance)","intervalFactor":2,"legendFormat":"{{instance}}"}],"title":"Alerts","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"$datasource"},"description":"rate
+ of successful and invalid alerts received by the Alertmanager","fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","stacking":{"mode":"normal"}},"unit":"ops"}},"gridPos":{"h":7,"w":12,"x":12,"y":1},"id":3,"options":{"legend":{"showLegend":false},"tooltip":{"mode":"multi"}},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"sum(rate(alertmanager_alerts_received_total{namespace=~\"$namespace\",service=~\"$service\"}[$__rate_interval]))
+ by (namespace,service,instance)","intervalFactor":2,"legendFormat":"{{instance}}
+ Received"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"sum(rate(alertmanager_alerts_invalid_total{namespace=~\"$namespace\",service=~\"$service\"}[$__rate_interval]))
+ by (namespace,service,instance)","intervalFactor":2,"legendFormat":"{{instance}}
+ Invalid"}],"title":"Alerts receive rate","type":"timeseries"},{"collapsed":false,"gridPos":{"h":1,"w":24,"x":0,"y":8},"id":4,"panels":[],"title":"Notifications","type":"row"},{"datasource":{"type":"prometheus","uid":"$datasource"},"description":"rate
+ of successful and invalid notifications sent by the Alertmanager","fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","stacking":{"mode":"normal"}},"unit":"ops"}},"gridPos":{"h":7,"w":12,"x":0,"y":9},"id":5,"options":{"legend":{"showLegend":false},"tooltip":{"mode":"multi"}},"pluginVersion":"v11.1.0","repeat":"integration","targets":[{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"sum(rate(alertmanager_notifications_total{namespace=~\"$namespace\",service=~\"$service\",
+ integration=\"$integration\"}[$__rate_interval])) by (integration,namespace,service,instance)","intervalFactor":2,"legendFormat":"{{instance}}
+ Total"},{"datasource":{"type":"prometheus","uid":"$datasource"},"expr":"sum(rate(alertmanager_notifications_failed_total{namespace=~\"$namespace\",service=~\"$service\",
+ integration=\"$integration\"}[$__rate_interval])) by (integration,namespace,service,instance)","intervalFactor":2,"legendFormat":"{{instance}}
+ Failed"}],"title":"$integration: Notifications Send Rate","type":"timeseries"},{"datasource":{"type":"prometheus","uid":"$datasource"},"description":"latency
[Diff truncated by flux-local]
--- HelmRelease: monitoring/kube-prometheus-stack ConfigMap: monitoring/prometheus-k8s-resources-multicluster
+++ HelmRelease: monitoring/kube-prometheus-stack ConfigMap: monitoring/prometheus-k8s-resources-multicluster
@@ -11,15 +11,15 @@
app.kubernetes.io/instance: kube-prometheus-stack
app.kubernetes.io/part-of: kube-prometheus-stack
release: kube-prometheus-stack
heritage: Helm
data:
k8s-resources-multicluster.json: '{"editable":true,"links":[{"asDropdown":true,"includeVars":true,"keepTime":true,"tags":["kubernetes-mixin"],"targetBlank":false,"title":"Kubernetes","type":"dashboards"}],"panels":[{"datasource":{"type":"datasource","uid":"--
- Mixed --"},"fieldConfig":{"defaults":{"unit":"none"}},"gridPos":{"h":3,"w":4,"x":0,"y":0},"id":1,"interval":"1m","options":{"colorMode":"none"},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"cluster:node_cpu:ratio_rate5m","instant":true}],"title":"CPU
- Utilisation","type":"stat"},{"datasource":{"type":"datasource","uid":"-- Mixed
- --"},"fieldConfig":{"defaults":{"unit":"percentunit"}},"gridPos":{"h":3,"w":4,"x":4,"y":0},"id":2,"interval":"1m","options":{"colorMode":"none"},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\",
+ Mixed --"},"fieldConfig":{"defaults":{"unit":"none"}},"gridPos":{"h":3,"w":4,"x":0,"y":0},"id":1,"interval":"1m","options":{"colorMode":"none"},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(cluster:node_cpu:ratio_rate5m)
+ / count(cluster:node_cpu:ratio_rate5m)","instant":true}],"title":"CPU Utilisation","type":"stat"},{"datasource":{"type":"datasource","uid":"--
+ Mixed --"},"fieldConfig":{"defaults":{"unit":"percentunit"}},"gridPos":{"h":3,"w":4,"x":4,"y":0},"id":2,"interval":"1m","options":{"colorMode":"none"},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_container_resource_requests{job=\"kube-state-metrics\",
resource=\"cpu\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",
resource=\"cpu\"})","instant":true}],"title":"CPU Requests Commitment","type":"stat"},{"datasource":{"type":"datasource","uid":"--
Mixed --"},"fieldConfig":{"defaults":{"unit":"percentunit"}},"gridPos":{"h":3,"w":4,"x":8,"y":0},"id":3,"interval":"1m","options":{"colorMode":"none"},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\",
resource=\"cpu\"}) / sum(kube_node_status_allocatable{job=\"kube-state-metrics\",
resource=\"cpu\"})","instant":true}],"title":"CPU Limits Commitment","type":"stat"},{"datasource":{"type":"datasource","uid":"--
Mixed --"},"fieldConfig":{"defaults":{"unit":"percentunit"}},"gridPos":{"h":3,"w":4,"x":12,"y":0},"id":4,"interval":"1m","options":{"colorMode":"none"},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"1
--- HelmRelease: monitoring/kube-prometheus-stack ConfigMap: monitoring/prometheus-k8s-resources-namespace
+++ HelmRelease: monitoring/kube-prometheus-stack ConfigMap: monitoring/prometheus-k8s-resources-namespace
@@ -30,16 +30,16 @@
Mixed --"},"fieldConfig":{"defaults":{"unit":"percentunit"}},"gridPos":{"h":3,"w":6,"x":18,"y":0},"id":4,"interval":"1m","options":{"colorMode":"none"},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(container_memory_working_set_bytes{job=\"kubelet\",
metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\",
image!=\"\"}) / sum(kube_pod_container_resource_limits{job=\"kube-state-metrics\",
cluster=\"$cluster\", namespace=\"$namespace\", resource=\"memory\"})","instant":true}],"title":"Memory
Utilisation (from limits)","type":"stat"},{"datasource":{"type":"datasource","uid":"--
Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true}},"overrides":[{"matcher":{"id":"byFrameRefID","options":"B"},"properties":[{"id":"custom.lineStyle","value":{"fill":"dash"}},{"id":"custom.lineWidth","value":2},{"id":"color","value":{"fixedColor":"red","mode":"fixed"}}]},{"matcher":{"id":"byFrameRefID","options":"C"},"properties":[{"id":"custom.lineStyle","value":{"fill":"dash"}},{"id":"custom.lineWidth","value":2},{"id":"color","value":{"fixedColor":"orange","mode":"fixed"}}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":7},"id":5,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\",
- namespace=\"$namespace\"}) by (pod)","legendFormat":"__auto"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"scalar(kube_resourcequota{cluster=\"$cluster\",
- namespace=\"$namespace\", type=\"hard\",resource=\"requests.cpu\"})","legendFormat":"quota
- - requests"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"scalar(kube_resourcequota{cluster=\"$cluster\",
- namespace=\"$namespace\", type=\"hard\",resource=\"limits.cpu\"})","legendFormat":"quota
+ namespace=\"$namespace\"}) by (pod)","legendFormat":"__auto"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"scalar(max(kube_resourcequota{cluster=\"$cluster\",
+ namespace=\"$namespace\", type=\"hard\",resource=\"requests.cpu\"}))","legendFormat":"quota
+ - requests"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"scalar(max(kube_resourcequota{cluster=\"$cluster\",
+ namespace=\"$namespace\", type=\"hard\",resource=\"limits.cpu\"}))","legendFormat":"quota
- limits"}],"title":"CPU Usage","type":"timeseries"},{"datasource":{"type":"datasource","uid":"--
Mixed --"},"fieldConfig":{"overrides":[{"matcher":{"id":"byRegexp","options":"/%/"},"properties":[{"id":"unit","value":"percentunit"}]},{"matcher":{"id":"byName","options":"Pod"},"properties":[{"id":"links","value":[{"title":"Drill
down to pods","url":"/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__data.fields.Pod}"}]}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":14},"id":6,"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\",
namespace=\"$namespace\"}) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\",
namespace=\"$namespace\"}) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\",
namespace=\"$namespace\"}) by (pod) / sum(cluster:namespace:pod_cpu:active:kube_pod_container_resource_requests{cluster=\"$cluster\",
@@ -52,16 +52,16 @@
1":0,"Time 2":1,"Time 3":2,"Time 4":3,"Time 5":4,"Value #A":6,"Value #B":7,"Value
#C":8,"Value #D":9,"Value #E":10,"pod":5},"renameByName":{"Value #A":"CPU Usage","Value
#B":"CPU Requests","Value #C":"CPU Requests %","Value #D":"CPU Limits","Value
#E":"CPU Limits %","pod":"Pod"}}}],"type":"table"},{"datasource":{"type":"datasource","uid":"--
Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bytes"},"overrides":[{"matcher":{"id":"byFrameRefID","options":"B"},"properties":[{"id":"custom.lineStyle","value":{"fill":"dash"}},{"id":"custom.lineWidth","value":2},{"id":"color","value":{"fixedColor":"red","mode":"fixed"}}]},{"matcher":{"id":"byFrameRefID","options":"C"},"properties":[{"id":"custom.lineStyle","value":{"fill":"dash"}},{"id":"custom.lineWidth","value":2},{"id":"color","value":{"fixedColor":"orange","mode":"fixed"}}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":21},"id":7,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(container_memory_working_set_bytes{job=\"kubelet\",
metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",
- container!=\"\", image!=\"\"}) by (pod)","legendFormat":"__auto"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"scalar(kube_resourcequota{cluster=\"$cluster\",
- namespace=\"$namespace\", type=\"hard\",resource=\"requests.memory\"})","legendFormat":"quota
- - requests"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"scalar(kube_resourcequota{cluster=\"$cluster\",
- namespace=\"$namespace\", type=\"hard\",resource=\"limits.memory\"})","legendFormat":"quota
+ container!=\"\", image!=\"\"}) by (pod)","legendFormat":"__auto"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"scalar(max(kube_resourcequota{cluster=\"$cluster\",
+ namespace=\"$namespace\", type=\"hard\",resource=\"requests.memory\"}))","legendFormat":"quota
+ - requests"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"scalar(max(kube_resourcequota{cluster=\"$cluster\",
+ namespace=\"$namespace\", type=\"hard\",resource=\"limits.memory\"}))","legendFormat":"quota
- limits"}],"title":"Memory Usage (w/o cache)","type":"timeseries"},{"datasource":{"type":"datasource","uid":"--
Mixed --"},"fieldConfig":{"defaults":{"unit":"bytes"},"overrides":[{"matcher":{"id":"byRegexp","options":"/%/"},"properties":[{"id":"unit","value":"percentunit"}]},{"matcher":{"id":"byName","options":"Pod"},"properties":[{"id":"links","value":[{"title":"Drill
down to pods","url":"/d/6581e46e4e5c7ba40a07646395ef7b23/k8s-resources-pod?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-pod=${__data.fields.Pod}"}]}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":28},"id":8,"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(container_memory_working_set_bytes{job=\"kubelet\",
metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",container!=\"\",
image!=\"\"}) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(cluster:namespace:pod_memory:active:kube_pod_container_resource_requests{cluster=\"$cluster\",
namespace=\"$namespace\"}) by (pod)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(container_memory_working_set_bytes{job=\"kubelet\",
--- HelmRelease: monitoring/kube-prometheus-stack ConfigMap: monitoring/prometheus-k8s-resources-workloads-namespace
+++ HelmRelease: monitoring/kube-prometheus-stack ConfigMap: monitoring/prometheus-k8s-resources-workloads-namespace
@@ -15,16 +15,16 @@
data:
k8s-resources-workloads-namespace.json: '{"editable":true,"links":[{"asDropdown":true,"includeVars":true,"keepTime":true,"tags":["kubernetes-mixin"],"targetBlank":false,"title":"Kubernetes","type":"dashboards"}],"panels":[{"datasource":{"type":"datasource","uid":"--
Mixed --"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true}},"overrides":[{"matcher":{"id":"byFrameRefID","options":"B"},"properties":[{"id":"custom.lineStyle","value":{"fill":"dash"}},{"id":"custom.lineWidth","value":2},{"id":"color","value":{"fixedColor":"red","mode":"fixed"}}]},{"matcher":{"id":"byFrameRefID","options":"C"},"properties":[{"id":"custom.lineStyle","value":{"fill":"dash"}},{"id":"custom.lineWidth","value":2},{"id":"color","value":{"fixedColor":"orange","mode":"fixed"}}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":0},"id":1,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\",
namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type)
namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\", namespace=\"$namespace\",
workload_type=~\"$type\"}\n) by (workload, workload_type)\n","legendFormat":"{{workload}}
- - {{workload_type}}"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"scalar(kube_resourcequota{cluster=\"$cluster\",
- namespace=\"$namespace\", type=\"hard\",resource=~\"requests.cpu|cpu\"})","legendFormat":"quota
- - requests"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"scalar(kube_resourcequota{cluster=\"$cluster\",
- namespace=\"$namespace\", type=\"hard\",resource=~\"limits.cpu\"})","legendFormat":"quota
+ - {{workload_type}}"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"scalar(max(kube_resourcequota{cluster=\"$cluster\",
+ namespace=\"$namespace\", type=\"hard\",resource=~\"requests.cpu|cpu\"}))","legendFormat":"quota
+ - requests"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"scalar(max(kube_resourcequota{cluster=\"$cluster\",
+ namespace=\"$namespace\", type=\"hard\",resource=~\"limits.cpu\"}))","legendFormat":"quota
- limits"}],"title":"CPU Usage","type":"timeseries"},{"datasource":{"type":"datasource","uid":"--
Mixed --"},"fieldConfig":{"overrides":[{"matcher":{"id":"byRegexp","options":"/%/"},"properties":[{"id":"unit","value":"percentunit"}]},{"matcher":{"id":"byName","options":"Workload"},"properties":[{"id":"links","value":[{"title":"Drill
down to workloads","url":"/d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-type=${__data.fields.Type}&var-workload=${__data.fields.Workload}"}]}]},{"matcher":{"id":"byName","options":"Running
Pods"},"properties":[{"id":"unit","value":"none"}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":7},"id":2,"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"count(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",
namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload, workload_type)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n node_namespace_pod_container:container_cpu_usage_seconds_total:sum_irate{cluster=\"$cluster\",
namespace=\"$namespace\"}\n* on(namespace,pod)\n group_left(workload, workload_type)
@@ -60,16 +60,16 @@
1":"Type"}}}],"type":"table"},{"datasource":{"type":"datasource","uid":"-- Mixed
--"},"fieldConfig":{"defaults":{"custom":{"fillOpacity":10,"showPoints":"never","spanNulls":true},"unit":"bytes"},"overrides":[{"matcher":{"id":"byFrameRefID","options":"B"},"properties":[{"id":"custom.lineStyle","value":{"fill":"dash"}},{"id":"custom.lineWidth","value":2},{"id":"color","value":{"fixedColor":"red","mode":"fixed"}}]},{"matcher":{"id":"byFrameRefID","options":"C"},"properties":[{"id":"custom.lineStyle","value":{"fill":"dash"}},{"id":"custom.lineWidth","value":2},{"id":"color","value":{"fixedColor":"orange","mode":"fixed"}}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":14},"id":3,"interval":"1m","options":{"legend":{"asTable":true,"calcs":["lastNotNull"],"displayMode":"table","placement":"right","showLegend":true},"tooltip":{"mode":"single"}},"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n container_memory_working_set_bytes{job=\"kubelet\",
metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",
container!=\"\", image!=\"\"}\n * on(namespace,pod)\n group_left(workload,
workload_type) namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",
namespace=\"$namespace\", workload_type=~\"$type\"}\n) by (workload, workload_type)\n","legendFormat":"{{workload}}
- - {{workload_type}}"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"scalar(kube_resourcequota{cluster=\"$cluster\",
- namespace=\"$namespace\", type=\"hard\",resource=~\"requests.memory|memory\"})","legendFormat":"quota
- - requests"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"scalar(kube_resourcequota{cluster=\"$cluster\",
- namespace=\"$namespace\", type=\"hard\",resource=~\"limits.memory\"})","legendFormat":"quota
+ - {{workload_type}}"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"scalar(max(kube_resourcequota{cluster=\"$cluster\",
+ namespace=\"$namespace\", type=\"hard\",resource=~\"requests.memory|memory\"}))","legendFormat":"quota
+ - requests"},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"scalar(max(kube_resourcequota{cluster=\"$cluster\",
+ namespace=\"$namespace\", type=\"hard\",resource=~\"limits.memory\"}))","legendFormat":"quota
- limits"}],"title":"Memory Usage","type":"timeseries"},{"datasource":{"type":"datasource","uid":"--
Mixed --"},"fieldConfig":{"defaults":{"unit":"bytes"},"overrides":[{"matcher":{"id":"byRegexp","options":"/%/"},"properties":[{"id":"unit","value":"percentunit"}]},{"matcher":{"id":"byName","options":"Workload"},"properties":[{"id":"links","value":[{"title":"Drill
down to workloads","url":"/d/a164a7f0339f99e89cea5cb47e9be617/k8s-resources-workload?${datasource:queryparam}&var-cluster=$cluster&var-namespace=$namespace&var-type=${__data.fields.Type}&var-workload=${__data.fields.Workload}"}]}]},{"matcher":{"id":"byName","options":"Running
Pods"},"properties":[{"id":"unit","value":"none"}]}]},"gridPos":{"h":7,"w":24,"x":0,"y":21},"id":4,"pluginVersion":"v11.1.0","targets":[{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"count(namespace_workload_pod:kube_pod_owner:relabel{cluster=\"$cluster\",
namespace=\"$namespace\", workload_type=~\"$type\"}) by (workload, workload_type)","format":"table","instant":true},{"datasource":{"type":"prometheus","uid":"${datasource}"},"expr":"sum(\n container_memory_working_set_bytes{job=\"kubelet\",
metrics_path=\"/metrics/cadvisor\", cluster=\"$cluster\", namespace=\"$namespace\",
--- HelmRelease: monitoring/kube-prometheus-stack Deployment: monitoring/grafana
+++ HelmRelease: monitoring/kube-prometheus-stack Deployment: monitoring/grafana
@@ -19,13 +19,13 @@
template:
metadata:
labels:
app.kubernetes.io/name: grafana
app.kubernetes.io/instance: kube-prometheus-stack
annotations:
- checksum/dashboards-json-config: b269b6458d3745112dff220bc31a9f072bf8ae92cba67c94d1c8f8cc1c5853fd
+ checksum/dashboards-json-config: 1760b74a8eaf96927e54af013d249da6e37cd462d943c74ecd016fde35342c77
checksum/sc-dashboard-provider-config: 81f722b8194d01e4a84ce12a0cbbe9b705b2d9145719c803ac264b4786c64a09
checksum/secret: 032056e9c62bbe9d1daa41ee49cd3d9524c076f51ca4c65adadf4ef08ef28712
kubectl.kubernetes.io/default-container: grafana
spec:
serviceAccountName: grafana
automountServiceAccountToken: true
--- HelmRelease: monitoring/kube-prometheus-stack Deployment: monitoring/kube-state-metrics
+++ HelmRelease: monitoring/kube-prometheus-stack Deployment: monitoring/kube-state-metrics
@@ -49,13 +49,13 @@
- --custom-resource-state-config-file=/etc/customresourcestate/config.yaml
volumeMounts:
- name: customresourcestate-config
mountPath: /etc/customresourcestate
readOnly: true
imagePullPolicy: IfNotPresent
- image: registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.13.0
+ image: registry.k8s.io/kube-state-metrics/kube-state-metrics:v2.14.0
ports:
- containerPort: 8080
name: http
livenessProbe:
failureThreshold: 3
httpGet:
@@ -69,13 +69,13 @@
timeoutSeconds: 5
readinessProbe:
failureThreshold: 3
httpGet:
httpHeaders: null
path: /readyz
- port: 8080
+ port: 8081
scheme: HTTP
initialDelaySeconds: 5
periodSeconds: 10
successThreshold: 1
timeoutSeconds: 5
resources: {}
--- HelmRelease: monitoring/kube-prometheus-stack Prometheus: monitoring/prometheus-prometheus
+++ HelmRelease: monitoring/kube-prometheus-stack Prometheus: monitoring/prometheus-prometheus
@@ -17,14 +17,14 @@
alertmanagers:
- namespace: monitoring
name: prometheus-alertmanager
port: http-web
pathPrefix: /
apiVersion: v2
- image: quay.io/prometheus/prometheus:v2.55.0
- version: v2.55.0
+ image: quay.io/prometheus/prometheus:v2.55.1
+ version: v2.55.1
externalUrl: http://prometheus...PLACEHOLDER_SECRET_DOMAIN../
paused: false
replicas: 1
shards: 1
logLevel: info
logFormat: logfmt
--- HelmRelease: monitoring/kube-prometheus-stack PrometheusRule: monitoring/prometheus-kube-apiserver-availability.rules
+++ HelmRelease: monitoring/kube-prometheus-stack PrometheusRule: monitoring/prometheus-kube-apiserver-availability.rules
@@ -24,22 +24,22 @@
verb: read
record: code:apiserver_request_total:increase30d
- expr: sum by (cluster, code) (code_verb:apiserver_request_total:increase30d{verb=~"POST|PUT|PATCH|DELETE"})
labels:
verb: write
record: code:apiserver_request_total:increase30d
- - expr: sum by (cluster, verb, scope) (increase(apiserver_request_sli_duration_seconds_count{job="apiserver"}[1h]))
- record: cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase1h
- - expr: sum by (cluster, verb, scope) (avg_over_time(cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase1h[30d])
- * 24 * 30)
- record: cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d
- expr: sum by (cluster, verb, scope, le) (increase(apiserver_request_sli_duration_seconds_bucket[1h]))
record: cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase1h
- expr: sum by (cluster, verb, scope, le) (avg_over_time(cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase1h[30d])
* 24 * 30)
record: cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d
+ - expr: sum by (cluster, verb, scope) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase1h{le="+Inf"})
+ record: cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase1h
+ - expr: sum by (cluster, verb, scope) (cluster_verb_scope_le:apiserver_request_sli_duration_seconds_bucket:increase30d{le="+Inf"}
+ * 24 * 30)
+ record: cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d
- expr: |-
1 - (
(
# write too slow
sum by (cluster) (cluster_verb_scope:apiserver_request_sli_duration_seconds_count:increase30d{verb=~"POST|PUT|PATCH|DELETE"})
-
--- HelmRelease: monitoring/kube-prometheus-stack PrometheusRule: monitoring/prometheus-kubernetes-apps
+++ HelmRelease: monitoring/kube-prometheus-stack PrometheusRule: monitoring/prometheus-kubernetes-apps
@@ -180,19 +180,19 @@
)
for: 15m
labels:
severity: warning
- alert: KubeContainerWaiting
annotations:
- description: pod/{{ $labels.pod }} in namespace {{ $labels.namespace }} on
+ description: 'pod/{{ $labels.pod }} in namespace {{ $labels.namespace }} on
container {{ $labels.container}} has been in waiting state for longer than
- 1 hour.
+ 1 hour. (reason: "{{ $labels.reason }}").'
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubecontainerwaiting
summary: Pod container waiting longer than 1 hour
- expr: sum by (namespace, pod, container, cluster) (kube_pod_container_status_waiting_reason{job="kube-state-metrics",
- namespace=~".*"}) > 0
+ expr: kube_pod_container_status_waiting_reason{reason!="CrashLoopBackOff", job="kube-state-metrics",
+ namespace=~".*"} > 0
for: 1h
labels:
severity: warning
- alert: KubeDaemonSetNotScheduled
annotations:
description: '{{ $value }} Pods of DaemonSet {{ $labels.namespace }}/{{ $labels.daemonset
--- HelmRelease: monitoring/kube-prometheus-stack PrometheusRule: monitoring/prometheus-kubernetes-resources
+++ HelmRelease: monitoring/kube-prometheus-stack PrometheusRule: monitoring/prometheus-kubernetes-resources
@@ -117,14 +117,14 @@
description: '{{ $value | humanizePercentage }} throttling of CPU in namespace
{{ $labels.namespace }} for container {{ $labels.container }} in pod {{
$labels.pod }}.'
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/cputhrottlinghigh
summary: Processes experience elevated CPU throttling.
expr: |-
- sum(increase(container_cpu_cfs_throttled_periods_total{container!="", }[5m])) by (cluster, container, pod, namespace)
+ sum(increase(container_cpu_cfs_throttled_periods_total{container!="", job="kubelet", metrics_path="/metrics/cadvisor", }[5m])) without (id, metrics_path, name, image, endpoint, job, node)
/
- sum(increase(container_cpu_cfs_periods_total{}[5m])) by (cluster, container, pod, namespace)
+ sum(increase(container_cpu_cfs_periods_total{job="kubelet", metrics_path="/metrics/cadvisor", }[5m])) without (id, metrics_path, name, image, endpoint, job, node)
> ( 25 / 100 )
for: 15m
labels:
severity: info
--- HelmRelease: monitoring/kube-prometheus-stack PrometheusRule: monitoring/prometheus-kubernetes-system-apiserver
+++ HelmRelease: monitoring/kube-prometheus-stack PrometheusRule: monitoring/prometheus-kubernetes-system-apiserver
@@ -18,29 +18,29 @@
- alert: KubeClientCertificateExpiration
annotations:
description: A client certificate used to authenticate to kubernetes apiserver
is expiring in less than 7.0 days.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclientcertificateexpiration
summary: Client certificate is about to expire.
- expr: apiserver_client_certificate_expiration_seconds_count{job="apiserver"}
- > 0 and on (cluster, job) histogram_quantile(0.01, sum by (cluster, job, le)
- (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m])))
- < 604800
+ expr: |-
+ histogram_quantile(0.01, sum without (namespace, service, endpoint) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 604800
+ and
+ on (job, cluster, instance) apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0
for: 5m
labels:
severity: warning
- alert: KubeClientCertificateExpiration
annotations:
description: A client certificate used to authenticate to kubernetes apiserver
is expiring in less than 24.0 hours.
runbook_url: https://runbooks.prometheus-operator.dev/runbooks/kubernetes/kubeclientcertificateexpiration
summary: Client certificate is about to expire.
- expr: apiserver_client_certificate_expiration_seconds_count{job="apiserver"}
- > 0 and on (cluster, job) histogram_quantile(0.01, sum by (cluster, job, le)
- (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m])))
- < 86400
+ expr: |-
+ histogram_quantile(0.01, sum without (namespace, service, endpoint) (rate(apiserver_client_certificate_expiration_seconds_bucket{job="apiserver"}[5m]))) < 86400
+ and
+ on (job, cluster, instance) apiserver_client_certificate_expiration_seconds_count{job="apiserver"} > 0
for: 5m
labels:
severity: critical
- alert: KubeAggregatedAPIErrors
annotations:
description: Kubernetes aggregated API {{ $labels.name }}/{{ $labels.namespace |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Labels
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
This PR contains the following updates:
66.0.0
->66.2.1
Release Notes
prometheus-community/helm-charts (kube-prometheus-stack)
v66.2.1
Compare Source
kube-prometheus-stack collects Kubernetes manifests, Grafana dashboards, and Prometheus rules combined with documentation and scripts to provide easy to operate end-to-end Kubernetes cluster monitoring with Prometheus using the Prometheus Operator.
What's Changed
Full Changelog: prometheus-community/helm-charts@kube-prometheus-stack-66.2.0...kube-prometheus-stack-66.2.1
v66.2.0
Compare Source
kube-prometheus-stack collects Kubernetes manifests, Grafana dashboards, and Prometheus rules combined with documentation and scripts to provide easy to operate end-to-end Kubernetes cluster monitoring with Prometheus using the Prometheus Operator.
What's Changed
Full Changelog: prometheus-community/helm-charts@kube-prometheus-stack-66.1.1...kube-prometheus-stack-66.2.0
v66.1.1
Compare Source
kube-prometheus-stack collects Kubernetes manifests, Grafana dashboards, and Prometheus rules combined with documentation and scripts to provide easy to operate end-to-end Kubernetes cluster monitoring with Prometheus using the Prometheus Operator.
What's Changed
New Contributors
Full Changelog: prometheus-community/helm-charts@prometheus-25.30.0...kube-prometheus-stack-66.1.1
v66.1.0
Compare Source
kube-prometheus-stack collects Kubernetes manifests, Grafana dashboards, and Prometheus rules combined with documentation and scripts to provide easy to operate end-to-end Kubernetes cluster monitoring with Prometheus using the Prometheus Operator.
What's Changed
Full Changelog: prometheus-community/helm-charts@kube-state-metrics-5.27.0...kube-prometheus-stack-66.1.0
Configuration
📅 Schedule: Branch creation - "on saturday" (UTC), Automerge - At any time (no schedule defined).
🚦 Automerge: Disabled by config. Please merge this manually once you are satisfied.
♻ Rebasing: Whenever PR becomes conflicted, or you tick the rebase/retry checkbox.
🔕 Ignore: Close this PR and you won't be reminded about this update again.
This PR was generated by Mend Renovate. View the repository job log.