From 73acb5809585cd4ca2bd646cea5475c60ea63569 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Th=C3=A9o=20Brigitte?= Date: Thu, 23 May 2024 10:40:10 +0200 Subject: [PATCH] Add datasource to Prometheus dashboard (#544) * add datasource variable to Prometheus dashboard * update CHANGELOG * Update helm/dashboards/charts/public_dashboards/dashboards/shared/public/prometheus.json Co-authored-by: Quentin Bisson * use default datasource * add allValues=.* for cluster variable --------- Co-authored-by: Quentin Bisson --- CHANGELOG.md | 4 + .../dashboards/shared/public/prometheus.json | 98 +++++++++++-------- 2 files changed, 63 insertions(+), 39 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 875590d5..35fd0c71 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- Add datasource variable to Prometheus dashboard + ## [3.14.2] - 2024-05-20 ### Changed diff --git a/helm/dashboards/charts/public_dashboards/dashboards/shared/public/prometheus.json b/helm/dashboards/charts/public_dashboards/dashboards/shared/public/prometheus.json index 2e277805..de6aa548 100644 --- a/helm/dashboards/charts/public_dashboards/dashboards/shared/public/prometheus.json +++ b/helm/dashboards/charts/public_dashboards/dashboards/shared/public/prometheus.json @@ -22,7 +22,7 @@ { "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "${datasource}" }, "enable": true, "expr": "increase(kube_pod_container_status_restarts_total{container=\"prometheus\", pod=~\"prometheus-($cluster)-[0-9]+\", cluster_type=~\"management_cluster\"}[$__interval]) > 0", @@ -143,7 +143,7 @@ { "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "${datasource}" }, "description": "5 minute rate of samples appended", "fieldConfig": { @@ -230,7 +230,7 @@ { "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "${datasource}" }, "editorMode": "code", "expr": "rate(prometheus_tsdb_head_samples_appended_total{type!=\"histogram\", cluster_id=~\"($cluster)\"}[5m])", @@ -248,7 +248,7 @@ { "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "${datasource}" }, "description": "Total number of time series in prometheus", "fieldConfig": { @@ -317,7 +317,7 @@ { "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "${datasource}" }, "editorMode": "code", "exemplar": false, @@ -337,7 +337,7 @@ { "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "${datasource}" }, "description": "Number of restarts during the current time range", "fieldConfig": { @@ -391,7 +391,7 @@ { "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "${datasource}" }, "editorMode": "code", "exemplar": false, @@ -408,7 +408,7 @@ { "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "${datasource}" }, "description": "Number of restarts during the current time range", "fieldConfig": { @@ -462,7 +462,7 @@ { "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "${datasource}" }, "editorMode": "code", "expr": "time() - max(kube_pod_start_time{pod=~\"prometheus-($cluster)-0\"}) by (pod)", @@ -478,7 +478,7 @@ { "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "${datasource}" }, "description": "Time take for rule evaluation", "fieldConfig": { @@ -565,7 +565,7 @@ { "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "${datasource}" }, "editorMode": "code", "expr": "prometheus_rule_evaluation_duration_seconds{app=\"prometheus\",quantile=\"0.99\", cluster_id=~\"($cluster)\"}", @@ -579,7 +579,7 @@ { "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "${datasource}" }, "editorMode": "code", "expr": "prometheus_rule_evaluation_duration_seconds{app=\"prometheus\",quantile=\"0.9\", cluster_id=~\"($cluster)\"}", @@ -593,7 +593,7 @@ { "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "${datasource}" }, "editorMode": "code", "expr": "prometheus_rule_evaluation_duration_seconds{app=\"prometheus\",quantile=\"0.5\", cluster_id=~\"($cluster)\"}", @@ -611,7 +611,7 @@ { "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "${datasource}" }, "description": "TIme taken to send notifications", "fieldConfig": { @@ -698,7 +698,7 @@ { "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "${datasource}" }, "editorMode": "code", "expr": "prometheus_notifications_latency_seconds{app=\"prometheus\",quantile=\"0.99\", cluster_id=~\"($cluster)\"}", @@ -712,7 +712,7 @@ { "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "${datasource}" }, "editorMode": "code", "expr": "prometheus_notifications_latency_seconds{app=\"prometheus\",quantile=\"0.9\", cluster_id=~\"($cluster)\"}", @@ -726,7 +726,7 @@ { "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "${datasource}" }, "editorMode": "code", "expr": "prometheus_notifications_latency_seconds{app=\"prometheus\",quantile=\"0.5\", cluster_id=~\"($cluster)\"}", @@ -757,7 +757,7 @@ { "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "${datasource}" }, "description": "Memory usage by Prometheus instances.\n\nDashed lines show pod limit and node capacity.", "fieldConfig": { @@ -898,7 +898,7 @@ { "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "${datasource}" }, "editorMode": "code", "expr": "max(\n container_memory_working_set_bytes{container=\"prometheus\", pod=~\"prometheus-($cluster).*\", cluster_type=~\"management_cluster\"}\n ) by (container, pod, cluster_type)", @@ -912,7 +912,7 @@ { "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "${datasource}" }, "editorMode": "code", "expr": "max(\n container_memory_usage_bytes{container=\"prometheus\", pod=~\"prometheus-($cluster).*\", cluster_type=~\"management_cluster\"}\n) by (container, pod, cluster_type)\n", @@ -928,7 +928,7 @@ { "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "${datasource}" }, "editorMode": "code", "expr": "max(\n kube_pod_container_resource_limits{container=\"prometheus\",resource=\"memory\", unit=\"byte\", pod=~\"prometheus-($cluster).*\", cluster_type=~\"management_cluster\"}\n) by (container, pod, cluster_type)", @@ -940,7 +940,7 @@ { "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "${datasource}" }, "editorMode": "code", "expr": "max (node_memory_MemTotal_bytes{node=~\".+\", cluster_type=\"management_cluster\"} * on (node) group_right(pod) (kube_pod_info{pod=~\"prometheus-.*-0\",cluster_type=\"management_cluster\", namespace=~\"($cluster)-prometheus\"})) by (namespace)", @@ -956,7 +956,7 @@ { "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "${datasource}" }, "description": "CPU usage by Prometheus instances.\n\nDashed lines show pod limit and node capacity.", "fieldConfig": { @@ -1097,7 +1097,7 @@ { "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "${datasource}" }, "editorMode": "code", "expr": "sum(rate(container_cpu_usage_seconds_total{container=\"prometheus\", pod=~\"prometheus-($cluster)-.*\", cluster_type=~\"management_cluster\"}[3m])) by (pod)", @@ -1112,7 +1112,7 @@ { "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "${datasource}" }, "editorMode": "code", "expr": "max(\n kube_pod_container_resource_limits{container=\"prometheus\",resource=\"cpu\", unit=\"core\", pod=~\"prometheus-($cluster)-.*\", cluster_type=~\"management_cluster\"}\n) by (container, pod, cluster_type)", @@ -1124,7 +1124,7 @@ { "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "${datasource}" }, "editorMode": "code", "exemplar": false, @@ -1142,7 +1142,7 @@ { "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "${datasource}" }, "description": "CPU throttling percentage\n\nHow much CPU had to be throttled down by CFS scheduler. No worries as long as youre <100%.", "fieldConfig": { @@ -1283,7 +1283,7 @@ { "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "${datasource}" }, "editorMode": "code", "expr": "sum(irate(container_cpu_cfs_throttled_periods_total{pod=~\"prometheus-($cluster)-0\", container=\"prometheus\"}[$__rate_interval])) by (pod, container)\n/\nsum(irate(container_cpu_cfs_periods_total{pod=~\"prometheus-($cluster)-0\", container=\"prometheus\"}[$__rate_interval])) by (pod,container)", @@ -1302,7 +1302,7 @@ { "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "${datasource}" }, "description": "", "fieldConfig": { @@ -1386,7 +1386,7 @@ { "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "${datasource}" }, "editorMode": "code", "expr": "max (\n (\n kubelet_volume_stats_capacity_bytes{cluster_type=~\"management_cluster\", app=\"kubelet\", persistentvolumeclaim=~\"prometheus-($cluster)-db-prometheus-($cluster)-.*\"}\n - \n kubelet_volume_stats_available_bytes{cluster_type=~\"management_cluster\", app=\"kubelet\", persistentvolumeclaim=~\"prometheus-($cluster)-db-prometheus-($cluster)-.*\"}\n )\n / kubelet_volume_stats_capacity_bytes{cluster_type=~\"management_cluster\", app=\"kubelet\", persistentvolumeclaim=~\"prometheus-($cluster)-db-prometheus-($cluster)-.*\"}\n * 100\n) by (persistentvolumeclaim)", @@ -1414,7 +1414,7 @@ { "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "${datasource}" }, "description": "Number of samples scraped by app / job.\n\nShows which jobs scrape the most data.\n\nMetric: `scrape_samples_scraped`", "fieldConfig": { @@ -1471,7 +1471,7 @@ { "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "${datasource}" }, "editorMode": "code", "exemplar": false, @@ -1488,7 +1488,7 @@ { "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "${datasource}" }, "description": "Top number metrics and associated job.\n\nShows which metrics have the highest instant cardinality.", "fieldConfig": { @@ -1545,7 +1545,7 @@ { "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "${datasource}" }, "editorMode": "code", "exemplar": false, @@ -1562,7 +1562,7 @@ { "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "${datasource}" }, "description": "Top number of metrics per job.\n\nShows which metrics add new labels, increasing cardinality over time.\n\nMetric: `scrape_series_added`", "fieldConfig": { @@ -1619,7 +1619,7 @@ { "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "${datasource}" }, "editorMode": "code", "exemplar": false, @@ -1649,7 +1649,7 @@ { "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "${datasource}" }, "description": "Duration of rules evaluation per group / cluster.\n\nShows which ones use most CPU.", "fieldConfig": { @@ -1706,7 +1706,7 @@ { "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "${datasource}" }, "editorMode": "code", "exemplar": false, @@ -1731,6 +1731,26 @@ "templating": { "list": [ { + "current": { + "selected": false, + "text": "default", + "value": "default" + }, + "hide": 0, + "includeAll": false, + "multi": false, + "name": "datasource", + "options": [], + "label": "Data source", + "query": "prometheus", + "queryValue": "", + "refresh": 1, + "regex": "", + "skipUrlSync": false, + "type": "datasource" + }, + { + "allValue": ".*", "current": { "selected": true, "text": [ @@ -1742,7 +1762,7 @@ }, "datasource": { "type": "prometheus", - "uid": "PBFA97CFB590B2093" + "uid": "${datasource}" }, "definition": "label_values(container_cpu_usage_seconds_total{container=\"prometheus\",cluster_type=~\"management_cluster\", pod=~\".*prometheus.*\"}, pod)", "hide": 0,