From 0c8c88e766fc225cf7dff3bf0288c4443e459f49 Mon Sep 17 00:00:00 2001 From: Orfeas Kourkakis Date: Tue, 9 Apr 2024 12:33:31 +0300 Subject: [PATCH] backport(fix): Fix dashboard panels not working from #157 (#160) * Add `ckf` tag to argo-controller's grafana dashboard. * Fix dashboard panels not working by: * Replacing unavailable metrics with available ones * Change 2 minutes instead of 1 in places where rate() is used since this requires more than one scrape data points. * Remove rate() from panel that shows total number of log messages. Ref canonical/bundle-kubeflow#856 Ref canonical/bundle-kubeflow#834 --- .../src/grafana_dashboards/basic.json.tmpl | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/charms/argo-controller/src/grafana_dashboards/basic.json.tmpl b/charms/argo-controller/src/grafana_dashboards/basic.json.tmpl index bcf5671..9341693 100644 --- a/charms/argo-controller/src/grafana_dashboards/basic.json.tmpl +++ b/charms/argo-controller/src/grafana_dashboards/basic.json.tmpl @@ -980,7 +980,7 @@ { "datasource": "${prometheusds}", "exemplar": true, - "expr": " rate(argo_workflows_queue_latency_sum{queue_name=\"cron_wf_queue\",kubernetes_namespace=~\"^$ns$\"}[1m])\n/\n rate(argo_workflows_queue_latency_count{queue_name=\"cron_wf_queue\",kubernetes_namespace=~\"^$ns$\",origin_prometheus=~\"^$dc$\"}[1m])", + "expr": " rate(argo_workflows_queue_latency_sum{queue_name=\"cron_wf_queue\",kubernetes_namespace=~\"^$ns$\"}[2m])\n/\n rate(argo_workflows_queue_latency_count{queue_name=\"cron_wf_queue\",kubernetes_namespace=~\"^$ns$\",origin_prometheus=~\"^$dc$\"}[2m])", "interval": "1m", "legendFormat": "{{origin_prometheus}} : {{app}} : {{kubernetes_namespace}} : {{queue_name}}", "queryType": "randomWalk", @@ -989,7 +989,7 @@ { "datasource": "${prometheusds}", "exemplar": true, - "expr": " rate(argo_workflows_queue_latency_sum{queue_name=\"pod_queue\",kubernetes_namespace=~\"^$ns$\",origin_prometheus=~\"^$dc$\"}[1m])\n/\n rate(argo_workflows_queue_latency_count{queue_name=\"pod_queue\",kubernetes_namespace=~\"^$ns$\",origin_prometheus=~\"^$dc$\"}[1m])", + "expr": " rate(argo_workflows_queue_latency_sum{queue_name=\"pod_cleanup_queue\",kubernetes_namespace=~\"^$ns$\",origin_prometheus=~\"^$dc$\"}[2m])\n/\n rate(argo_workflows_queue_latency_count{queue_name=\"pod_cleanup_queue\",kubernetes_namespace=~\"^$ns$\",origin_prometheus=~\"^$dc$\"}[2m])", "interval": "1m", "legendFormat": "{{origin_prometheus}} : {{app}} : {{kubernetes_namespace}} : {{queue_name}}", "refId": "B" @@ -997,7 +997,7 @@ { "datasource": "${prometheusds}", "exemplar": true, - "expr": " rate(argo_workflows_queue_latency_sum{queue_name=\"wf_cron_queue\",kubernetes_namespace=~\"^$ns$\",origin_prometheus=~\"^$dc$\"}[1m])\n/\n rate(argo_workflows_queue_latency_count{queue_name=\"wf_cron_queue\",kubernetes_namespace=~\"^$ns$\",origin_prometheus=~\"^$dc$\"}[1m])", + "expr": " rate(argo_workflows_queue_latency_sum{queue_name=\"cron_wf_queue\",kubernetes_namespace=~\"^$ns$\",origin_prometheus=~\"^$dc$\"}[2m])\n/\n rate(argo_workflows_queue_latency_count{queue_name=\"cron_wf_queue\",kubernetes_namespace=~\"^$ns$\",origin_prometheus=~\"^$dc$\"}[2m])", "hide": false, "interval": "1m", "legendFormat": "{{origin_prometheus}} : {{app}} : {{kubernetes_namespace}} : {{queue_name}}", @@ -1006,7 +1006,7 @@ { "datasource": "${prometheusds}", "exemplar": true, - "expr": " rate(argo_workflows_queue_latency_sum{queue_name=\"workflow_queue\",kubernetes_namespace=~\"^$ns$\",origin_prometheus=~\"^$dc$\"}[1m])\n/\n rate(argo_workflows_queue_latency_count{queue_name=\"workflow_queue\",kubernetes_namespace=~\"^$ns$\",origin_prometheus=~\"^$dc$\"}[1m])", + "expr": " rate(argo_workflows_queue_latency_sum{queue_name=\"workflow_queue\",kubernetes_namespace=~\"^$ns$\",origin_prometheus=~\"^$dc$\"}[2m])\n/\n rate(argo_workflows_queue_latency_count{queue_name=\"workflow_queue\",kubernetes_namespace=~\"^$ns$\",origin_prometheus=~\"^$dc$\"}[2m])", "hide": false, "interval": "1m", "legendFormat": "{{origin_prometheus}} : {{app}} : {{kubernetes_namespace}} : {{queue_name}}", @@ -1112,7 +1112,7 @@ { "datasource": "${prometheusds}", "exemplar": true, - "expr": "rate(log_messages{kubernetes_namespace=~\"$ns\",origin_prometheus=~\"^$dc$\"}[1m])", + "expr": "log_messages{kubernetes_namespace=~\"$ns\",origin_prometheus=~\"^$dc$\"}", "interval": "1m", "legendFormat": "{{origin_prometheus}} : {{app}} : {{kubernetes_namespace}} : {{level}}", "queryType": "randomWalk", @@ -1159,6 +1159,7 @@ "schemaVersion": 33, "style": "dark", "tags": [ + "ckf", "argo" ], "templating": {